diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 50.0, - "global_step": 17273600, + "epoch": 60.0, + "global_step": 20728320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -207838,11 +207838,41575 @@ "eval_samples_per_second": 1314.633, "eval_steps_per_second": 54.777, "step": 17273600 + }, + { + "epoch": 50.0, + "learning_rate": 2.5008411680251946e-05, + "loss": 1.9799, + "step": 17274000 + }, + { + "epoch": 50.0, + "learning_rate": 2.500768803260467e-05, + "loss": 1.9725, + "step": 17274500 + }, + { + "epoch": 50.0, + "learning_rate": 2.5006964384957394e-05, + "loss": 1.9746, + "step": 17275000 + }, + { + "epoch": 50.01, + "learning_rate": 2.5006240737310116e-05, + "loss": 2.0099, + "step": 17275500 + }, + { + "epoch": 50.01, + "learning_rate": 2.5005517089662838e-05, + "loss": 1.9809, + "step": 17276000 + }, + { + "epoch": 50.01, + "learning_rate": 2.500479344201556e-05, + "loss": 1.9807, + "step": 17276500 + }, + { + "epoch": 50.01, + "learning_rate": 2.5004069794368286e-05, + "loss": 1.9837, + "step": 17277000 + }, + { + "epoch": 50.01, + "learning_rate": 2.5003346146721008e-05, + "loss": 1.9915, + "step": 17277500 + }, + { + "epoch": 50.01, + "learning_rate": 2.500262249907373e-05, + "loss": 2.0015, + "step": 17278000 + }, + { + "epoch": 50.01, + "learning_rate": 2.5001898851426453e-05, + "loss": 1.9679, + "step": 17278500 + }, + { + "epoch": 50.02, + "learning_rate": 2.500117665107447e-05, + "loss": 1.9543, + "step": 17279000 + }, + { + "epoch": 50.02, + "learning_rate": 2.5000453003427197e-05, + "loss": 1.973, + "step": 17279500 + }, + { + "epoch": 50.02, + "learning_rate": 2.499972935577992e-05, + "loss": 2.0089, + "step": 17280000 + }, + { + "epoch": 50.02, + "learning_rate": 2.4999005708132642e-05, + "loss": 1.9815, + "step": 17280500 + }, + { + "epoch": 50.02, + "learning_rate": 2.499828350778066e-05, + "loss": 1.9694, + "step": 17281000 + }, + { + "epoch": 50.02, + "learning_rate": 2.4997559860133386e-05, + "loss": 2.0003, + "step": 17281500 + }, + { + "epoch": 50.02, + "learning_rate": 2.499683621248611e-05, + "loss": 1.9927, + "step": 17282000 + }, + { + "epoch": 50.03, + "learning_rate": 2.499611256483883e-05, + "loss": 1.9785, + "step": 17282500 + }, + { + "epoch": 50.03, + "learning_rate": 2.4995388917191553e-05, + "loss": 2.0075, + "step": 17283000 + }, + { + "epoch": 50.03, + "learning_rate": 2.4994665269544275e-05, + "loss": 1.9742, + "step": 17283500 + }, + { + "epoch": 50.03, + "learning_rate": 2.4993943069192294e-05, + "loss": 1.9941, + "step": 17284000 + }, + { + "epoch": 50.03, + "learning_rate": 2.4993219421545016e-05, + "loss": 1.9981, + "step": 17284500 + }, + { + "epoch": 50.03, + "learning_rate": 2.4992495773897742e-05, + "loss": 1.9847, + "step": 17285000 + }, + { + "epoch": 50.03, + "learning_rate": 2.4991772126250464e-05, + "loss": 1.9996, + "step": 17285500 + }, + { + "epoch": 50.04, + "learning_rate": 2.4991048478603187e-05, + "loss": 1.9901, + "step": 17286000 + }, + { + "epoch": 50.04, + "learning_rate": 2.4990324830955912e-05, + "loss": 1.9742, + "step": 17286500 + }, + { + "epoch": 50.04, + "learning_rate": 2.4989601183308634e-05, + "loss": 1.9722, + "step": 17287000 + }, + { + "epoch": 50.04, + "learning_rate": 2.4988877535661357e-05, + "loss": 1.9862, + "step": 17287500 + }, + { + "epoch": 50.04, + "learning_rate": 2.498815388801408e-05, + "loss": 1.9744, + "step": 17288000 + }, + { + "epoch": 50.04, + "learning_rate": 2.498743313495739e-05, + "loss": 1.9983, + "step": 17288500 + }, + { + "epoch": 50.04, + "learning_rate": 2.4986709487310117e-05, + "loss": 1.9922, + "step": 17289000 + }, + { + "epoch": 50.05, + "learning_rate": 2.4985987286958136e-05, + "loss": 2.0188, + "step": 17289500 + }, + { + "epoch": 50.05, + "learning_rate": 2.4985263639310858e-05, + "loss": 2.0132, + "step": 17290000 + }, + { + "epoch": 50.05, + "learning_rate": 2.498453999166358e-05, + "loss": 1.965, + "step": 17290500 + }, + { + "epoch": 50.05, + "learning_rate": 2.4983816344016302e-05, + "loss": 1.9674, + "step": 17291000 + }, + { + "epoch": 50.05, + "learning_rate": 2.4983092696369025e-05, + "loss": 1.9928, + "step": 17291500 + }, + { + "epoch": 50.05, + "learning_rate": 2.498236904872175e-05, + "loss": 1.9651, + "step": 17292000 + }, + { + "epoch": 50.05, + "learning_rate": 2.4981645401074476e-05, + "loss": 2.0029, + "step": 17292500 + }, + { + "epoch": 50.06, + "learning_rate": 2.4980921753427198e-05, + "loss": 1.9575, + "step": 17293000 + }, + { + "epoch": 50.06, + "learning_rate": 2.4980199553075214e-05, + "loss": 1.989, + "step": 17293500 + }, + { + "epoch": 50.06, + "learning_rate": 2.4979475905427936e-05, + "loss": 1.9848, + "step": 17294000 + }, + { + "epoch": 50.06, + "learning_rate": 2.4978753705075955e-05, + "loss": 1.9803, + "step": 17294500 + }, + { + "epoch": 50.06, + "learning_rate": 2.4978030057428677e-05, + "loss": 1.988, + "step": 17295000 + }, + { + "epoch": 50.06, + "learning_rate": 2.49773064097814e-05, + "loss": 1.9715, + "step": 17295500 + }, + { + "epoch": 50.06, + "learning_rate": 2.4976582762134125e-05, + "loss": 2.0055, + "step": 17296000 + }, + { + "epoch": 50.07, + "learning_rate": 2.497585911448685e-05, + "loss": 2.0013, + "step": 17296500 + }, + { + "epoch": 50.07, + "learning_rate": 2.4975135466839573e-05, + "loss": 1.9806, + "step": 17297000 + }, + { + "epoch": 50.07, + "learning_rate": 2.4974411819192295e-05, + "loss": 2.0164, + "step": 17297500 + }, + { + "epoch": 50.07, + "learning_rate": 2.4973688171545017e-05, + "loss": 1.9793, + "step": 17298000 + }, + { + "epoch": 50.07, + "learning_rate": 2.497296452389774e-05, + "loss": 1.9851, + "step": 17298500 + }, + { + "epoch": 50.07, + "learning_rate": 2.4972240876250462e-05, + "loss": 1.9913, + "step": 17299000 + }, + { + "epoch": 50.07, + "learning_rate": 2.4971517228603187e-05, + "loss": 1.9892, + "step": 17299500 + }, + { + "epoch": 50.08, + "learning_rate": 2.4970793580955913e-05, + "loss": 1.9841, + "step": 17300000 + }, + { + "epoch": 50.08, + "learning_rate": 2.4970069933308635e-05, + "loss": 1.9887, + "step": 17300500 + }, + { + "epoch": 50.08, + "learning_rate": 2.4969346285661357e-05, + "loss": 1.961, + "step": 17301000 + }, + { + "epoch": 50.08, + "learning_rate": 2.496862263801408e-05, + "loss": 1.9903, + "step": 17301500 + }, + { + "epoch": 50.08, + "learning_rate": 2.4967898990366802e-05, + "loss": 1.9844, + "step": 17302000 + }, + { + "epoch": 50.08, + "learning_rate": 2.4967175342719528e-05, + "loss": 1.9927, + "step": 17302500 + }, + { + "epoch": 50.09, + "learning_rate": 2.496645169507225e-05, + "loss": 2.0015, + "step": 17303000 + }, + { + "epoch": 50.09, + "learning_rate": 2.496572949472027e-05, + "loss": 1.9786, + "step": 17303500 + }, + { + "epoch": 50.09, + "learning_rate": 2.496500584707299e-05, + "loss": 1.9586, + "step": 17304000 + }, + { + "epoch": 50.09, + "learning_rate": 2.4964282199425713e-05, + "loss": 1.9639, + "step": 17304500 + }, + { + "epoch": 50.09, + "learning_rate": 2.496355855177844e-05, + "loss": 1.9983, + "step": 17305000 + }, + { + "epoch": 50.09, + "learning_rate": 2.4962836351426454e-05, + "loss": 1.9841, + "step": 17305500 + }, + { + "epoch": 50.09, + "learning_rate": 2.4962112703779177e-05, + "loss": 2.013, + "step": 17306000 + }, + { + "epoch": 50.1, + "learning_rate": 2.4961389056131902e-05, + "loss": 1.9718, + "step": 17306500 + }, + { + "epoch": 50.1, + "learning_rate": 2.4960666855779918e-05, + "loss": 1.9945, + "step": 17307000 + }, + { + "epoch": 50.1, + "learning_rate": 2.4959943208132643e-05, + "loss": 2.0052, + "step": 17307500 + }, + { + "epoch": 50.1, + "learning_rate": 2.4959221007780662e-05, + "loss": 1.9896, + "step": 17308000 + }, + { + "epoch": 50.1, + "learning_rate": 2.4958497360133385e-05, + "loss": 2.0044, + "step": 17308500 + }, + { + "epoch": 50.1, + "learning_rate": 2.4957773712486107e-05, + "loss": 1.9897, + "step": 17309000 + }, + { + "epoch": 50.1, + "learning_rate": 2.4957051512134126e-05, + "loss": 1.9821, + "step": 17309500 + }, + { + "epoch": 50.11, + "learning_rate": 2.4956327864486848e-05, + "loss": 1.996, + "step": 17310000 + }, + { + "epoch": 50.11, + "learning_rate": 2.495560421683957e-05, + "loss": 1.9854, + "step": 17310500 + }, + { + "epoch": 50.11, + "learning_rate": 2.4954880569192296e-05, + "loss": 2.0065, + "step": 17311000 + }, + { + "epoch": 50.11, + "learning_rate": 2.4954156921545018e-05, + "loss": 2.0063, + "step": 17311500 + }, + { + "epoch": 50.11, + "learning_rate": 2.495343327389774e-05, + "loss": 1.97, + "step": 17312000 + }, + { + "epoch": 50.11, + "learning_rate": 2.4952709626250466e-05, + "loss": 1.9786, + "step": 17312500 + }, + { + "epoch": 50.11, + "learning_rate": 2.4951985978603188e-05, + "loss": 2.0034, + "step": 17313000 + }, + { + "epoch": 50.12, + "learning_rate": 2.4951263778251204e-05, + "loss": 1.9928, + "step": 17313500 + }, + { + "epoch": 50.12, + "learning_rate": 2.495054013060393e-05, + "loss": 1.994, + "step": 17314000 + }, + { + "epoch": 50.12, + "learning_rate": 2.494981648295665e-05, + "loss": 1.9931, + "step": 17314500 + }, + { + "epoch": 50.12, + "learning_rate": 2.4949092835309377e-05, + "loss": 1.9751, + "step": 17315000 + }, + { + "epoch": 50.12, + "learning_rate": 2.49483691876621e-05, + "loss": 1.9952, + "step": 17315500 + }, + { + "epoch": 50.12, + "learning_rate": 2.4947646987310115e-05, + "loss": 2.0058, + "step": 17316000 + }, + { + "epoch": 50.12, + "learning_rate": 2.494692333966284e-05, + "loss": 1.9842, + "step": 17316500 + }, + { + "epoch": 50.13, + "learning_rate": 2.4946199692015563e-05, + "loss": 2.0002, + "step": 17317000 + }, + { + "epoch": 50.13, + "learning_rate": 2.4945476044368285e-05, + "loss": 1.9815, + "step": 17317500 + }, + { + "epoch": 50.13, + "learning_rate": 2.4944752396721007e-05, + "loss": 1.9977, + "step": 17318000 + }, + { + "epoch": 50.13, + "learning_rate": 2.4944028749073733e-05, + "loss": 1.9983, + "step": 17318500 + }, + { + "epoch": 50.13, + "learning_rate": 2.4943305101426455e-05, + "loss": 1.9859, + "step": 17319000 + }, + { + "epoch": 50.13, + "learning_rate": 2.494258145377918e-05, + "loss": 1.9796, + "step": 17319500 + }, + { + "epoch": 50.13, + "learning_rate": 2.4941857806131903e-05, + "loss": 1.9982, + "step": 17320000 + }, + { + "epoch": 50.14, + "learning_rate": 2.494113560577992e-05, + "loss": 1.9927, + "step": 17320500 + }, + { + "epoch": 50.14, + "learning_rate": 2.494041195813264e-05, + "loss": 1.9698, + "step": 17321000 + }, + { + "epoch": 50.14, + "learning_rate": 2.493968975778066e-05, + "loss": 1.979, + "step": 17321500 + }, + { + "epoch": 50.14, + "learning_rate": 2.4938966110133382e-05, + "loss": 1.9691, + "step": 17322000 + }, + { + "epoch": 50.14, + "learning_rate": 2.4938242462486108e-05, + "loss": 1.9898, + "step": 17322500 + }, + { + "epoch": 50.14, + "learning_rate": 2.493751881483883e-05, + "loss": 1.9711, + "step": 17323000 + }, + { + "epoch": 50.14, + "learning_rate": 2.493679661448685e-05, + "loss": 1.993, + "step": 17323500 + }, + { + "epoch": 50.15, + "learning_rate": 2.493607296683957e-05, + "loss": 1.9847, + "step": 17324000 + }, + { + "epoch": 50.15, + "learning_rate": 2.4935349319192293e-05, + "loss": 1.9873, + "step": 17324500 + }, + { + "epoch": 50.15, + "learning_rate": 2.493462567154502e-05, + "loss": 1.998, + "step": 17325000 + }, + { + "epoch": 50.15, + "learning_rate": 2.493390202389774e-05, + "loss": 1.9979, + "step": 17325500 + }, + { + "epoch": 50.15, + "learning_rate": 2.4933178376250467e-05, + "loss": 2.009, + "step": 17326000 + }, + { + "epoch": 50.15, + "learning_rate": 2.493245472860319e-05, + "loss": 2.0006, + "step": 17326500 + }, + { + "epoch": 50.15, + "learning_rate": 2.493173108095591e-05, + "loss": 1.9862, + "step": 17327000 + }, + { + "epoch": 50.16, + "learning_rate": 2.4931007433308633e-05, + "loss": 1.9925, + "step": 17327500 + }, + { + "epoch": 50.16, + "learning_rate": 2.4930283785661356e-05, + "loss": 2.0133, + "step": 17328000 + }, + { + "epoch": 50.16, + "learning_rate": 2.492956013801408e-05, + "loss": 1.9986, + "step": 17328500 + }, + { + "epoch": 50.16, + "learning_rate": 2.4928836490366803e-05, + "loss": 1.9809, + "step": 17329000 + }, + { + "epoch": 50.16, + "learning_rate": 2.492811284271953e-05, + "loss": 1.9832, + "step": 17329500 + }, + { + "epoch": 50.16, + "learning_rate": 2.492738919507225e-05, + "loss": 1.9843, + "step": 17330000 + }, + { + "epoch": 50.16, + "learning_rate": 2.4926665547424974e-05, + "loss": 1.9872, + "step": 17330500 + }, + { + "epoch": 50.17, + "learning_rate": 2.4925943347072993e-05, + "loss": 1.9958, + "step": 17331000 + }, + { + "epoch": 50.17, + "learning_rate": 2.4925219699425715e-05, + "loss": 1.9842, + "step": 17331500 + }, + { + "epoch": 50.17, + "learning_rate": 2.4924496051778437e-05, + "loss": 1.9749, + "step": 17332000 + }, + { + "epoch": 50.17, + "learning_rate": 2.492377240413116e-05, + "loss": 2.0018, + "step": 17332500 + }, + { + "epoch": 50.17, + "learning_rate": 2.4923048756483885e-05, + "loss": 2.0097, + "step": 17333000 + }, + { + "epoch": 50.17, + "learning_rate": 2.4922325108836607e-05, + "loss": 1.99, + "step": 17333500 + }, + { + "epoch": 50.17, + "learning_rate": 2.4921601461189333e-05, + "loss": 2.0028, + "step": 17334000 + }, + { + "epoch": 50.18, + "learning_rate": 2.4920879260837348e-05, + "loss": 1.9941, + "step": 17334500 + }, + { + "epoch": 50.18, + "learning_rate": 2.492015561319007e-05, + "loss": 1.9876, + "step": 17335000 + }, + { + "epoch": 50.18, + "learning_rate": 2.4919431965542793e-05, + "loss": 1.9999, + "step": 17335500 + }, + { + "epoch": 50.18, + "learning_rate": 2.491870831789552e-05, + "loss": 2.0008, + "step": 17336000 + }, + { + "epoch": 50.18, + "learning_rate": 2.491798467024824e-05, + "loss": 1.9921, + "step": 17336500 + }, + { + "epoch": 50.18, + "learning_rate": 2.4917261022600966e-05, + "loss": 2.0039, + "step": 17337000 + }, + { + "epoch": 50.18, + "learning_rate": 2.4916538822248982e-05, + "loss": 2.0004, + "step": 17337500 + }, + { + "epoch": 50.19, + "learning_rate": 2.4915815174601707e-05, + "loss": 1.997, + "step": 17338000 + }, + { + "epoch": 50.19, + "learning_rate": 2.491509152695443e-05, + "loss": 1.9888, + "step": 17338500 + }, + { + "epoch": 50.19, + "learning_rate": 2.4914367879307152e-05, + "loss": 1.9952, + "step": 17339000 + }, + { + "epoch": 50.19, + "learning_rate": 2.4913644231659874e-05, + "loss": 1.9574, + "step": 17339500 + }, + { + "epoch": 50.19, + "learning_rate": 2.4912922031307893e-05, + "loss": 1.9946, + "step": 17340000 + }, + { + "epoch": 50.19, + "learning_rate": 2.4912198383660615e-05, + "loss": 2.0033, + "step": 17340500 + }, + { + "epoch": 50.2, + "learning_rate": 2.491147473601334e-05, + "loss": 1.9669, + "step": 17341000 + }, + { + "epoch": 50.2, + "learning_rate": 2.4910751088366063e-05, + "loss": 1.9864, + "step": 17341500 + }, + { + "epoch": 50.2, + "learning_rate": 2.4910027440718785e-05, + "loss": 1.9639, + "step": 17342000 + }, + { + "epoch": 50.2, + "learning_rate": 2.4909305240366804e-05, + "loss": 1.9888, + "step": 17342500 + }, + { + "epoch": 50.2, + "learning_rate": 2.4908581592719527e-05, + "loss": 1.9918, + "step": 17343000 + }, + { + "epoch": 50.2, + "learning_rate": 2.490785794507225e-05, + "loss": 1.9835, + "step": 17343500 + }, + { + "epoch": 50.2, + "learning_rate": 2.490713429742497e-05, + "loss": 1.9576, + "step": 17344000 + }, + { + "epoch": 50.21, + "learning_rate": 2.4906410649777697e-05, + "loss": 1.9886, + "step": 17344500 + }, + { + "epoch": 50.21, + "learning_rate": 2.4905687002130422e-05, + "loss": 1.9862, + "step": 17345000 + }, + { + "epoch": 50.21, + "learning_rate": 2.4904963354483144e-05, + "loss": 1.9952, + "step": 17345500 + }, + { + "epoch": 50.21, + "learning_rate": 2.4904239706835867e-05, + "loss": 1.9939, + "step": 17346000 + }, + { + "epoch": 50.21, + "learning_rate": 2.490351605918859e-05, + "loss": 2.0038, + "step": 17346500 + }, + { + "epoch": 50.21, + "learning_rate": 2.490279241154131e-05, + "loss": 1.9934, + "step": 17347000 + }, + { + "epoch": 50.21, + "learning_rate": 2.4902068763894033e-05, + "loss": 1.992, + "step": 17347500 + }, + { + "epoch": 50.22, + "learning_rate": 2.490134511624676e-05, + "loss": 1.9906, + "step": 17348000 + }, + { + "epoch": 50.22, + "learning_rate": 2.4900621468599485e-05, + "loss": 2.0035, + "step": 17348500 + }, + { + "epoch": 50.22, + "learning_rate": 2.4899897820952207e-05, + "loss": 1.9983, + "step": 17349000 + }, + { + "epoch": 50.22, + "learning_rate": 2.489917417330493e-05, + "loss": 2.007, + "step": 17349500 + }, + { + "epoch": 50.22, + "learning_rate": 2.489845052565765e-05, + "loss": 1.9917, + "step": 17350000 + }, + { + "epoch": 50.22, + "learning_rate": 2.4897729772600964e-05, + "loss": 1.9712, + "step": 17350500 + }, + { + "epoch": 50.22, + "learning_rate": 2.4897007572248983e-05, + "loss": 1.9944, + "step": 17351000 + }, + { + "epoch": 50.23, + "learning_rate": 2.4896283924601705e-05, + "loss": 1.9806, + "step": 17351500 + }, + { + "epoch": 50.23, + "learning_rate": 2.489556027695443e-05, + "loss": 2.0172, + "step": 17352000 + }, + { + "epoch": 50.23, + "learning_rate": 2.4894836629307153e-05, + "loss": 2.0283, + "step": 17352500 + }, + { + "epoch": 50.23, + "learning_rate": 2.489411442895517e-05, + "loss": 2.0039, + "step": 17353000 + }, + { + "epoch": 50.23, + "learning_rate": 2.4893390781307894e-05, + "loss": 1.9857, + "step": 17353500 + }, + { + "epoch": 50.23, + "learning_rate": 2.4892667133660616e-05, + "loss": 1.9803, + "step": 17354000 + }, + { + "epoch": 50.23, + "learning_rate": 2.4891943486013338e-05, + "loss": 2.0028, + "step": 17354500 + }, + { + "epoch": 50.24, + "learning_rate": 2.489121983836606e-05, + "loss": 2.0014, + "step": 17355000 + }, + { + "epoch": 50.24, + "learning_rate": 2.4890496190718786e-05, + "loss": 1.9846, + "step": 17355500 + }, + { + "epoch": 50.24, + "learning_rate": 2.4889772543071512e-05, + "loss": 1.993, + "step": 17356000 + }, + { + "epoch": 50.24, + "learning_rate": 2.4889048895424234e-05, + "loss": 1.9771, + "step": 17356500 + }, + { + "epoch": 50.24, + "learning_rate": 2.4888325247776956e-05, + "loss": 1.9963, + "step": 17357000 + }, + { + "epoch": 50.24, + "learning_rate": 2.488760160012968e-05, + "loss": 1.9877, + "step": 17357500 + }, + { + "epoch": 50.24, + "learning_rate": 2.48868779524824e-05, + "loss": 1.9948, + "step": 17358000 + }, + { + "epoch": 50.25, + "learning_rate": 2.4886154304835123e-05, + "loss": 1.9968, + "step": 17358500 + }, + { + "epoch": 50.25, + "learning_rate": 2.488543065718785e-05, + "loss": 1.9894, + "step": 17359000 + }, + { + "epoch": 50.25, + "learning_rate": 2.4884707009540574e-05, + "loss": 2.0012, + "step": 17359500 + }, + { + "epoch": 50.25, + "learning_rate": 2.4883983361893296e-05, + "loss": 2.0122, + "step": 17360000 + }, + { + "epoch": 50.25, + "learning_rate": 2.4883261161541312e-05, + "loss": 1.9951, + "step": 17360500 + }, + { + "epoch": 50.25, + "learning_rate": 2.4882537513894034e-05, + "loss": 1.9909, + "step": 17361000 + }, + { + "epoch": 50.25, + "learning_rate": 2.488181386624676e-05, + "loss": 1.972, + "step": 17361500 + }, + { + "epoch": 50.26, + "learning_rate": 2.4881090218599482e-05, + "loss": 1.9815, + "step": 17362000 + }, + { + "epoch": 50.26, + "learning_rate": 2.4880368018247498e-05, + "loss": 1.9966, + "step": 17362500 + }, + { + "epoch": 50.26, + "learning_rate": 2.4879644370600223e-05, + "loss": 2.0009, + "step": 17363000 + }, + { + "epoch": 50.26, + "learning_rate": 2.487892072295295e-05, + "loss": 1.9903, + "step": 17363500 + }, + { + "epoch": 50.26, + "learning_rate": 2.487819707530567e-05, + "loss": 1.9868, + "step": 17364000 + }, + { + "epoch": 50.26, + "learning_rate": 2.4877474874953687e-05, + "loss": 1.9914, + "step": 17364500 + }, + { + "epoch": 50.26, + "learning_rate": 2.4876751227306412e-05, + "loss": 1.9963, + "step": 17365000 + }, + { + "epoch": 50.27, + "learning_rate": 2.4876027579659134e-05, + "loss": 1.9841, + "step": 17365500 + }, + { + "epoch": 50.27, + "learning_rate": 2.487530537930715e-05, + "loss": 1.9952, + "step": 17366000 + }, + { + "epoch": 50.27, + "learning_rate": 2.4874581731659872e-05, + "loss": 2.0083, + "step": 17366500 + }, + { + "epoch": 50.27, + "learning_rate": 2.4873858084012598e-05, + "loss": 1.9895, + "step": 17367000 + }, + { + "epoch": 50.27, + "learning_rate": 2.4873134436365324e-05, + "loss": 2.0331, + "step": 17367500 + }, + { + "epoch": 50.27, + "learning_rate": 2.4872410788718046e-05, + "loss": 1.9786, + "step": 17368000 + }, + { + "epoch": 50.27, + "learning_rate": 2.4871690035661358e-05, + "loss": 1.9931, + "step": 17368500 + }, + { + "epoch": 50.28, + "learning_rate": 2.487096638801408e-05, + "loss": 1.9892, + "step": 17369000 + }, + { + "epoch": 50.28, + "learning_rate": 2.4870242740366802e-05, + "loss": 1.9965, + "step": 17369500 + }, + { + "epoch": 50.28, + "learning_rate": 2.4869519092719525e-05, + "loss": 1.9833, + "step": 17370000 + }, + { + "epoch": 50.28, + "learning_rate": 2.486879544507225e-05, + "loss": 1.9753, + "step": 17370500 + }, + { + "epoch": 50.28, + "learning_rate": 2.4868071797424976e-05, + "loss": 1.9702, + "step": 17371000 + }, + { + "epoch": 50.28, + "learning_rate": 2.486734959707299e-05, + "loss": 2.0033, + "step": 17371500 + }, + { + "epoch": 50.28, + "learning_rate": 2.4866625949425714e-05, + "loss": 1.9789, + "step": 17372000 + }, + { + "epoch": 50.29, + "learning_rate": 2.4865902301778436e-05, + "loss": 1.9754, + "step": 17372500 + }, + { + "epoch": 50.29, + "learning_rate": 2.486517865413116e-05, + "loss": 1.9817, + "step": 17373000 + }, + { + "epoch": 50.29, + "learning_rate": 2.4864455006483884e-05, + "loss": 1.99, + "step": 17373500 + }, + { + "epoch": 50.29, + "learning_rate": 2.4863734253427196e-05, + "loss": 1.9934, + "step": 17374000 + }, + { + "epoch": 50.29, + "learning_rate": 2.486301060577992e-05, + "loss": 1.9881, + "step": 17374500 + }, + { + "epoch": 50.29, + "learning_rate": 2.4862286958132644e-05, + "loss": 1.9784, + "step": 17375000 + }, + { + "epoch": 50.29, + "learning_rate": 2.4861563310485366e-05, + "loss": 2.0021, + "step": 17375500 + }, + { + "epoch": 50.3, + "learning_rate": 2.486083966283809e-05, + "loss": 1.9886, + "step": 17376000 + }, + { + "epoch": 50.3, + "learning_rate": 2.4860116015190814e-05, + "loss": 1.9914, + "step": 17376500 + }, + { + "epoch": 50.3, + "learning_rate": 2.4859392367543536e-05, + "loss": 1.972, + "step": 17377000 + }, + { + "epoch": 50.3, + "learning_rate": 2.485866871989626e-05, + "loss": 1.9976, + "step": 17377500 + }, + { + "epoch": 50.3, + "learning_rate": 2.485794507224898e-05, + "loss": 1.9722, + "step": 17378000 + }, + { + "epoch": 50.3, + "learning_rate": 2.4857221424601706e-05, + "loss": 1.9912, + "step": 17378500 + }, + { + "epoch": 50.31, + "learning_rate": 2.485649777695443e-05, + "loss": 1.9858, + "step": 17379000 + }, + { + "epoch": 50.31, + "learning_rate": 2.485577412930715e-05, + "loss": 1.9874, + "step": 17379500 + }, + { + "epoch": 50.31, + "learning_rate": 2.4855050481659876e-05, + "loss": 2.0062, + "step": 17380000 + }, + { + "epoch": 50.31, + "learning_rate": 2.48543268340126e-05, + "loss": 1.9686, + "step": 17380500 + }, + { + "epoch": 50.31, + "learning_rate": 2.4853604633660614e-05, + "loss": 1.9991, + "step": 17381000 + }, + { + "epoch": 50.31, + "learning_rate": 2.485288098601334e-05, + "loss": 2.0058, + "step": 17381500 + }, + { + "epoch": 50.31, + "learning_rate": 2.485215878566136e-05, + "loss": 1.974, + "step": 17382000 + }, + { + "epoch": 50.32, + "learning_rate": 2.485143513801408e-05, + "loss": 1.9929, + "step": 17382500 + }, + { + "epoch": 50.32, + "learning_rate": 2.4850711490366803e-05, + "loss": 1.9743, + "step": 17383000 + }, + { + "epoch": 50.32, + "learning_rate": 2.4849987842719526e-05, + "loss": 1.981, + "step": 17383500 + }, + { + "epoch": 50.32, + "learning_rate": 2.484926419507225e-05, + "loss": 1.9751, + "step": 17384000 + }, + { + "epoch": 50.32, + "learning_rate": 2.4848540547424973e-05, + "loss": 1.9955, + "step": 17384500 + }, + { + "epoch": 50.32, + "learning_rate": 2.4847816899777696e-05, + "loss": 1.9925, + "step": 17385000 + }, + { + "epoch": 50.32, + "learning_rate": 2.484709325213042e-05, + "loss": 2.0191, + "step": 17385500 + }, + { + "epoch": 50.33, + "learning_rate": 2.4846369604483143e-05, + "loss": 1.9942, + "step": 17386000 + }, + { + "epoch": 50.33, + "learning_rate": 2.4845647404131162e-05, + "loss": 1.9974, + "step": 17386500 + }, + { + "epoch": 50.33, + "learning_rate": 2.4844923756483885e-05, + "loss": 2.0031, + "step": 17387000 + }, + { + "epoch": 50.33, + "learning_rate": 2.4844201556131904e-05, + "loss": 1.9792, + "step": 17387500 + }, + { + "epoch": 50.33, + "learning_rate": 2.4843477908484626e-05, + "loss": 2.0051, + "step": 17388000 + }, + { + "epoch": 50.33, + "learning_rate": 2.4842754260837348e-05, + "loss": 1.9902, + "step": 17388500 + }, + { + "epoch": 50.33, + "learning_rate": 2.484203061319007e-05, + "loss": 2.0088, + "step": 17389000 + }, + { + "epoch": 50.34, + "learning_rate": 2.4841306965542796e-05, + "loss": 1.9952, + "step": 17389500 + }, + { + "epoch": 50.34, + "learning_rate": 2.4840583317895518e-05, + "loss": 1.9923, + "step": 17390000 + }, + { + "epoch": 50.34, + "learning_rate": 2.483985967024824e-05, + "loss": 1.9908, + "step": 17390500 + }, + { + "epoch": 50.34, + "learning_rate": 2.4839136022600966e-05, + "loss": 1.9692, + "step": 17391000 + }, + { + "epoch": 50.34, + "learning_rate": 2.4838412374953688e-05, + "loss": 1.9869, + "step": 17391500 + }, + { + "epoch": 50.34, + "learning_rate": 2.483768872730641e-05, + "loss": 2.0331, + "step": 17392000 + }, + { + "epoch": 50.34, + "learning_rate": 2.4836965079659133e-05, + "loss": 1.9672, + "step": 17392500 + }, + { + "epoch": 50.35, + "learning_rate": 2.483624143201186e-05, + "loss": 1.9638, + "step": 17393000 + }, + { + "epoch": 50.35, + "learning_rate": 2.4835519231659877e-05, + "loss": 1.991, + "step": 17393500 + }, + { + "epoch": 50.35, + "learning_rate": 2.48347955840126e-05, + "loss": 2.0156, + "step": 17394000 + }, + { + "epoch": 50.35, + "learning_rate": 2.4834071936365322e-05, + "loss": 1.9986, + "step": 17394500 + }, + { + "epoch": 50.35, + "learning_rate": 2.4833348288718044e-05, + "loss": 2.0033, + "step": 17395000 + }, + { + "epoch": 50.35, + "learning_rate": 2.4832624641070766e-05, + "loss": 2.0009, + "step": 17395500 + }, + { + "epoch": 50.35, + "learning_rate": 2.4831900993423492e-05, + "loss": 2.0046, + "step": 17396000 + }, + { + "epoch": 50.36, + "learning_rate": 2.4831177345776217e-05, + "loss": 2.0006, + "step": 17396500 + }, + { + "epoch": 50.36, + "learning_rate": 2.483045369812894e-05, + "loss": 1.9968, + "step": 17397000 + }, + { + "epoch": 50.36, + "learning_rate": 2.4829731497776955e-05, + "loss": 1.9734, + "step": 17397500 + }, + { + "epoch": 50.36, + "learning_rate": 2.4829007850129677e-05, + "loss": 1.9864, + "step": 17398000 + }, + { + "epoch": 50.36, + "learning_rate": 2.4828284202482403e-05, + "loss": 2.0261, + "step": 17398500 + }, + { + "epoch": 50.36, + "learning_rate": 2.4827560554835125e-05, + "loss": 2.0025, + "step": 17399000 + }, + { + "epoch": 50.36, + "learning_rate": 2.482683835448314e-05, + "loss": 1.9791, + "step": 17399500 + }, + { + "epoch": 50.37, + "learning_rate": 2.4826114706835866e-05, + "loss": 2.0009, + "step": 17400000 + }, + { + "epoch": 50.37, + "learning_rate": 2.4825391059188592e-05, + "loss": 2.0112, + "step": 17400500 + }, + { + "epoch": 50.37, + "learning_rate": 2.4824667411541314e-05, + "loss": 1.9896, + "step": 17401000 + }, + { + "epoch": 50.37, + "learning_rate": 2.4823943763894037e-05, + "loss": 2.0145, + "step": 17401500 + }, + { + "epoch": 50.37, + "learning_rate": 2.4823221563542056e-05, + "loss": 2.0149, + "step": 17402000 + }, + { + "epoch": 50.37, + "learning_rate": 2.4822500810485368e-05, + "loss": 2.0097, + "step": 17402500 + }, + { + "epoch": 50.37, + "learning_rate": 2.482177716283809e-05, + "loss": 1.9965, + "step": 17403000 + }, + { + "epoch": 50.38, + "learning_rate": 2.4821053515190812e-05, + "loss": 1.9762, + "step": 17403500 + }, + { + "epoch": 50.38, + "learning_rate": 2.4820329867543534e-05, + "loss": 1.9651, + "step": 17404000 + }, + { + "epoch": 50.38, + "learning_rate": 2.481960621989626e-05, + "loss": 1.9967, + "step": 17404500 + }, + { + "epoch": 50.38, + "learning_rate": 2.4818882572248982e-05, + "loss": 1.9935, + "step": 17405000 + }, + { + "epoch": 50.38, + "learning_rate": 2.4818158924601705e-05, + "loss": 2.0024, + "step": 17405500 + }, + { + "epoch": 50.38, + "learning_rate": 2.481743527695443e-05, + "loss": 1.9951, + "step": 17406000 + }, + { + "epoch": 50.38, + "learning_rate": 2.4816711629307152e-05, + "loss": 2.0151, + "step": 17406500 + }, + { + "epoch": 50.39, + "learning_rate": 2.4815989428955168e-05, + "loss": 1.9846, + "step": 17407000 + }, + { + "epoch": 50.39, + "learning_rate": 2.4815265781307894e-05, + "loss": 1.988, + "step": 17407500 + }, + { + "epoch": 50.39, + "learning_rate": 2.4814542133660616e-05, + "loss": 2.0073, + "step": 17408000 + }, + { + "epoch": 50.39, + "learning_rate": 2.481381848601334e-05, + "loss": 1.9632, + "step": 17408500 + }, + { + "epoch": 50.39, + "learning_rate": 2.4813094838366064e-05, + "loss": 1.9713, + "step": 17409000 + }, + { + "epoch": 50.39, + "learning_rate": 2.4812371190718786e-05, + "loss": 1.9914, + "step": 17409500 + }, + { + "epoch": 50.39, + "learning_rate": 2.4811648990366805e-05, + "loss": 1.9628, + "step": 17410000 + }, + { + "epoch": 50.4, + "learning_rate": 2.4810925342719527e-05, + "loss": 2.0066, + "step": 17410500 + }, + { + "epoch": 50.4, + "learning_rate": 2.481020169507225e-05, + "loss": 1.9854, + "step": 17411000 + }, + { + "epoch": 50.4, + "learning_rate": 2.480947804742497e-05, + "loss": 1.9945, + "step": 17411500 + }, + { + "epoch": 50.4, + "learning_rate": 2.4808754399777697e-05, + "loss": 1.9988, + "step": 17412000 + }, + { + "epoch": 50.4, + "learning_rate": 2.480803364672101e-05, + "loss": 1.9925, + "step": 17412500 + }, + { + "epoch": 50.4, + "learning_rate": 2.480730999907373e-05, + "loss": 2.0012, + "step": 17413000 + }, + { + "epoch": 50.4, + "learning_rate": 2.4806586351426457e-05, + "loss": 1.9747, + "step": 17413500 + }, + { + "epoch": 50.41, + "learning_rate": 2.480586270377918e-05, + "loss": 2.0006, + "step": 17414000 + }, + { + "epoch": 50.41, + "learning_rate": 2.4805139056131902e-05, + "loss": 1.9911, + "step": 17414500 + }, + { + "epoch": 50.41, + "learning_rate": 2.4804415408484624e-05, + "loss": 1.9953, + "step": 17415000 + }, + { + "epoch": 50.41, + "learning_rate": 2.4803691760837346e-05, + "loss": 2.001, + "step": 17415500 + }, + { + "epoch": 50.41, + "learning_rate": 2.4802968113190072e-05, + "loss": 1.9964, + "step": 17416000 + }, + { + "epoch": 50.41, + "learning_rate": 2.480224591283809e-05, + "loss": 1.9944, + "step": 17416500 + }, + { + "epoch": 50.42, + "learning_rate": 2.4801522265190813e-05, + "loss": 1.9922, + "step": 17417000 + }, + { + "epoch": 50.42, + "learning_rate": 2.4800798617543535e-05, + "loss": 1.9922, + "step": 17417500 + }, + { + "epoch": 50.42, + "learning_rate": 2.4800074969896258e-05, + "loss": 1.9827, + "step": 17418000 + }, + { + "epoch": 50.42, + "learning_rate": 2.4799351322248983e-05, + "loss": 1.9929, + "step": 17418500 + }, + { + "epoch": 50.42, + "learning_rate": 2.4798627674601705e-05, + "loss": 1.9796, + "step": 17419000 + }, + { + "epoch": 50.42, + "learning_rate": 2.479790402695443e-05, + "loss": 1.9813, + "step": 17419500 + }, + { + "epoch": 50.42, + "learning_rate": 2.4797181826602447e-05, + "loss": 2.0144, + "step": 17420000 + }, + { + "epoch": 50.43, + "learning_rate": 2.479645817895517e-05, + "loss": 1.9745, + "step": 17420500 + }, + { + "epoch": 50.43, + "learning_rate": 2.4795734531307894e-05, + "loss": 2.0111, + "step": 17421000 + }, + { + "epoch": 50.43, + "learning_rate": 2.4795010883660617e-05, + "loss": 1.9904, + "step": 17421500 + }, + { + "epoch": 50.43, + "learning_rate": 2.479428723601334e-05, + "loss": 1.9971, + "step": 17422000 + }, + { + "epoch": 50.43, + "learning_rate": 2.479356358836606e-05, + "loss": 1.9603, + "step": 17422500 + }, + { + "epoch": 50.43, + "learning_rate": 2.4792839940718787e-05, + "loss": 2.0042, + "step": 17423000 + }, + { + "epoch": 50.43, + "learning_rate": 2.479211629307151e-05, + "loss": 2.0064, + "step": 17423500 + }, + { + "epoch": 50.44, + "learning_rate": 2.4791392645424235e-05, + "loss": 1.9844, + "step": 17424000 + }, + { + "epoch": 50.44, + "learning_rate": 2.4790668997776957e-05, + "loss": 1.9765, + "step": 17424500 + }, + { + "epoch": 50.44, + "learning_rate": 2.478994535012968e-05, + "loss": 1.9938, + "step": 17425000 + }, + { + "epoch": 50.44, + "learning_rate": 2.4789223149777695e-05, + "loss": 1.982, + "step": 17425500 + }, + { + "epoch": 50.44, + "learning_rate": 2.478849950213042e-05, + "loss": 1.9949, + "step": 17426000 + }, + { + "epoch": 50.44, + "learning_rate": 2.4787775854483142e-05, + "loss": 2.0092, + "step": 17426500 + }, + { + "epoch": 50.44, + "learning_rate": 2.4787052206835868e-05, + "loss": 2.0001, + "step": 17427000 + }, + { + "epoch": 50.45, + "learning_rate": 2.478632855918859e-05, + "loss": 1.9664, + "step": 17427500 + }, + { + "epoch": 50.45, + "learning_rate": 2.4785604911541313e-05, + "loss": 2.0078, + "step": 17428000 + }, + { + "epoch": 50.45, + "learning_rate": 2.4784881263894035e-05, + "loss": 1.9871, + "step": 17428500 + }, + { + "epoch": 50.45, + "learning_rate": 2.4784157616246757e-05, + "loss": 1.9873, + "step": 17429000 + }, + { + "epoch": 50.45, + "learning_rate": 2.4783435415894776e-05, + "loss": 1.9728, + "step": 17429500 + }, + { + "epoch": 50.45, + "learning_rate": 2.4782711768247498e-05, + "loss": 1.9873, + "step": 17430000 + }, + { + "epoch": 50.45, + "learning_rate": 2.4781988120600224e-05, + "loss": 1.9805, + "step": 17430500 + }, + { + "epoch": 50.46, + "learning_rate": 2.4781264472952946e-05, + "loss": 1.9878, + "step": 17431000 + }, + { + "epoch": 50.46, + "learning_rate": 2.478054082530567e-05, + "loss": 1.9835, + "step": 17431500 + }, + { + "epoch": 50.46, + "learning_rate": 2.4779818624953687e-05, + "loss": 1.984, + "step": 17432000 + }, + { + "epoch": 50.46, + "learning_rate": 2.477909497730641e-05, + "loss": 1.9968, + "step": 17432500 + }, + { + "epoch": 50.46, + "learning_rate": 2.4778371329659135e-05, + "loss": 2.0091, + "step": 17433000 + }, + { + "epoch": 50.46, + "learning_rate": 2.4777647682011857e-05, + "loss": 1.976, + "step": 17433500 + }, + { + "epoch": 50.46, + "learning_rate": 2.4776925481659873e-05, + "loss": 2.0087, + "step": 17434000 + }, + { + "epoch": 50.47, + "learning_rate": 2.47762018340126e-05, + "loss": 1.9791, + "step": 17434500 + }, + { + "epoch": 50.47, + "learning_rate": 2.477547818636532e-05, + "loss": 1.9951, + "step": 17435000 + }, + { + "epoch": 50.47, + "learning_rate": 2.4774754538718046e-05, + "loss": 2.0234, + "step": 17435500 + }, + { + "epoch": 50.47, + "learning_rate": 2.4774032338366062e-05, + "loss": 2.0091, + "step": 17436000 + }, + { + "epoch": 50.47, + "learning_rate": 2.477331013801408e-05, + "loss": 1.9887, + "step": 17436500 + }, + { + "epoch": 50.47, + "learning_rate": 2.4772586490366803e-05, + "loss": 1.9927, + "step": 17437000 + }, + { + "epoch": 50.47, + "learning_rate": 2.4771862842719525e-05, + "loss": 1.9992, + "step": 17437500 + }, + { + "epoch": 50.48, + "learning_rate": 2.4771139195072248e-05, + "loss": 2.0106, + "step": 17438000 + }, + { + "epoch": 50.48, + "learning_rate": 2.4770415547424973e-05, + "loss": 1.9927, + "step": 17438500 + }, + { + "epoch": 50.48, + "learning_rate": 2.4769694794368285e-05, + "loss": 2.0006, + "step": 17439000 + }, + { + "epoch": 50.48, + "learning_rate": 2.4768972594016304e-05, + "loss": 2.0174, + "step": 17439500 + }, + { + "epoch": 50.48, + "learning_rate": 2.4768250393664323e-05, + "loss": 1.9971, + "step": 17440000 + }, + { + "epoch": 50.48, + "learning_rate": 2.4767526746017046e-05, + "loss": 1.9879, + "step": 17440500 + }, + { + "epoch": 50.48, + "learning_rate": 2.4766803098369768e-05, + "loss": 1.9943, + "step": 17441000 + }, + { + "epoch": 50.49, + "learning_rate": 2.476607945072249e-05, + "loss": 1.9902, + "step": 17441500 + }, + { + "epoch": 50.49, + "learning_rate": 2.4765355803075212e-05, + "loss": 2.0023, + "step": 17442000 + }, + { + "epoch": 50.49, + "learning_rate": 2.4764632155427938e-05, + "loss": 1.9824, + "step": 17442500 + }, + { + "epoch": 50.49, + "learning_rate": 2.476390850778066e-05, + "loss": 1.985, + "step": 17443000 + }, + { + "epoch": 50.49, + "learning_rate": 2.4763184860133386e-05, + "loss": 2.016, + "step": 17443500 + }, + { + "epoch": 50.49, + "learning_rate": 2.4762461212486108e-05, + "loss": 2.0225, + "step": 17444000 + }, + { + "epoch": 50.49, + "learning_rate": 2.476173756483883e-05, + "loss": 1.9983, + "step": 17444500 + }, + { + "epoch": 50.5, + "learning_rate": 2.4761013917191552e-05, + "loss": 1.9952, + "step": 17445000 + }, + { + "epoch": 50.5, + "learning_rate": 2.4760290269544275e-05, + "loss": 2.0148, + "step": 17445500 + }, + { + "epoch": 50.5, + "learning_rate": 2.4759566621897e-05, + "loss": 1.9865, + "step": 17446000 + }, + { + "epoch": 50.5, + "learning_rate": 2.4758842974249723e-05, + "loss": 2.0025, + "step": 17446500 + }, + { + "epoch": 50.5, + "learning_rate": 2.4758119326602448e-05, + "loss": 2.0154, + "step": 17447000 + }, + { + "epoch": 50.5, + "learning_rate": 2.475739567895517e-05, + "loss": 1.9968, + "step": 17447500 + }, + { + "epoch": 50.5, + "learning_rate": 2.4756672031307893e-05, + "loss": 1.9791, + "step": 17448000 + }, + { + "epoch": 50.51, + "learning_rate": 2.4755948383660615e-05, + "loss": 1.9893, + "step": 17448500 + }, + { + "epoch": 50.51, + "learning_rate": 2.4755226183308634e-05, + "loss": 2.0045, + "step": 17449000 + }, + { + "epoch": 50.51, + "learning_rate": 2.475450253566136e-05, + "loss": 2.0072, + "step": 17449500 + }, + { + "epoch": 50.51, + "learning_rate": 2.4753780335309375e-05, + "loss": 1.9908, + "step": 17450000 + }, + { + "epoch": 50.51, + "learning_rate": 2.47530566876621e-05, + "loss": 1.9955, + "step": 17450500 + }, + { + "epoch": 50.51, + "learning_rate": 2.4752333040014823e-05, + "loss": 1.9845, + "step": 17451000 + }, + { + "epoch": 50.51, + "learning_rate": 2.4751609392367545e-05, + "loss": 2.0091, + "step": 17451500 + }, + { + "epoch": 50.52, + "learning_rate": 2.4750885744720267e-05, + "loss": 2.0017, + "step": 17452000 + }, + { + "epoch": 50.52, + "learning_rate": 2.475016209707299e-05, + "loss": 1.9792, + "step": 17452500 + }, + { + "epoch": 50.52, + "learning_rate": 2.4749438449425712e-05, + "loss": 2.0255, + "step": 17453000 + }, + { + "epoch": 50.52, + "learning_rate": 2.4748714801778437e-05, + "loss": 1.9892, + "step": 17453500 + }, + { + "epoch": 50.52, + "learning_rate": 2.4747991154131163e-05, + "loss": 2.0207, + "step": 17454000 + }, + { + "epoch": 50.52, + "learning_rate": 2.4747267506483885e-05, + "loss": 1.9995, + "step": 17454500 + }, + { + "epoch": 50.53, + "learning_rate": 2.4746543858836607e-05, + "loss": 1.9952, + "step": 17455000 + }, + { + "epoch": 50.53, + "learning_rate": 2.474582021118933e-05, + "loss": 1.9979, + "step": 17455500 + }, + { + "epoch": 50.53, + "learning_rate": 2.4745096563542052e-05, + "loss": 1.9859, + "step": 17456000 + }, + { + "epoch": 50.53, + "learning_rate": 2.4744372915894774e-05, + "loss": 1.9939, + "step": 17456500 + }, + { + "epoch": 50.53, + "learning_rate": 2.474365216283809e-05, + "loss": 1.9905, + "step": 17457000 + }, + { + "epoch": 50.53, + "learning_rate": 2.4742928515190812e-05, + "loss": 1.9933, + "step": 17457500 + }, + { + "epoch": 50.53, + "learning_rate": 2.4742204867543538e-05, + "loss": 2.001, + "step": 17458000 + }, + { + "epoch": 50.54, + "learning_rate": 2.474148121989626e-05, + "loss": 1.9928, + "step": 17458500 + }, + { + "epoch": 50.54, + "learning_rate": 2.4740757572248982e-05, + "loss": 1.9765, + "step": 17459000 + }, + { + "epoch": 50.54, + "learning_rate": 2.4740033924601704e-05, + "loss": 2.002, + "step": 17459500 + }, + { + "epoch": 50.54, + "learning_rate": 2.4739310276954427e-05, + "loss": 1.9911, + "step": 17460000 + }, + { + "epoch": 50.54, + "learning_rate": 2.4738586629307152e-05, + "loss": 1.9832, + "step": 17460500 + }, + { + "epoch": 50.54, + "learning_rate": 2.4737862981659874e-05, + "loss": 2.0018, + "step": 17461000 + }, + { + "epoch": 50.54, + "learning_rate": 2.47371393340126e-05, + "loss": 1.9946, + "step": 17461500 + }, + { + "epoch": 50.55, + "learning_rate": 2.4736415686365322e-05, + "loss": 1.9628, + "step": 17462000 + }, + { + "epoch": 50.55, + "learning_rate": 2.4735692038718045e-05, + "loss": 1.9984, + "step": 17462500 + }, + { + "epoch": 50.55, + "learning_rate": 2.4734969838366063e-05, + "loss": 2.0152, + "step": 17463000 + }, + { + "epoch": 50.55, + "learning_rate": 2.4734246190718786e-05, + "loss": 1.9665, + "step": 17463500 + }, + { + "epoch": 50.55, + "learning_rate": 2.4733522543071508e-05, + "loss": 1.9968, + "step": 17464000 + }, + { + "epoch": 50.55, + "learning_rate": 2.4732800342719527e-05, + "loss": 2.0006, + "step": 17464500 + }, + { + "epoch": 50.55, + "learning_rate": 2.4732076695072253e-05, + "loss": 1.9942, + "step": 17465000 + }, + { + "epoch": 50.56, + "learning_rate": 2.4731353047424975e-05, + "loss": 2.0033, + "step": 17465500 + }, + { + "epoch": 50.56, + "learning_rate": 2.4730629399777697e-05, + "loss": 1.9885, + "step": 17466000 + }, + { + "epoch": 50.56, + "learning_rate": 2.472990575213042e-05, + "loss": 1.9931, + "step": 17466500 + }, + { + "epoch": 50.56, + "learning_rate": 2.4729183551778438e-05, + "loss": 2.0058, + "step": 17467000 + }, + { + "epoch": 50.56, + "learning_rate": 2.472845990413116e-05, + "loss": 2.009, + "step": 17467500 + }, + { + "epoch": 50.56, + "learning_rate": 2.4727736256483886e-05, + "loss": 1.9898, + "step": 17468000 + }, + { + "epoch": 50.56, + "learning_rate": 2.4727012608836608e-05, + "loss": 2.0043, + "step": 17468500 + }, + { + "epoch": 50.57, + "learning_rate": 2.472628896118933e-05, + "loss": 1.9885, + "step": 17469000 + }, + { + "epoch": 50.57, + "learning_rate": 2.472556676083735e-05, + "loss": 2.0047, + "step": 17469500 + }, + { + "epoch": 50.57, + "learning_rate": 2.472484311319007e-05, + "loss": 2.007, + "step": 17470000 + }, + { + "epoch": 50.57, + "learning_rate": 2.4724119465542794e-05, + "loss": 1.9803, + "step": 17470500 + }, + { + "epoch": 50.57, + "learning_rate": 2.4723395817895516e-05, + "loss": 1.9892, + "step": 17471000 + }, + { + "epoch": 50.57, + "learning_rate": 2.472267217024824e-05, + "loss": 1.9896, + "step": 17471500 + }, + { + "epoch": 50.57, + "learning_rate": 2.472195141719155e-05, + "loss": 1.9931, + "step": 17472000 + }, + { + "epoch": 50.58, + "learning_rate": 2.4721227769544276e-05, + "loss": 1.9777, + "step": 17472500 + }, + { + "epoch": 50.58, + "learning_rate": 2.4720504121897002e-05, + "loss": 1.97, + "step": 17473000 + }, + { + "epoch": 50.58, + "learning_rate": 2.4719780474249724e-05, + "loss": 1.9771, + "step": 17473500 + }, + { + "epoch": 50.58, + "learning_rate": 2.4719056826602446e-05, + "loss": 1.994, + "step": 17474000 + }, + { + "epoch": 50.58, + "learning_rate": 2.471833317895517e-05, + "loss": 1.9899, + "step": 17474500 + }, + { + "epoch": 50.58, + "learning_rate": 2.471760953130789e-05, + "loss": 1.9945, + "step": 17475000 + }, + { + "epoch": 50.58, + "learning_rate": 2.4716885883660616e-05, + "loss": 1.9872, + "step": 17475500 + }, + { + "epoch": 50.59, + "learning_rate": 2.4716162236013342e-05, + "loss": 1.9854, + "step": 17476000 + }, + { + "epoch": 50.59, + "learning_rate": 2.4715438588366064e-05, + "loss": 2.0142, + "step": 17476500 + }, + { + "epoch": 50.59, + "learning_rate": 2.4714714940718787e-05, + "loss": 2.0106, + "step": 17477000 + }, + { + "epoch": 50.59, + "learning_rate": 2.471399129307151e-05, + "loss": 2.0082, + "step": 17477500 + }, + { + "epoch": 50.59, + "learning_rate": 2.471326764542423e-05, + "loss": 1.9982, + "step": 17478000 + }, + { + "epoch": 50.59, + "learning_rate": 2.4712543997776953e-05, + "loss": 1.9827, + "step": 17478500 + }, + { + "epoch": 50.59, + "learning_rate": 2.4711823244720265e-05, + "loss": 1.998, + "step": 17479000 + }, + { + "epoch": 50.6, + "learning_rate": 2.471109959707299e-05, + "loss": 1.9942, + "step": 17479500 + }, + { + "epoch": 50.6, + "learning_rate": 2.4710375949425717e-05, + "loss": 1.9935, + "step": 17480000 + }, + { + "epoch": 50.6, + "learning_rate": 2.470965230177844e-05, + "loss": 2.0113, + "step": 17480500 + }, + { + "epoch": 50.6, + "learning_rate": 2.470892865413116e-05, + "loss": 1.9715, + "step": 17481000 + }, + { + "epoch": 50.6, + "learning_rate": 2.4708205006483883e-05, + "loss": 2.0057, + "step": 17481500 + }, + { + "epoch": 50.6, + "learning_rate": 2.4707481358836606e-05, + "loss": 1.9884, + "step": 17482000 + }, + { + "epoch": 50.6, + "learning_rate": 2.4706757711189328e-05, + "loss": 1.9807, + "step": 17482500 + }, + { + "epoch": 50.61, + "learning_rate": 2.4706034063542054e-05, + "loss": 1.9627, + "step": 17483000 + }, + { + "epoch": 50.61, + "learning_rate": 2.4705311863190072e-05, + "loss": 2.0058, + "step": 17483500 + }, + { + "epoch": 50.61, + "learning_rate": 2.4704588215542795e-05, + "loss": 2.0196, + "step": 17484000 + }, + { + "epoch": 50.61, + "learning_rate": 2.4703864567895517e-05, + "loss": 1.9879, + "step": 17484500 + }, + { + "epoch": 50.61, + "learning_rate": 2.4703140920248243e-05, + "loss": 1.984, + "step": 17485000 + }, + { + "epoch": 50.61, + "learning_rate": 2.4702417272600965e-05, + "loss": 1.9933, + "step": 17485500 + }, + { + "epoch": 50.61, + "learning_rate": 2.470169507224898e-05, + "loss": 1.9807, + "step": 17486000 + }, + { + "epoch": 50.62, + "learning_rate": 2.4700971424601706e-05, + "loss": 1.9985, + "step": 17486500 + }, + { + "epoch": 50.62, + "learning_rate": 2.470024777695443e-05, + "loss": 2.0027, + "step": 17487000 + }, + { + "epoch": 50.62, + "learning_rate": 2.4699524129307154e-05, + "loss": 1.9925, + "step": 17487500 + }, + { + "epoch": 50.62, + "learning_rate": 2.4698800481659876e-05, + "loss": 1.9926, + "step": 17488000 + }, + { + "epoch": 50.62, + "learning_rate": 2.469807828130789e-05, + "loss": 2.0012, + "step": 17488500 + }, + { + "epoch": 50.62, + "learning_rate": 2.469735608095591e-05, + "loss": 1.9997, + "step": 17489000 + }, + { + "epoch": 50.62, + "learning_rate": 2.4696632433308633e-05, + "loss": 2.0088, + "step": 17489500 + }, + { + "epoch": 50.63, + "learning_rate": 2.4695908785661355e-05, + "loss": 1.9827, + "step": 17490000 + }, + { + "epoch": 50.63, + "learning_rate": 2.469518513801408e-05, + "loss": 2.0116, + "step": 17490500 + }, + { + "epoch": 50.63, + "learning_rate": 2.4694461490366806e-05, + "loss": 1.9905, + "step": 17491000 + }, + { + "epoch": 50.63, + "learning_rate": 2.469373784271953e-05, + "loss": 2.0062, + "step": 17491500 + }, + { + "epoch": 50.63, + "learning_rate": 2.469301419507225e-05, + "loss": 1.9968, + "step": 17492000 + }, + { + "epoch": 50.63, + "learning_rate": 2.4692290547424973e-05, + "loss": 2.0161, + "step": 17492500 + }, + { + "epoch": 50.64, + "learning_rate": 2.4691566899777695e-05, + "loss": 1.9858, + "step": 17493000 + }, + { + "epoch": 50.64, + "learning_rate": 2.4690846146721007e-05, + "loss": 2.0133, + "step": 17493500 + }, + { + "epoch": 50.64, + "learning_rate": 2.469012249907373e-05, + "loss": 2.0, + "step": 17494000 + }, + { + "epoch": 50.64, + "learning_rate": 2.4689398851426455e-05, + "loss": 1.9779, + "step": 17494500 + }, + { + "epoch": 50.64, + "learning_rate": 2.468867520377918e-05, + "loss": 1.9836, + "step": 17495000 + }, + { + "epoch": 50.64, + "learning_rate": 2.4687951556131903e-05, + "loss": 2.0027, + "step": 17495500 + }, + { + "epoch": 50.64, + "learning_rate": 2.4687227908484625e-05, + "loss": 1.9955, + "step": 17496000 + }, + { + "epoch": 50.65, + "learning_rate": 2.4686504260837348e-05, + "loss": 2.0085, + "step": 17496500 + }, + { + "epoch": 50.65, + "learning_rate": 2.4685782060485367e-05, + "loss": 2.0006, + "step": 17497000 + }, + { + "epoch": 50.65, + "learning_rate": 2.468505841283809e-05, + "loss": 2.0057, + "step": 17497500 + }, + { + "epoch": 50.65, + "learning_rate": 2.468433476519081e-05, + "loss": 1.9994, + "step": 17498000 + }, + { + "epoch": 50.65, + "learning_rate": 2.4683611117543537e-05, + "loss": 2.0058, + "step": 17498500 + }, + { + "epoch": 50.65, + "learning_rate": 2.468288746989626e-05, + "loss": 1.9991, + "step": 17499000 + }, + { + "epoch": 50.65, + "learning_rate": 2.468216382224898e-05, + "loss": 2.0089, + "step": 17499500 + }, + { + "epoch": 50.66, + "learning_rate": 2.4681440174601707e-05, + "loss": 1.9931, + "step": 17500000 + }, + { + "epoch": 50.66, + "learning_rate": 2.4680717974249722e-05, + "loss": 2.0089, + "step": 17500500 + }, + { + "epoch": 50.66, + "learning_rate": 2.4679994326602445e-05, + "loss": 2.0205, + "step": 17501000 + }, + { + "epoch": 50.66, + "learning_rate": 2.467927067895517e-05, + "loss": 1.9868, + "step": 17501500 + }, + { + "epoch": 50.66, + "learning_rate": 2.4678547031307896e-05, + "loss": 1.9947, + "step": 17502000 + }, + { + "epoch": 50.66, + "learning_rate": 2.4677823383660618e-05, + "loss": 2.0023, + "step": 17502500 + }, + { + "epoch": 50.66, + "learning_rate": 2.467709973601334e-05, + "loss": 2.0296, + "step": 17503000 + }, + { + "epoch": 50.67, + "learning_rate": 2.4676377535661356e-05, + "loss": 1.9838, + "step": 17503500 + }, + { + "epoch": 50.67, + "learning_rate": 2.467565388801408e-05, + "loss": 1.9932, + "step": 17504000 + }, + { + "epoch": 50.67, + "learning_rate": 2.4674930240366804e-05, + "loss": 1.9686, + "step": 17504500 + }, + { + "epoch": 50.67, + "learning_rate": 2.4674206592719526e-05, + "loss": 2.0352, + "step": 17505000 + }, + { + "epoch": 50.67, + "learning_rate": 2.4673484392367545e-05, + "loss": 1.99, + "step": 17505500 + }, + { + "epoch": 50.67, + "learning_rate": 2.4672762192015564e-05, + "loss": 2.0125, + "step": 17506000 + }, + { + "epoch": 50.67, + "learning_rate": 2.4672038544368286e-05, + "loss": 1.9879, + "step": 17506500 + }, + { + "epoch": 50.68, + "learning_rate": 2.4671314896721008e-05, + "loss": 1.977, + "step": 17507000 + }, + { + "epoch": 50.68, + "learning_rate": 2.4670591249073734e-05, + "loss": 2.0234, + "step": 17507500 + }, + { + "epoch": 50.68, + "learning_rate": 2.4669867601426456e-05, + "loss": 2.0016, + "step": 17508000 + }, + { + "epoch": 50.68, + "learning_rate": 2.466914395377918e-05, + "loss": 1.9841, + "step": 17508500 + }, + { + "epoch": 50.68, + "learning_rate": 2.4668421753427194e-05, + "loss": 1.9496, + "step": 17509000 + }, + { + "epoch": 50.68, + "learning_rate": 2.466769810577992e-05, + "loss": 1.9724, + "step": 17509500 + }, + { + "epoch": 50.68, + "learning_rate": 2.4666974458132645e-05, + "loss": 2.005, + "step": 17510000 + }, + { + "epoch": 50.69, + "learning_rate": 2.466625225778066e-05, + "loss": 1.9969, + "step": 17510500 + }, + { + "epoch": 50.69, + "learning_rate": 2.466553005742868e-05, + "loss": 2.0198, + "step": 17511000 + }, + { + "epoch": 50.69, + "learning_rate": 2.4664806409781402e-05, + "loss": 1.9979, + "step": 17511500 + }, + { + "epoch": 50.69, + "learning_rate": 2.4664082762134124e-05, + "loss": 2.0014, + "step": 17512000 + }, + { + "epoch": 50.69, + "learning_rate": 2.4663359114486846e-05, + "loss": 1.9969, + "step": 17512500 + }, + { + "epoch": 50.69, + "learning_rate": 2.4662635466839572e-05, + "loss": 1.9972, + "step": 17513000 + }, + { + "epoch": 50.69, + "learning_rate": 2.4661911819192298e-05, + "loss": 1.984, + "step": 17513500 + }, + { + "epoch": 50.7, + "learning_rate": 2.466118817154502e-05, + "loss": 1.9894, + "step": 17514000 + }, + { + "epoch": 50.7, + "learning_rate": 2.4660464523897742e-05, + "loss": 1.9843, + "step": 17514500 + }, + { + "epoch": 50.7, + "learning_rate": 2.4659740876250464e-05, + "loss": 1.9978, + "step": 17515000 + }, + { + "epoch": 50.7, + "learning_rate": 2.4659017228603187e-05, + "loss": 1.9942, + "step": 17515500 + }, + { + "epoch": 50.7, + "learning_rate": 2.465829358095591e-05, + "loss": 2.0056, + "step": 17516000 + }, + { + "epoch": 50.7, + "learning_rate": 2.4657569933308634e-05, + "loss": 2.0077, + "step": 17516500 + }, + { + "epoch": 50.7, + "learning_rate": 2.465684628566136e-05, + "loss": 2.0216, + "step": 17517000 + }, + { + "epoch": 50.71, + "learning_rate": 2.4656124085309376e-05, + "loss": 2.0021, + "step": 17517500 + }, + { + "epoch": 50.71, + "learning_rate": 2.4655400437662098e-05, + "loss": 1.9934, + "step": 17518000 + }, + { + "epoch": 50.71, + "learning_rate": 2.4654678237310117e-05, + "loss": 2.0218, + "step": 17518500 + }, + { + "epoch": 50.71, + "learning_rate": 2.465395458966284e-05, + "loss": 2.0018, + "step": 17519000 + }, + { + "epoch": 50.71, + "learning_rate": 2.465323094201556e-05, + "loss": 1.9822, + "step": 17519500 + }, + { + "epoch": 50.71, + "learning_rate": 2.4652507294368283e-05, + "loss": 1.9795, + "step": 17520000 + }, + { + "epoch": 50.71, + "learning_rate": 2.465178364672101e-05, + "loss": 2.0216, + "step": 17520500 + }, + { + "epoch": 50.72, + "learning_rate": 2.4651059999073735e-05, + "loss": 1.9912, + "step": 17521000 + }, + { + "epoch": 50.72, + "learning_rate": 2.465033779872175e-05, + "loss": 1.9999, + "step": 17521500 + }, + { + "epoch": 50.72, + "learning_rate": 2.4649614151074472e-05, + "loss": 2.004, + "step": 17522000 + }, + { + "epoch": 50.72, + "learning_rate": 2.4648890503427198e-05, + "loss": 1.9926, + "step": 17522500 + }, + { + "epoch": 50.72, + "learning_rate": 2.464816685577992e-05, + "loss": 2.0017, + "step": 17523000 + }, + { + "epoch": 50.72, + "learning_rate": 2.4647444655427936e-05, + "loss": 2.0184, + "step": 17523500 + }, + { + "epoch": 50.72, + "learning_rate": 2.464672100778066e-05, + "loss": 2.0029, + "step": 17524000 + }, + { + "epoch": 50.73, + "learning_rate": 2.4645997360133384e-05, + "loss": 2.0148, + "step": 17524500 + }, + { + "epoch": 50.73, + "learning_rate": 2.464527371248611e-05, + "loss": 2.0069, + "step": 17525000 + }, + { + "epoch": 50.73, + "learning_rate": 2.4644551512134125e-05, + "loss": 1.9907, + "step": 17525500 + }, + { + "epoch": 50.73, + "learning_rate": 2.4643827864486847e-05, + "loss": 2.0446, + "step": 17526000 + }, + { + "epoch": 50.73, + "learning_rate": 2.4643104216839573e-05, + "loss": 1.9987, + "step": 17526500 + }, + { + "epoch": 50.73, + "learning_rate": 2.4642380569192295e-05, + "loss": 2.0077, + "step": 17527000 + }, + { + "epoch": 50.73, + "learning_rate": 2.4641656921545017e-05, + "loss": 2.0126, + "step": 17527500 + }, + { + "epoch": 50.74, + "learning_rate": 2.464093327389774e-05, + "loss": 2.0036, + "step": 17528000 + }, + { + "epoch": 50.74, + "learning_rate": 2.4640209626250465e-05, + "loss": 1.9733, + "step": 17528500 + }, + { + "epoch": 50.74, + "learning_rate": 2.4639485978603187e-05, + "loss": 1.9847, + "step": 17529000 + }, + { + "epoch": 50.74, + "learning_rate": 2.4638762330955913e-05, + "loss": 1.9902, + "step": 17529500 + }, + { + "epoch": 50.74, + "learning_rate": 2.4638038683308635e-05, + "loss": 2.0017, + "step": 17530000 + }, + { + "epoch": 50.74, + "learning_rate": 2.4637315035661357e-05, + "loss": 1.9753, + "step": 17530500 + }, + { + "epoch": 50.75, + "learning_rate": 2.463659138801408e-05, + "loss": 1.9906, + "step": 17531000 + }, + { + "epoch": 50.75, + "learning_rate": 2.46358691876621e-05, + "loss": 1.9932, + "step": 17531500 + }, + { + "epoch": 50.75, + "learning_rate": 2.463514554001482e-05, + "loss": 1.9649, + "step": 17532000 + }, + { + "epoch": 50.75, + "learning_rate": 2.4634421892367546e-05, + "loss": 2.0006, + "step": 17532500 + }, + { + "epoch": 50.75, + "learning_rate": 2.463369824472027e-05, + "loss": 1.9979, + "step": 17533000 + }, + { + "epoch": 50.75, + "learning_rate": 2.463297459707299e-05, + "loss": 2.0129, + "step": 17533500 + }, + { + "epoch": 50.75, + "learning_rate": 2.4632250949425713e-05, + "loss": 1.9912, + "step": 17534000 + }, + { + "epoch": 50.76, + "learning_rate": 2.4631527301778435e-05, + "loss": 1.991, + "step": 17534500 + }, + { + "epoch": 50.76, + "learning_rate": 2.463080365413116e-05, + "loss": 1.9867, + "step": 17535000 + }, + { + "epoch": 50.76, + "learning_rate": 2.4630080006483887e-05, + "loss": 2.0029, + "step": 17535500 + }, + { + "epoch": 50.76, + "learning_rate": 2.462935635883661e-05, + "loss": 2.0012, + "step": 17536000 + }, + { + "epoch": 50.76, + "learning_rate": 2.4628634158484624e-05, + "loss": 1.9926, + "step": 17536500 + }, + { + "epoch": 50.76, + "learning_rate": 2.462791051083735e-05, + "loss": 2.0055, + "step": 17537000 + }, + { + "epoch": 50.76, + "learning_rate": 2.4627186863190072e-05, + "loss": 2.0242, + "step": 17537500 + }, + { + "epoch": 50.77, + "learning_rate": 2.4626463215542794e-05, + "loss": 1.988, + "step": 17538000 + }, + { + "epoch": 50.77, + "learning_rate": 2.4625739567895517e-05, + "loss": 2.0085, + "step": 17538500 + }, + { + "epoch": 50.77, + "learning_rate": 2.4625017367543536e-05, + "loss": 2.0176, + "step": 17539000 + }, + { + "epoch": 50.77, + "learning_rate": 2.462429516719155e-05, + "loss": 2.0005, + "step": 17539500 + }, + { + "epoch": 50.77, + "learning_rate": 2.4623571519544277e-05, + "loss": 1.9808, + "step": 17540000 + }, + { + "epoch": 50.77, + "learning_rate": 2.4622847871897e-05, + "loss": 1.9914, + "step": 17540500 + }, + { + "epoch": 50.77, + "learning_rate": 2.4622124224249725e-05, + "loss": 2.0064, + "step": 17541000 + }, + { + "epoch": 50.78, + "learning_rate": 2.4621400576602447e-05, + "loss": 1.9927, + "step": 17541500 + }, + { + "epoch": 50.78, + "learning_rate": 2.462067692895517e-05, + "loss": 2.0006, + "step": 17542000 + }, + { + "epoch": 50.78, + "learning_rate": 2.461995328130789e-05, + "loss": 1.9874, + "step": 17542500 + }, + { + "epoch": 50.78, + "learning_rate": 2.4619229633660614e-05, + "loss": 2.0076, + "step": 17543000 + }, + { + "epoch": 50.78, + "learning_rate": 2.4618507433308636e-05, + "loss": 1.9975, + "step": 17543500 + }, + { + "epoch": 50.78, + "learning_rate": 2.4617783785661358e-05, + "loss": 1.9847, + "step": 17544000 + }, + { + "epoch": 50.78, + "learning_rate": 2.4617061585309377e-05, + "loss": 2.0011, + "step": 17544500 + }, + { + "epoch": 50.79, + "learning_rate": 2.46163379376621e-05, + "loss": 1.9955, + "step": 17545000 + }, + { + "epoch": 50.79, + "learning_rate": 2.461561429001482e-05, + "loss": 1.9921, + "step": 17545500 + }, + { + "epoch": 50.79, + "learning_rate": 2.4614890642367544e-05, + "loss": 2.0053, + "step": 17546000 + }, + { + "epoch": 50.79, + "learning_rate": 2.4614166994720266e-05, + "loss": 1.9896, + "step": 17546500 + }, + { + "epoch": 50.79, + "learning_rate": 2.4613444794368285e-05, + "loss": 2.0138, + "step": 17547000 + }, + { + "epoch": 50.79, + "learning_rate": 2.461272114672101e-05, + "loss": 1.9994, + "step": 17547500 + }, + { + "epoch": 50.79, + "learning_rate": 2.4611997499073733e-05, + "loss": 2.0149, + "step": 17548000 + }, + { + "epoch": 50.8, + "learning_rate": 2.4611273851426455e-05, + "loss": 2.0131, + "step": 17548500 + }, + { + "epoch": 50.8, + "learning_rate": 2.4610550203779177e-05, + "loss": 1.9967, + "step": 17549000 + }, + { + "epoch": 50.8, + "learning_rate": 2.4609826556131903e-05, + "loss": 2.0265, + "step": 17549500 + }, + { + "epoch": 50.8, + "learning_rate": 2.4609102908484625e-05, + "loss": 1.9856, + "step": 17550000 + }, + { + "epoch": 50.8, + "learning_rate": 2.460838070813264e-05, + "loss": 2.001, + "step": 17550500 + }, + { + "epoch": 50.8, + "learning_rate": 2.4607657060485366e-05, + "loss": 1.994, + "step": 17551000 + }, + { + "epoch": 50.8, + "learning_rate": 2.460693341283809e-05, + "loss": 1.9973, + "step": 17551500 + }, + { + "epoch": 50.81, + "learning_rate": 2.4606211212486108e-05, + "loss": 1.983, + "step": 17552000 + }, + { + "epoch": 50.81, + "learning_rate": 2.460548756483883e-05, + "loss": 1.978, + "step": 17552500 + }, + { + "epoch": 50.81, + "learning_rate": 2.4604763917191552e-05, + "loss": 2.0205, + "step": 17553000 + }, + { + "epoch": 50.81, + "learning_rate": 2.4604040269544278e-05, + "loss": 1.9772, + "step": 17553500 + }, + { + "epoch": 50.81, + "learning_rate": 2.4603316621897e-05, + "loss": 1.9826, + "step": 17554000 + }, + { + "epoch": 50.81, + "learning_rate": 2.4602592974249725e-05, + "loss": 2.0124, + "step": 17554500 + }, + { + "epoch": 50.81, + "learning_rate": 2.4601869326602448e-05, + "loss": 1.9836, + "step": 17555000 + }, + { + "epoch": 50.82, + "learning_rate": 2.460114567895517e-05, + "loss": 1.9941, + "step": 17555500 + }, + { + "epoch": 50.82, + "learning_rate": 2.4600422031307892e-05, + "loss": 2.0124, + "step": 17556000 + }, + { + "epoch": 50.82, + "learning_rate": 2.459969983095591e-05, + "loss": 2.0018, + "step": 17556500 + }, + { + "epoch": 50.82, + "learning_rate": 2.4598976183308633e-05, + "loss": 2.0123, + "step": 17557000 + }, + { + "epoch": 50.82, + "learning_rate": 2.4598252535661356e-05, + "loss": 2.023, + "step": 17557500 + }, + { + "epoch": 50.82, + "learning_rate": 2.4597528888014078e-05, + "loss": 1.9853, + "step": 17558000 + }, + { + "epoch": 50.82, + "learning_rate": 2.4596805240366803e-05, + "loss": 2.0241, + "step": 17558500 + }, + { + "epoch": 50.83, + "learning_rate": 2.459608159271953e-05, + "loss": 1.983, + "step": 17559000 + }, + { + "epoch": 50.83, + "learning_rate": 2.459535794507225e-05, + "loss": 2.0119, + "step": 17559500 + }, + { + "epoch": 50.83, + "learning_rate": 2.4594634297424974e-05, + "loss": 1.994, + "step": 17560000 + }, + { + "epoch": 50.83, + "learning_rate": 2.4593910649777696e-05, + "loss": 1.9932, + "step": 17560500 + }, + { + "epoch": 50.83, + "learning_rate": 2.4593187002130418e-05, + "loss": 1.9749, + "step": 17561000 + }, + { + "epoch": 50.83, + "learning_rate": 2.4592464801778437e-05, + "loss": 1.9964, + "step": 17561500 + }, + { + "epoch": 50.83, + "learning_rate": 2.4591741154131163e-05, + "loss": 1.9923, + "step": 17562000 + }, + { + "epoch": 50.84, + "learning_rate": 2.4591017506483885e-05, + "loss": 1.9847, + "step": 17562500 + }, + { + "epoch": 50.84, + "learning_rate": 2.4590293858836607e-05, + "loss": 1.9737, + "step": 17563000 + }, + { + "epoch": 50.84, + "learning_rate": 2.458957021118933e-05, + "loss": 1.9936, + "step": 17563500 + }, + { + "epoch": 50.84, + "learning_rate": 2.4588846563542055e-05, + "loss": 2.0146, + "step": 17564000 + }, + { + "epoch": 50.84, + "learning_rate": 2.4588122915894777e-05, + "loss": 1.9861, + "step": 17564500 + }, + { + "epoch": 50.84, + "learning_rate": 2.4587400715542793e-05, + "loss": 2.015, + "step": 17565000 + }, + { + "epoch": 50.84, + "learning_rate": 2.4586679962486105e-05, + "loss": 2.0106, + "step": 17565500 + }, + { + "epoch": 50.85, + "learning_rate": 2.4585957762134124e-05, + "loss": 1.9886, + "step": 17566000 + }, + { + "epoch": 50.85, + "learning_rate": 2.458523411448685e-05, + "loss": 2.0031, + "step": 17566500 + }, + { + "epoch": 50.85, + "learning_rate": 2.4584510466839572e-05, + "loss": 2.0058, + "step": 17567000 + }, + { + "epoch": 50.85, + "learning_rate": 2.4583786819192294e-05, + "loss": 1.9887, + "step": 17567500 + }, + { + "epoch": 50.85, + "learning_rate": 2.4583063171545016e-05, + "loss": 2.0092, + "step": 17568000 + }, + { + "epoch": 50.85, + "learning_rate": 2.4582339523897742e-05, + "loss": 1.9855, + "step": 17568500 + }, + { + "epoch": 50.86, + "learning_rate": 2.4581615876250464e-05, + "loss": 1.9857, + "step": 17569000 + }, + { + "epoch": 50.86, + "learning_rate": 2.4580892228603186e-05, + "loss": 1.995, + "step": 17569500 + }, + { + "epoch": 50.86, + "learning_rate": 2.4580168580955912e-05, + "loss": 2.0041, + "step": 17570000 + }, + { + "epoch": 50.86, + "learning_rate": 2.4579444933308634e-05, + "loss": 2.0195, + "step": 17570500 + }, + { + "epoch": 50.86, + "learning_rate": 2.4578721285661356e-05, + "loss": 1.9652, + "step": 17571000 + }, + { + "epoch": 50.86, + "learning_rate": 2.457799763801408e-05, + "loss": 1.9895, + "step": 17571500 + }, + { + "epoch": 50.86, + "learning_rate": 2.4577273990366804e-05, + "loss": 1.9836, + "step": 17572000 + }, + { + "epoch": 50.87, + "learning_rate": 2.4576550342719526e-05, + "loss": 2.0032, + "step": 17572500 + }, + { + "epoch": 50.87, + "learning_rate": 2.457582669507225e-05, + "loss": 1.9937, + "step": 17573000 + }, + { + "epoch": 50.87, + "learning_rate": 2.4575104494720268e-05, + "loss": 2.0251, + "step": 17573500 + }, + { + "epoch": 50.87, + "learning_rate": 2.4574382294368287e-05, + "loss": 2.0081, + "step": 17574000 + }, + { + "epoch": 50.87, + "learning_rate": 2.4573660094016306e-05, + "loss": 2.0161, + "step": 17574500 + }, + { + "epoch": 50.87, + "learning_rate": 2.4572936446369028e-05, + "loss": 1.9937, + "step": 17575000 + }, + { + "epoch": 50.87, + "learning_rate": 2.457221279872175e-05, + "loss": 2.0087, + "step": 17575500 + }, + { + "epoch": 50.88, + "learning_rate": 2.4571489151074472e-05, + "loss": 2.0195, + "step": 17576000 + }, + { + "epoch": 50.88, + "learning_rate": 2.457076695072249e-05, + "loss": 2.0174, + "step": 17576500 + }, + { + "epoch": 50.88, + "learning_rate": 2.4570043303075213e-05, + "loss": 2.0127, + "step": 17577000 + }, + { + "epoch": 50.88, + "learning_rate": 2.456931965542794e-05, + "loss": 1.9987, + "step": 17577500 + }, + { + "epoch": 50.88, + "learning_rate": 2.456859600778066e-05, + "loss": 2.0063, + "step": 17578000 + }, + { + "epoch": 50.88, + "learning_rate": 2.4567872360133384e-05, + "loss": 2.0057, + "step": 17578500 + }, + { + "epoch": 50.88, + "learning_rate": 2.4567148712486106e-05, + "loss": 1.9862, + "step": 17579000 + }, + { + "epoch": 50.89, + "learning_rate": 2.456642506483883e-05, + "loss": 1.9999, + "step": 17579500 + }, + { + "epoch": 50.89, + "learning_rate": 2.4565701417191554e-05, + "loss": 2.0094, + "step": 17580000 + }, + { + "epoch": 50.89, + "learning_rate": 2.4564977769544276e-05, + "loss": 2.0148, + "step": 17580500 + }, + { + "epoch": 50.89, + "learning_rate": 2.4564254121897e-05, + "loss": 1.9963, + "step": 17581000 + }, + { + "epoch": 50.89, + "learning_rate": 2.456353192154502e-05, + "loss": 2.0104, + "step": 17581500 + }, + { + "epoch": 50.89, + "learning_rate": 2.4562808273897743e-05, + "loss": 2.0072, + "step": 17582000 + }, + { + "epoch": 50.89, + "learning_rate": 2.4562084626250465e-05, + "loss": 2.0132, + "step": 17582500 + }, + { + "epoch": 50.9, + "learning_rate": 2.4561360978603187e-05, + "loss": 2.0153, + "step": 17583000 + }, + { + "epoch": 50.9, + "learning_rate": 2.456063733095591e-05, + "loss": 2.0013, + "step": 17583500 + }, + { + "epoch": 50.9, + "learning_rate": 2.455991368330863e-05, + "loss": 1.9907, + "step": 17584000 + }, + { + "epoch": 50.9, + "learning_rate": 2.4559190035661357e-05, + "loss": 2.0022, + "step": 17584500 + }, + { + "epoch": 50.9, + "learning_rate": 2.4558466388014083e-05, + "loss": 1.9937, + "step": 17585000 + }, + { + "epoch": 50.9, + "learning_rate": 2.4557742740366805e-05, + "loss": 2.0182, + "step": 17585500 + }, + { + "epoch": 50.9, + "learning_rate": 2.4557019092719527e-05, + "loss": 2.0162, + "step": 17586000 + }, + { + "epoch": 50.91, + "learning_rate": 2.455629544507225e-05, + "loss": 1.9928, + "step": 17586500 + }, + { + "epoch": 50.91, + "learning_rate": 2.4555571797424972e-05, + "loss": 2.0108, + "step": 17587000 + }, + { + "epoch": 50.91, + "learning_rate": 2.455484959707299e-05, + "loss": 2.0005, + "step": 17587500 + }, + { + "epoch": 50.91, + "learning_rate": 2.4554127396721006e-05, + "loss": 1.99, + "step": 17588000 + }, + { + "epoch": 50.91, + "learning_rate": 2.4553403749073732e-05, + "loss": 2.0026, + "step": 17588500 + }, + { + "epoch": 50.91, + "learning_rate": 2.4552680101426457e-05, + "loss": 2.0008, + "step": 17589000 + }, + { + "epoch": 50.91, + "learning_rate": 2.455195645377918e-05, + "loss": 1.9975, + "step": 17589500 + }, + { + "epoch": 50.92, + "learning_rate": 2.4551232806131902e-05, + "loss": 2.0039, + "step": 17590000 + }, + { + "epoch": 50.92, + "learning_rate": 2.455051060577992e-05, + "loss": 2.0302, + "step": 17590500 + }, + { + "epoch": 50.92, + "learning_rate": 2.4549788405427936e-05, + "loss": 2.0174, + "step": 17591000 + }, + { + "epoch": 50.92, + "learning_rate": 2.454906475778066e-05, + "loss": 2.0144, + "step": 17591500 + }, + { + "epoch": 50.92, + "learning_rate": 2.4548341110133384e-05, + "loss": 1.9996, + "step": 17592000 + }, + { + "epoch": 50.92, + "learning_rate": 2.454761746248611e-05, + "loss": 2.0079, + "step": 17592500 + }, + { + "epoch": 50.92, + "learning_rate": 2.4546893814838832e-05, + "loss": 1.9956, + "step": 17593000 + }, + { + "epoch": 50.93, + "learning_rate": 2.4546170167191554e-05, + "loss": 2.0057, + "step": 17593500 + }, + { + "epoch": 50.93, + "learning_rate": 2.4545446519544277e-05, + "loss": 2.0034, + "step": 17594000 + }, + { + "epoch": 50.93, + "learning_rate": 2.4544722871897e-05, + "loss": 2.0071, + "step": 17594500 + }, + { + "epoch": 50.93, + "learning_rate": 2.454399922424972e-05, + "loss": 2.007, + "step": 17595000 + }, + { + "epoch": 50.93, + "learning_rate": 2.4543275576602447e-05, + "loss": 1.9867, + "step": 17595500 + }, + { + "epoch": 50.93, + "learning_rate": 2.4542551928955172e-05, + "loss": 2.0045, + "step": 17596000 + }, + { + "epoch": 50.93, + "learning_rate": 2.4541829728603188e-05, + "loss": 2.0055, + "step": 17596500 + }, + { + "epoch": 50.94, + "learning_rate": 2.454110608095591e-05, + "loss": 1.9932, + "step": 17597000 + }, + { + "epoch": 50.94, + "learning_rate": 2.4540382433308636e-05, + "loss": 1.9901, + "step": 17597500 + }, + { + "epoch": 50.94, + "learning_rate": 2.4539658785661358e-05, + "loss": 2.0109, + "step": 17598000 + }, + { + "epoch": 50.94, + "learning_rate": 2.4538936585309374e-05, + "loss": 1.9969, + "step": 17598500 + }, + { + "epoch": 50.94, + "learning_rate": 2.4538214384957392e-05, + "loss": 2.0186, + "step": 17599000 + }, + { + "epoch": 50.94, + "learning_rate": 2.4537490737310115e-05, + "loss": 2.0254, + "step": 17599500 + }, + { + "epoch": 50.94, + "learning_rate": 2.453676708966284e-05, + "loss": 2.0075, + "step": 17600000 + }, + { + "epoch": 50.95, + "learning_rate": 2.4536043442015563e-05, + "loss": 1.981, + "step": 17600500 + }, + { + "epoch": 50.95, + "learning_rate": 2.4535319794368285e-05, + "loss": 1.997, + "step": 17601000 + }, + { + "epoch": 50.95, + "learning_rate": 2.453459614672101e-05, + "loss": 1.9839, + "step": 17601500 + }, + { + "epoch": 50.95, + "learning_rate": 2.4533872499073733e-05, + "loss": 1.9963, + "step": 17602000 + }, + { + "epoch": 50.95, + "learning_rate": 2.4533148851426455e-05, + "loss": 1.987, + "step": 17602500 + }, + { + "epoch": 50.95, + "learning_rate": 2.4532425203779177e-05, + "loss": 1.9847, + "step": 17603000 + }, + { + "epoch": 50.95, + "learning_rate": 2.4531701556131903e-05, + "loss": 2.0164, + "step": 17603500 + }, + { + "epoch": 50.96, + "learning_rate": 2.4530980803075215e-05, + "loss": 1.9937, + "step": 17604000 + }, + { + "epoch": 50.96, + "learning_rate": 2.4530257155427937e-05, + "loss": 2.0042, + "step": 17604500 + }, + { + "epoch": 50.96, + "learning_rate": 2.452953350778066e-05, + "loss": 1.9596, + "step": 17605000 + }, + { + "epoch": 50.96, + "learning_rate": 2.4528809860133385e-05, + "loss": 2.0091, + "step": 17605500 + }, + { + "epoch": 50.96, + "learning_rate": 2.4528086212486107e-05, + "loss": 2.0024, + "step": 17606000 + }, + { + "epoch": 50.96, + "learning_rate": 2.452736256483883e-05, + "loss": 2.0047, + "step": 17606500 + }, + { + "epoch": 50.97, + "learning_rate": 2.4526638917191552e-05, + "loss": 2.0183, + "step": 17607000 + }, + { + "epoch": 50.97, + "learning_rate": 2.4525915269544277e-05, + "loss": 2.0066, + "step": 17607500 + }, + { + "epoch": 50.97, + "learning_rate": 2.4525193069192296e-05, + "loss": 2.0064, + "step": 17608000 + }, + { + "epoch": 50.97, + "learning_rate": 2.452446942154502e-05, + "loss": 2.0116, + "step": 17608500 + }, + { + "epoch": 50.97, + "learning_rate": 2.452374577389774e-05, + "loss": 1.9679, + "step": 17609000 + }, + { + "epoch": 50.97, + "learning_rate": 2.4523022126250463e-05, + "loss": 1.9919, + "step": 17609500 + }, + { + "epoch": 50.97, + "learning_rate": 2.4522298478603185e-05, + "loss": 2.0007, + "step": 17610000 + }, + { + "epoch": 50.98, + "learning_rate": 2.452157483095591e-05, + "loss": 1.9881, + "step": 17610500 + }, + { + "epoch": 50.98, + "learning_rate": 2.4520851183308637e-05, + "loss": 1.9894, + "step": 17611000 + }, + { + "epoch": 50.98, + "learning_rate": 2.452012753566136e-05, + "loss": 1.997, + "step": 17611500 + }, + { + "epoch": 50.98, + "learning_rate": 2.451940388801408e-05, + "loss": 1.9965, + "step": 17612000 + }, + { + "epoch": 50.98, + "learning_rate": 2.4518680240366803e-05, + "loss": 1.9939, + "step": 17612500 + }, + { + "epoch": 50.98, + "learning_rate": 2.4517956592719525e-05, + "loss": 1.9864, + "step": 17613000 + }, + { + "epoch": 50.98, + "learning_rate": 2.4517232945072248e-05, + "loss": 1.9933, + "step": 17613500 + }, + { + "epoch": 50.99, + "learning_rate": 2.4516509297424973e-05, + "loss": 2.0052, + "step": 17614000 + }, + { + "epoch": 50.99, + "learning_rate": 2.45157856497777e-05, + "loss": 2.0182, + "step": 17614500 + }, + { + "epoch": 50.99, + "learning_rate": 2.451506200213042e-05, + "loss": 2.0023, + "step": 17615000 + }, + { + "epoch": 50.99, + "learning_rate": 2.4514339801778437e-05, + "loss": 1.99, + "step": 17615500 + }, + { + "epoch": 50.99, + "learning_rate": 2.4513616154131162e-05, + "loss": 1.9817, + "step": 17616000 + }, + { + "epoch": 50.99, + "learning_rate": 2.4512893953779178e-05, + "loss": 2.0061, + "step": 17616500 + }, + { + "epoch": 50.99, + "learning_rate": 2.45121703061319e-05, + "loss": 1.9763, + "step": 17617000 + }, + { + "epoch": 51.0, + "learning_rate": 2.4511446658484626e-05, + "loss": 2.0107, + "step": 17617500 + }, + { + "epoch": 51.0, + "learning_rate": 2.4510723010837348e-05, + "loss": 1.9947, + "step": 17618000 + }, + { + "epoch": 51.0, + "learning_rate": 2.4509999363190074e-05, + "loss": 1.9891, + "step": 17618500 + }, + { + "epoch": 51.0, + "learning_rate": 2.4509275715542796e-05, + "loss": 1.9701, + "step": 17619000 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.6732738776787731, + "eval_accuracy_mlm": 0.6393918868388159, + "eval_accuracy_nsp": 0.8549586833674774, + "eval_loss": 2.174137830734253, + "eval_runtime": 331.935, + "eval_samples_per_second": 1314.673, + "eval_steps_per_second": 54.779, + "step": 17619072 + }, + { + "epoch": 51.0, + "learning_rate": 2.4508552067895518e-05, + "loss": 1.9949, + "step": 17619500 + }, + { + "epoch": 51.0, + "learning_rate": 2.450782842024824e-05, + "loss": 1.9747, + "step": 17620000 + }, + { + "epoch": 51.0, + "learning_rate": 2.4507104772600963e-05, + "loss": 1.9979, + "step": 17620500 + }, + { + "epoch": 51.01, + "learning_rate": 2.4506381124953688e-05, + "loss": 1.9735, + "step": 17621000 + }, + { + "epoch": 51.01, + "learning_rate": 2.4505658924601704e-05, + "loss": 1.9927, + "step": 17621500 + }, + { + "epoch": 51.01, + "learning_rate": 2.450493527695443e-05, + "loss": 1.9978, + "step": 17622000 + }, + { + "epoch": 51.01, + "learning_rate": 2.450421162930715e-05, + "loss": 1.9624, + "step": 17622500 + }, + { + "epoch": 51.01, + "learning_rate": 2.4503487981659877e-05, + "loss": 2.0011, + "step": 17623000 + }, + { + "epoch": 51.01, + "learning_rate": 2.45027643340126e-05, + "loss": 1.9851, + "step": 17623500 + }, + { + "epoch": 51.01, + "learning_rate": 2.4502040686365322e-05, + "loss": 1.9784, + "step": 17624000 + }, + { + "epoch": 51.02, + "learning_rate": 2.4501317038718044e-05, + "loss": 1.9831, + "step": 17624500 + }, + { + "epoch": 51.02, + "learning_rate": 2.4500596285661356e-05, + "loss": 1.9701, + "step": 17625000 + }, + { + "epoch": 51.02, + "learning_rate": 2.449987263801408e-05, + "loss": 1.9709, + "step": 17625500 + }, + { + "epoch": 51.02, + "learning_rate": 2.4499148990366804e-05, + "loss": 1.9816, + "step": 17626000 + }, + { + "epoch": 51.02, + "learning_rate": 2.4498425342719526e-05, + "loss": 1.9544, + "step": 17626500 + }, + { + "epoch": 51.02, + "learning_rate": 2.4497701695072252e-05, + "loss": 1.9795, + "step": 17627000 + }, + { + "epoch": 51.02, + "learning_rate": 2.4496978047424974e-05, + "loss": 1.967, + "step": 17627500 + }, + { + "epoch": 51.03, + "learning_rate": 2.4496254399777696e-05, + "loss": 1.992, + "step": 17628000 + }, + { + "epoch": 51.03, + "learning_rate": 2.449553075213042e-05, + "loss": 1.9772, + "step": 17628500 + }, + { + "epoch": 51.03, + "learning_rate": 2.4494808551778438e-05, + "loss": 1.9825, + "step": 17629000 + }, + { + "epoch": 51.03, + "learning_rate": 2.4494084904131163e-05, + "loss": 1.9858, + "step": 17629500 + }, + { + "epoch": 51.03, + "learning_rate": 2.4493361256483885e-05, + "loss": 1.9852, + "step": 17630000 + }, + { + "epoch": 51.03, + "learning_rate": 2.4492637608836608e-05, + "loss": 1.9825, + "step": 17630500 + }, + { + "epoch": 51.03, + "learning_rate": 2.449191396118933e-05, + "loss": 1.9753, + "step": 17631000 + }, + { + "epoch": 51.04, + "learning_rate": 2.4491190313542052e-05, + "loss": 2.0039, + "step": 17631500 + }, + { + "epoch": 51.04, + "learning_rate": 2.4490466665894778e-05, + "loss": 1.9847, + "step": 17632000 + }, + { + "epoch": 51.04, + "learning_rate": 2.44897430182475e-05, + "loss": 1.9872, + "step": 17632500 + }, + { + "epoch": 51.04, + "learning_rate": 2.4489019370600226e-05, + "loss": 1.9923, + "step": 17633000 + }, + { + "epoch": 51.04, + "learning_rate": 2.4488295722952948e-05, + "loss": 1.9524, + "step": 17633500 + }, + { + "epoch": 51.04, + "learning_rate": 2.4487573522600963e-05, + "loss": 1.9673, + "step": 17634000 + }, + { + "epoch": 51.04, + "learning_rate": 2.448684987495369e-05, + "loss": 1.9886, + "step": 17634500 + }, + { + "epoch": 51.05, + "learning_rate": 2.448612622730641e-05, + "loss": 1.9651, + "step": 17635000 + }, + { + "epoch": 51.05, + "learning_rate": 2.4485402579659133e-05, + "loss": 1.9806, + "step": 17635500 + }, + { + "epoch": 51.05, + "learning_rate": 2.4484678932011856e-05, + "loss": 1.9819, + "step": 17636000 + }, + { + "epoch": 51.05, + "learning_rate": 2.448395528436458e-05, + "loss": 2.0005, + "step": 17636500 + }, + { + "epoch": 51.05, + "learning_rate": 2.4483231636717304e-05, + "loss": 1.9665, + "step": 17637000 + }, + { + "epoch": 51.05, + "learning_rate": 2.448250798907003e-05, + "loss": 1.9687, + "step": 17637500 + }, + { + "epoch": 51.05, + "learning_rate": 2.448178434142275e-05, + "loss": 1.9729, + "step": 17638000 + }, + { + "epoch": 51.06, + "learning_rate": 2.4481062141070767e-05, + "loss": 1.9739, + "step": 17638500 + }, + { + "epoch": 51.06, + "learning_rate": 2.448033849342349e-05, + "loss": 2.0044, + "step": 17639000 + }, + { + "epoch": 51.06, + "learning_rate": 2.4479614845776215e-05, + "loss": 1.975, + "step": 17639500 + }, + { + "epoch": 51.06, + "learning_rate": 2.4478891198128937e-05, + "loss": 1.984, + "step": 17640000 + }, + { + "epoch": 51.06, + "learning_rate": 2.4478167550481663e-05, + "loss": 1.9874, + "step": 17640500 + }, + { + "epoch": 51.06, + "learning_rate": 2.4477443902834385e-05, + "loss": 2.0056, + "step": 17641000 + }, + { + "epoch": 51.06, + "learning_rate": 2.4476720255187107e-05, + "loss": 1.984, + "step": 17641500 + }, + { + "epoch": 51.07, + "learning_rate": 2.447599660753983e-05, + "loss": 1.9732, + "step": 17642000 + }, + { + "epoch": 51.07, + "learning_rate": 2.4475272959892555e-05, + "loss": 1.9892, + "step": 17642500 + }, + { + "epoch": 51.07, + "learning_rate": 2.4474552206835867e-05, + "loss": 1.9649, + "step": 17643000 + }, + { + "epoch": 51.07, + "learning_rate": 2.447382855918859e-05, + "loss": 1.9791, + "step": 17643500 + }, + { + "epoch": 51.07, + "learning_rate": 2.4473104911541315e-05, + "loss": 1.9715, + "step": 17644000 + }, + { + "epoch": 51.07, + "learning_rate": 2.4472381263894037e-05, + "loss": 1.9926, + "step": 17644500 + }, + { + "epoch": 51.08, + "learning_rate": 2.447165761624676e-05, + "loss": 2.0049, + "step": 17645000 + }, + { + "epoch": 51.08, + "learning_rate": 2.4470933968599482e-05, + "loss": 2.0015, + "step": 17645500 + }, + { + "epoch": 51.08, + "learning_rate": 2.4470210320952204e-05, + "loss": 1.9711, + "step": 17646000 + }, + { + "epoch": 51.08, + "learning_rate": 2.446948667330493e-05, + "loss": 1.9765, + "step": 17646500 + }, + { + "epoch": 51.08, + "learning_rate": 2.4468763025657652e-05, + "loss": 1.9781, + "step": 17647000 + }, + { + "epoch": 51.08, + "learning_rate": 2.4468039378010378e-05, + "loss": 1.981, + "step": 17647500 + }, + { + "epoch": 51.08, + "learning_rate": 2.44673157303631e-05, + "loss": 1.9917, + "step": 17648000 + }, + { + "epoch": 51.09, + "learning_rate": 2.446659353001112e-05, + "loss": 1.9621, + "step": 17648500 + }, + { + "epoch": 51.09, + "learning_rate": 2.446586988236384e-05, + "loss": 1.9682, + "step": 17649000 + }, + { + "epoch": 51.09, + "learning_rate": 2.4465147682011856e-05, + "loss": 1.9912, + "step": 17649500 + }, + { + "epoch": 51.09, + "learning_rate": 2.446442403436458e-05, + "loss": 1.9995, + "step": 17650000 + }, + { + "epoch": 51.09, + "learning_rate": 2.4463700386717304e-05, + "loss": 1.9759, + "step": 17650500 + }, + { + "epoch": 51.09, + "learning_rate": 2.4462976739070027e-05, + "loss": 1.9904, + "step": 17651000 + }, + { + "epoch": 51.09, + "learning_rate": 2.4462253091422752e-05, + "loss": 1.9909, + "step": 17651500 + }, + { + "epoch": 51.1, + "learning_rate": 2.4461530891070768e-05, + "loss": 1.9844, + "step": 17652000 + }, + { + "epoch": 51.1, + "learning_rate": 2.4460807243423493e-05, + "loss": 1.9696, + "step": 17652500 + }, + { + "epoch": 51.1, + "learning_rate": 2.4460083595776216e-05, + "loss": 1.9827, + "step": 17653000 + }, + { + "epoch": 51.1, + "learning_rate": 2.445936139542423e-05, + "loss": 1.995, + "step": 17653500 + }, + { + "epoch": 51.1, + "learning_rate": 2.4458637747776957e-05, + "loss": 1.9909, + "step": 17654000 + }, + { + "epoch": 51.1, + "learning_rate": 2.445791410012968e-05, + "loss": 1.9711, + "step": 17654500 + }, + { + "epoch": 51.1, + "learning_rate": 2.44571904524824e-05, + "loss": 1.9805, + "step": 17655000 + }, + { + "epoch": 51.11, + "learning_rate": 2.445646825213042e-05, + "loss": 1.994, + "step": 17655500 + }, + { + "epoch": 51.11, + "learning_rate": 2.4455744604483142e-05, + "loss": 1.9783, + "step": 17656000 + }, + { + "epoch": 51.11, + "learning_rate": 2.445502240413116e-05, + "loss": 1.984, + "step": 17656500 + }, + { + "epoch": 51.11, + "learning_rate": 2.4454298756483884e-05, + "loss": 1.9605, + "step": 17657000 + }, + { + "epoch": 51.11, + "learning_rate": 2.4453575108836606e-05, + "loss": 1.9961, + "step": 17657500 + }, + { + "epoch": 51.11, + "learning_rate": 2.445285146118933e-05, + "loss": 1.9765, + "step": 17658000 + }, + { + "epoch": 51.11, + "learning_rate": 2.4452127813542054e-05, + "loss": 1.9907, + "step": 17658500 + }, + { + "epoch": 51.12, + "learning_rate": 2.4451404165894776e-05, + "loss": 2.0014, + "step": 17659000 + }, + { + "epoch": 51.12, + "learning_rate": 2.44506805182475e-05, + "loss": 1.9877, + "step": 17659500 + }, + { + "epoch": 51.12, + "learning_rate": 2.4449956870600224e-05, + "loss": 1.9856, + "step": 17660000 + }, + { + "epoch": 51.12, + "learning_rate": 2.4449233222952946e-05, + "loss": 1.9659, + "step": 17660500 + }, + { + "epoch": 51.12, + "learning_rate": 2.4448509575305668e-05, + "loss": 1.9546, + "step": 17661000 + }, + { + "epoch": 51.12, + "learning_rate": 2.4447785927658394e-05, + "loss": 2.0012, + "step": 17661500 + }, + { + "epoch": 51.12, + "learning_rate": 2.4447062280011116e-05, + "loss": 1.9893, + "step": 17662000 + }, + { + "epoch": 51.13, + "learning_rate": 2.444633863236384e-05, + "loss": 1.9668, + "step": 17662500 + }, + { + "epoch": 51.13, + "learning_rate": 2.4445614984716564e-05, + "loss": 1.9966, + "step": 17663000 + }, + { + "epoch": 51.13, + "learning_rate": 2.4444891337069286e-05, + "loss": 1.987, + "step": 17663500 + }, + { + "epoch": 51.13, + "learning_rate": 2.444416768942201e-05, + "loss": 1.9552, + "step": 17664000 + }, + { + "epoch": 51.13, + "learning_rate": 2.444344404177473e-05, + "loss": 1.964, + "step": 17664500 + }, + { + "epoch": 51.13, + "learning_rate": 2.4442723288718043e-05, + "loss": 1.9699, + "step": 17665000 + }, + { + "epoch": 51.13, + "learning_rate": 2.4442001088366062e-05, + "loss": 2.0116, + "step": 17665500 + }, + { + "epoch": 51.14, + "learning_rate": 2.4441277440718784e-05, + "loss": 1.987, + "step": 17666000 + }, + { + "epoch": 51.14, + "learning_rate": 2.4440553793071506e-05, + "loss": 1.9843, + "step": 17666500 + }, + { + "epoch": 51.14, + "learning_rate": 2.4439830145424232e-05, + "loss": 1.9891, + "step": 17667000 + }, + { + "epoch": 51.14, + "learning_rate": 2.4439106497776958e-05, + "loss": 2.0022, + "step": 17667500 + }, + { + "epoch": 51.14, + "learning_rate": 2.443838285012968e-05, + "loss": 1.9888, + "step": 17668000 + }, + { + "epoch": 51.14, + "learning_rate": 2.4437659202482402e-05, + "loss": 2.0083, + "step": 17668500 + }, + { + "epoch": 51.14, + "learning_rate": 2.443693700213042e-05, + "loss": 1.9891, + "step": 17669000 + }, + { + "epoch": 51.15, + "learning_rate": 2.4436213354483143e-05, + "loss": 1.9905, + "step": 17669500 + }, + { + "epoch": 51.15, + "learning_rate": 2.4435489706835865e-05, + "loss": 1.9767, + "step": 17670000 + }, + { + "epoch": 51.15, + "learning_rate": 2.443476605918859e-05, + "loss": 2.0048, + "step": 17670500 + }, + { + "epoch": 51.15, + "learning_rate": 2.4434042411541313e-05, + "loss": 1.9986, + "step": 17671000 + }, + { + "epoch": 51.15, + "learning_rate": 2.4433318763894036e-05, + "loss": 1.9624, + "step": 17671500 + }, + { + "epoch": 51.15, + "learning_rate": 2.4432596563542054e-05, + "loss": 1.9885, + "step": 17672000 + }, + { + "epoch": 51.15, + "learning_rate": 2.4431872915894777e-05, + "loss": 1.9892, + "step": 17672500 + }, + { + "epoch": 51.16, + "learning_rate": 2.44311492682475e-05, + "loss": 1.9868, + "step": 17673000 + }, + { + "epoch": 51.16, + "learning_rate": 2.443042562060022e-05, + "loss": 1.9986, + "step": 17673500 + }, + { + "epoch": 51.16, + "learning_rate": 2.4429701972952947e-05, + "loss": 1.9867, + "step": 17674000 + }, + { + "epoch": 51.16, + "learning_rate": 2.4428978325305672e-05, + "loss": 1.988, + "step": 17674500 + }, + { + "epoch": 51.16, + "learning_rate": 2.4428256124953688e-05, + "loss": 1.9801, + "step": 17675000 + }, + { + "epoch": 51.16, + "learning_rate": 2.442753247730641e-05, + "loss": 1.951, + "step": 17675500 + }, + { + "epoch": 51.16, + "learning_rate": 2.4426808829659132e-05, + "loss": 1.977, + "step": 17676000 + }, + { + "epoch": 51.17, + "learning_rate": 2.4426085182011858e-05, + "loss": 2.0077, + "step": 17676500 + }, + { + "epoch": 51.17, + "learning_rate": 2.442536153436458e-05, + "loss": 1.9678, + "step": 17677000 + }, + { + "epoch": 51.17, + "learning_rate": 2.4424637886717303e-05, + "loss": 1.9969, + "step": 17677500 + }, + { + "epoch": 51.17, + "learning_rate": 2.4423914239070028e-05, + "loss": 2.0112, + "step": 17678000 + }, + { + "epoch": 51.17, + "learning_rate": 2.442319059142275e-05, + "loss": 1.9616, + "step": 17678500 + }, + { + "epoch": 51.17, + "learning_rate": 2.442246839107077e-05, + "loss": 2.0111, + "step": 17679000 + }, + { + "epoch": 51.17, + "learning_rate": 2.442174474342349e-05, + "loss": 2.0043, + "step": 17679500 + }, + { + "epoch": 51.18, + "learning_rate": 2.4421021095776214e-05, + "loss": 1.9938, + "step": 17680000 + }, + { + "epoch": 51.18, + "learning_rate": 2.4420298895424233e-05, + "loss": 1.9844, + "step": 17680500 + }, + { + "epoch": 51.18, + "learning_rate": 2.4419575247776955e-05, + "loss": 2.0036, + "step": 17681000 + }, + { + "epoch": 51.18, + "learning_rate": 2.441885304742497e-05, + "loss": 2.0096, + "step": 17681500 + }, + { + "epoch": 51.18, + "learning_rate": 2.4418129399777696e-05, + "loss": 1.9985, + "step": 17682000 + }, + { + "epoch": 51.18, + "learning_rate": 2.4417405752130422e-05, + "loss": 1.9815, + "step": 17682500 + }, + { + "epoch": 51.19, + "learning_rate": 2.4416682104483144e-05, + "loss": 1.9834, + "step": 17683000 + }, + { + "epoch": 51.19, + "learning_rate": 2.4415958456835866e-05, + "loss": 1.9797, + "step": 17683500 + }, + { + "epoch": 51.19, + "learning_rate": 2.441523480918859e-05, + "loss": 1.9725, + "step": 17684000 + }, + { + "epoch": 51.19, + "learning_rate": 2.441451116154131e-05, + "loss": 1.9793, + "step": 17684500 + }, + { + "epoch": 51.19, + "learning_rate": 2.4413787513894036e-05, + "loss": 1.9821, + "step": 17685000 + }, + { + "epoch": 51.19, + "learning_rate": 2.4413063866246762e-05, + "loss": 1.9998, + "step": 17685500 + }, + { + "epoch": 51.19, + "learning_rate": 2.4412340218599484e-05, + "loss": 1.9707, + "step": 17686000 + }, + { + "epoch": 51.2, + "learning_rate": 2.4411616570952206e-05, + "loss": 1.9765, + "step": 17686500 + }, + { + "epoch": 51.2, + "learning_rate": 2.441089292330493e-05, + "loss": 1.9816, + "step": 17687000 + }, + { + "epoch": 51.2, + "learning_rate": 2.441016927565765e-05, + "loss": 1.98, + "step": 17687500 + }, + { + "epoch": 51.2, + "learning_rate": 2.4409445628010373e-05, + "loss": 1.9581, + "step": 17688000 + }, + { + "epoch": 51.2, + "learning_rate": 2.44087219803631e-05, + "loss": 1.9771, + "step": 17688500 + }, + { + "epoch": 51.2, + "learning_rate": 2.4407998332715824e-05, + "loss": 2.0041, + "step": 17689000 + }, + { + "epoch": 51.2, + "learning_rate": 2.4407274685068547e-05, + "loss": 1.9989, + "step": 17689500 + }, + { + "epoch": 51.21, + "learning_rate": 2.4406552484716562e-05, + "loss": 1.994, + "step": 17690000 + }, + { + "epoch": 51.21, + "learning_rate": 2.440583028436458e-05, + "loss": 1.9754, + "step": 17690500 + }, + { + "epoch": 51.21, + "learning_rate": 2.4405106636717303e-05, + "loss": 1.9891, + "step": 17691000 + }, + { + "epoch": 51.21, + "learning_rate": 2.4404382989070026e-05, + "loss": 2.0084, + "step": 17691500 + }, + { + "epoch": 51.21, + "learning_rate": 2.4403659341422748e-05, + "loss": 1.9854, + "step": 17692000 + }, + { + "epoch": 51.21, + "learning_rate": 2.4402935693775473e-05, + "loss": 2.0098, + "step": 17692500 + }, + { + "epoch": 51.21, + "learning_rate": 2.44022120461282e-05, + "loss": 1.9977, + "step": 17693000 + }, + { + "epoch": 51.22, + "learning_rate": 2.440148839848092e-05, + "loss": 1.9687, + "step": 17693500 + }, + { + "epoch": 51.22, + "learning_rate": 2.4400764750833644e-05, + "loss": 1.9837, + "step": 17694000 + }, + { + "epoch": 51.22, + "learning_rate": 2.4400041103186366e-05, + "loss": 2.0025, + "step": 17694500 + }, + { + "epoch": 51.22, + "learning_rate": 2.4399317455539088e-05, + "loss": 1.986, + "step": 17695000 + }, + { + "epoch": 51.22, + "learning_rate": 2.4398593807891814e-05, + "loss": 1.9921, + "step": 17695500 + }, + { + "epoch": 51.22, + "learning_rate": 2.4397870160244536e-05, + "loss": 1.9585, + "step": 17696000 + }, + { + "epoch": 51.22, + "learning_rate": 2.4397147959892555e-05, + "loss": 1.9823, + "step": 17696500 + }, + { + "epoch": 51.23, + "learning_rate": 2.4396424312245277e-05, + "loss": 1.9956, + "step": 17697000 + }, + { + "epoch": 51.23, + "learning_rate": 2.4395700664598e-05, + "loss": 1.9764, + "step": 17697500 + }, + { + "epoch": 51.23, + "learning_rate": 2.4394978464246018e-05, + "loss": 2.0184, + "step": 17698000 + }, + { + "epoch": 51.23, + "learning_rate": 2.439425481659874e-05, + "loss": 1.9898, + "step": 17698500 + }, + { + "epoch": 51.23, + "learning_rate": 2.4393531168951463e-05, + "loss": 1.9678, + "step": 17699000 + }, + { + "epoch": 51.23, + "learning_rate": 2.4392807521304188e-05, + "loss": 1.9788, + "step": 17699500 + }, + { + "epoch": 51.23, + "learning_rate": 2.4392083873656914e-05, + "loss": 1.983, + "step": 17700000 + }, + { + "epoch": 51.24, + "learning_rate": 2.4391360226009636e-05, + "loss": 1.9922, + "step": 17700500 + }, + { + "epoch": 51.24, + "learning_rate": 2.439063657836236e-05, + "loss": 1.9816, + "step": 17701000 + }, + { + "epoch": 51.24, + "learning_rate": 2.438991293071508e-05, + "loss": 1.9732, + "step": 17701500 + }, + { + "epoch": 51.24, + "learning_rate": 2.4389189283067803e-05, + "loss": 1.9948, + "step": 17702000 + }, + { + "epoch": 51.24, + "learning_rate": 2.4388465635420525e-05, + "loss": 1.9982, + "step": 17702500 + }, + { + "epoch": 51.24, + "learning_rate": 2.438774198777325e-05, + "loss": 2.0041, + "step": 17703000 + }, + { + "epoch": 51.24, + "learning_rate": 2.4387018340125976e-05, + "loss": 1.9898, + "step": 17703500 + }, + { + "epoch": 51.25, + "learning_rate": 2.4386296139773992e-05, + "loss": 1.9815, + "step": 17704000 + }, + { + "epoch": 51.25, + "learning_rate": 2.4385572492126714e-05, + "loss": 1.98, + "step": 17704500 + }, + { + "epoch": 51.25, + "learning_rate": 2.438484884447944e-05, + "loss": 1.9783, + "step": 17705000 + }, + { + "epoch": 51.25, + "learning_rate": 2.4384126644127455e-05, + "loss": 1.9731, + "step": 17705500 + }, + { + "epoch": 51.25, + "learning_rate": 2.4383402996480178e-05, + "loss": 1.9765, + "step": 17706000 + }, + { + "epoch": 51.25, + "learning_rate": 2.43826793488329e-05, + "loss": 1.9773, + "step": 17706500 + }, + { + "epoch": 51.25, + "learning_rate": 2.4381955701185625e-05, + "loss": 1.9804, + "step": 17707000 + }, + { + "epoch": 51.26, + "learning_rate": 2.438123205353835e-05, + "loss": 1.9992, + "step": 17707500 + }, + { + "epoch": 51.26, + "learning_rate": 2.4380508405891073e-05, + "loss": 1.9816, + "step": 17708000 + }, + { + "epoch": 51.26, + "learning_rate": 2.437978620553909e-05, + "loss": 1.9938, + "step": 17708500 + }, + { + "epoch": 51.26, + "learning_rate": 2.4379062557891814e-05, + "loss": 1.9932, + "step": 17709000 + }, + { + "epoch": 51.26, + "learning_rate": 2.437834035753983e-05, + "loss": 2.0002, + "step": 17709500 + }, + { + "epoch": 51.26, + "learning_rate": 2.4377616709892552e-05, + "loss": 1.9774, + "step": 17710000 + }, + { + "epoch": 51.26, + "learning_rate": 2.4376893062245278e-05, + "loss": 2.0023, + "step": 17710500 + }, + { + "epoch": 51.27, + "learning_rate": 2.4376169414598e-05, + "loss": 1.9971, + "step": 17711000 + }, + { + "epoch": 51.27, + "learning_rate": 2.4375445766950726e-05, + "loss": 2.0062, + "step": 17711500 + }, + { + "epoch": 51.27, + "learning_rate": 2.4374722119303448e-05, + "loss": 1.9993, + "step": 17712000 + }, + { + "epoch": 51.27, + "learning_rate": 2.437399847165617e-05, + "loss": 1.993, + "step": 17712500 + }, + { + "epoch": 51.27, + "learning_rate": 2.4373274824008892e-05, + "loss": 1.9738, + "step": 17713000 + }, + { + "epoch": 51.27, + "learning_rate": 2.4372551176361615e-05, + "loss": 1.9924, + "step": 17713500 + }, + { + "epoch": 51.27, + "learning_rate": 2.437182752871434e-05, + "loss": 2.0012, + "step": 17714000 + }, + { + "epoch": 51.28, + "learning_rate": 2.4371103881067062e-05, + "loss": 1.9822, + "step": 17714500 + }, + { + "epoch": 51.28, + "learning_rate": 2.4370380233419788e-05, + "loss": 1.9882, + "step": 17715000 + }, + { + "epoch": 51.28, + "learning_rate": 2.436965658577251e-05, + "loss": 2.0018, + "step": 17715500 + }, + { + "epoch": 51.28, + "learning_rate": 2.436893438542053e-05, + "loss": 1.9936, + "step": 17716000 + }, + { + "epoch": 51.28, + "learning_rate": 2.4368212185068545e-05, + "loss": 1.9701, + "step": 17716500 + }, + { + "epoch": 51.28, + "learning_rate": 2.4367488537421267e-05, + "loss": 1.9812, + "step": 17717000 + }, + { + "epoch": 51.28, + "learning_rate": 2.436676488977399e-05, + "loss": 1.9801, + "step": 17717500 + }, + { + "epoch": 51.29, + "learning_rate": 2.4366041242126715e-05, + "loss": 1.9927, + "step": 17718000 + }, + { + "epoch": 51.29, + "learning_rate": 2.436531759447944e-05, + "loss": 2.0044, + "step": 17718500 + }, + { + "epoch": 51.29, + "learning_rate": 2.4364595394127456e-05, + "loss": 1.9808, + "step": 17719000 + }, + { + "epoch": 51.29, + "learning_rate": 2.436387174648018e-05, + "loss": 2.0154, + "step": 17719500 + }, + { + "epoch": 51.29, + "learning_rate": 2.4363149546128197e-05, + "loss": 1.9955, + "step": 17720000 + }, + { + "epoch": 51.29, + "learning_rate": 2.436242589848092e-05, + "loss": 1.9788, + "step": 17720500 + }, + { + "epoch": 51.3, + "learning_rate": 2.436170369812894e-05, + "loss": 1.9964, + "step": 17721000 + }, + { + "epoch": 51.3, + "learning_rate": 2.436098005048166e-05, + "loss": 1.9775, + "step": 17721500 + }, + { + "epoch": 51.3, + "learning_rate": 2.4360256402834383e-05, + "loss": 1.9969, + "step": 17722000 + }, + { + "epoch": 51.3, + "learning_rate": 2.435953275518711e-05, + "loss": 1.9881, + "step": 17722500 + }, + { + "epoch": 51.3, + "learning_rate": 2.435880910753983e-05, + "loss": 1.9744, + "step": 17723000 + }, + { + "epoch": 51.3, + "learning_rate": 2.4358085459892553e-05, + "loss": 1.9911, + "step": 17723500 + }, + { + "epoch": 51.3, + "learning_rate": 2.435736181224528e-05, + "loss": 1.9726, + "step": 17724000 + }, + { + "epoch": 51.31, + "learning_rate": 2.4356638164598e-05, + "loss": 1.9767, + "step": 17724500 + }, + { + "epoch": 51.31, + "learning_rate": 2.4355914516950723e-05, + "loss": 1.9795, + "step": 17725000 + }, + { + "epoch": 51.31, + "learning_rate": 2.4355192316598742e-05, + "loss": 1.9973, + "step": 17725500 + }, + { + "epoch": 51.31, + "learning_rate": 2.4354468668951464e-05, + "loss": 1.982, + "step": 17726000 + }, + { + "epoch": 51.31, + "learning_rate": 2.435374502130419e-05, + "loss": 1.9871, + "step": 17726500 + }, + { + "epoch": 51.31, + "learning_rate": 2.4353021373656912e-05, + "loss": 1.9876, + "step": 17727000 + }, + { + "epoch": 51.31, + "learning_rate": 2.4352297726009634e-05, + "loss": 2.001, + "step": 17727500 + }, + { + "epoch": 51.32, + "learning_rate": 2.4351574078362357e-05, + "loss": 2.0063, + "step": 17728000 + }, + { + "epoch": 51.32, + "learning_rate": 2.435085043071508e-05, + "loss": 1.9989, + "step": 17728500 + }, + { + "epoch": 51.32, + "learning_rate": 2.4350126783067804e-05, + "loss": 1.9569, + "step": 17729000 + }, + { + "epoch": 51.32, + "learning_rate": 2.434940458271582e-05, + "loss": 1.9762, + "step": 17729500 + }, + { + "epoch": 51.32, + "learning_rate": 2.4348680935068546e-05, + "loss": 2.0073, + "step": 17730000 + }, + { + "epoch": 51.32, + "learning_rate": 2.4347957287421268e-05, + "loss": 1.9871, + "step": 17730500 + }, + { + "epoch": 51.32, + "learning_rate": 2.4347233639773993e-05, + "loss": 1.9912, + "step": 17731000 + }, + { + "epoch": 51.33, + "learning_rate": 2.4346509992126716e-05, + "loss": 2.0098, + "step": 17731500 + }, + { + "epoch": 51.33, + "learning_rate": 2.4345786344479438e-05, + "loss": 1.9744, + "step": 17732000 + }, + { + "epoch": 51.33, + "learning_rate": 2.434506269683216e-05, + "loss": 1.9985, + "step": 17732500 + }, + { + "epoch": 51.33, + "learning_rate": 2.4344339049184882e-05, + "loss": 1.9773, + "step": 17733000 + }, + { + "epoch": 51.33, + "learning_rate": 2.4343616848832905e-05, + "loss": 1.9958, + "step": 17733500 + }, + { + "epoch": 51.33, + "learning_rate": 2.4342893201185627e-05, + "loss": 1.9777, + "step": 17734000 + }, + { + "epoch": 51.33, + "learning_rate": 2.434216955353835e-05, + "loss": 1.9991, + "step": 17734500 + }, + { + "epoch": 51.34, + "learning_rate": 2.4341447353186368e-05, + "loss": 2.0075, + "step": 17735000 + }, + { + "epoch": 51.34, + "learning_rate": 2.434072370553909e-05, + "loss": 1.9932, + "step": 17735500 + }, + { + "epoch": 51.34, + "learning_rate": 2.4340001505187106e-05, + "loss": 1.982, + "step": 17736000 + }, + { + "epoch": 51.34, + "learning_rate": 2.433927785753983e-05, + "loss": 1.9842, + "step": 17736500 + }, + { + "epoch": 51.34, + "learning_rate": 2.4338554209892554e-05, + "loss": 1.9956, + "step": 17737000 + }, + { + "epoch": 51.34, + "learning_rate": 2.433783056224528e-05, + "loss": 1.9695, + "step": 17737500 + }, + { + "epoch": 51.34, + "learning_rate": 2.4337106914598e-05, + "loss": 1.9812, + "step": 17738000 + }, + { + "epoch": 51.35, + "learning_rate": 2.4336383266950724e-05, + "loss": 2.002, + "step": 17738500 + }, + { + "epoch": 51.35, + "learning_rate": 2.4335659619303446e-05, + "loss": 1.9895, + "step": 17739000 + }, + { + "epoch": 51.35, + "learning_rate": 2.433493597165617e-05, + "loss": 1.9969, + "step": 17739500 + }, + { + "epoch": 51.35, + "learning_rate": 2.4334212324008894e-05, + "loss": 1.9887, + "step": 17740000 + }, + { + "epoch": 51.35, + "learning_rate": 2.4333488676361616e-05, + "loss": 1.9793, + "step": 17740500 + }, + { + "epoch": 51.35, + "learning_rate": 2.4332766476009632e-05, + "loss": 2.0083, + "step": 17741000 + }, + { + "epoch": 51.35, + "learning_rate": 2.4332042828362357e-05, + "loss": 2.0087, + "step": 17741500 + }, + { + "epoch": 51.36, + "learning_rate": 2.4331319180715083e-05, + "loss": 2.0044, + "step": 17742000 + }, + { + "epoch": 51.36, + "learning_rate": 2.4330595533067805e-05, + "loss": 2.0007, + "step": 17742500 + }, + { + "epoch": 51.36, + "learning_rate": 2.432987333271582e-05, + "loss": 1.9892, + "step": 17743000 + }, + { + "epoch": 51.36, + "learning_rate": 2.4329149685068543e-05, + "loss": 2.0041, + "step": 17743500 + }, + { + "epoch": 51.36, + "learning_rate": 2.432842603742127e-05, + "loss": 2.0129, + "step": 17744000 + }, + { + "epoch": 51.36, + "learning_rate": 2.432770238977399e-05, + "loss": 1.9825, + "step": 17744500 + }, + { + "epoch": 51.36, + "learning_rate": 2.4326978742126716e-05, + "loss": 1.991, + "step": 17745000 + }, + { + "epoch": 51.37, + "learning_rate": 2.432625509447944e-05, + "loss": 2.0086, + "step": 17745500 + }, + { + "epoch": 51.37, + "learning_rate": 2.432553434142275e-05, + "loss": 1.9807, + "step": 17746000 + }, + { + "epoch": 51.37, + "learning_rate": 2.4324813588366063e-05, + "loss": 2.0073, + "step": 17746500 + }, + { + "epoch": 51.37, + "learning_rate": 2.4324089940718785e-05, + "loss": 2.0274, + "step": 17747000 + }, + { + "epoch": 51.37, + "learning_rate": 2.4323366293071508e-05, + "loss": 2.0009, + "step": 17747500 + }, + { + "epoch": 51.37, + "learning_rate": 2.4322642645424233e-05, + "loss": 1.9813, + "step": 17748000 + }, + { + "epoch": 51.37, + "learning_rate": 2.4321918997776956e-05, + "loss": 1.992, + "step": 17748500 + }, + { + "epoch": 51.38, + "learning_rate": 2.432119535012968e-05, + "loss": 1.9664, + "step": 17749000 + }, + { + "epoch": 51.38, + "learning_rate": 2.4320471702482403e-05, + "loss": 1.9831, + "step": 17749500 + }, + { + "epoch": 51.38, + "learning_rate": 2.4319748054835126e-05, + "loss": 1.9782, + "step": 17750000 + }, + { + "epoch": 51.38, + "learning_rate": 2.4319024407187848e-05, + "loss": 1.9753, + "step": 17750500 + }, + { + "epoch": 51.38, + "learning_rate": 2.431830075954057e-05, + "loss": 1.9954, + "step": 17751000 + }, + { + "epoch": 51.38, + "learning_rate": 2.4317577111893296e-05, + "loss": 1.9961, + "step": 17751500 + }, + { + "epoch": 51.38, + "learning_rate": 2.431685491154131e-05, + "loss": 1.9791, + "step": 17752000 + }, + { + "epoch": 51.39, + "learning_rate": 2.4316131263894034e-05, + "loss": 1.9557, + "step": 17752500 + }, + { + "epoch": 51.39, + "learning_rate": 2.431540761624676e-05, + "loss": 1.9933, + "step": 17753000 + }, + { + "epoch": 51.39, + "learning_rate": 2.4314683968599485e-05, + "loss": 1.9728, + "step": 17753500 + }, + { + "epoch": 51.39, + "learning_rate": 2.4313960320952207e-05, + "loss": 1.9737, + "step": 17754000 + }, + { + "epoch": 51.39, + "learning_rate": 2.431323667330493e-05, + "loss": 2.006, + "step": 17754500 + }, + { + "epoch": 51.39, + "learning_rate": 2.431251302565765e-05, + "loss": 1.9818, + "step": 17755000 + }, + { + "epoch": 51.39, + "learning_rate": 2.4311789378010374e-05, + "loss": 2.0053, + "step": 17755500 + }, + { + "epoch": 51.4, + "learning_rate": 2.4311065730363096e-05, + "loss": 1.9741, + "step": 17756000 + }, + { + "epoch": 51.4, + "learning_rate": 2.431034208271582e-05, + "loss": 2.0018, + "step": 17756500 + }, + { + "epoch": 51.4, + "learning_rate": 2.430961988236384e-05, + "loss": 1.9887, + "step": 17757000 + }, + { + "epoch": 51.4, + "learning_rate": 2.4308896234716563e-05, + "loss": 1.9965, + "step": 17757500 + }, + { + "epoch": 51.4, + "learning_rate": 2.4308172587069285e-05, + "loss": 1.9966, + "step": 17758000 + }, + { + "epoch": 51.4, + "learning_rate": 2.430744893942201e-05, + "loss": 2.0037, + "step": 17758500 + }, + { + "epoch": 51.41, + "learning_rate": 2.4306725291774733e-05, + "loss": 2.0014, + "step": 17759000 + }, + { + "epoch": 51.41, + "learning_rate": 2.4306001644127455e-05, + "loss": 1.9779, + "step": 17759500 + }, + { + "epoch": 51.41, + "learning_rate": 2.430527799648018e-05, + "loss": 1.9963, + "step": 17760000 + }, + { + "epoch": 51.41, + "learning_rate": 2.4304554348832903e-05, + "loss": 1.9884, + "step": 17760500 + }, + { + "epoch": 51.41, + "learning_rate": 2.4303832148480922e-05, + "loss": 2.0144, + "step": 17761000 + }, + { + "epoch": 51.41, + "learning_rate": 2.4303108500833644e-05, + "loss": 2.0232, + "step": 17761500 + }, + { + "epoch": 51.41, + "learning_rate": 2.430238630048166e-05, + "loss": 1.9989, + "step": 17762000 + }, + { + "epoch": 51.42, + "learning_rate": 2.4301662652834385e-05, + "loss": 1.9807, + "step": 17762500 + }, + { + "epoch": 51.42, + "learning_rate": 2.4300939005187108e-05, + "loss": 2.0134, + "step": 17763000 + }, + { + "epoch": 51.42, + "learning_rate": 2.430021535753983e-05, + "loss": 1.9752, + "step": 17763500 + }, + { + "epoch": 51.42, + "learning_rate": 2.4299491709892555e-05, + "loss": 1.979, + "step": 17764000 + }, + { + "epoch": 51.42, + "learning_rate": 2.4298768062245278e-05, + "loss": 1.9745, + "step": 17764500 + }, + { + "epoch": 51.42, + "learning_rate": 2.4298045861893297e-05, + "loss": 1.9798, + "step": 17765000 + }, + { + "epoch": 51.42, + "learning_rate": 2.429732221424602e-05, + "loss": 1.9645, + "step": 17765500 + }, + { + "epoch": 51.43, + "learning_rate": 2.429659856659874e-05, + "loss": 1.9786, + "step": 17766000 + }, + { + "epoch": 51.43, + "learning_rate": 2.4295874918951463e-05, + "loss": 2.0019, + "step": 17766500 + }, + { + "epoch": 51.43, + "learning_rate": 2.4295151271304185e-05, + "loss": 1.9875, + "step": 17767000 + }, + { + "epoch": 51.43, + "learning_rate": 2.429442762365691e-05, + "loss": 1.9936, + "step": 17767500 + }, + { + "epoch": 51.43, + "learning_rate": 2.4293703976009637e-05, + "loss": 1.9964, + "step": 17768000 + }, + { + "epoch": 51.43, + "learning_rate": 2.429298032836236e-05, + "loss": 2.006, + "step": 17768500 + }, + { + "epoch": 51.43, + "learning_rate": 2.429225668071508e-05, + "loss": 1.9675, + "step": 17769000 + }, + { + "epoch": 51.44, + "learning_rate": 2.4291533033067803e-05, + "loss": 1.9867, + "step": 17769500 + }, + { + "epoch": 51.44, + "learning_rate": 2.4290810832715822e-05, + "loss": 2.0065, + "step": 17770000 + }, + { + "epoch": 51.44, + "learning_rate": 2.4290088632363838e-05, + "loss": 1.971, + "step": 17770500 + }, + { + "epoch": 51.44, + "learning_rate": 2.428936498471656e-05, + "loss": 2.0023, + "step": 17771000 + }, + { + "epoch": 51.44, + "learning_rate": 2.4288641337069286e-05, + "loss": 2.0077, + "step": 17771500 + }, + { + "epoch": 51.44, + "learning_rate": 2.428791768942201e-05, + "loss": 1.9849, + "step": 17772000 + }, + { + "epoch": 51.44, + "learning_rate": 2.4287194041774734e-05, + "loss": 1.981, + "step": 17772500 + }, + { + "epoch": 51.45, + "learning_rate": 2.4286470394127456e-05, + "loss": 2.0022, + "step": 17773000 + }, + { + "epoch": 51.45, + "learning_rate": 2.4285746746480178e-05, + "loss": 1.9873, + "step": 17773500 + }, + { + "epoch": 51.45, + "learning_rate": 2.42850230988329e-05, + "loss": 1.9903, + "step": 17774000 + }, + { + "epoch": 51.45, + "learning_rate": 2.4284299451185623e-05, + "loss": 1.9959, + "step": 17774500 + }, + { + "epoch": 51.45, + "learning_rate": 2.4283575803538348e-05, + "loss": 1.994, + "step": 17775000 + }, + { + "epoch": 51.45, + "learning_rate": 2.4282852155891074e-05, + "loss": 2.0203, + "step": 17775500 + }, + { + "epoch": 51.45, + "learning_rate": 2.4282128508243796e-05, + "loss": 1.9736, + "step": 17776000 + }, + { + "epoch": 51.46, + "learning_rate": 2.428140630789181e-05, + "loss": 1.9931, + "step": 17776500 + }, + { + "epoch": 51.46, + "learning_rate": 2.4280682660244537e-05, + "loss": 1.9829, + "step": 17777000 + }, + { + "epoch": 51.46, + "learning_rate": 2.4279960459892553e-05, + "loss": 1.9807, + "step": 17777500 + }, + { + "epoch": 51.46, + "learning_rate": 2.4279236812245275e-05, + "loss": 2.0, + "step": 17778000 + }, + { + "epoch": 51.46, + "learning_rate": 2.4278514611893294e-05, + "loss": 1.9775, + "step": 17778500 + }, + { + "epoch": 51.46, + "learning_rate": 2.427779096424602e-05, + "loss": 1.997, + "step": 17779000 + }, + { + "epoch": 51.46, + "learning_rate": 2.4277067316598742e-05, + "loss": 1.9909, + "step": 17779500 + }, + { + "epoch": 51.47, + "learning_rate": 2.4276343668951464e-05, + "loss": 2.0104, + "step": 17780000 + }, + { + "epoch": 51.47, + "learning_rate": 2.4275620021304186e-05, + "loss": 1.9832, + "step": 17780500 + }, + { + "epoch": 51.47, + "learning_rate": 2.4274896373656912e-05, + "loss": 1.9882, + "step": 17781000 + }, + { + "epoch": 51.47, + "learning_rate": 2.4274172726009634e-05, + "loss": 1.9843, + "step": 17781500 + }, + { + "epoch": 51.47, + "learning_rate": 2.4273449078362356e-05, + "loss": 1.9743, + "step": 17782000 + }, + { + "epoch": 51.47, + "learning_rate": 2.4272725430715082e-05, + "loss": 1.9883, + "step": 17782500 + }, + { + "epoch": 51.47, + "learning_rate": 2.4272001783067804e-05, + "loss": 1.9852, + "step": 17783000 + }, + { + "epoch": 51.48, + "learning_rate": 2.4271278135420526e-05, + "loss": 1.9665, + "step": 17783500 + }, + { + "epoch": 51.48, + "learning_rate": 2.4270554487773252e-05, + "loss": 1.9924, + "step": 17784000 + }, + { + "epoch": 51.48, + "learning_rate": 2.4269830840125974e-05, + "loss": 1.9915, + "step": 17784500 + }, + { + "epoch": 51.48, + "learning_rate": 2.426910863977399e-05, + "loss": 1.9815, + "step": 17785000 + }, + { + "epoch": 51.48, + "learning_rate": 2.4268384992126712e-05, + "loss": 1.9967, + "step": 17785500 + }, + { + "epoch": 51.48, + "learning_rate": 2.4267661344479438e-05, + "loss": 1.9839, + "step": 17786000 + }, + { + "epoch": 51.48, + "learning_rate": 2.4266937696832163e-05, + "loss": 1.992, + "step": 17786500 + }, + { + "epoch": 51.49, + "learning_rate": 2.4266214049184886e-05, + "loss": 1.9891, + "step": 17787000 + }, + { + "epoch": 51.49, + "learning_rate": 2.42654918488329e-05, + "loss": 1.9864, + "step": 17787500 + }, + { + "epoch": 51.49, + "learning_rate": 2.4264768201185627e-05, + "loss": 1.9949, + "step": 17788000 + }, + { + "epoch": 51.49, + "learning_rate": 2.426404455353835e-05, + "loss": 1.9861, + "step": 17788500 + }, + { + "epoch": 51.49, + "learning_rate": 2.426332090589107e-05, + "loss": 1.9972, + "step": 17789000 + }, + { + "epoch": 51.49, + "learning_rate": 2.426259870553909e-05, + "loss": 1.979, + "step": 17789500 + }, + { + "epoch": 51.49, + "learning_rate": 2.426187650518711e-05, + "loss": 2.0037, + "step": 17790000 + }, + { + "epoch": 51.5, + "learning_rate": 2.426115285753983e-05, + "loss": 2.0009, + "step": 17790500 + }, + { + "epoch": 51.5, + "learning_rate": 2.4260429209892554e-05, + "loss": 1.9909, + "step": 17791000 + }, + { + "epoch": 51.5, + "learning_rate": 2.4259707009540573e-05, + "loss": 1.9745, + "step": 17791500 + }, + { + "epoch": 51.5, + "learning_rate": 2.4258983361893295e-05, + "loss": 1.9729, + "step": 17792000 + }, + { + "epoch": 51.5, + "learning_rate": 2.4258259714246017e-05, + "loss": 1.9919, + "step": 17792500 + }, + { + "epoch": 51.5, + "learning_rate": 2.425753606659874e-05, + "loss": 1.984, + "step": 17793000 + }, + { + "epoch": 51.5, + "learning_rate": 2.4256812418951465e-05, + "loss": 1.9853, + "step": 17793500 + }, + { + "epoch": 51.51, + "learning_rate": 2.425608877130419e-05, + "loss": 1.9979, + "step": 17794000 + }, + { + "epoch": 51.51, + "learning_rate": 2.4255366570952206e-05, + "loss": 2.0018, + "step": 17794500 + }, + { + "epoch": 51.51, + "learning_rate": 2.4254642923304928e-05, + "loss": 2.0158, + "step": 17795000 + }, + { + "epoch": 51.51, + "learning_rate": 2.4253920722952947e-05, + "loss": 1.9974, + "step": 17795500 + }, + { + "epoch": 51.51, + "learning_rate": 2.425319707530567e-05, + "loss": 1.9711, + "step": 17796000 + }, + { + "epoch": 51.51, + "learning_rate": 2.425247342765839e-05, + "loss": 1.9994, + "step": 17796500 + }, + { + "epoch": 51.52, + "learning_rate": 2.4251749780011114e-05, + "loss": 1.9739, + "step": 17797000 + }, + { + "epoch": 51.52, + "learning_rate": 2.425102613236384e-05, + "loss": 1.9888, + "step": 17797500 + }, + { + "epoch": 51.52, + "learning_rate": 2.4250302484716565e-05, + "loss": 1.999, + "step": 17798000 + }, + { + "epoch": 51.52, + "learning_rate": 2.4249578837069287e-05, + "loss": 1.9923, + "step": 17798500 + }, + { + "epoch": 51.52, + "learning_rate": 2.424885518942201e-05, + "loss": 1.9993, + "step": 17799000 + }, + { + "epoch": 51.52, + "learning_rate": 2.4248131541774732e-05, + "loss": 2.0076, + "step": 17799500 + }, + { + "epoch": 51.52, + "learning_rate": 2.4247407894127454e-05, + "loss": 1.9969, + "step": 17800000 + }, + { + "epoch": 51.53, + "learning_rate": 2.4246684246480176e-05, + "loss": 2.008, + "step": 17800500 + }, + { + "epoch": 51.53, + "learning_rate": 2.4245960598832902e-05, + "loss": 1.969, + "step": 17801000 + }, + { + "epoch": 51.53, + "learning_rate": 2.4245236951185628e-05, + "loss": 2.01, + "step": 17801500 + }, + { + "epoch": 51.53, + "learning_rate": 2.424451330353835e-05, + "loss": 1.9812, + "step": 17802000 + }, + { + "epoch": 51.53, + "learning_rate": 2.4243789655891072e-05, + "loss": 2.0148, + "step": 17802500 + }, + { + "epoch": 51.53, + "learning_rate": 2.4243066008243794e-05, + "loss": 1.994, + "step": 17803000 + }, + { + "epoch": 51.53, + "learning_rate": 2.4242343807891813e-05, + "loss": 1.987, + "step": 17803500 + }, + { + "epoch": 51.54, + "learning_rate": 2.424162160753983e-05, + "loss": 2.0125, + "step": 17804000 + }, + { + "epoch": 51.54, + "learning_rate": 2.4240897959892554e-05, + "loss": 2.0064, + "step": 17804500 + }, + { + "epoch": 51.54, + "learning_rate": 2.424017431224528e-05, + "loss": 1.9766, + "step": 17805000 + }, + { + "epoch": 51.54, + "learning_rate": 2.4239452111893296e-05, + "loss": 1.9909, + "step": 17805500 + }, + { + "epoch": 51.54, + "learning_rate": 2.4238728464246018e-05, + "loss": 1.9909, + "step": 17806000 + }, + { + "epoch": 51.54, + "learning_rate": 2.423800481659874e-05, + "loss": 2.0074, + "step": 17806500 + }, + { + "epoch": 51.54, + "learning_rate": 2.4237281168951466e-05, + "loss": 1.9958, + "step": 17807000 + }, + { + "epoch": 51.55, + "learning_rate": 2.4236557521304188e-05, + "loss": 1.995, + "step": 17807500 + }, + { + "epoch": 51.55, + "learning_rate": 2.423583387365691e-05, + "loss": 1.982, + "step": 17808000 + }, + { + "epoch": 51.55, + "learning_rate": 2.4235110226009632e-05, + "loss": 1.9904, + "step": 17808500 + }, + { + "epoch": 51.55, + "learning_rate": 2.4234386578362358e-05, + "loss": 1.9952, + "step": 17809000 + }, + { + "epoch": 51.55, + "learning_rate": 2.423366293071508e-05, + "loss": 1.9846, + "step": 17809500 + }, + { + "epoch": 51.55, + "learning_rate": 2.4232939283067806e-05, + "loss": 2.0042, + "step": 17810000 + }, + { + "epoch": 51.55, + "learning_rate": 2.4232215635420528e-05, + "loss": 1.9878, + "step": 17810500 + }, + { + "epoch": 51.56, + "learning_rate": 2.423149198777325e-05, + "loss": 1.9853, + "step": 17811000 + }, + { + "epoch": 51.56, + "learning_rate": 2.4230768340125973e-05, + "loss": 1.9883, + "step": 17811500 + }, + { + "epoch": 51.56, + "learning_rate": 2.4230044692478698e-05, + "loss": 1.9842, + "step": 17812000 + }, + { + "epoch": 51.56, + "learning_rate": 2.422932104483142e-05, + "loss": 2.0059, + "step": 17812500 + }, + { + "epoch": 51.56, + "learning_rate": 2.4228597397184143e-05, + "loss": 1.9831, + "step": 17813000 + }, + { + "epoch": 51.56, + "learning_rate": 2.422787519683216e-05, + "loss": 2.0047, + "step": 17813500 + }, + { + "epoch": 51.56, + "learning_rate": 2.4227151549184884e-05, + "loss": 2.0043, + "step": 17814000 + }, + { + "epoch": 51.57, + "learning_rate": 2.4226429348832903e-05, + "loss": 2.002, + "step": 17814500 + }, + { + "epoch": 51.57, + "learning_rate": 2.4225705701185625e-05, + "loss": 1.9772, + "step": 17815000 + }, + { + "epoch": 51.57, + "learning_rate": 2.4224983500833644e-05, + "loss": 1.9984, + "step": 17815500 + }, + { + "epoch": 51.57, + "learning_rate": 2.4224259853186366e-05, + "loss": 1.9742, + "step": 17816000 + }, + { + "epoch": 51.57, + "learning_rate": 2.4223537652834385e-05, + "loss": 1.9888, + "step": 17816500 + }, + { + "epoch": 51.57, + "learning_rate": 2.4222814005187107e-05, + "loss": 2.0104, + "step": 17817000 + }, + { + "epoch": 51.57, + "learning_rate": 2.422209035753983e-05, + "loss": 1.9822, + "step": 17817500 + }, + { + "epoch": 51.58, + "learning_rate": 2.4221366709892555e-05, + "loss": 1.9954, + "step": 17818000 + }, + { + "epoch": 51.58, + "learning_rate": 2.4220643062245277e-05, + "loss": 1.9771, + "step": 17818500 + }, + { + "epoch": 51.58, + "learning_rate": 2.4219919414598e-05, + "loss": 2.004, + "step": 17819000 + }, + { + "epoch": 51.58, + "learning_rate": 2.4219195766950722e-05, + "loss": 2.0019, + "step": 17819500 + }, + { + "epoch": 51.58, + "learning_rate": 2.4218472119303447e-05, + "loss": 1.9826, + "step": 17820000 + }, + { + "epoch": 51.58, + "learning_rate": 2.421774847165617e-05, + "loss": 1.9801, + "step": 17820500 + }, + { + "epoch": 51.58, + "learning_rate": 2.4217024824008895e-05, + "loss": 1.994, + "step": 17821000 + }, + { + "epoch": 51.59, + "learning_rate": 2.4216301176361618e-05, + "loss": 2.0118, + "step": 17821500 + }, + { + "epoch": 51.59, + "learning_rate": 2.421557752871434e-05, + "loss": 1.975, + "step": 17822000 + }, + { + "epoch": 51.59, + "learning_rate": 2.4214855328362355e-05, + "loss": 1.9759, + "step": 17822500 + }, + { + "epoch": 51.59, + "learning_rate": 2.421413168071508e-05, + "loss": 1.9796, + "step": 17823000 + }, + { + "epoch": 51.59, + "learning_rate": 2.4213408033067807e-05, + "loss": 1.9897, + "step": 17823500 + }, + { + "epoch": 51.59, + "learning_rate": 2.421268438542053e-05, + "loss": 1.9582, + "step": 17824000 + }, + { + "epoch": 51.59, + "learning_rate": 2.421196073777325e-05, + "loss": 1.9928, + "step": 17824500 + }, + { + "epoch": 51.6, + "learning_rate": 2.4211237090125973e-05, + "loss": 2.009, + "step": 17825000 + }, + { + "epoch": 51.6, + "learning_rate": 2.4210513442478696e-05, + "loss": 1.9929, + "step": 17825500 + }, + { + "epoch": 51.6, + "learning_rate": 2.420978979483142e-05, + "loss": 1.9976, + "step": 17826000 + }, + { + "epoch": 51.6, + "learning_rate": 2.4209066147184143e-05, + "loss": 1.9962, + "step": 17826500 + }, + { + "epoch": 51.6, + "learning_rate": 2.420834394683216e-05, + "loss": 2.0236, + "step": 17827000 + }, + { + "epoch": 51.6, + "learning_rate": 2.4207620299184885e-05, + "loss": 1.9801, + "step": 17827500 + }, + { + "epoch": 51.6, + "learning_rate": 2.4206898098832904e-05, + "loss": 1.9704, + "step": 17828000 + }, + { + "epoch": 51.61, + "learning_rate": 2.4206174451185626e-05, + "loss": 1.9973, + "step": 17828500 + }, + { + "epoch": 51.61, + "learning_rate": 2.4205450803538348e-05, + "loss": 1.9994, + "step": 17829000 + }, + { + "epoch": 51.61, + "learning_rate": 2.420472715589107e-05, + "loss": 1.9647, + "step": 17829500 + }, + { + "epoch": 51.61, + "learning_rate": 2.420400495553909e-05, + "loss": 1.9988, + "step": 17830000 + }, + { + "epoch": 51.61, + "learning_rate": 2.4203282755187108e-05, + "loss": 1.9832, + "step": 17830500 + }, + { + "epoch": 51.61, + "learning_rate": 2.420255910753983e-05, + "loss": 1.9955, + "step": 17831000 + }, + { + "epoch": 51.61, + "learning_rate": 2.4201835459892556e-05, + "loss": 1.9977, + "step": 17831500 + }, + { + "epoch": 51.62, + "learning_rate": 2.4201111812245278e-05, + "loss": 1.9805, + "step": 17832000 + }, + { + "epoch": 51.62, + "learning_rate": 2.4200388164598e-05, + "loss": 2.0149, + "step": 17832500 + }, + { + "epoch": 51.62, + "learning_rate": 2.419966596424602e-05, + "loss": 1.9917, + "step": 17833000 + }, + { + "epoch": 51.62, + "learning_rate": 2.419894231659874e-05, + "loss": 2.0017, + "step": 17833500 + }, + { + "epoch": 51.62, + "learning_rate": 2.4198218668951464e-05, + "loss": 1.9985, + "step": 17834000 + }, + { + "epoch": 51.62, + "learning_rate": 2.4197495021304186e-05, + "loss": 1.9916, + "step": 17834500 + }, + { + "epoch": 51.63, + "learning_rate": 2.4196771373656912e-05, + "loss": 2.0182, + "step": 17835000 + }, + { + "epoch": 51.63, + "learning_rate": 2.4196047726009634e-05, + "loss": 1.9854, + "step": 17835500 + }, + { + "epoch": 51.63, + "learning_rate": 2.419532407836236e-05, + "loss": 1.9664, + "step": 17836000 + }, + { + "epoch": 51.63, + "learning_rate": 2.4194601878010375e-05, + "loss": 2.0408, + "step": 17836500 + }, + { + "epoch": 51.63, + "learning_rate": 2.4193878230363097e-05, + "loss": 1.9784, + "step": 17837000 + }, + { + "epoch": 51.63, + "learning_rate": 2.4193156030011116e-05, + "loss": 1.9788, + "step": 17837500 + }, + { + "epoch": 51.63, + "learning_rate": 2.419243238236384e-05, + "loss": 1.9918, + "step": 17838000 + }, + { + "epoch": 51.64, + "learning_rate": 2.419170873471656e-05, + "loss": 1.9885, + "step": 17838500 + }, + { + "epoch": 51.64, + "learning_rate": 2.4190985087069286e-05, + "loss": 1.9747, + "step": 17839000 + }, + { + "epoch": 51.64, + "learning_rate": 2.4190262886717305e-05, + "loss": 2.0008, + "step": 17839500 + }, + { + "epoch": 51.64, + "learning_rate": 2.4189539239070028e-05, + "loss": 2.0073, + "step": 17840000 + }, + { + "epoch": 51.64, + "learning_rate": 2.418881559142275e-05, + "loss": 1.9941, + "step": 17840500 + }, + { + "epoch": 51.64, + "learning_rate": 2.4188091943775472e-05, + "loss": 1.9926, + "step": 17841000 + }, + { + "epoch": 51.64, + "learning_rate": 2.4187368296128198e-05, + "loss": 1.9846, + "step": 17841500 + }, + { + "epoch": 51.65, + "learning_rate": 2.418664464848092e-05, + "loss": 2.0055, + "step": 17842000 + }, + { + "epoch": 51.65, + "learning_rate": 2.4185921000833645e-05, + "loss": 1.9837, + "step": 17842500 + }, + { + "epoch": 51.65, + "learning_rate": 2.4185197353186368e-05, + "loss": 2.0073, + "step": 17843000 + }, + { + "epoch": 51.65, + "learning_rate": 2.418447370553909e-05, + "loss": 2.0092, + "step": 17843500 + }, + { + "epoch": 51.65, + "learning_rate": 2.4183750057891812e-05, + "loss": 2.0056, + "step": 17844000 + }, + { + "epoch": 51.65, + "learning_rate": 2.4183026410244534e-05, + "loss": 1.9885, + "step": 17844500 + }, + { + "epoch": 51.65, + "learning_rate": 2.418230276259726e-05, + "loss": 1.9733, + "step": 17845000 + }, + { + "epoch": 51.66, + "learning_rate": 2.4181579114949982e-05, + "loss": 1.9846, + "step": 17845500 + }, + { + "epoch": 51.66, + "learning_rate": 2.4180855467302708e-05, + "loss": 1.9828, + "step": 17846000 + }, + { + "epoch": 51.66, + "learning_rate": 2.418013181965543e-05, + "loss": 1.9824, + "step": 17846500 + }, + { + "epoch": 51.66, + "learning_rate": 2.417940961930345e-05, + "loss": 1.9769, + "step": 17847000 + }, + { + "epoch": 51.66, + "learning_rate": 2.417868597165617e-05, + "loss": 1.9933, + "step": 17847500 + }, + { + "epoch": 51.66, + "learning_rate": 2.4177962324008894e-05, + "loss": 1.9941, + "step": 17848000 + }, + { + "epoch": 51.66, + "learning_rate": 2.4177238676361616e-05, + "loss": 1.9797, + "step": 17848500 + }, + { + "epoch": 51.67, + "learning_rate": 2.4176515028714338e-05, + "loss": 1.9926, + "step": 17849000 + }, + { + "epoch": 51.67, + "learning_rate": 2.417579138106706e-05, + "loss": 2.0036, + "step": 17849500 + }, + { + "epoch": 51.67, + "learning_rate": 2.4175067733419786e-05, + "loss": 1.9825, + "step": 17850000 + }, + { + "epoch": 51.67, + "learning_rate": 2.417434408577251e-05, + "loss": 2.0047, + "step": 17850500 + }, + { + "epoch": 51.67, + "learning_rate": 2.4173623332715824e-05, + "loss": 2.0039, + "step": 17851000 + }, + { + "epoch": 51.67, + "learning_rate": 2.4172899685068546e-05, + "loss": 2.0111, + "step": 17851500 + }, + { + "epoch": 51.67, + "learning_rate": 2.4172176037421268e-05, + "loss": 2.007, + "step": 17852000 + }, + { + "epoch": 51.68, + "learning_rate": 2.417145238977399e-05, + "loss": 1.994, + "step": 17852500 + }, + { + "epoch": 51.68, + "learning_rate": 2.4170728742126713e-05, + "loss": 2.0155, + "step": 17853000 + }, + { + "epoch": 51.68, + "learning_rate": 2.417000509447944e-05, + "loss": 1.9994, + "step": 17853500 + }, + { + "epoch": 51.68, + "learning_rate": 2.416928144683216e-05, + "loss": 1.9968, + "step": 17854000 + }, + { + "epoch": 51.68, + "learning_rate": 2.4168557799184886e-05, + "loss": 2.0092, + "step": 17854500 + }, + { + "epoch": 51.68, + "learning_rate": 2.416783415153761e-05, + "loss": 1.9643, + "step": 17855000 + }, + { + "epoch": 51.68, + "learning_rate": 2.416711050389033e-05, + "loss": 2.0068, + "step": 17855500 + }, + { + "epoch": 51.69, + "learning_rate": 2.4166386856243053e-05, + "loss": 1.9979, + "step": 17856000 + }, + { + "epoch": 51.69, + "learning_rate": 2.4165663208595775e-05, + "loss": 2.0112, + "step": 17856500 + }, + { + "epoch": 51.69, + "learning_rate": 2.41649395609485e-05, + "loss": 2.0116, + "step": 17857000 + }, + { + "epoch": 51.69, + "learning_rate": 2.4164215913301226e-05, + "loss": 1.9697, + "step": 17857500 + }, + { + "epoch": 51.69, + "learning_rate": 2.4163493712949242e-05, + "loss": 1.9926, + "step": 17858000 + }, + { + "epoch": 51.69, + "learning_rate": 2.4162770065301964e-05, + "loss": 1.9828, + "step": 17858500 + }, + { + "epoch": 51.69, + "learning_rate": 2.4162046417654686e-05, + "loss": 1.9876, + "step": 17859000 + }, + { + "epoch": 51.7, + "learning_rate": 2.4161322770007412e-05, + "loss": 1.9986, + "step": 17859500 + }, + { + "epoch": 51.7, + "learning_rate": 2.4160599122360134e-05, + "loss": 1.9905, + "step": 17860000 + }, + { + "epoch": 51.7, + "learning_rate": 2.4159875474712856e-05, + "loss": 2.0239, + "step": 17860500 + }, + { + "epoch": 51.7, + "learning_rate": 2.4159153274360875e-05, + "loss": 1.993, + "step": 17861000 + }, + { + "epoch": 51.7, + "learning_rate": 2.41584296267136e-05, + "loss": 1.9816, + "step": 17861500 + }, + { + "epoch": 51.7, + "learning_rate": 2.4157705979066323e-05, + "loss": 1.9968, + "step": 17862000 + }, + { + "epoch": 51.7, + "learning_rate": 2.4156982331419045e-05, + "loss": 1.9858, + "step": 17862500 + }, + { + "epoch": 51.71, + "learning_rate": 2.4156258683771768e-05, + "loss": 1.9657, + "step": 17863000 + }, + { + "epoch": 51.71, + "learning_rate": 2.415553503612449e-05, + "loss": 1.9779, + "step": 17863500 + }, + { + "epoch": 51.71, + "learning_rate": 2.415481283577251e-05, + "loss": 1.9815, + "step": 17864000 + }, + { + "epoch": 51.71, + "learning_rate": 2.4154089188125235e-05, + "loss": 2.0093, + "step": 17864500 + }, + { + "epoch": 51.71, + "learning_rate": 2.415336698777325e-05, + "loss": 2.0089, + "step": 17865000 + }, + { + "epoch": 51.71, + "learning_rate": 2.4152643340125976e-05, + "loss": 1.9634, + "step": 17865500 + }, + { + "epoch": 51.71, + "learning_rate": 2.4151919692478698e-05, + "loss": 2.0003, + "step": 17866000 + }, + { + "epoch": 51.72, + "learning_rate": 2.415119604483142e-05, + "loss": 2.0055, + "step": 17866500 + }, + { + "epoch": 51.72, + "learning_rate": 2.4150472397184142e-05, + "loss": 1.9846, + "step": 17867000 + }, + { + "epoch": 51.72, + "learning_rate": 2.4149748749536865e-05, + "loss": 2.0077, + "step": 17867500 + }, + { + "epoch": 51.72, + "learning_rate": 2.4149025101889587e-05, + "loss": 1.9702, + "step": 17868000 + }, + { + "epoch": 51.72, + "learning_rate": 2.4148301454242312e-05, + "loss": 2.0114, + "step": 17868500 + }, + { + "epoch": 51.72, + "learning_rate": 2.4147577806595038e-05, + "loss": 1.9846, + "step": 17869000 + }, + { + "epoch": 51.72, + "learning_rate": 2.414685415894776e-05, + "loss": 1.9885, + "step": 17869500 + }, + { + "epoch": 51.73, + "learning_rate": 2.4146130511300483e-05, + "loss": 1.9884, + "step": 17870000 + }, + { + "epoch": 51.73, + "learning_rate": 2.4145406863653205e-05, + "loss": 1.9826, + "step": 17870500 + }, + { + "epoch": 51.73, + "learning_rate": 2.4144684663301224e-05, + "loss": 1.9927, + "step": 17871000 + }, + { + "epoch": 51.73, + "learning_rate": 2.4143961015653946e-05, + "loss": 1.9944, + "step": 17871500 + }, + { + "epoch": 51.73, + "learning_rate": 2.4143238815301965e-05, + "loss": 2.0158, + "step": 17872000 + }, + { + "epoch": 51.73, + "learning_rate": 2.414251516765469e-05, + "loss": 1.9885, + "step": 17872500 + }, + { + "epoch": 51.74, + "learning_rate": 2.4141792967302706e-05, + "loss": 1.962, + "step": 17873000 + }, + { + "epoch": 51.74, + "learning_rate": 2.414106931965543e-05, + "loss": 1.9864, + "step": 17873500 + }, + { + "epoch": 51.74, + "learning_rate": 2.414034567200815e-05, + "loss": 1.9758, + "step": 17874000 + }, + { + "epoch": 51.74, + "learning_rate": 2.4139622024360876e-05, + "loss": 1.9842, + "step": 17874500 + }, + { + "epoch": 51.74, + "learning_rate": 2.41388983767136e-05, + "loss": 1.9943, + "step": 17875000 + }, + { + "epoch": 51.74, + "learning_rate": 2.413817472906632e-05, + "loss": 2.0157, + "step": 17875500 + }, + { + "epoch": 51.74, + "learning_rate": 2.4137451081419046e-05, + "loss": 1.9879, + "step": 17876000 + }, + { + "epoch": 51.75, + "learning_rate": 2.413672743377177e-05, + "loss": 2.0002, + "step": 17876500 + }, + { + "epoch": 51.75, + "learning_rate": 2.413600378612449e-05, + "loss": 1.9943, + "step": 17877000 + }, + { + "epoch": 51.75, + "learning_rate": 2.413528158577251e-05, + "loss": 1.9848, + "step": 17877500 + }, + { + "epoch": 51.75, + "learning_rate": 2.4134557938125232e-05, + "loss": 1.9746, + "step": 17878000 + }, + { + "epoch": 51.75, + "learning_rate": 2.4133834290477954e-05, + "loss": 2.009, + "step": 17878500 + }, + { + "epoch": 51.75, + "learning_rate": 2.4133110642830676e-05, + "loss": 2.0211, + "step": 17879000 + }, + { + "epoch": 51.75, + "learning_rate": 2.4132386995183402e-05, + "loss": 1.9892, + "step": 17879500 + }, + { + "epoch": 51.76, + "learning_rate": 2.4131663347536128e-05, + "loss": 1.9654, + "step": 17880000 + }, + { + "epoch": 51.76, + "learning_rate": 2.4130941147184143e-05, + "loss": 2.0012, + "step": 17880500 + }, + { + "epoch": 51.76, + "learning_rate": 2.4130217499536865e-05, + "loss": 1.9971, + "step": 17881000 + }, + { + "epoch": 51.76, + "learning_rate": 2.412949385188959e-05, + "loss": 1.9865, + "step": 17881500 + }, + { + "epoch": 51.76, + "learning_rate": 2.4128770204242313e-05, + "loss": 2.0005, + "step": 17882000 + }, + { + "epoch": 51.76, + "learning_rate": 2.4128046556595036e-05, + "loss": 2.0104, + "step": 17882500 + }, + { + "epoch": 51.76, + "learning_rate": 2.412732290894776e-05, + "loss": 2.015, + "step": 17883000 + }, + { + "epoch": 51.77, + "learning_rate": 2.412660070859578e-05, + "loss": 2.0063, + "step": 17883500 + }, + { + "epoch": 51.77, + "learning_rate": 2.4125878508243796e-05, + "loss": 1.9833, + "step": 17884000 + }, + { + "epoch": 51.77, + "learning_rate": 2.4125154860596518e-05, + "loss": 1.985, + "step": 17884500 + }, + { + "epoch": 51.77, + "learning_rate": 2.412443121294924e-05, + "loss": 1.9923, + "step": 17885000 + }, + { + "epoch": 51.77, + "learning_rate": 2.4123707565301966e-05, + "loss": 1.9756, + "step": 17885500 + }, + { + "epoch": 51.77, + "learning_rate": 2.4122983917654688e-05, + "loss": 1.9833, + "step": 17886000 + }, + { + "epoch": 51.77, + "learning_rate": 2.412226027000741e-05, + "loss": 1.9929, + "step": 17886500 + }, + { + "epoch": 51.78, + "learning_rate": 2.412153806965543e-05, + "loss": 1.9785, + "step": 17887000 + }, + { + "epoch": 51.78, + "learning_rate": 2.4120814422008155e-05, + "loss": 2.0035, + "step": 17887500 + }, + { + "epoch": 51.78, + "learning_rate": 2.412009222165617e-05, + "loss": 1.97, + "step": 17888000 + }, + { + "epoch": 51.78, + "learning_rate": 2.4119368574008893e-05, + "loss": 1.9909, + "step": 17888500 + }, + { + "epoch": 51.78, + "learning_rate": 2.4118644926361618e-05, + "loss": 1.9951, + "step": 17889000 + }, + { + "epoch": 51.78, + "learning_rate": 2.411792127871434e-05, + "loss": 1.9855, + "step": 17889500 + }, + { + "epoch": 51.78, + "learning_rate": 2.4117197631067063e-05, + "loss": 1.9831, + "step": 17890000 + }, + { + "epoch": 51.79, + "learning_rate": 2.4116473983419785e-05, + "loss": 1.999, + "step": 17890500 + }, + { + "epoch": 51.79, + "learning_rate": 2.411575033577251e-05, + "loss": 1.9675, + "step": 17891000 + }, + { + "epoch": 51.79, + "learning_rate": 2.4115026688125233e-05, + "loss": 1.9946, + "step": 17891500 + }, + { + "epoch": 51.79, + "learning_rate": 2.4114303040477955e-05, + "loss": 1.9948, + "step": 17892000 + }, + { + "epoch": 51.79, + "learning_rate": 2.411357939283068e-05, + "loss": 1.9847, + "step": 17892500 + }, + { + "epoch": 51.79, + "learning_rate": 2.4112855745183403e-05, + "loss": 2.0055, + "step": 17893000 + }, + { + "epoch": 51.79, + "learning_rate": 2.4112132097536125e-05, + "loss": 2.0054, + "step": 17893500 + }, + { + "epoch": 51.8, + "learning_rate": 2.4111408449888847e-05, + "loss": 2.0047, + "step": 17894000 + }, + { + "epoch": 51.8, + "learning_rate": 2.411068624953687e-05, + "loss": 2.0053, + "step": 17894500 + }, + { + "epoch": 51.8, + "learning_rate": 2.4109964049184885e-05, + "loss": 1.9783, + "step": 17895000 + }, + { + "epoch": 51.8, + "learning_rate": 2.4109240401537607e-05, + "loss": 1.9767, + "step": 17895500 + }, + { + "epoch": 51.8, + "learning_rate": 2.410851675389033e-05, + "loss": 1.9955, + "step": 17896000 + }, + { + "epoch": 51.8, + "learning_rate": 2.4107793106243055e-05, + "loss": 1.9758, + "step": 17896500 + }, + { + "epoch": 51.8, + "learning_rate": 2.4107069458595777e-05, + "loss": 2.0083, + "step": 17897000 + }, + { + "epoch": 51.81, + "learning_rate": 2.41063458109485e-05, + "loss": 1.9777, + "step": 17897500 + }, + { + "epoch": 51.81, + "learning_rate": 2.4105622163301222e-05, + "loss": 2.0355, + "step": 17898000 + }, + { + "epoch": 51.81, + "learning_rate": 2.4104898515653948e-05, + "loss": 1.9953, + "step": 17898500 + }, + { + "epoch": 51.81, + "learning_rate": 2.410417486800667e-05, + "loss": 1.999, + "step": 17899000 + }, + { + "epoch": 51.81, + "learning_rate": 2.4103451220359392e-05, + "loss": 2.0195, + "step": 17899500 + }, + { + "epoch": 51.81, + "learning_rate": 2.410272902000741e-05, + "loss": 1.9829, + "step": 17900000 + }, + { + "epoch": 51.81, + "learning_rate": 2.4102005372360133e-05, + "loss": 1.9678, + "step": 17900500 + }, + { + "epoch": 51.82, + "learning_rate": 2.4101281724712855e-05, + "loss": 1.9793, + "step": 17901000 + }, + { + "epoch": 51.82, + "learning_rate": 2.4100559524360874e-05, + "loss": 2.0003, + "step": 17901500 + }, + { + "epoch": 51.82, + "learning_rate": 2.40998358767136e-05, + "loss": 1.987, + "step": 17902000 + }, + { + "epoch": 51.82, + "learning_rate": 2.4099112229066322e-05, + "loss": 2.0019, + "step": 17902500 + }, + { + "epoch": 51.82, + "learning_rate": 2.409839002871434e-05, + "loss": 1.9471, + "step": 17903000 + }, + { + "epoch": 51.82, + "learning_rate": 2.4097666381067063e-05, + "loss": 1.9942, + "step": 17903500 + }, + { + "epoch": 51.82, + "learning_rate": 2.4096942733419786e-05, + "loss": 2.0126, + "step": 17904000 + }, + { + "epoch": 51.83, + "learning_rate": 2.4096219085772508e-05, + "loss": 1.9953, + "step": 17904500 + }, + { + "epoch": 51.83, + "learning_rate": 2.409549543812523e-05, + "loss": 1.992, + "step": 17905000 + }, + { + "epoch": 51.83, + "learning_rate": 2.4094771790477956e-05, + "loss": 1.9968, + "step": 17905500 + }, + { + "epoch": 51.83, + "learning_rate": 2.4094049590125975e-05, + "loss": 1.9949, + "step": 17906000 + }, + { + "epoch": 51.83, + "learning_rate": 2.4093325942478697e-05, + "loss": 1.9983, + "step": 17906500 + }, + { + "epoch": 51.83, + "learning_rate": 2.409260229483142e-05, + "loss": 1.9753, + "step": 17907000 + }, + { + "epoch": 51.83, + "learning_rate": 2.4091878647184145e-05, + "loss": 1.9995, + "step": 17907500 + }, + { + "epoch": 51.84, + "learning_rate": 2.4091154999536867e-05, + "loss": 1.9909, + "step": 17908000 + }, + { + "epoch": 51.84, + "learning_rate": 2.4090432799184883e-05, + "loss": 1.9879, + "step": 17908500 + }, + { + "epoch": 51.84, + "learning_rate": 2.40897105988329e-05, + "loss": 2.0082, + "step": 17909000 + }, + { + "epoch": 51.84, + "learning_rate": 2.4088986951185624e-05, + "loss": 1.9986, + "step": 17909500 + }, + { + "epoch": 51.84, + "learning_rate": 2.408826330353835e-05, + "loss": 2.014, + "step": 17910000 + }, + { + "epoch": 51.84, + "learning_rate": 2.408753965589107e-05, + "loss": 1.988, + "step": 17910500 + }, + { + "epoch": 51.85, + "learning_rate": 2.4086816008243794e-05, + "loss": 2.0019, + "step": 17911000 + }, + { + "epoch": 51.85, + "learning_rate": 2.408609236059652e-05, + "loss": 1.972, + "step": 17911500 + }, + { + "epoch": 51.85, + "learning_rate": 2.4085368712949242e-05, + "loss": 1.9818, + "step": 17912000 + }, + { + "epoch": 51.85, + "learning_rate": 2.4084645065301964e-05, + "loss": 2.0015, + "step": 17912500 + }, + { + "epoch": 51.85, + "learning_rate": 2.4083921417654686e-05, + "loss": 2.0172, + "step": 17913000 + }, + { + "epoch": 51.85, + "learning_rate": 2.4083197770007412e-05, + "loss": 2.0059, + "step": 17913500 + }, + { + "epoch": 51.85, + "learning_rate": 2.4082474122360134e-05, + "loss": 1.9894, + "step": 17914000 + }, + { + "epoch": 51.86, + "learning_rate": 2.408175047471286e-05, + "loss": 1.9931, + "step": 17914500 + }, + { + "epoch": 51.86, + "learning_rate": 2.4081026827065582e-05, + "loss": 2.0055, + "step": 17915000 + }, + { + "epoch": 51.86, + "learning_rate": 2.4080303179418304e-05, + "loss": 1.9908, + "step": 17915500 + }, + { + "epoch": 51.86, + "learning_rate": 2.4079579531771026e-05, + "loss": 1.9894, + "step": 17916000 + }, + { + "epoch": 51.86, + "learning_rate": 2.407885588412375e-05, + "loss": 2.0038, + "step": 17916500 + }, + { + "epoch": 51.86, + "learning_rate": 2.4078132236476474e-05, + "loss": 1.9793, + "step": 17917000 + }, + { + "epoch": 51.86, + "learning_rate": 2.4077408588829196e-05, + "loss": 1.982, + "step": 17917500 + }, + { + "epoch": 51.87, + "learning_rate": 2.4076684941181922e-05, + "loss": 2.0041, + "step": 17918000 + }, + { + "epoch": 51.87, + "learning_rate": 2.4075961293534644e-05, + "loss": 1.9675, + "step": 17918500 + }, + { + "epoch": 51.87, + "learning_rate": 2.407523909318266e-05, + "loss": 1.9917, + "step": 17919000 + }, + { + "epoch": 51.87, + "learning_rate": 2.4074515445535385e-05, + "loss": 2.0117, + "step": 17919500 + }, + { + "epoch": 51.87, + "learning_rate": 2.4073794692478698e-05, + "loss": 1.9785, + "step": 17920000 + }, + { + "epoch": 51.87, + "learning_rate": 2.407307104483142e-05, + "loss": 1.9916, + "step": 17920500 + }, + { + "epoch": 51.87, + "learning_rate": 2.4072347397184146e-05, + "loss": 2.0303, + "step": 17921000 + }, + { + "epoch": 51.88, + "learning_rate": 2.4071623749536868e-05, + "loss": 1.9962, + "step": 17921500 + }, + { + "epoch": 51.88, + "learning_rate": 2.407090010188959e-05, + "loss": 1.9977, + "step": 17922000 + }, + { + "epoch": 51.88, + "learning_rate": 2.4070176454242312e-05, + "loss": 1.9996, + "step": 17922500 + }, + { + "epoch": 51.88, + "learning_rate": 2.4069452806595035e-05, + "loss": 1.9966, + "step": 17923000 + }, + { + "epoch": 51.88, + "learning_rate": 2.4068730606243053e-05, + "loss": 2.0157, + "step": 17923500 + }, + { + "epoch": 51.88, + "learning_rate": 2.4068006958595776e-05, + "loss": 2.0132, + "step": 17924000 + }, + { + "epoch": 51.88, + "learning_rate": 2.4067284758243795e-05, + "loss": 1.9959, + "step": 17924500 + }, + { + "epoch": 51.89, + "learning_rate": 2.406656111059652e-05, + "loss": 2.0228, + "step": 17925000 + }, + { + "epoch": 51.89, + "learning_rate": 2.4065837462949242e-05, + "loss": 1.9954, + "step": 17925500 + }, + { + "epoch": 51.89, + "learning_rate": 2.4065113815301965e-05, + "loss": 1.9962, + "step": 17926000 + }, + { + "epoch": 51.89, + "learning_rate": 2.4064390167654687e-05, + "loss": 1.9922, + "step": 17926500 + }, + { + "epoch": 51.89, + "learning_rate": 2.406366652000741e-05, + "loss": 2.0128, + "step": 17927000 + }, + { + "epoch": 51.89, + "learning_rate": 2.4062942872360135e-05, + "loss": 1.9784, + "step": 17927500 + }, + { + "epoch": 51.89, + "learning_rate": 2.406222067200815e-05, + "loss": 1.9981, + "step": 17928000 + }, + { + "epoch": 51.9, + "learning_rate": 2.4061497024360876e-05, + "loss": 2.0066, + "step": 17928500 + }, + { + "epoch": 51.9, + "learning_rate": 2.4060773376713598e-05, + "loss": 1.9799, + "step": 17929000 + }, + { + "epoch": 51.9, + "learning_rate": 2.4060049729066324e-05, + "loss": 1.9834, + "step": 17929500 + }, + { + "epoch": 51.9, + "learning_rate": 2.4059326081419046e-05, + "loss": 1.9965, + "step": 17930000 + }, + { + "epoch": 51.9, + "learning_rate": 2.405860243377177e-05, + "loss": 1.9857, + "step": 17930500 + }, + { + "epoch": 51.9, + "learning_rate": 2.405787878612449e-05, + "loss": 1.989, + "step": 17931000 + }, + { + "epoch": 51.9, + "learning_rate": 2.4057155138477213e-05, + "loss": 1.9924, + "step": 17931500 + }, + { + "epoch": 51.91, + "learning_rate": 2.405643149082994e-05, + "loss": 1.9879, + "step": 17932000 + }, + { + "epoch": 51.91, + "learning_rate": 2.4055709290477957e-05, + "loss": 1.9851, + "step": 17932500 + }, + { + "epoch": 51.91, + "learning_rate": 2.405498564283068e-05, + "loss": 1.9867, + "step": 17933000 + }, + { + "epoch": 51.91, + "learning_rate": 2.40542634424787e-05, + "loss": 1.9812, + "step": 17933500 + }, + { + "epoch": 51.91, + "learning_rate": 2.405353979483142e-05, + "loss": 1.9995, + "step": 17934000 + }, + { + "epoch": 51.91, + "learning_rate": 2.4052816147184143e-05, + "loss": 1.9925, + "step": 17934500 + }, + { + "epoch": 51.91, + "learning_rate": 2.4052092499536865e-05, + "loss": 2.0056, + "step": 17935000 + }, + { + "epoch": 51.92, + "learning_rate": 2.4051368851889587e-05, + "loss": 2.0094, + "step": 17935500 + }, + { + "epoch": 51.92, + "learning_rate": 2.4050645204242313e-05, + "loss": 1.9854, + "step": 17936000 + }, + { + "epoch": 51.92, + "learning_rate": 2.4049921556595035e-05, + "loss": 1.9946, + "step": 17936500 + }, + { + "epoch": 51.92, + "learning_rate": 2.404919790894776e-05, + "loss": 1.9916, + "step": 17937000 + }, + { + "epoch": 51.92, + "learning_rate": 2.4048474261300483e-05, + "loss": 1.9931, + "step": 17937500 + }, + { + "epoch": 51.92, + "learning_rate": 2.4047753508243795e-05, + "loss": 2.0235, + "step": 17938000 + }, + { + "epoch": 51.92, + "learning_rate": 2.4047029860596518e-05, + "loss": 2.0159, + "step": 17938500 + }, + { + "epoch": 51.93, + "learning_rate": 2.404630621294924e-05, + "loss": 2.0153, + "step": 17939000 + }, + { + "epoch": 51.93, + "learning_rate": 2.4045582565301966e-05, + "loss": 1.9665, + "step": 17939500 + }, + { + "epoch": 51.93, + "learning_rate": 2.4044858917654688e-05, + "loss": 2.0157, + "step": 17940000 + }, + { + "epoch": 51.93, + "learning_rate": 2.4044135270007413e-05, + "loss": 1.9814, + "step": 17940500 + }, + { + "epoch": 51.93, + "learning_rate": 2.4043411622360136e-05, + "loss": 1.9936, + "step": 17941000 + }, + { + "epoch": 51.93, + "learning_rate": 2.4042687974712858e-05, + "loss": 1.9912, + "step": 17941500 + }, + { + "epoch": 51.93, + "learning_rate": 2.404196432706558e-05, + "loss": 1.9819, + "step": 17942000 + }, + { + "epoch": 51.94, + "learning_rate": 2.4041240679418302e-05, + "loss": 1.997, + "step": 17942500 + }, + { + "epoch": 51.94, + "learning_rate": 2.4040517031771028e-05, + "loss": 2.0217, + "step": 17943000 + }, + { + "epoch": 51.94, + "learning_rate": 2.4039794831419047e-05, + "loss": 2.0166, + "step": 17943500 + }, + { + "epoch": 51.94, + "learning_rate": 2.403907118377177e-05, + "loss": 1.9938, + "step": 17944000 + }, + { + "epoch": 51.94, + "learning_rate": 2.403834753612449e-05, + "loss": 1.9768, + "step": 17944500 + }, + { + "epoch": 51.94, + "learning_rate": 2.403762533577251e-05, + "loss": 1.9886, + "step": 17945000 + }, + { + "epoch": 51.94, + "learning_rate": 2.4036903135420526e-05, + "loss": 1.9923, + "step": 17945500 + }, + { + "epoch": 51.95, + "learning_rate": 2.403617948777325e-05, + "loss": 2.0024, + "step": 17946000 + }, + { + "epoch": 51.95, + "learning_rate": 2.4035455840125974e-05, + "loss": 1.9859, + "step": 17946500 + }, + { + "epoch": 51.95, + "learning_rate": 2.40347321924787e-05, + "loss": 1.9866, + "step": 17947000 + }, + { + "epoch": 51.95, + "learning_rate": 2.403400854483142e-05, + "loss": 1.9836, + "step": 17947500 + }, + { + "epoch": 51.95, + "learning_rate": 2.4033284897184144e-05, + "loss": 1.9876, + "step": 17948000 + }, + { + "epoch": 51.95, + "learning_rate": 2.4032562696832163e-05, + "loss": 1.9725, + "step": 17948500 + }, + { + "epoch": 51.96, + "learning_rate": 2.4031839049184885e-05, + "loss": 1.9994, + "step": 17949000 + }, + { + "epoch": 51.96, + "learning_rate": 2.4031115401537607e-05, + "loss": 1.9762, + "step": 17949500 + }, + { + "epoch": 51.96, + "learning_rate": 2.403039175389033e-05, + "loss": 2.0033, + "step": 17950000 + }, + { + "epoch": 51.96, + "learning_rate": 2.402966810624305e-05, + "loss": 1.9961, + "step": 17950500 + }, + { + "epoch": 51.96, + "learning_rate": 2.4028944458595777e-05, + "loss": 2.0289, + "step": 17951000 + }, + { + "epoch": 51.96, + "learning_rate": 2.4028220810948503e-05, + "loss": 1.9946, + "step": 17951500 + }, + { + "epoch": 51.96, + "learning_rate": 2.4027497163301225e-05, + "loss": 1.9834, + "step": 17952000 + }, + { + "epoch": 51.97, + "learning_rate": 2.4026773515653947e-05, + "loss": 1.9938, + "step": 17952500 + }, + { + "epoch": 51.97, + "learning_rate": 2.402604986800667e-05, + "loss": 2.005, + "step": 17953000 + }, + { + "epoch": 51.97, + "learning_rate": 2.4025326220359392e-05, + "loss": 2.0028, + "step": 17953500 + }, + { + "epoch": 51.97, + "learning_rate": 2.4024602572712114e-05, + "loss": 2.0058, + "step": 17954000 + }, + { + "epoch": 51.97, + "learning_rate": 2.402387892506484e-05, + "loss": 1.9653, + "step": 17954500 + }, + { + "epoch": 51.97, + "learning_rate": 2.402315672471286e-05, + "loss": 2.0039, + "step": 17955000 + }, + { + "epoch": 51.97, + "learning_rate": 2.402243307706558e-05, + "loss": 2.0044, + "step": 17955500 + }, + { + "epoch": 51.98, + "learning_rate": 2.4021709429418303e-05, + "loss": 2.0221, + "step": 17956000 + }, + { + "epoch": 51.98, + "learning_rate": 2.402098578177103e-05, + "loss": 1.9855, + "step": 17956500 + }, + { + "epoch": 51.98, + "learning_rate": 2.402026213412375e-05, + "loss": 2.014, + "step": 17957000 + }, + { + "epoch": 51.98, + "learning_rate": 2.4019538486476473e-05, + "loss": 1.9889, + "step": 17957500 + }, + { + "epoch": 51.98, + "learning_rate": 2.40188148388292e-05, + "loss": 2.0233, + "step": 17958000 + }, + { + "epoch": 51.98, + "learning_rate": 2.401809408577251e-05, + "loss": 2.002, + "step": 17958500 + }, + { + "epoch": 51.98, + "learning_rate": 2.4017371885420527e-05, + "loss": 1.9845, + "step": 17959000 + }, + { + "epoch": 51.99, + "learning_rate": 2.4016649685068546e-05, + "loss": 2.0269, + "step": 17959500 + }, + { + "epoch": 51.99, + "learning_rate": 2.4015926037421268e-05, + "loss": 2.0001, + "step": 17960000 + }, + { + "epoch": 51.99, + "learning_rate": 2.401520238977399e-05, + "loss": 1.9809, + "step": 17960500 + }, + { + "epoch": 51.99, + "learning_rate": 2.4014478742126716e-05, + "loss": 2.0207, + "step": 17961000 + }, + { + "epoch": 51.99, + "learning_rate": 2.401375654177473e-05, + "loss": 2.0085, + "step": 17961500 + }, + { + "epoch": 51.99, + "learning_rate": 2.4013032894127453e-05, + "loss": 1.9825, + "step": 17962000 + }, + { + "epoch": 51.99, + "learning_rate": 2.401230924648018e-05, + "loss": 2.0076, + "step": 17962500 + }, + { + "epoch": 52.0, + "learning_rate": 2.4011585598832905e-05, + "loss": 2.0011, + "step": 17963000 + }, + { + "epoch": 52.0, + "learning_rate": 2.4010861951185627e-05, + "loss": 1.9912, + "step": 17963500 + }, + { + "epoch": 52.0, + "learning_rate": 2.401013830353835e-05, + "loss": 2.023, + "step": 17964000 + }, + { + "epoch": 52.0, + "learning_rate": 2.400941465589107e-05, + "loss": 2.0113, + "step": 17964500 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.6733575656076577, + "eval_accuracy_mlm": 0.6398535085439098, + "eval_accuracy_nsp": 0.8529352454020065, + "eval_loss": 2.1604368686676025, + "eval_runtime": 331.8083, + "eval_samples_per_second": 1315.175, + "eval_steps_per_second": 54.8, + "step": 17964544 + }, + { + "epoch": 52.0, + "learning_rate": 2.4008691008243794e-05, + "loss": 1.9692, + "step": 17965000 + }, + { + "epoch": 52.0, + "learning_rate": 2.4007967360596516e-05, + "loss": 1.9905, + "step": 17965500 + }, + { + "epoch": 52.0, + "learning_rate": 2.400724371294924e-05, + "loss": 1.9939, + "step": 17966000 + }, + { + "epoch": 52.01, + "learning_rate": 2.400652151259726e-05, + "loss": 1.9971, + "step": 17966500 + }, + { + "epoch": 52.01, + "learning_rate": 2.4005797864949983e-05, + "loss": 1.9688, + "step": 17967000 + }, + { + "epoch": 52.01, + "learning_rate": 2.4005074217302705e-05, + "loss": 1.9806, + "step": 17967500 + }, + { + "epoch": 52.01, + "learning_rate": 2.4004350569655427e-05, + "loss": 1.9852, + "step": 17968000 + }, + { + "epoch": 52.01, + "learning_rate": 2.4003626922008153e-05, + "loss": 1.9637, + "step": 17968500 + }, + { + "epoch": 52.01, + "learning_rate": 2.400290472165617e-05, + "loss": 1.9664, + "step": 17969000 + }, + { + "epoch": 52.01, + "learning_rate": 2.400218107400889e-05, + "loss": 1.9848, + "step": 17969500 + }, + { + "epoch": 52.02, + "learning_rate": 2.4001457426361616e-05, + "loss": 1.9547, + "step": 17970000 + }, + { + "epoch": 52.02, + "learning_rate": 2.4000733778714342e-05, + "loss": 1.9873, + "step": 17970500 + }, + { + "epoch": 52.02, + "learning_rate": 2.4000011578362357e-05, + "loss": 1.9764, + "step": 17971000 + }, + { + "epoch": 52.02, + "learning_rate": 2.399928793071508e-05, + "loss": 1.9571, + "step": 17971500 + }, + { + "epoch": 52.02, + "learning_rate": 2.3998564283067805e-05, + "loss": 1.9932, + "step": 17972000 + }, + { + "epoch": 52.02, + "learning_rate": 2.3997840635420527e-05, + "loss": 2.0068, + "step": 17972500 + }, + { + "epoch": 52.02, + "learning_rate": 2.399711698777325e-05, + "loss": 1.9844, + "step": 17973000 + }, + { + "epoch": 52.03, + "learning_rate": 2.3996393340125975e-05, + "loss": 1.9721, + "step": 17973500 + }, + { + "epoch": 52.03, + "learning_rate": 2.3995669692478698e-05, + "loss": 1.9976, + "step": 17974000 + }, + { + "epoch": 52.03, + "learning_rate": 2.399494604483142e-05, + "loss": 1.9776, + "step": 17974500 + }, + { + "epoch": 52.03, + "learning_rate": 2.399422384447944e-05, + "loss": 1.9746, + "step": 17975000 + }, + { + "epoch": 52.03, + "learning_rate": 2.399350019683216e-05, + "loss": 1.9654, + "step": 17975500 + }, + { + "epoch": 52.03, + "learning_rate": 2.3992776549184883e-05, + "loss": 1.9853, + "step": 17976000 + }, + { + "epoch": 52.03, + "learning_rate": 2.3992052901537605e-05, + "loss": 1.9811, + "step": 17976500 + }, + { + "epoch": 52.04, + "learning_rate": 2.399132925389033e-05, + "loss": 1.9706, + "step": 17977000 + }, + { + "epoch": 52.04, + "learning_rate": 2.3990605606243057e-05, + "loss": 1.9728, + "step": 17977500 + }, + { + "epoch": 52.04, + "learning_rate": 2.398988195859578e-05, + "loss": 1.9912, + "step": 17978000 + }, + { + "epoch": 52.04, + "learning_rate": 2.39891583109485e-05, + "loss": 1.9626, + "step": 17978500 + }, + { + "epoch": 52.04, + "learning_rate": 2.3988434663301223e-05, + "loss": 1.9793, + "step": 17979000 + }, + { + "epoch": 52.04, + "learning_rate": 2.3987711015653946e-05, + "loss": 1.9737, + "step": 17979500 + }, + { + "epoch": 52.04, + "learning_rate": 2.3986987368006668e-05, + "loss": 1.9903, + "step": 17980000 + }, + { + "epoch": 52.05, + "learning_rate": 2.3986265167654687e-05, + "loss": 1.9732, + "step": 17980500 + }, + { + "epoch": 52.05, + "learning_rate": 2.3985541520007412e-05, + "loss": 1.9862, + "step": 17981000 + }, + { + "epoch": 52.05, + "learning_rate": 2.3984817872360135e-05, + "loss": 1.9599, + "step": 17981500 + }, + { + "epoch": 52.05, + "learning_rate": 2.3984094224712857e-05, + "loss": 1.9863, + "step": 17982000 + }, + { + "epoch": 52.05, + "learning_rate": 2.3983370577065582e-05, + "loss": 1.9879, + "step": 17982500 + }, + { + "epoch": 52.05, + "learning_rate": 2.3982648376713598e-05, + "loss": 1.9867, + "step": 17983000 + }, + { + "epoch": 52.05, + "learning_rate": 2.398192472906632e-05, + "loss": 2.0022, + "step": 17983500 + }, + { + "epoch": 52.06, + "learning_rate": 2.3981201081419042e-05, + "loss": 1.9627, + "step": 17984000 + }, + { + "epoch": 52.06, + "learning_rate": 2.3980477433771768e-05, + "loss": 1.9772, + "step": 17984500 + }, + { + "epoch": 52.06, + "learning_rate": 2.3979755233419787e-05, + "loss": 1.9667, + "step": 17985000 + }, + { + "epoch": 52.06, + "learning_rate": 2.397903158577251e-05, + "loss": 1.9771, + "step": 17985500 + }, + { + "epoch": 52.06, + "learning_rate": 2.397830793812523e-05, + "loss": 1.9553, + "step": 17986000 + }, + { + "epoch": 52.06, + "learning_rate": 2.3977584290477957e-05, + "loss": 1.9609, + "step": 17986500 + }, + { + "epoch": 52.07, + "learning_rate": 2.397686064283068e-05, + "loss": 1.9716, + "step": 17987000 + }, + { + "epoch": 52.07, + "learning_rate": 2.39761369951834e-05, + "loss": 1.9825, + "step": 17987500 + }, + { + "epoch": 52.07, + "learning_rate": 2.3975413347536127e-05, + "loss": 1.9891, + "step": 17988000 + }, + { + "epoch": 52.07, + "learning_rate": 2.397468969988885e-05, + "loss": 1.9685, + "step": 17988500 + }, + { + "epoch": 52.07, + "learning_rate": 2.3973966052241572e-05, + "loss": 1.9624, + "step": 17989000 + }, + { + "epoch": 52.07, + "learning_rate": 2.3973242404594294e-05, + "loss": 1.97, + "step": 17989500 + }, + { + "epoch": 52.07, + "learning_rate": 2.397251875694702e-05, + "loss": 1.9468, + "step": 17990000 + }, + { + "epoch": 52.08, + "learning_rate": 2.3971795109299742e-05, + "loss": 1.9825, + "step": 17990500 + }, + { + "epoch": 52.08, + "learning_rate": 2.3971071461652464e-05, + "loss": 1.9772, + "step": 17991000 + }, + { + "epoch": 52.08, + "learning_rate": 2.397034781400519e-05, + "loss": 1.9851, + "step": 17991500 + }, + { + "epoch": 52.08, + "learning_rate": 2.396962561365321e-05, + "loss": 1.9617, + "step": 17992000 + }, + { + "epoch": 52.08, + "learning_rate": 2.396890196600593e-05, + "loss": 1.9892, + "step": 17992500 + }, + { + "epoch": 52.08, + "learning_rate": 2.3968178318358653e-05, + "loss": 1.9438, + "step": 17993000 + }, + { + "epoch": 52.08, + "learning_rate": 2.3967454670711375e-05, + "loss": 1.9785, + "step": 17993500 + }, + { + "epoch": 52.09, + "learning_rate": 2.3966733917654688e-05, + "loss": 1.9942, + "step": 17994000 + }, + { + "epoch": 52.09, + "learning_rate": 2.396601027000741e-05, + "loss": 1.9652, + "step": 17994500 + }, + { + "epoch": 52.09, + "learning_rate": 2.396528806965543e-05, + "loss": 1.9555, + "step": 17995000 + }, + { + "epoch": 52.09, + "learning_rate": 2.396456442200815e-05, + "loss": 1.9545, + "step": 17995500 + }, + { + "epoch": 52.09, + "learning_rate": 2.3963840774360877e-05, + "loss": 1.9717, + "step": 17996000 + }, + { + "epoch": 52.09, + "learning_rate": 2.39631171267136e-05, + "loss": 1.9879, + "step": 17996500 + }, + { + "epoch": 52.09, + "learning_rate": 2.396239347906632e-05, + "loss": 1.9426, + "step": 17997000 + }, + { + "epoch": 52.1, + "learning_rate": 2.3961669831419047e-05, + "loss": 1.9578, + "step": 17997500 + }, + { + "epoch": 52.1, + "learning_rate": 2.396094618377177e-05, + "loss": 1.9701, + "step": 17998000 + }, + { + "epoch": 52.1, + "learning_rate": 2.396022253612449e-05, + "loss": 2.0016, + "step": 17998500 + }, + { + "epoch": 52.1, + "learning_rate": 2.395950033577251e-05, + "loss": 1.9863, + "step": 17999000 + }, + { + "epoch": 52.1, + "learning_rate": 2.3958776688125236e-05, + "loss": 1.9981, + "step": 17999500 + }, + { + "epoch": 52.1, + "learning_rate": 2.3958053040477958e-05, + "loss": 1.9824, + "step": 18000000 + }, + { + "epoch": 52.1, + "learning_rate": 2.395732939283068e-05, + "loss": 1.9898, + "step": 18000500 + }, + { + "epoch": 52.11, + "learning_rate": 2.3956605745183402e-05, + "loss": 1.9961, + "step": 18001000 + }, + { + "epoch": 52.11, + "learning_rate": 2.3955882097536125e-05, + "loss": 1.9645, + "step": 18001500 + }, + { + "epoch": 52.11, + "learning_rate": 2.3955158449888847e-05, + "loss": 1.9396, + "step": 18002000 + }, + { + "epoch": 52.11, + "learning_rate": 2.3954434802241572e-05, + "loss": 1.9814, + "step": 18002500 + }, + { + "epoch": 52.11, + "learning_rate": 2.3953711154594298e-05, + "loss": 1.9939, + "step": 18003000 + }, + { + "epoch": 52.11, + "learning_rate": 2.3952988954242314e-05, + "loss": 1.9722, + "step": 18003500 + }, + { + "epoch": 52.11, + "learning_rate": 2.3952265306595036e-05, + "loss": 1.9796, + "step": 18004000 + }, + { + "epoch": 52.12, + "learning_rate": 2.3951541658947758e-05, + "loss": 1.9788, + "step": 18004500 + }, + { + "epoch": 52.12, + "learning_rate": 2.3950818011300484e-05, + "loss": 1.9689, + "step": 18005000 + }, + { + "epoch": 52.12, + "learning_rate": 2.3950094363653206e-05, + "loss": 1.9648, + "step": 18005500 + }, + { + "epoch": 52.12, + "learning_rate": 2.3949370716005928e-05, + "loss": 1.9719, + "step": 18006000 + }, + { + "epoch": 52.12, + "learning_rate": 2.3948648515653947e-05, + "loss": 1.9751, + "step": 18006500 + }, + { + "epoch": 52.12, + "learning_rate": 2.3947924868006673e-05, + "loss": 1.9792, + "step": 18007000 + }, + { + "epoch": 52.12, + "learning_rate": 2.3947201220359395e-05, + "loss": 1.9894, + "step": 18007500 + }, + { + "epoch": 52.13, + "learning_rate": 2.3946477572712117e-05, + "loss": 1.982, + "step": 18008000 + }, + { + "epoch": 52.13, + "learning_rate": 2.394575392506484e-05, + "loss": 1.9997, + "step": 18008500 + }, + { + "epoch": 52.13, + "learning_rate": 2.3945030277417562e-05, + "loss": 1.982, + "step": 18009000 + }, + { + "epoch": 52.13, + "learning_rate": 2.3944306629770284e-05, + "loss": 1.9933, + "step": 18009500 + }, + { + "epoch": 52.13, + "learning_rate": 2.394358298212301e-05, + "loss": 1.963, + "step": 18010000 + }, + { + "epoch": 52.13, + "learning_rate": 2.394286078177103e-05, + "loss": 1.9744, + "step": 18010500 + }, + { + "epoch": 52.13, + "learning_rate": 2.394213713412375e-05, + "loss": 1.9675, + "step": 18011000 + }, + { + "epoch": 52.14, + "learning_rate": 2.3941413486476473e-05, + "loss": 1.979, + "step": 18011500 + }, + { + "epoch": 52.14, + "learning_rate": 2.39406898388292e-05, + "loss": 1.9752, + "step": 18012000 + }, + { + "epoch": 52.14, + "learning_rate": 2.393996619118192e-05, + "loss": 1.9485, + "step": 18012500 + }, + { + "epoch": 52.14, + "learning_rate": 2.3939243990829936e-05, + "loss": 1.9757, + "step": 18013000 + }, + { + "epoch": 52.14, + "learning_rate": 2.3938520343182662e-05, + "loss": 1.9781, + "step": 18013500 + }, + { + "epoch": 52.14, + "learning_rate": 2.3937798142830678e-05, + "loss": 1.9736, + "step": 18014000 + }, + { + "epoch": 52.14, + "learning_rate": 2.3937074495183403e-05, + "loss": 1.9807, + "step": 18014500 + }, + { + "epoch": 52.15, + "learning_rate": 2.3936350847536125e-05, + "loss": 1.9986, + "step": 18015000 + }, + { + "epoch": 52.15, + "learning_rate": 2.3935627199888848e-05, + "loss": 1.9802, + "step": 18015500 + }, + { + "epoch": 52.15, + "learning_rate": 2.3934903552241573e-05, + "loss": 1.9634, + "step": 18016000 + }, + { + "epoch": 52.15, + "learning_rate": 2.3934179904594296e-05, + "loss": 2.0083, + "step": 18016500 + }, + { + "epoch": 52.15, + "learning_rate": 2.3933456256947018e-05, + "loss": 1.9671, + "step": 18017000 + }, + { + "epoch": 52.15, + "learning_rate": 2.393273260929974e-05, + "loss": 1.9985, + "step": 18017500 + }, + { + "epoch": 52.15, + "learning_rate": 2.3932008961652466e-05, + "loss": 1.9947, + "step": 18018000 + }, + { + "epoch": 52.16, + "learning_rate": 2.3931286761300485e-05, + "loss": 1.9817, + "step": 18018500 + }, + { + "epoch": 52.16, + "learning_rate": 2.3930563113653207e-05, + "loss": 1.9745, + "step": 18019000 + }, + { + "epoch": 52.16, + "learning_rate": 2.392984236059652e-05, + "loss": 1.9691, + "step": 18019500 + }, + { + "epoch": 52.16, + "learning_rate": 2.392911871294924e-05, + "loss": 1.9734, + "step": 18020000 + }, + { + "epoch": 52.16, + "learning_rate": 2.3928395065301964e-05, + "loss": 1.9939, + "step": 18020500 + }, + { + "epoch": 52.16, + "learning_rate": 2.3927671417654686e-05, + "loss": 1.9912, + "step": 18021000 + }, + { + "epoch": 52.16, + "learning_rate": 2.392694777000741e-05, + "loss": 2.0022, + "step": 18021500 + }, + { + "epoch": 52.17, + "learning_rate": 2.3926224122360137e-05, + "loss": 1.9792, + "step": 18022000 + }, + { + "epoch": 52.17, + "learning_rate": 2.392550047471286e-05, + "loss": 1.9674, + "step": 18022500 + }, + { + "epoch": 52.17, + "learning_rate": 2.392477682706558e-05, + "loss": 1.9699, + "step": 18023000 + }, + { + "epoch": 52.17, + "learning_rate": 2.3924053179418304e-05, + "loss": 1.9815, + "step": 18023500 + }, + { + "epoch": 52.17, + "learning_rate": 2.3923330979066323e-05, + "loss": 1.9926, + "step": 18024000 + }, + { + "epoch": 52.17, + "learning_rate": 2.3922607331419045e-05, + "loss": 2.0032, + "step": 18024500 + }, + { + "epoch": 52.17, + "learning_rate": 2.3921883683771767e-05, + "loss": 1.9983, + "step": 18025000 + }, + { + "epoch": 52.18, + "learning_rate": 2.3921160036124493e-05, + "loss": 1.9787, + "step": 18025500 + }, + { + "epoch": 52.18, + "learning_rate": 2.392043783577251e-05, + "loss": 1.9775, + "step": 18026000 + }, + { + "epoch": 52.18, + "learning_rate": 2.3919714188125234e-05, + "loss": 1.9896, + "step": 18026500 + }, + { + "epoch": 52.18, + "learning_rate": 2.3918990540477956e-05, + "loss": 1.9954, + "step": 18027000 + }, + { + "epoch": 52.18, + "learning_rate": 2.391826689283068e-05, + "loss": 1.9906, + "step": 18027500 + }, + { + "epoch": 52.18, + "learning_rate": 2.39175432451834e-05, + "loss": 1.9733, + "step": 18028000 + }, + { + "epoch": 52.19, + "learning_rate": 2.3916819597536126e-05, + "loss": 1.9725, + "step": 18028500 + }, + { + "epoch": 52.19, + "learning_rate": 2.391609594988885e-05, + "loss": 1.9827, + "step": 18029000 + }, + { + "epoch": 52.19, + "learning_rate": 2.3915372302241574e-05, + "loss": 1.9928, + "step": 18029500 + }, + { + "epoch": 52.19, + "learning_rate": 2.391465010188959e-05, + "loss": 1.9937, + "step": 18030000 + }, + { + "epoch": 52.19, + "learning_rate": 2.3913926454242315e-05, + "loss": 1.9943, + "step": 18030500 + }, + { + "epoch": 52.19, + "learning_rate": 2.3913202806595037e-05, + "loss": 1.994, + "step": 18031000 + }, + { + "epoch": 52.19, + "learning_rate": 2.391247915894776e-05, + "loss": 1.9545, + "step": 18031500 + }, + { + "epoch": 52.2, + "learning_rate": 2.3911755511300482e-05, + "loss": 1.9868, + "step": 18032000 + }, + { + "epoch": 52.2, + "learning_rate": 2.3911031863653204e-05, + "loss": 1.9876, + "step": 18032500 + }, + { + "epoch": 52.2, + "learning_rate": 2.3910309663301223e-05, + "loss": 1.9849, + "step": 18033000 + }, + { + "epoch": 52.2, + "learning_rate": 2.390958601565395e-05, + "loss": 1.9906, + "step": 18033500 + }, + { + "epoch": 52.2, + "learning_rate": 2.390886236800667e-05, + "loss": 1.9954, + "step": 18034000 + }, + { + "epoch": 52.2, + "learning_rate": 2.3908138720359393e-05, + "loss": 1.9781, + "step": 18034500 + }, + { + "epoch": 52.2, + "learning_rate": 2.3907415072712115e-05, + "loss": 1.9885, + "step": 18035000 + }, + { + "epoch": 52.21, + "learning_rate": 2.3906691425064838e-05, + "loss": 1.9517, + "step": 18035500 + }, + { + "epoch": 52.21, + "learning_rate": 2.3905969224712857e-05, + "loss": 1.9748, + "step": 18036000 + }, + { + "epoch": 52.21, + "learning_rate": 2.390524557706558e-05, + "loss": 1.9728, + "step": 18036500 + }, + { + "epoch": 52.21, + "learning_rate": 2.3904521929418304e-05, + "loss": 1.9622, + "step": 18037000 + }, + { + "epoch": 52.21, + "learning_rate": 2.3903798281771027e-05, + "loss": 1.9897, + "step": 18037500 + }, + { + "epoch": 52.21, + "learning_rate": 2.3903074634123752e-05, + "loss": 1.9903, + "step": 18038000 + }, + { + "epoch": 52.21, + "learning_rate": 2.3902352433771768e-05, + "loss": 1.961, + "step": 18038500 + }, + { + "epoch": 52.22, + "learning_rate": 2.390162878612449e-05, + "loss": 1.9802, + "step": 18039000 + }, + { + "epoch": 52.22, + "learning_rate": 2.3900905138477216e-05, + "loss": 1.9555, + "step": 18039500 + }, + { + "epoch": 52.22, + "learning_rate": 2.3900181490829938e-05, + "loss": 1.9787, + "step": 18040000 + }, + { + "epoch": 52.22, + "learning_rate": 2.3899457843182664e-05, + "loss": 1.9865, + "step": 18040500 + }, + { + "epoch": 52.22, + "learning_rate": 2.3898734195535386e-05, + "loss": 1.9977, + "step": 18041000 + }, + { + "epoch": 52.22, + "learning_rate": 2.3898010547888108e-05, + "loss": 1.9654, + "step": 18041500 + }, + { + "epoch": 52.22, + "learning_rate": 2.389728690024083e-05, + "loss": 1.9967, + "step": 18042000 + }, + { + "epoch": 52.23, + "learning_rate": 2.389656469988885e-05, + "loss": 1.9846, + "step": 18042500 + }, + { + "epoch": 52.23, + "learning_rate": 2.389584105224157e-05, + "loss": 1.995, + "step": 18043000 + }, + { + "epoch": 52.23, + "learning_rate": 2.3895117404594294e-05, + "loss": 1.974, + "step": 18043500 + }, + { + "epoch": 52.23, + "learning_rate": 2.3894395204242313e-05, + "loss": 2.0012, + "step": 18044000 + }, + { + "epoch": 52.23, + "learning_rate": 2.3893671556595038e-05, + "loss": 1.9901, + "step": 18044500 + }, + { + "epoch": 52.23, + "learning_rate": 2.389294790894776e-05, + "loss": 1.9788, + "step": 18045000 + }, + { + "epoch": 52.23, + "learning_rate": 2.3892224261300483e-05, + "loss": 1.973, + "step": 18045500 + }, + { + "epoch": 52.24, + "learning_rate": 2.3891500613653205e-05, + "loss": 1.9799, + "step": 18046000 + }, + { + "epoch": 52.24, + "learning_rate": 2.3890776966005927e-05, + "loss": 1.9983, + "step": 18046500 + }, + { + "epoch": 52.24, + "learning_rate": 2.3890053318358653e-05, + "loss": 2.0009, + "step": 18047000 + }, + { + "epoch": 52.24, + "learning_rate": 2.3889329670711375e-05, + "loss": 1.9982, + "step": 18047500 + }, + { + "epoch": 52.24, + "learning_rate": 2.38886060230641e-05, + "loss": 1.9898, + "step": 18048000 + }, + { + "epoch": 52.24, + "learning_rate": 2.3887882375416823e-05, + "loss": 1.9818, + "step": 18048500 + }, + { + "epoch": 52.24, + "learning_rate": 2.3887160175064842e-05, + "loss": 1.9552, + "step": 18049000 + }, + { + "epoch": 52.25, + "learning_rate": 2.3886436527417564e-05, + "loss": 1.9755, + "step": 18049500 + }, + { + "epoch": 52.25, + "learning_rate": 2.3885712879770286e-05, + "loss": 1.977, + "step": 18050000 + }, + { + "epoch": 52.25, + "learning_rate": 2.3884990679418305e-05, + "loss": 1.9645, + "step": 18050500 + }, + { + "epoch": 52.25, + "learning_rate": 2.3884267031771028e-05, + "loss": 1.9909, + "step": 18051000 + }, + { + "epoch": 52.25, + "learning_rate": 2.388354338412375e-05, + "loss": 1.9592, + "step": 18051500 + }, + { + "epoch": 52.25, + "learning_rate": 2.3882819736476475e-05, + "loss": 1.9839, + "step": 18052000 + }, + { + "epoch": 52.25, + "learning_rate": 2.3882096088829198e-05, + "loss": 1.9772, + "step": 18052500 + }, + { + "epoch": 52.26, + "learning_rate": 2.388137244118192e-05, + "loss": 1.9695, + "step": 18053000 + }, + { + "epoch": 52.26, + "learning_rate": 2.3880648793534642e-05, + "loss": 1.987, + "step": 18053500 + }, + { + "epoch": 52.26, + "learning_rate": 2.3879925145887368e-05, + "loss": 1.9771, + "step": 18054000 + }, + { + "epoch": 52.26, + "learning_rate": 2.3879202945535383e-05, + "loss": 2.0041, + "step": 18054500 + }, + { + "epoch": 52.26, + "learning_rate": 2.3878480745183402e-05, + "loss": 1.9763, + "step": 18055000 + }, + { + "epoch": 52.26, + "learning_rate": 2.3877757097536128e-05, + "loss": 1.9817, + "step": 18055500 + }, + { + "epoch": 52.26, + "learning_rate": 2.387703344988885e-05, + "loss": 1.9894, + "step": 18056000 + }, + { + "epoch": 52.27, + "learning_rate": 2.3876309802241572e-05, + "loss": 1.97, + "step": 18056500 + }, + { + "epoch": 52.27, + "learning_rate": 2.3875586154594295e-05, + "loss": 1.9891, + "step": 18057000 + }, + { + "epoch": 52.27, + "learning_rate": 2.3874862506947017e-05, + "loss": 2.0176, + "step": 18057500 + }, + { + "epoch": 52.27, + "learning_rate": 2.3874138859299742e-05, + "loss": 1.9646, + "step": 18058000 + }, + { + "epoch": 52.27, + "learning_rate": 2.3873415211652465e-05, + "loss": 1.9566, + "step": 18058500 + }, + { + "epoch": 52.27, + "learning_rate": 2.387269156400519e-05, + "loss": 1.9841, + "step": 18059000 + }, + { + "epoch": 52.27, + "learning_rate": 2.3871969363653206e-05, + "loss": 1.9879, + "step": 18059500 + }, + { + "epoch": 52.28, + "learning_rate": 2.387124571600593e-05, + "loss": 1.9915, + "step": 18060000 + }, + { + "epoch": 52.28, + "learning_rate": 2.3870522068358654e-05, + "loss": 1.9888, + "step": 18060500 + }, + { + "epoch": 52.28, + "learning_rate": 2.3869798420711376e-05, + "loss": 1.997, + "step": 18061000 + }, + { + "epoch": 52.28, + "learning_rate": 2.3869074773064098e-05, + "loss": 1.977, + "step": 18061500 + }, + { + "epoch": 52.28, + "learning_rate": 2.386835112541682e-05, + "loss": 1.9671, + "step": 18062000 + }, + { + "epoch": 52.28, + "learning_rate": 2.3867627477769543e-05, + "loss": 1.9852, + "step": 18062500 + }, + { + "epoch": 52.28, + "learning_rate": 2.3866903830122268e-05, + "loss": 1.9899, + "step": 18063000 + }, + { + "epoch": 52.29, + "learning_rate": 2.3866180182474994e-05, + "loss": 1.9639, + "step": 18063500 + }, + { + "epoch": 52.29, + "learning_rate": 2.3865456534827716e-05, + "loss": 1.982, + "step": 18064000 + }, + { + "epoch": 52.29, + "learning_rate": 2.386473433447573e-05, + "loss": 1.9786, + "step": 18064500 + }, + { + "epoch": 52.29, + "learning_rate": 2.3864010686828457e-05, + "loss": 2.0186, + "step": 18065000 + }, + { + "epoch": 52.29, + "learning_rate": 2.3863288486476473e-05, + "loss": 1.9776, + "step": 18065500 + }, + { + "epoch": 52.29, + "learning_rate": 2.3862564838829195e-05, + "loss": 1.9692, + "step": 18066000 + }, + { + "epoch": 52.3, + "learning_rate": 2.386184119118192e-05, + "loss": 1.999, + "step": 18066500 + }, + { + "epoch": 52.3, + "learning_rate": 2.3861117543534643e-05, + "loss": 1.9687, + "step": 18067000 + }, + { + "epoch": 52.3, + "learning_rate": 2.386039389588737e-05, + "loss": 1.9796, + "step": 18067500 + }, + { + "epoch": 52.3, + "learning_rate": 2.385967024824009e-05, + "loss": 1.9619, + "step": 18068000 + }, + { + "epoch": 52.3, + "learning_rate": 2.3858946600592813e-05, + "loss": 1.985, + "step": 18068500 + }, + { + "epoch": 52.3, + "learning_rate": 2.3858224400240832e-05, + "loss": 1.9723, + "step": 18069000 + }, + { + "epoch": 52.3, + "learning_rate": 2.3857500752593554e-05, + "loss": 1.9798, + "step": 18069500 + }, + { + "epoch": 52.31, + "learning_rate": 2.3856777104946276e-05, + "loss": 1.9675, + "step": 18070000 + }, + { + "epoch": 52.31, + "learning_rate": 2.3856053457299002e-05, + "loss": 2.004, + "step": 18070500 + }, + { + "epoch": 52.31, + "learning_rate": 2.3855329809651724e-05, + "loss": 2.0104, + "step": 18071000 + }, + { + "epoch": 52.31, + "learning_rate": 2.3854606162004446e-05, + "loss": 1.9757, + "step": 18071500 + }, + { + "epoch": 52.31, + "learning_rate": 2.385388251435717e-05, + "loss": 1.982, + "step": 18072000 + }, + { + "epoch": 52.31, + "learning_rate": 2.3853160314005188e-05, + "loss": 1.971, + "step": 18072500 + }, + { + "epoch": 52.31, + "learning_rate": 2.385243666635791e-05, + "loss": 1.963, + "step": 18073000 + }, + { + "epoch": 52.32, + "learning_rate": 2.3851713018710632e-05, + "loss": 1.9797, + "step": 18073500 + }, + { + "epoch": 52.32, + "learning_rate": 2.3850989371063358e-05, + "loss": 1.9998, + "step": 18074000 + }, + { + "epoch": 52.32, + "learning_rate": 2.3850265723416083e-05, + "loss": 1.9903, + "step": 18074500 + }, + { + "epoch": 52.32, + "learning_rate": 2.38495435230641e-05, + "loss": 2.0157, + "step": 18075000 + }, + { + "epoch": 52.32, + "learning_rate": 2.384881987541682e-05, + "loss": 1.9821, + "step": 18075500 + }, + { + "epoch": 52.32, + "learning_rate": 2.3848096227769547e-05, + "loss": 1.9624, + "step": 18076000 + }, + { + "epoch": 52.32, + "learning_rate": 2.384737258012227e-05, + "loss": 1.965, + "step": 18076500 + }, + { + "epoch": 52.33, + "learning_rate": 2.3846650379770285e-05, + "loss": 1.97, + "step": 18077000 + }, + { + "epoch": 52.33, + "learning_rate": 2.3845926732123007e-05, + "loss": 1.9774, + "step": 18077500 + }, + { + "epoch": 52.33, + "learning_rate": 2.3845203084475732e-05, + "loss": 2.0155, + "step": 18078000 + }, + { + "epoch": 52.33, + "learning_rate": 2.3844479436828458e-05, + "loss": 1.9798, + "step": 18078500 + }, + { + "epoch": 52.33, + "learning_rate": 2.384375578918118e-05, + "loss": 1.974, + "step": 18079000 + }, + { + "epoch": 52.33, + "learning_rate": 2.3843032141533902e-05, + "loss": 1.9777, + "step": 18079500 + }, + { + "epoch": 52.33, + "learning_rate": 2.3842308493886625e-05, + "loss": 1.9685, + "step": 18080000 + }, + { + "epoch": 52.34, + "learning_rate": 2.3841584846239347e-05, + "loss": 1.9861, + "step": 18080500 + }, + { + "epoch": 52.34, + "learning_rate": 2.3840861198592073e-05, + "loss": 1.9942, + "step": 18081000 + }, + { + "epoch": 52.34, + "learning_rate": 2.384013899824009e-05, + "loss": 1.9685, + "step": 18081500 + }, + { + "epoch": 52.34, + "learning_rate": 2.383941679788811e-05, + "loss": 1.9631, + "step": 18082000 + }, + { + "epoch": 52.34, + "learning_rate": 2.3838693150240833e-05, + "loss": 1.9576, + "step": 18082500 + }, + { + "epoch": 52.34, + "learning_rate": 2.3837970949888848e-05, + "loss": 1.991, + "step": 18083000 + }, + { + "epoch": 52.34, + "learning_rate": 2.383724730224157e-05, + "loss": 1.9437, + "step": 18083500 + }, + { + "epoch": 52.35, + "learning_rate": 2.3836523654594296e-05, + "loss": 1.9881, + "step": 18084000 + }, + { + "epoch": 52.35, + "learning_rate": 2.383580000694702e-05, + "loss": 1.9571, + "step": 18084500 + }, + { + "epoch": 52.35, + "learning_rate": 2.383507635929974e-05, + "loss": 1.9763, + "step": 18085000 + }, + { + "epoch": 52.35, + "learning_rate": 2.3834352711652466e-05, + "loss": 1.9689, + "step": 18085500 + }, + { + "epoch": 52.35, + "learning_rate": 2.383362906400519e-05, + "loss": 1.9709, + "step": 18086000 + }, + { + "epoch": 52.35, + "learning_rate": 2.383290541635791e-05, + "loss": 2.0078, + "step": 18086500 + }, + { + "epoch": 52.35, + "learning_rate": 2.3832181768710636e-05, + "loss": 1.9897, + "step": 18087000 + }, + { + "epoch": 52.36, + "learning_rate": 2.383145812106336e-05, + "loss": 1.9485, + "step": 18087500 + }, + { + "epoch": 52.36, + "learning_rate": 2.383073447341608e-05, + "loss": 1.9912, + "step": 18088000 + }, + { + "epoch": 52.36, + "learning_rate": 2.3830010825768803e-05, + "loss": 1.9617, + "step": 18088500 + }, + { + "epoch": 52.36, + "learning_rate": 2.382928717812153e-05, + "loss": 1.9687, + "step": 18089000 + }, + { + "epoch": 52.36, + "learning_rate": 2.382856353047425e-05, + "loss": 1.9946, + "step": 18089500 + }, + { + "epoch": 52.36, + "learning_rate": 2.3827842777417563e-05, + "loss": 1.9755, + "step": 18090000 + }, + { + "epoch": 52.36, + "learning_rate": 2.3827120577065582e-05, + "loss": 1.9881, + "step": 18090500 + }, + { + "epoch": 52.37, + "learning_rate": 2.3826396929418304e-05, + "loss": 1.9606, + "step": 18091000 + }, + { + "epoch": 52.37, + "learning_rate": 2.3825673281771027e-05, + "loss": 1.976, + "step": 18091500 + }, + { + "epoch": 52.37, + "learning_rate": 2.382494963412375e-05, + "loss": 1.981, + "step": 18092000 + }, + { + "epoch": 52.37, + "learning_rate": 2.3824225986476474e-05, + "loss": 1.9838, + "step": 18092500 + }, + { + "epoch": 52.37, + "learning_rate": 2.3823503786124493e-05, + "loss": 2.0, + "step": 18093000 + }, + { + "epoch": 52.37, + "learning_rate": 2.3822780138477216e-05, + "loss": 1.9737, + "step": 18093500 + }, + { + "epoch": 52.37, + "learning_rate": 2.3822056490829938e-05, + "loss": 1.9739, + "step": 18094000 + }, + { + "epoch": 52.38, + "learning_rate": 2.382133284318266e-05, + "loss": 1.9733, + "step": 18094500 + }, + { + "epoch": 52.38, + "learning_rate": 2.3820609195535386e-05, + "loss": 1.9953, + "step": 18095000 + }, + { + "epoch": 52.38, + "learning_rate": 2.3819885547888108e-05, + "loss": 1.9883, + "step": 18095500 + }, + { + "epoch": 52.38, + "learning_rate": 2.381916190024083e-05, + "loss": 1.9996, + "step": 18096000 + }, + { + "epoch": 52.38, + "learning_rate": 2.3818438252593556e-05, + "loss": 1.9924, + "step": 18096500 + }, + { + "epoch": 52.38, + "learning_rate": 2.3817714604946278e-05, + "loss": 2.0069, + "step": 18097000 + }, + { + "epoch": 52.38, + "learning_rate": 2.3816992404594297e-05, + "loss": 1.9831, + "step": 18097500 + }, + { + "epoch": 52.39, + "learning_rate": 2.381626875694702e-05, + "loss": 1.9977, + "step": 18098000 + }, + { + "epoch": 52.39, + "learning_rate": 2.381554510929974e-05, + "loss": 2.0033, + "step": 18098500 + }, + { + "epoch": 52.39, + "learning_rate": 2.3814821461652464e-05, + "loss": 2.0067, + "step": 18099000 + }, + { + "epoch": 52.39, + "learning_rate": 2.3814097814005186e-05, + "loss": 1.976, + "step": 18099500 + }, + { + "epoch": 52.39, + "learning_rate": 2.3813375613653205e-05, + "loss": 1.9901, + "step": 18100000 + }, + { + "epoch": 52.39, + "learning_rate": 2.381265196600593e-05, + "loss": 1.9907, + "step": 18100500 + }, + { + "epoch": 52.39, + "learning_rate": 2.3811928318358653e-05, + "loss": 1.9774, + "step": 18101000 + }, + { + "epoch": 52.4, + "learning_rate": 2.3811204670711375e-05, + "loss": 1.9582, + "step": 18101500 + }, + { + "epoch": 52.4, + "learning_rate": 2.38104810230641e-05, + "loss": 1.9709, + "step": 18102000 + }, + { + "epoch": 52.4, + "learning_rate": 2.3809757375416823e-05, + "loss": 1.9551, + "step": 18102500 + }, + { + "epoch": 52.4, + "learning_rate": 2.3809033727769545e-05, + "loss": 1.9756, + "step": 18103000 + }, + { + "epoch": 52.4, + "learning_rate": 2.380831152741756e-05, + "loss": 1.9859, + "step": 18103500 + }, + { + "epoch": 52.4, + "learning_rate": 2.3807587879770286e-05, + "loss": 2.0019, + "step": 18104000 + }, + { + "epoch": 52.41, + "learning_rate": 2.3806864232123012e-05, + "loss": 1.9959, + "step": 18104500 + }, + { + "epoch": 52.41, + "learning_rate": 2.3806140584475734e-05, + "loss": 2.0052, + "step": 18105000 + }, + { + "epoch": 52.41, + "learning_rate": 2.380541838412375e-05, + "loss": 1.9965, + "step": 18105500 + }, + { + "epoch": 52.41, + "learning_rate": 2.3804694736476475e-05, + "loss": 1.9705, + "step": 18106000 + }, + { + "epoch": 52.41, + "learning_rate": 2.3803971088829197e-05, + "loss": 1.9775, + "step": 18106500 + }, + { + "epoch": 52.41, + "learning_rate": 2.380324744118192e-05, + "loss": 1.9821, + "step": 18107000 + }, + { + "epoch": 52.41, + "learning_rate": 2.3802523793534642e-05, + "loss": 1.973, + "step": 18107500 + }, + { + "epoch": 52.42, + "learning_rate": 2.3801800145887367e-05, + "loss": 1.9659, + "step": 18108000 + }, + { + "epoch": 52.42, + "learning_rate": 2.380107649824009e-05, + "loss": 1.9809, + "step": 18108500 + }, + { + "epoch": 52.42, + "learning_rate": 2.3800352850592812e-05, + "loss": 1.9954, + "step": 18109000 + }, + { + "epoch": 52.42, + "learning_rate": 2.3799629202945538e-05, + "loss": 1.9856, + "step": 18109500 + }, + { + "epoch": 52.42, + "learning_rate": 2.379890555529826e-05, + "loss": 1.9658, + "step": 18110000 + }, + { + "epoch": 52.42, + "learning_rate": 2.3798183354946275e-05, + "loss": 1.9811, + "step": 18110500 + }, + { + "epoch": 52.42, + "learning_rate": 2.3797459707299e-05, + "loss": 1.9779, + "step": 18111000 + }, + { + "epoch": 52.43, + "learning_rate": 2.3796737506947017e-05, + "loss": 1.9919, + "step": 18111500 + }, + { + "epoch": 52.43, + "learning_rate": 2.3796013859299742e-05, + "loss": 1.9956, + "step": 18112000 + }, + { + "epoch": 52.43, + "learning_rate": 2.379529165894776e-05, + "loss": 1.9872, + "step": 18112500 + }, + { + "epoch": 52.43, + "learning_rate": 2.3794568011300483e-05, + "loss": 1.9893, + "step": 18113000 + }, + { + "epoch": 52.43, + "learning_rate": 2.3793844363653206e-05, + "loss": 2.0038, + "step": 18113500 + }, + { + "epoch": 52.43, + "learning_rate": 2.3793120716005928e-05, + "loss": 1.9872, + "step": 18114000 + }, + { + "epoch": 52.43, + "learning_rate": 2.379239706835865e-05, + "loss": 1.9656, + "step": 18114500 + }, + { + "epoch": 52.44, + "learning_rate": 2.3791673420711376e-05, + "loss": 1.9759, + "step": 18115000 + }, + { + "epoch": 52.44, + "learning_rate": 2.37909497730641e-05, + "loss": 2.0076, + "step": 18115500 + }, + { + "epoch": 52.44, + "learning_rate": 2.3790226125416824e-05, + "loss": 1.99, + "step": 18116000 + }, + { + "epoch": 52.44, + "learning_rate": 2.3789502477769546e-05, + "loss": 1.9873, + "step": 18116500 + }, + { + "epoch": 52.44, + "learning_rate": 2.3788778830122268e-05, + "loss": 1.9654, + "step": 18117000 + }, + { + "epoch": 52.44, + "learning_rate": 2.378805518247499e-05, + "loss": 1.9775, + "step": 18117500 + }, + { + "epoch": 52.44, + "learning_rate": 2.378733298212301e-05, + "loss": 1.9913, + "step": 18118000 + }, + { + "epoch": 52.45, + "learning_rate": 2.378660933447573e-05, + "loss": 1.9865, + "step": 18118500 + }, + { + "epoch": 52.45, + "learning_rate": 2.378588713412375e-05, + "loss": 2.0073, + "step": 18119000 + }, + { + "epoch": 52.45, + "learning_rate": 2.3785163486476476e-05, + "loss": 1.9688, + "step": 18119500 + }, + { + "epoch": 52.45, + "learning_rate": 2.3784439838829198e-05, + "loss": 1.9861, + "step": 18120000 + }, + { + "epoch": 52.45, + "learning_rate": 2.378371619118192e-05, + "loss": 1.9777, + "step": 18120500 + }, + { + "epoch": 52.45, + "learning_rate": 2.3782992543534643e-05, + "loss": 1.9892, + "step": 18121000 + }, + { + "epoch": 52.45, + "learning_rate": 2.3782268895887365e-05, + "loss": 2.0024, + "step": 18121500 + }, + { + "epoch": 52.46, + "learning_rate": 2.378154524824009e-05, + "loss": 1.9682, + "step": 18122000 + }, + { + "epoch": 52.46, + "learning_rate": 2.3780821600592813e-05, + "loss": 2.0042, + "step": 18122500 + }, + { + "epoch": 52.46, + "learning_rate": 2.3780099400240832e-05, + "loss": 1.9976, + "step": 18123000 + }, + { + "epoch": 52.46, + "learning_rate": 2.3779375752593554e-05, + "loss": 1.9853, + "step": 18123500 + }, + { + "epoch": 52.46, + "learning_rate": 2.377865210494628e-05, + "loss": 1.9827, + "step": 18124000 + }, + { + "epoch": 52.46, + "learning_rate": 2.3777928457299002e-05, + "loss": 1.9653, + "step": 18124500 + }, + { + "epoch": 52.46, + "learning_rate": 2.3777206256947017e-05, + "loss": 1.9998, + "step": 18125000 + }, + { + "epoch": 52.47, + "learning_rate": 2.377648260929974e-05, + "loss": 1.9868, + "step": 18125500 + }, + { + "epoch": 52.47, + "learning_rate": 2.3775758961652465e-05, + "loss": 1.9881, + "step": 18126000 + }, + { + "epoch": 52.47, + "learning_rate": 2.377503531400519e-05, + "loss": 1.9896, + "step": 18126500 + }, + { + "epoch": 52.47, + "learning_rate": 2.3774311666357913e-05, + "loss": 2.0116, + "step": 18127000 + }, + { + "epoch": 52.47, + "learning_rate": 2.3773588018710635e-05, + "loss": 1.9975, + "step": 18127500 + }, + { + "epoch": 52.47, + "learning_rate": 2.3772864371063358e-05, + "loss": 1.9742, + "step": 18128000 + }, + { + "epoch": 52.47, + "learning_rate": 2.377214072341608e-05, + "loss": 1.975, + "step": 18128500 + }, + { + "epoch": 52.48, + "learning_rate": 2.3771417075768805e-05, + "loss": 1.9949, + "step": 18129000 + }, + { + "epoch": 52.48, + "learning_rate": 2.3770693428121528e-05, + "loss": 1.9546, + "step": 18129500 + }, + { + "epoch": 52.48, + "learning_rate": 2.3769971227769543e-05, + "loss": 1.9932, + "step": 18130000 + }, + { + "epoch": 52.48, + "learning_rate": 2.376924758012227e-05, + "loss": 1.983, + "step": 18130500 + }, + { + "epoch": 52.48, + "learning_rate": 2.376852393247499e-05, + "loss": 1.9796, + "step": 18131000 + }, + { + "epoch": 52.48, + "learning_rate": 2.3767800284827717e-05, + "loss": 1.9496, + "step": 18131500 + }, + { + "epoch": 52.48, + "learning_rate": 2.376707663718044e-05, + "loss": 2.002, + "step": 18132000 + }, + { + "epoch": 52.49, + "learning_rate": 2.3766354436828454e-05, + "loss": 1.975, + "step": 18132500 + }, + { + "epoch": 52.49, + "learning_rate": 2.376563078918118e-05, + "loss": 2.0127, + "step": 18133000 + }, + { + "epoch": 52.49, + "learning_rate": 2.3764907141533902e-05, + "loss": 1.9646, + "step": 18133500 + }, + { + "epoch": 52.49, + "learning_rate": 2.3764183493886628e-05, + "loss": 2.0027, + "step": 18134000 + }, + { + "epoch": 52.49, + "learning_rate": 2.376345984623935e-05, + "loss": 1.9769, + "step": 18134500 + }, + { + "epoch": 52.49, + "learning_rate": 2.3762739093182662e-05, + "loss": 1.9946, + "step": 18135000 + }, + { + "epoch": 52.49, + "learning_rate": 2.3762015445535385e-05, + "loss": 2.0056, + "step": 18135500 + }, + { + "epoch": 52.5, + "learning_rate": 2.3761291797888107e-05, + "loss": 1.9789, + "step": 18136000 + }, + { + "epoch": 52.5, + "learning_rate": 2.376056815024083e-05, + "loss": 1.9758, + "step": 18136500 + }, + { + "epoch": 52.5, + "learning_rate": 2.3759845949888848e-05, + "loss": 2.0221, + "step": 18137000 + }, + { + "epoch": 52.5, + "learning_rate": 2.375912230224157e-05, + "loss": 1.9856, + "step": 18137500 + }, + { + "epoch": 52.5, + "learning_rate": 2.3758398654594296e-05, + "loss": 1.9954, + "step": 18138000 + }, + { + "epoch": 52.5, + "learning_rate": 2.3757675006947018e-05, + "loss": 1.9747, + "step": 18138500 + }, + { + "epoch": 52.5, + "learning_rate": 2.3756951359299744e-05, + "loss": 1.9795, + "step": 18139000 + }, + { + "epoch": 52.51, + "learning_rate": 2.3756227711652466e-05, + "loss": 2.0, + "step": 18139500 + }, + { + "epoch": 52.51, + "learning_rate": 2.3755504064005188e-05, + "loss": 1.9969, + "step": 18140000 + }, + { + "epoch": 52.51, + "learning_rate": 2.375478041635791e-05, + "loss": 1.959, + "step": 18140500 + }, + { + "epoch": 52.51, + "learning_rate": 2.3754056768710633e-05, + "loss": 1.9748, + "step": 18141000 + }, + { + "epoch": 52.51, + "learning_rate": 2.375333312106336e-05, + "loss": 1.9973, + "step": 18141500 + }, + { + "epoch": 52.51, + "learning_rate": 2.3752610920711377e-05, + "loss": 2.0078, + "step": 18142000 + }, + { + "epoch": 52.52, + "learning_rate": 2.37518872730641e-05, + "loss": 1.9835, + "step": 18142500 + }, + { + "epoch": 52.52, + "learning_rate": 2.3751163625416822e-05, + "loss": 2.001, + "step": 18143000 + }, + { + "epoch": 52.52, + "learning_rate": 2.3750439977769544e-05, + "loss": 1.9884, + "step": 18143500 + }, + { + "epoch": 52.52, + "learning_rate": 2.374971633012227e-05, + "loss": 1.9853, + "step": 18144000 + }, + { + "epoch": 52.52, + "learning_rate": 2.3748992682474992e-05, + "loss": 1.975, + "step": 18144500 + }, + { + "epoch": 52.52, + "learning_rate": 2.3748269034827717e-05, + "loss": 1.9599, + "step": 18145000 + }, + { + "epoch": 52.52, + "learning_rate": 2.374754538718044e-05, + "loss": 1.9782, + "step": 18145500 + }, + { + "epoch": 52.53, + "learning_rate": 2.3746821739533162e-05, + "loss": 1.9505, + "step": 18146000 + }, + { + "epoch": 52.53, + "learning_rate": 2.374609953918118e-05, + "loss": 1.9755, + "step": 18146500 + }, + { + "epoch": 52.53, + "learning_rate": 2.3745375891533903e-05, + "loss": 1.9865, + "step": 18147000 + }, + { + "epoch": 52.53, + "learning_rate": 2.374465369118192e-05, + "loss": 1.9845, + "step": 18147500 + }, + { + "epoch": 52.53, + "learning_rate": 2.3743930043534644e-05, + "loss": 2.0075, + "step": 18148000 + }, + { + "epoch": 52.53, + "learning_rate": 2.3743206395887366e-05, + "loss": 2.0044, + "step": 18148500 + }, + { + "epoch": 52.53, + "learning_rate": 2.3742482748240092e-05, + "loss": 1.9862, + "step": 18149000 + }, + { + "epoch": 52.54, + "learning_rate": 2.3741759100592814e-05, + "loss": 1.9829, + "step": 18149500 + }, + { + "epoch": 52.54, + "learning_rate": 2.3741035452945537e-05, + "loss": 1.9721, + "step": 18150000 + }, + { + "epoch": 52.54, + "learning_rate": 2.374031180529826e-05, + "loss": 1.9807, + "step": 18150500 + }, + { + "epoch": 52.54, + "learning_rate": 2.373958815765098e-05, + "loss": 1.9901, + "step": 18151000 + }, + { + "epoch": 52.54, + "learning_rate": 2.3738864510003707e-05, + "loss": 2.0054, + "step": 18151500 + }, + { + "epoch": 52.54, + "learning_rate": 2.3738142309651722e-05, + "loss": 1.9619, + "step": 18152000 + }, + { + "epoch": 52.54, + "learning_rate": 2.3737418662004444e-05, + "loss": 1.9912, + "step": 18152500 + }, + { + "epoch": 52.55, + "learning_rate": 2.373669501435717e-05, + "loss": 1.9777, + "step": 18153000 + }, + { + "epoch": 52.55, + "learning_rate": 2.373597281400519e-05, + "loss": 1.9983, + "step": 18153500 + }, + { + "epoch": 52.55, + "learning_rate": 2.3735250613653208e-05, + "loss": 1.9925, + "step": 18154000 + }, + { + "epoch": 52.55, + "learning_rate": 2.373452696600593e-05, + "loss": 1.9858, + "step": 18154500 + }, + { + "epoch": 52.55, + "learning_rate": 2.3733803318358652e-05, + "loss": 1.9591, + "step": 18155000 + }, + { + "epoch": 52.55, + "learning_rate": 2.3733079670711375e-05, + "loss": 1.9848, + "step": 18155500 + }, + { + "epoch": 52.55, + "learning_rate": 2.3732357470359394e-05, + "loss": 1.9797, + "step": 18156000 + }, + { + "epoch": 52.56, + "learning_rate": 2.3731633822712116e-05, + "loss": 1.9873, + "step": 18156500 + }, + { + "epoch": 52.56, + "learning_rate": 2.373091017506484e-05, + "loss": 2.0087, + "step": 18157000 + }, + { + "epoch": 52.56, + "learning_rate": 2.3730186527417564e-05, + "loss": 1.9949, + "step": 18157500 + }, + { + "epoch": 52.56, + "learning_rate": 2.3729462879770286e-05, + "loss": 2.0076, + "step": 18158000 + }, + { + "epoch": 52.56, + "learning_rate": 2.3728740679418305e-05, + "loss": 1.983, + "step": 18158500 + }, + { + "epoch": 52.56, + "learning_rate": 2.3728017031771027e-05, + "loss": 1.9864, + "step": 18159000 + }, + { + "epoch": 52.56, + "learning_rate": 2.372729338412375e-05, + "loss": 1.9743, + "step": 18159500 + }, + { + "epoch": 52.57, + "learning_rate": 2.372656973647647e-05, + "loss": 1.9651, + "step": 18160000 + }, + { + "epoch": 52.57, + "learning_rate": 2.3725846088829197e-05, + "loss": 1.9737, + "step": 18160500 + }, + { + "epoch": 52.57, + "learning_rate": 2.3725122441181923e-05, + "loss": 1.9794, + "step": 18161000 + }, + { + "epoch": 52.57, + "learning_rate": 2.3724398793534645e-05, + "loss": 1.9985, + "step": 18161500 + }, + { + "epoch": 52.57, + "learning_rate": 2.3723675145887367e-05, + "loss": 2.0152, + "step": 18162000 + }, + { + "epoch": 52.57, + "learning_rate": 2.372295149824009e-05, + "loss": 1.9824, + "step": 18162500 + }, + { + "epoch": 52.57, + "learning_rate": 2.372222929788811e-05, + "loss": 1.9772, + "step": 18163000 + }, + { + "epoch": 52.58, + "learning_rate": 2.372150565024083e-05, + "loss": 1.986, + "step": 18163500 + }, + { + "epoch": 52.58, + "learning_rate": 2.3720782002593556e-05, + "loss": 1.9662, + "step": 18164000 + }, + { + "epoch": 52.58, + "learning_rate": 2.372005835494628e-05, + "loss": 1.9913, + "step": 18164500 + }, + { + "epoch": 52.58, + "learning_rate": 2.3719334707299e-05, + "loss": 1.9947, + "step": 18165000 + }, + { + "epoch": 52.58, + "learning_rate": 2.3718611059651723e-05, + "loss": 2.0097, + "step": 18165500 + }, + { + "epoch": 52.58, + "learning_rate": 2.371788741200445e-05, + "loss": 2.0117, + "step": 18166000 + }, + { + "epoch": 52.58, + "learning_rate": 2.3717165211652464e-05, + "loss": 1.9698, + "step": 18166500 + }, + { + "epoch": 52.59, + "learning_rate": 2.3716441564005186e-05, + "loss": 1.9988, + "step": 18167000 + }, + { + "epoch": 52.59, + "learning_rate": 2.37157208109485e-05, + "loss": 1.9892, + "step": 18167500 + }, + { + "epoch": 52.59, + "learning_rate": 2.371499716330122e-05, + "loss": 1.9689, + "step": 18168000 + }, + { + "epoch": 52.59, + "learning_rate": 2.3714276410244537e-05, + "loss": 1.9737, + "step": 18168500 + }, + { + "epoch": 52.59, + "learning_rate": 2.371355276259726e-05, + "loss": 1.9523, + "step": 18169000 + }, + { + "epoch": 52.59, + "learning_rate": 2.3712829114949984e-05, + "loss": 1.9994, + "step": 18169500 + }, + { + "epoch": 52.59, + "learning_rate": 2.3712105467302707e-05, + "loss": 1.9846, + "step": 18170000 + }, + { + "epoch": 52.6, + "learning_rate": 2.371138181965543e-05, + "loss": 1.9861, + "step": 18170500 + }, + { + "epoch": 52.6, + "learning_rate": 2.371065817200815e-05, + "loss": 1.9867, + "step": 18171000 + }, + { + "epoch": 52.6, + "learning_rate": 2.3709934524360873e-05, + "loss": 1.978, + "step": 18171500 + }, + { + "epoch": 52.6, + "learning_rate": 2.37092108767136e-05, + "loss": 1.9812, + "step": 18172000 + }, + { + "epoch": 52.6, + "learning_rate": 2.370848722906632e-05, + "loss": 1.9888, + "step": 18172500 + }, + { + "epoch": 52.6, + "learning_rate": 2.3707763581419047e-05, + "loss": 1.9854, + "step": 18173000 + }, + { + "epoch": 52.6, + "learning_rate": 2.370703993377177e-05, + "loss": 2.007, + "step": 18173500 + }, + { + "epoch": 52.61, + "learning_rate": 2.370631628612449e-05, + "loss": 1.9832, + "step": 18174000 + }, + { + "epoch": 52.61, + "learning_rate": 2.3705592638477214e-05, + "loss": 2.0097, + "step": 18174500 + }, + { + "epoch": 52.61, + "learning_rate": 2.3704868990829936e-05, + "loss": 1.9766, + "step": 18175000 + }, + { + "epoch": 52.61, + "learning_rate": 2.370414534318266e-05, + "loss": 1.9771, + "step": 18175500 + }, + { + "epoch": 52.61, + "learning_rate": 2.3703421695535387e-05, + "loss": 1.9813, + "step": 18176000 + }, + { + "epoch": 52.61, + "learning_rate": 2.370269804788811e-05, + "loss": 1.9941, + "step": 18176500 + }, + { + "epoch": 52.61, + "learning_rate": 2.370197440024083e-05, + "loss": 1.9996, + "step": 18177000 + }, + { + "epoch": 52.62, + "learning_rate": 2.3701250752593554e-05, + "loss": 1.9735, + "step": 18177500 + }, + { + "epoch": 52.62, + "learning_rate": 2.3700527104946276e-05, + "loss": 1.9971, + "step": 18178000 + }, + { + "epoch": 52.62, + "learning_rate": 2.3699804904594295e-05, + "loss": 1.9805, + "step": 18178500 + }, + { + "epoch": 52.62, + "learning_rate": 2.3699081256947017e-05, + "loss": 1.9661, + "step": 18179000 + }, + { + "epoch": 52.62, + "learning_rate": 2.3698357609299743e-05, + "loss": 1.9794, + "step": 18179500 + }, + { + "epoch": 52.62, + "learning_rate": 2.3697633961652465e-05, + "loss": 1.9762, + "step": 18180000 + }, + { + "epoch": 52.63, + "learning_rate": 2.3696910314005187e-05, + "loss": 1.9873, + "step": 18180500 + }, + { + "epoch": 52.63, + "learning_rate": 2.3696188113653206e-05, + "loss": 1.9888, + "step": 18181000 + }, + { + "epoch": 52.63, + "learning_rate": 2.3695465913301225e-05, + "loss": 1.9833, + "step": 18181500 + }, + { + "epoch": 52.63, + "learning_rate": 2.3694742265653947e-05, + "loss": 2.005, + "step": 18182000 + }, + { + "epoch": 52.63, + "learning_rate": 2.369401861800667e-05, + "loss": 1.9956, + "step": 18182500 + }, + { + "epoch": 52.63, + "learning_rate": 2.3693294970359395e-05, + "loss": 1.9885, + "step": 18183000 + }, + { + "epoch": 52.63, + "learning_rate": 2.3692571322712117e-05, + "loss": 1.9835, + "step": 18183500 + }, + { + "epoch": 52.64, + "learning_rate": 2.369184767506484e-05, + "loss": 1.9963, + "step": 18184000 + }, + { + "epoch": 52.64, + "learning_rate": 2.3691124027417562e-05, + "loss": 1.9772, + "step": 18184500 + }, + { + "epoch": 52.64, + "learning_rate": 2.3690400379770288e-05, + "loss": 1.9938, + "step": 18185000 + }, + { + "epoch": 52.64, + "learning_rate": 2.368967673212301e-05, + "loss": 1.98, + "step": 18185500 + }, + { + "epoch": 52.64, + "learning_rate": 2.3688953084475732e-05, + "loss": 1.9914, + "step": 18186000 + }, + { + "epoch": 52.64, + "learning_rate": 2.3688229436828458e-05, + "loss": 1.9881, + "step": 18186500 + }, + { + "epoch": 52.64, + "learning_rate": 2.368750578918118e-05, + "loss": 1.9795, + "step": 18187000 + }, + { + "epoch": 52.65, + "learning_rate": 2.3686782141533902e-05, + "loss": 1.9757, + "step": 18187500 + }, + { + "epoch": 52.65, + "learning_rate": 2.3686058493886624e-05, + "loss": 1.9994, + "step": 18188000 + }, + { + "epoch": 52.65, + "learning_rate": 2.3685336293534643e-05, + "loss": 1.9695, + "step": 18188500 + }, + { + "epoch": 52.65, + "learning_rate": 2.3684612645887365e-05, + "loss": 1.991, + "step": 18189000 + }, + { + "epoch": 52.65, + "learning_rate": 2.3683888998240088e-05, + "loss": 2.0035, + "step": 18189500 + }, + { + "epoch": 52.65, + "learning_rate": 2.3683165350592813e-05, + "loss": 2.0033, + "step": 18190000 + }, + { + "epoch": 52.65, + "learning_rate": 2.3682443150240832e-05, + "loss": 2.0015, + "step": 18190500 + }, + { + "epoch": 52.66, + "learning_rate": 2.3681719502593555e-05, + "loss": 1.9803, + "step": 18191000 + }, + { + "epoch": 52.66, + "learning_rate": 2.3680995854946277e-05, + "loss": 1.9924, + "step": 18191500 + }, + { + "epoch": 52.66, + "learning_rate": 2.3680272207299002e-05, + "loss": 1.9948, + "step": 18192000 + }, + { + "epoch": 52.66, + "learning_rate": 2.3679550006947018e-05, + "loss": 1.989, + "step": 18192500 + }, + { + "epoch": 52.66, + "learning_rate": 2.367882635929974e-05, + "loss": 1.9873, + "step": 18193000 + }, + { + "epoch": 52.66, + "learning_rate": 2.3678102711652462e-05, + "loss": 1.9913, + "step": 18193500 + }, + { + "epoch": 52.66, + "learning_rate": 2.3677379064005188e-05, + "loss": 1.9914, + "step": 18194000 + }, + { + "epoch": 52.67, + "learning_rate": 2.3676655416357914e-05, + "loss": 1.972, + "step": 18194500 + }, + { + "epoch": 52.67, + "learning_rate": 2.3675931768710636e-05, + "loss": 1.9465, + "step": 18195000 + }, + { + "epoch": 52.67, + "learning_rate": 2.3675208121063358e-05, + "loss": 1.9882, + "step": 18195500 + }, + { + "epoch": 52.67, + "learning_rate": 2.3674485920711377e-05, + "loss": 1.993, + "step": 18196000 + }, + { + "epoch": 52.67, + "learning_rate": 2.36737622730641e-05, + "loss": 1.9943, + "step": 18196500 + }, + { + "epoch": 52.67, + "learning_rate": 2.367303862541682e-05, + "loss": 1.9831, + "step": 18197000 + }, + { + "epoch": 52.67, + "learning_rate": 2.3672314977769544e-05, + "loss": 1.965, + "step": 18197500 + }, + { + "epoch": 52.68, + "learning_rate": 2.367159133012227e-05, + "loss": 1.977, + "step": 18198000 + }, + { + "epoch": 52.68, + "learning_rate": 2.367086768247499e-05, + "loss": 2.003, + "step": 18198500 + }, + { + "epoch": 52.68, + "learning_rate": 2.3670144034827714e-05, + "loss": 1.992, + "step": 18199000 + }, + { + "epoch": 52.68, + "learning_rate": 2.366942038718044e-05, + "loss": 1.9695, + "step": 18199500 + }, + { + "epoch": 52.68, + "learning_rate": 2.3668698186828455e-05, + "loss": 1.9952, + "step": 18200000 + }, + { + "epoch": 52.68, + "learning_rate": 2.3667975986476474e-05, + "loss": 1.984, + "step": 18200500 + }, + { + "epoch": 52.68, + "learning_rate": 2.3667252338829196e-05, + "loss": 2.0022, + "step": 18201000 + }, + { + "epoch": 52.69, + "learning_rate": 2.3666528691181922e-05, + "loss": 1.9897, + "step": 18201500 + }, + { + "epoch": 52.69, + "learning_rate": 2.3665805043534644e-05, + "loss": 1.9922, + "step": 18202000 + }, + { + "epoch": 52.69, + "learning_rate": 2.3665081395887366e-05, + "loss": 1.9758, + "step": 18202500 + }, + { + "epoch": 52.69, + "learning_rate": 2.3664359195535385e-05, + "loss": 1.9796, + "step": 18203000 + }, + { + "epoch": 52.69, + "learning_rate": 2.3663635547888107e-05, + "loss": 1.9809, + "step": 18203500 + }, + { + "epoch": 52.69, + "learning_rate": 2.366291190024083e-05, + "loss": 1.9912, + "step": 18204000 + }, + { + "epoch": 52.69, + "learning_rate": 2.3662188252593552e-05, + "loss": 1.9762, + "step": 18204500 + }, + { + "epoch": 52.7, + "learning_rate": 2.3661464604946278e-05, + "loss": 1.9723, + "step": 18205000 + }, + { + "epoch": 52.7, + "learning_rate": 2.3660740957299003e-05, + "loss": 1.9849, + "step": 18205500 + }, + { + "epoch": 52.7, + "learning_rate": 2.3660017309651725e-05, + "loss": 1.9894, + "step": 18206000 + }, + { + "epoch": 52.7, + "learning_rate": 2.3659293662004448e-05, + "loss": 1.9849, + "step": 18206500 + }, + { + "epoch": 52.7, + "learning_rate": 2.365857001435717e-05, + "loss": 1.9855, + "step": 18207000 + }, + { + "epoch": 52.7, + "learning_rate": 2.3657846366709892e-05, + "loss": 2.0027, + "step": 18207500 + }, + { + "epoch": 52.7, + "learning_rate": 2.365712416635791e-05, + "loss": 1.9922, + "step": 18208000 + }, + { + "epoch": 52.71, + "learning_rate": 2.3656400518710633e-05, + "loss": 1.977, + "step": 18208500 + }, + { + "epoch": 52.71, + "learning_rate": 2.365567687106336e-05, + "loss": 2.0176, + "step": 18209000 + }, + { + "epoch": 52.71, + "learning_rate": 2.365495322341608e-05, + "loss": 1.9971, + "step": 18209500 + }, + { + "epoch": 52.71, + "learning_rate": 2.3654229575768803e-05, + "loss": 1.9646, + "step": 18210000 + }, + { + "epoch": 52.71, + "learning_rate": 2.365350592812153e-05, + "loss": 1.9931, + "step": 18210500 + }, + { + "epoch": 52.71, + "learning_rate": 2.365278228047425e-05, + "loss": 1.9599, + "step": 18211000 + }, + { + "epoch": 52.71, + "learning_rate": 2.3652060080122267e-05, + "loss": 1.9888, + "step": 18211500 + }, + { + "epoch": 52.72, + "learning_rate": 2.3651336432474992e-05, + "loss": 2.0018, + "step": 18212000 + }, + { + "epoch": 52.72, + "learning_rate": 2.3650612784827718e-05, + "loss": 1.9736, + "step": 18212500 + }, + { + "epoch": 52.72, + "learning_rate": 2.364988913718044e-05, + "loss": 1.979, + "step": 18213000 + }, + { + "epoch": 52.72, + "learning_rate": 2.3649165489533163e-05, + "loss": 1.9911, + "step": 18213500 + }, + { + "epoch": 52.72, + "learning_rate": 2.3648443289181178e-05, + "loss": 1.9808, + "step": 18214000 + }, + { + "epoch": 52.72, + "learning_rate": 2.3647719641533904e-05, + "loss": 1.9882, + "step": 18214500 + }, + { + "epoch": 52.72, + "learning_rate": 2.3646995993886626e-05, + "loss": 1.991, + "step": 18215000 + }, + { + "epoch": 52.73, + "learning_rate": 2.3646272346239348e-05, + "loss": 1.9908, + "step": 18215500 + }, + { + "epoch": 52.73, + "learning_rate": 2.364554869859207e-05, + "loss": 1.9651, + "step": 18216000 + }, + { + "epoch": 52.73, + "learning_rate": 2.3644825050944796e-05, + "loss": 1.9919, + "step": 18216500 + }, + { + "epoch": 52.73, + "learning_rate": 2.3644101403297518e-05, + "loss": 1.9826, + "step": 18217000 + }, + { + "epoch": 52.73, + "learning_rate": 2.3643377755650244e-05, + "loss": 1.9823, + "step": 18217500 + }, + { + "epoch": 52.73, + "learning_rate": 2.3642654108002966e-05, + "loss": 2.0025, + "step": 18218000 + }, + { + "epoch": 52.74, + "learning_rate": 2.364193046035569e-05, + "loss": 1.9856, + "step": 18218500 + }, + { + "epoch": 52.74, + "learning_rate": 2.3641208260003704e-05, + "loss": 2.0056, + "step": 18219000 + }, + { + "epoch": 52.74, + "learning_rate": 2.364048461235643e-05, + "loss": 1.9995, + "step": 18219500 + }, + { + "epoch": 52.74, + "learning_rate": 2.3639760964709155e-05, + "loss": 1.9842, + "step": 18220000 + }, + { + "epoch": 52.74, + "learning_rate": 2.3639037317061877e-05, + "loss": 1.9964, + "step": 18220500 + }, + { + "epoch": 52.74, + "learning_rate": 2.36383136694146e-05, + "loss": 1.9838, + "step": 18221000 + }, + { + "epoch": 52.74, + "learning_rate": 2.3637590021767322e-05, + "loss": 2.0006, + "step": 18221500 + }, + { + "epoch": 52.75, + "learning_rate": 2.3636866374120044e-05, + "loss": 1.9851, + "step": 18222000 + }, + { + "epoch": 52.75, + "learning_rate": 2.363614272647277e-05, + "loss": 1.9995, + "step": 18222500 + }, + { + "epoch": 52.75, + "learning_rate": 2.3635419078825492e-05, + "loss": 1.9743, + "step": 18223000 + }, + { + "epoch": 52.75, + "learning_rate": 2.3634695431178218e-05, + "loss": 1.9903, + "step": 18223500 + }, + { + "epoch": 52.75, + "learning_rate": 2.3633973230826233e-05, + "loss": 1.9792, + "step": 18224000 + }, + { + "epoch": 52.75, + "learning_rate": 2.3633251030474252e-05, + "loss": 1.9752, + "step": 18224500 + }, + { + "epoch": 52.75, + "learning_rate": 2.3632527382826974e-05, + "loss": 1.9994, + "step": 18225000 + }, + { + "epoch": 52.76, + "learning_rate": 2.3631803735179696e-05, + "loss": 1.9949, + "step": 18225500 + }, + { + "epoch": 52.76, + "learning_rate": 2.363108008753242e-05, + "loss": 2.0131, + "step": 18226000 + }, + { + "epoch": 52.76, + "learning_rate": 2.3630357887180438e-05, + "loss": 1.9948, + "step": 18226500 + }, + { + "epoch": 52.76, + "learning_rate": 2.362963423953316e-05, + "loss": 1.9893, + "step": 18227000 + }, + { + "epoch": 52.76, + "learning_rate": 2.3628910591885886e-05, + "loss": 2.0062, + "step": 18227500 + }, + { + "epoch": 52.76, + "learning_rate": 2.3628186944238608e-05, + "loss": 2.0181, + "step": 18228000 + }, + { + "epoch": 52.76, + "learning_rate": 2.3627463296591333e-05, + "loss": 2.0045, + "step": 18228500 + }, + { + "epoch": 52.77, + "learning_rate": 2.3626739648944056e-05, + "loss": 1.9821, + "step": 18229000 + }, + { + "epoch": 52.77, + "learning_rate": 2.3626016001296778e-05, + "loss": 2.0189, + "step": 18229500 + }, + { + "epoch": 52.77, + "learning_rate": 2.3625293800944793e-05, + "loss": 1.9761, + "step": 18230000 + }, + { + "epoch": 52.77, + "learning_rate": 2.362457015329752e-05, + "loss": 2.0094, + "step": 18230500 + }, + { + "epoch": 52.77, + "learning_rate": 2.362384650565024e-05, + "loss": 2.0122, + "step": 18231000 + }, + { + "epoch": 52.77, + "learning_rate": 2.3623122858002967e-05, + "loss": 1.9952, + "step": 18231500 + }, + { + "epoch": 52.77, + "learning_rate": 2.362239921035569e-05, + "loss": 1.9979, + "step": 18232000 + }, + { + "epoch": 52.78, + "learning_rate": 2.362167556270841e-05, + "loss": 2.0159, + "step": 18232500 + }, + { + "epoch": 52.78, + "learning_rate": 2.362095336235643e-05, + "loss": 1.9939, + "step": 18233000 + }, + { + "epoch": 52.78, + "learning_rate": 2.3620229714709153e-05, + "loss": 1.9933, + "step": 18233500 + }, + { + "epoch": 52.78, + "learning_rate": 2.361950751435717e-05, + "loss": 1.9963, + "step": 18234000 + }, + { + "epoch": 52.78, + "learning_rate": 2.3618783866709894e-05, + "loss": 1.9646, + "step": 18234500 + }, + { + "epoch": 52.78, + "learning_rate": 2.361806021906262e-05, + "loss": 2.0081, + "step": 18235000 + }, + { + "epoch": 52.78, + "learning_rate": 2.361733657141534e-05, + "loss": 1.9844, + "step": 18235500 + }, + { + "epoch": 52.79, + "learning_rate": 2.3616612923768064e-05, + "loss": 1.9763, + "step": 18236000 + }, + { + "epoch": 52.79, + "learning_rate": 2.3615889276120786e-05, + "loss": 1.9816, + "step": 18236500 + }, + { + "epoch": 52.79, + "learning_rate": 2.3615165628473508e-05, + "loss": 1.9912, + "step": 18237000 + }, + { + "epoch": 52.79, + "learning_rate": 2.3614441980826234e-05, + "loss": 1.9906, + "step": 18237500 + }, + { + "epoch": 52.79, + "learning_rate": 2.3613718333178956e-05, + "loss": 1.9821, + "step": 18238000 + }, + { + "epoch": 52.79, + "learning_rate": 2.361299613282697e-05, + "loss": 1.9635, + "step": 18238500 + }, + { + "epoch": 52.79, + "learning_rate": 2.3612272485179697e-05, + "loss": 1.9677, + "step": 18239000 + }, + { + "epoch": 52.8, + "learning_rate": 2.3611550284827716e-05, + "loss": 2.0071, + "step": 18239500 + }, + { + "epoch": 52.8, + "learning_rate": 2.361082663718044e-05, + "loss": 1.9959, + "step": 18240000 + }, + { + "epoch": 52.8, + "learning_rate": 2.361010298953316e-05, + "loss": 2.0013, + "step": 18240500 + }, + { + "epoch": 52.8, + "learning_rate": 2.3609379341885883e-05, + "loss": 1.9707, + "step": 18241000 + }, + { + "epoch": 52.8, + "learning_rate": 2.360865569423861e-05, + "loss": 1.9912, + "step": 18241500 + }, + { + "epoch": 52.8, + "learning_rate": 2.3607933493886624e-05, + "loss": 2.0034, + "step": 18242000 + }, + { + "epoch": 52.8, + "learning_rate": 2.360720984623935e-05, + "loss": 2.0121, + "step": 18242500 + }, + { + "epoch": 52.81, + "learning_rate": 2.3606486198592072e-05, + "loss": 1.9741, + "step": 18243000 + }, + { + "epoch": 52.81, + "learning_rate": 2.3605762550944798e-05, + "loss": 2.0113, + "step": 18243500 + }, + { + "epoch": 52.81, + "learning_rate": 2.360503890329752e-05, + "loss": 1.978, + "step": 18244000 + }, + { + "epoch": 52.81, + "learning_rate": 2.3604315255650242e-05, + "loss": 1.9861, + "step": 18244500 + }, + { + "epoch": 52.81, + "learning_rate": 2.3603591608002964e-05, + "loss": 1.9698, + "step": 18245000 + }, + { + "epoch": 52.81, + "learning_rate": 2.3602867960355687e-05, + "loss": 1.9739, + "step": 18245500 + }, + { + "epoch": 52.81, + "learning_rate": 2.3602144312708412e-05, + "loss": 1.9856, + "step": 18246000 + }, + { + "epoch": 52.82, + "learning_rate": 2.3601420665061134e-05, + "loss": 1.9794, + "step": 18246500 + }, + { + "epoch": 52.82, + "learning_rate": 2.360069701741386e-05, + "loss": 2.001, + "step": 18247000 + }, + { + "epoch": 52.82, + "learning_rate": 2.3599973369766582e-05, + "loss": 1.9956, + "step": 18247500 + }, + { + "epoch": 52.82, + "learning_rate": 2.3599249722119304e-05, + "loss": 2.004, + "step": 18248000 + }, + { + "epoch": 52.82, + "learning_rate": 2.3598527521767323e-05, + "loss": 1.9926, + "step": 18248500 + }, + { + "epoch": 52.82, + "learning_rate": 2.359780532141534e-05, + "loss": 1.9767, + "step": 18249000 + }, + { + "epoch": 52.82, + "learning_rate": 2.359708167376806e-05, + "loss": 1.9839, + "step": 18249500 + }, + { + "epoch": 52.83, + "learning_rate": 2.3596358026120787e-05, + "loss": 1.9812, + "step": 18250000 + }, + { + "epoch": 52.83, + "learning_rate": 2.359563437847351e-05, + "loss": 1.9955, + "step": 18250500 + }, + { + "epoch": 52.83, + "learning_rate": 2.3594910730826235e-05, + "loss": 1.9962, + "step": 18251000 + }, + { + "epoch": 52.83, + "learning_rate": 2.3594187083178957e-05, + "loss": 1.9726, + "step": 18251500 + }, + { + "epoch": 52.83, + "learning_rate": 2.359346343553168e-05, + "loss": 1.9905, + "step": 18252000 + }, + { + "epoch": 52.83, + "learning_rate": 2.35927397878844e-05, + "loss": 1.9918, + "step": 18252500 + }, + { + "epoch": 52.83, + "learning_rate": 2.3592016140237124e-05, + "loss": 1.9786, + "step": 18253000 + }, + { + "epoch": 52.84, + "learning_rate": 2.359129249258985e-05, + "loss": 1.9833, + "step": 18253500 + }, + { + "epoch": 52.84, + "learning_rate": 2.3590570292237868e-05, + "loss": 1.9884, + "step": 18254000 + }, + { + "epoch": 52.84, + "learning_rate": 2.3589848091885887e-05, + "loss": 1.9587, + "step": 18254500 + }, + { + "epoch": 52.84, + "learning_rate": 2.358912444423861e-05, + "loss": 1.9892, + "step": 18255000 + }, + { + "epoch": 52.84, + "learning_rate": 2.358840079659133e-05, + "loss": 2.0008, + "step": 18255500 + }, + { + "epoch": 52.84, + "learning_rate": 2.3587677148944054e-05, + "loss": 2.0007, + "step": 18256000 + }, + { + "epoch": 52.85, + "learning_rate": 2.3586953501296776e-05, + "loss": 1.9978, + "step": 18256500 + }, + { + "epoch": 52.85, + "learning_rate": 2.3586231300944795e-05, + "loss": 1.9722, + "step": 18257000 + }, + { + "epoch": 52.85, + "learning_rate": 2.358550765329752e-05, + "loss": 1.9828, + "step": 18257500 + }, + { + "epoch": 52.85, + "learning_rate": 2.3584784005650243e-05, + "loss": 1.9584, + "step": 18258000 + }, + { + "epoch": 52.85, + "learning_rate": 2.3584061805298262e-05, + "loss": 1.985, + "step": 18258500 + }, + { + "epoch": 52.85, + "learning_rate": 2.3583338157650984e-05, + "loss": 1.9892, + "step": 18259000 + }, + { + "epoch": 52.85, + "learning_rate": 2.3582614510003706e-05, + "loss": 1.9924, + "step": 18259500 + }, + { + "epoch": 52.86, + "learning_rate": 2.358189086235643e-05, + "loss": 1.9816, + "step": 18260000 + }, + { + "epoch": 52.86, + "learning_rate": 2.358116721470915e-05, + "loss": 1.9822, + "step": 18260500 + }, + { + "epoch": 52.86, + "learning_rate": 2.3580443567061873e-05, + "loss": 1.9948, + "step": 18261000 + }, + { + "epoch": 52.86, + "learning_rate": 2.3579721366709895e-05, + "loss": 1.9811, + "step": 18261500 + }, + { + "epoch": 52.86, + "learning_rate": 2.3578997719062618e-05, + "loss": 1.9984, + "step": 18262000 + }, + { + "epoch": 52.86, + "learning_rate": 2.357827407141534e-05, + "loss": 1.979, + "step": 18262500 + }, + { + "epoch": 52.86, + "learning_rate": 2.3577550423768062e-05, + "loss": 1.9996, + "step": 18263000 + }, + { + "epoch": 52.87, + "learning_rate": 2.3576826776120788e-05, + "loss": 1.9734, + "step": 18263500 + }, + { + "epoch": 52.87, + "learning_rate": 2.3576104575768803e-05, + "loss": 2.0015, + "step": 18264000 + }, + { + "epoch": 52.87, + "learning_rate": 2.3575380928121525e-05, + "loss": 1.9883, + "step": 18264500 + }, + { + "epoch": 52.87, + "learning_rate": 2.357465728047425e-05, + "loss": 2.0078, + "step": 18265000 + }, + { + "epoch": 52.87, + "learning_rate": 2.3573933632826977e-05, + "loss": 1.9834, + "step": 18265500 + }, + { + "epoch": 52.87, + "learning_rate": 2.35732099851797e-05, + "loss": 1.9849, + "step": 18266000 + }, + { + "epoch": 52.87, + "learning_rate": 2.357248633753242e-05, + "loss": 1.998, + "step": 18266500 + }, + { + "epoch": 52.88, + "learning_rate": 2.3571762689885143e-05, + "loss": 2.0245, + "step": 18267000 + }, + { + "epoch": 52.88, + "learning_rate": 2.3571039042237866e-05, + "loss": 1.9922, + "step": 18267500 + }, + { + "epoch": 52.88, + "learning_rate": 2.3570316841885885e-05, + "loss": 1.9956, + "step": 18268000 + }, + { + "epoch": 52.88, + "learning_rate": 2.35695946415339e-05, + "loss": 1.9774, + "step": 18268500 + }, + { + "epoch": 52.88, + "learning_rate": 2.3568870993886626e-05, + "loss": 1.9705, + "step": 18269000 + }, + { + "epoch": 52.88, + "learning_rate": 2.356814734623935e-05, + "loss": 1.971, + "step": 18269500 + }, + { + "epoch": 52.88, + "learning_rate": 2.3567423698592074e-05, + "loss": 2.001, + "step": 18270000 + }, + { + "epoch": 52.89, + "learning_rate": 2.3566700050944796e-05, + "loss": 1.9887, + "step": 18270500 + }, + { + "epoch": 52.89, + "learning_rate": 2.3565976403297518e-05, + "loss": 1.9915, + "step": 18271000 + }, + { + "epoch": 52.89, + "learning_rate": 2.356525275565024e-05, + "loss": 1.9883, + "step": 18271500 + }, + { + "epoch": 52.89, + "learning_rate": 2.3564529108002962e-05, + "loss": 1.9604, + "step": 18272000 + }, + { + "epoch": 52.89, + "learning_rate": 2.3563805460355688e-05, + "loss": 1.9885, + "step": 18272500 + }, + { + "epoch": 52.89, + "learning_rate": 2.3563083260003707e-05, + "loss": 1.9725, + "step": 18273000 + }, + { + "epoch": 52.89, + "learning_rate": 2.3562361059651726e-05, + "loss": 1.97, + "step": 18273500 + }, + { + "epoch": 52.9, + "learning_rate": 2.3561637412004448e-05, + "loss": 1.964, + "step": 18274000 + }, + { + "epoch": 52.9, + "learning_rate": 2.3560915211652464e-05, + "loss": 1.9852, + "step": 18274500 + }, + { + "epoch": 52.9, + "learning_rate": 2.356019156400519e-05, + "loss": 1.9889, + "step": 18275000 + }, + { + "epoch": 52.9, + "learning_rate": 2.355946791635791e-05, + "loss": 1.9944, + "step": 18275500 + }, + { + "epoch": 52.9, + "learning_rate": 2.3558744268710634e-05, + "loss": 2.0001, + "step": 18276000 + }, + { + "epoch": 52.9, + "learning_rate": 2.355802062106336e-05, + "loss": 1.9889, + "step": 18276500 + }, + { + "epoch": 52.9, + "learning_rate": 2.3557296973416082e-05, + "loss": 1.9936, + "step": 18277000 + }, + { + "epoch": 52.91, + "learning_rate": 2.3556573325768804e-05, + "loss": 1.998, + "step": 18277500 + }, + { + "epoch": 52.91, + "learning_rate": 2.3555849678121526e-05, + "loss": 1.9929, + "step": 18278000 + }, + { + "epoch": 52.91, + "learning_rate": 2.3555126030474252e-05, + "loss": 2.006, + "step": 18278500 + }, + { + "epoch": 52.91, + "learning_rate": 2.3554402382826974e-05, + "loss": 2.0108, + "step": 18279000 + }, + { + "epoch": 52.91, + "learning_rate": 2.3553678735179696e-05, + "loss": 1.9952, + "step": 18279500 + }, + { + "epoch": 52.91, + "learning_rate": 2.3552955087532422e-05, + "loss": 1.9757, + "step": 18280000 + }, + { + "epoch": 52.91, + "learning_rate": 2.3552231439885144e-05, + "loss": 1.9685, + "step": 18280500 + }, + { + "epoch": 52.92, + "learning_rate": 2.3551507792237866e-05, + "loss": 2.0039, + "step": 18281000 + }, + { + "epoch": 52.92, + "learning_rate": 2.355078414459059e-05, + "loss": 1.9669, + "step": 18281500 + }, + { + "epoch": 52.92, + "learning_rate": 2.3550061944238608e-05, + "loss": 1.9867, + "step": 18282000 + }, + { + "epoch": 52.92, + "learning_rate": 2.354933829659133e-05, + "loss": 1.9897, + "step": 18282500 + }, + { + "epoch": 52.92, + "learning_rate": 2.3548614648944052e-05, + "loss": 1.9782, + "step": 18283000 + }, + { + "epoch": 52.92, + "learning_rate": 2.354789244859207e-05, + "loss": 1.9843, + "step": 18283500 + }, + { + "epoch": 52.92, + "learning_rate": 2.3547168800944797e-05, + "loss": 1.9993, + "step": 18284000 + }, + { + "epoch": 52.93, + "learning_rate": 2.354644515329752e-05, + "loss": 1.9871, + "step": 18284500 + }, + { + "epoch": 52.93, + "learning_rate": 2.354572150565024e-05, + "loss": 1.9835, + "step": 18285000 + }, + { + "epoch": 52.93, + "learning_rate": 2.3544997858002967e-05, + "loss": 2.0049, + "step": 18285500 + }, + { + "epoch": 52.93, + "learning_rate": 2.354427421035569e-05, + "loss": 1.9921, + "step": 18286000 + }, + { + "epoch": 52.93, + "learning_rate": 2.354355056270841e-05, + "loss": 2.0152, + "step": 18286500 + }, + { + "epoch": 52.93, + "learning_rate": 2.3542826915061133e-05, + "loss": 1.9995, + "step": 18287000 + }, + { + "epoch": 52.93, + "learning_rate": 2.354210326741386e-05, + "loss": 2.014, + "step": 18287500 + }, + { + "epoch": 52.94, + "learning_rate": 2.354137961976658e-05, + "loss": 1.9721, + "step": 18288000 + }, + { + "epoch": 52.94, + "learning_rate": 2.3540655972119303e-05, + "loss": 1.992, + "step": 18288500 + }, + { + "epoch": 52.94, + "learning_rate": 2.353993232447203e-05, + "loss": 1.9793, + "step": 18289000 + }, + { + "epoch": 52.94, + "learning_rate": 2.353920867682475e-05, + "loss": 1.9817, + "step": 18289500 + }, + { + "epoch": 52.94, + "learning_rate": 2.3538487923768064e-05, + "loss": 2.0064, + "step": 18290000 + }, + { + "epoch": 52.94, + "learning_rate": 2.3537764276120786e-05, + "loss": 1.9911, + "step": 18290500 + }, + { + "epoch": 52.94, + "learning_rate": 2.3537042075768805e-05, + "loss": 1.9856, + "step": 18291000 + }, + { + "epoch": 52.95, + "learning_rate": 2.353631842812153e-05, + "loss": 1.9809, + "step": 18291500 + }, + { + "epoch": 52.95, + "learning_rate": 2.3535594780474253e-05, + "loss": 1.9833, + "step": 18292000 + }, + { + "epoch": 52.95, + "learning_rate": 2.3534871132826975e-05, + "loss": 1.9816, + "step": 18292500 + }, + { + "epoch": 52.95, + "learning_rate": 2.3534147485179697e-05, + "loss": 1.97, + "step": 18293000 + }, + { + "epoch": 52.95, + "learning_rate": 2.353342383753242e-05, + "loss": 1.9985, + "step": 18293500 + }, + { + "epoch": 52.95, + "learning_rate": 2.353270018988514e-05, + "loss": 1.9854, + "step": 18294000 + }, + { + "epoch": 52.96, + "learning_rate": 2.3531976542237867e-05, + "loss": 1.97, + "step": 18294500 + }, + { + "epoch": 52.96, + "learning_rate": 2.3531252894590593e-05, + "loss": 2.0074, + "step": 18295000 + }, + { + "epoch": 52.96, + "learning_rate": 2.353053069423861e-05, + "loss": 1.987, + "step": 18295500 + }, + { + "epoch": 52.96, + "learning_rate": 2.352980704659133e-05, + "loss": 1.9777, + "step": 18296000 + }, + { + "epoch": 52.96, + "learning_rate": 2.3529083398944056e-05, + "loss": 1.9863, + "step": 18296500 + }, + { + "epoch": 52.96, + "learning_rate": 2.352835975129678e-05, + "loss": 1.9882, + "step": 18297000 + }, + { + "epoch": 52.96, + "learning_rate": 2.35276361036495e-05, + "loss": 1.9782, + "step": 18297500 + }, + { + "epoch": 52.97, + "learning_rate": 2.3526913903297516e-05, + "loss": 1.98, + "step": 18298000 + }, + { + "epoch": 52.97, + "learning_rate": 2.3526190255650242e-05, + "loss": 1.9988, + "step": 18298500 + }, + { + "epoch": 52.97, + "learning_rate": 2.352546805529826e-05, + "loss": 1.9843, + "step": 18299000 + }, + { + "epoch": 52.97, + "learning_rate": 2.3524744407650983e-05, + "loss": 1.9878, + "step": 18299500 + }, + { + "epoch": 52.97, + "learning_rate": 2.3524020760003705e-05, + "loss": 1.9845, + "step": 18300000 + }, + { + "epoch": 52.97, + "learning_rate": 2.352329711235643e-05, + "loss": 1.9577, + "step": 18300500 + }, + { + "epoch": 52.97, + "learning_rate": 2.3522573464709153e-05, + "loss": 1.9844, + "step": 18301000 + }, + { + "epoch": 52.98, + "learning_rate": 2.3521849817061875e-05, + "loss": 1.9695, + "step": 18301500 + }, + { + "epoch": 52.98, + "learning_rate": 2.3521126169414598e-05, + "loss": 1.9866, + "step": 18302000 + }, + { + "epoch": 52.98, + "learning_rate": 2.3520402521767323e-05, + "loss": 1.9785, + "step": 18302500 + }, + { + "epoch": 52.98, + "learning_rate": 2.3519678874120045e-05, + "loss": 1.9861, + "step": 18303000 + }, + { + "epoch": 52.98, + "learning_rate": 2.3518955226472768e-05, + "loss": 1.9747, + "step": 18303500 + }, + { + "epoch": 52.98, + "learning_rate": 2.3518231578825493e-05, + "loss": 1.9817, + "step": 18304000 + }, + { + "epoch": 52.98, + "learning_rate": 2.3517507931178216e-05, + "loss": 1.9984, + "step": 18304500 + }, + { + "epoch": 52.99, + "learning_rate": 2.3516784283530938e-05, + "loss": 1.9903, + "step": 18305000 + }, + { + "epoch": 52.99, + "learning_rate": 2.351606353047425e-05, + "loss": 2.0017, + "step": 18305500 + }, + { + "epoch": 52.99, + "learning_rate": 2.3515339882826972e-05, + "loss": 1.9828, + "step": 18306000 + }, + { + "epoch": 52.99, + "learning_rate": 2.3514617682474995e-05, + "loss": 1.9829, + "step": 18306500 + }, + { + "epoch": 52.99, + "learning_rate": 2.3513894034827717e-05, + "loss": 1.9766, + "step": 18307000 + }, + { + "epoch": 52.99, + "learning_rate": 2.351317038718044e-05, + "loss": 1.9841, + "step": 18307500 + }, + { + "epoch": 52.99, + "learning_rate": 2.351244673953316e-05, + "loss": 1.9948, + "step": 18308000 + }, + { + "epoch": 53.0, + "learning_rate": 2.3511723091885884e-05, + "loss": 1.9655, + "step": 18308500 + }, + { + "epoch": 53.0, + "learning_rate": 2.3510999444238606e-05, + "loss": 1.9785, + "step": 18309000 + }, + { + "epoch": 53.0, + "learning_rate": 2.351027579659133e-05, + "loss": 1.9925, + "step": 18309500 + }, + { + "epoch": 53.0, + "learning_rate": 2.3509552148944057e-05, + "loss": 2.0027, + "step": 18310000 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.6746968742115707, + "eval_accuracy_mlm": 0.6409910673456639, + "eval_accuracy_nsp": 0.8555957340519632, + "eval_loss": 2.1695713996887207, + "eval_runtime": 331.4336, + "eval_samples_per_second": 1316.662, + "eval_steps_per_second": 54.862, + "step": 18310016 + }, + { + "epoch": 53.0, + "learning_rate": 2.350883139588737e-05, + "loss": 1.9548, + "step": 18310500 + }, + { + "epoch": 53.0, + "learning_rate": 2.350810774824009e-05, + "loss": 1.9557, + "step": 18311000 + }, + { + "epoch": 53.0, + "learning_rate": 2.3507384100592814e-05, + "loss": 1.9956, + "step": 18311500 + }, + { + "epoch": 53.01, + "learning_rate": 2.3506660452945536e-05, + "loss": 1.9563, + "step": 18312000 + }, + { + "epoch": 53.01, + "learning_rate": 2.3505936805298258e-05, + "loss": 1.9515, + "step": 18312500 + }, + { + "epoch": 53.01, + "learning_rate": 2.3505214604946277e-05, + "loss": 1.9616, + "step": 18313000 + }, + { + "epoch": 53.01, + "learning_rate": 2.3504490957299e-05, + "loss": 1.9612, + "step": 18313500 + }, + { + "epoch": 53.01, + "learning_rate": 2.3503767309651725e-05, + "loss": 1.9614, + "step": 18314000 + }, + { + "epoch": 53.01, + "learning_rate": 2.3503043662004447e-05, + "loss": 1.9658, + "step": 18314500 + }, + { + "epoch": 53.01, + "learning_rate": 2.350232001435717e-05, + "loss": 1.9725, + "step": 18315000 + }, + { + "epoch": 53.02, + "learning_rate": 2.350159781400519e-05, + "loss": 1.9633, + "step": 18315500 + }, + { + "epoch": 53.02, + "learning_rate": 2.350087416635791e-05, + "loss": 1.9664, + "step": 18316000 + }, + { + "epoch": 53.02, + "learning_rate": 2.3500150518710633e-05, + "loss": 1.9499, + "step": 18316500 + }, + { + "epoch": 53.02, + "learning_rate": 2.349942687106336e-05, + "loss": 1.9625, + "step": 18317000 + }, + { + "epoch": 53.02, + "learning_rate": 2.349870322341608e-05, + "loss": 1.9761, + "step": 18317500 + }, + { + "epoch": 53.02, + "learning_rate": 2.3497979575768806e-05, + "loss": 1.9814, + "step": 18318000 + }, + { + "epoch": 53.02, + "learning_rate": 2.3497257375416822e-05, + "loss": 1.9775, + "step": 18318500 + }, + { + "epoch": 53.03, + "learning_rate": 2.3496533727769544e-05, + "loss": 1.9671, + "step": 18319000 + }, + { + "epoch": 53.03, + "learning_rate": 2.349581008012227e-05, + "loss": 1.9823, + "step": 18319500 + }, + { + "epoch": 53.03, + "learning_rate": 2.3495086432474992e-05, + "loss": 1.971, + "step": 18320000 + }, + { + "epoch": 53.03, + "learning_rate": 2.3494362784827714e-05, + "loss": 1.9586, + "step": 18320500 + }, + { + "epoch": 53.03, + "learning_rate": 2.3493640584475733e-05, + "loss": 1.9435, + "step": 18321000 + }, + { + "epoch": 53.03, + "learning_rate": 2.349291693682846e-05, + "loss": 1.9558, + "step": 18321500 + }, + { + "epoch": 53.03, + "learning_rate": 2.349219328918118e-05, + "loss": 1.9561, + "step": 18322000 + }, + { + "epoch": 53.04, + "learning_rate": 2.3491471088829197e-05, + "loss": 1.9759, + "step": 18322500 + }, + { + "epoch": 53.04, + "learning_rate": 2.3490747441181922e-05, + "loss": 1.9912, + "step": 18323000 + }, + { + "epoch": 53.04, + "learning_rate": 2.3490025240829938e-05, + "loss": 1.9873, + "step": 18323500 + }, + { + "epoch": 53.04, + "learning_rate": 2.348930159318266e-05, + "loss": 1.9878, + "step": 18324000 + }, + { + "epoch": 53.04, + "learning_rate": 2.3488577945535382e-05, + "loss": 1.978, + "step": 18324500 + }, + { + "epoch": 53.04, + "learning_rate": 2.3487854297888108e-05, + "loss": 1.9788, + "step": 18325000 + }, + { + "epoch": 53.04, + "learning_rate": 2.3487130650240833e-05, + "loss": 1.965, + "step": 18325500 + }, + { + "epoch": 53.05, + "learning_rate": 2.3486407002593556e-05, + "loss": 1.9709, + "step": 18326000 + }, + { + "epoch": 53.05, + "learning_rate": 2.3485683354946278e-05, + "loss": 1.9794, + "step": 18326500 + }, + { + "epoch": 53.05, + "learning_rate": 2.3484959707299e-05, + "loss": 1.98, + "step": 18327000 + }, + { + "epoch": 53.05, + "learning_rate": 2.3484236059651722e-05, + "loss": 1.9707, + "step": 18327500 + }, + { + "epoch": 53.05, + "learning_rate": 2.348351385929974e-05, + "loss": 1.9835, + "step": 18328000 + }, + { + "epoch": 53.05, + "learning_rate": 2.3482790211652464e-05, + "loss": 1.9757, + "step": 18328500 + }, + { + "epoch": 53.05, + "learning_rate": 2.348206656400519e-05, + "loss": 1.9857, + "step": 18329000 + }, + { + "epoch": 53.06, + "learning_rate": 2.348134291635791e-05, + "loss": 1.992, + "step": 18329500 + }, + { + "epoch": 53.06, + "learning_rate": 2.3480619268710634e-05, + "loss": 1.993, + "step": 18330000 + }, + { + "epoch": 53.06, + "learning_rate": 2.347989562106336e-05, + "loss": 1.9924, + "step": 18330500 + }, + { + "epoch": 53.06, + "learning_rate": 2.347917197341608e-05, + "loss": 1.9681, + "step": 18331000 + }, + { + "epoch": 53.06, + "learning_rate": 2.3478448325768804e-05, + "loss": 1.994, + "step": 18331500 + }, + { + "epoch": 53.06, + "learning_rate": 2.3477724678121526e-05, + "loss": 1.9921, + "step": 18332000 + }, + { + "epoch": 53.07, + "learning_rate": 2.347700103047425e-05, + "loss": 2.0165, + "step": 18332500 + }, + { + "epoch": 53.07, + "learning_rate": 2.3476277382826974e-05, + "loss": 1.9823, + "step": 18333000 + }, + { + "epoch": 53.07, + "learning_rate": 2.34755537351797e-05, + "loss": 1.99, + "step": 18333500 + }, + { + "epoch": 53.07, + "learning_rate": 2.3474830087532422e-05, + "loss": 1.9676, + "step": 18334000 + }, + { + "epoch": 53.07, + "learning_rate": 2.3474106439885144e-05, + "loss": 1.9527, + "step": 18334500 + }, + { + "epoch": 53.07, + "learning_rate": 2.3473382792237866e-05, + "loss": 1.9565, + "step": 18335000 + }, + { + "epoch": 53.07, + "learning_rate": 2.347265914459059e-05, + "loss": 1.9731, + "step": 18335500 + }, + { + "epoch": 53.08, + "learning_rate": 2.3471935496943314e-05, + "loss": 1.9716, + "step": 18336000 + }, + { + "epoch": 53.08, + "learning_rate": 2.3471211849296036e-05, + "loss": 1.9778, + "step": 18336500 + }, + { + "epoch": 53.08, + "learning_rate": 2.3470488201648762e-05, + "loss": 1.9644, + "step": 18337000 + }, + { + "epoch": 53.08, + "learning_rate": 2.3469764554001484e-05, + "loss": 1.9678, + "step": 18337500 + }, + { + "epoch": 53.08, + "learning_rate": 2.3469040906354206e-05, + "loss": 1.9846, + "step": 18338000 + }, + { + "epoch": 53.08, + "learning_rate": 2.346831725870693e-05, + "loss": 1.9898, + "step": 18338500 + }, + { + "epoch": 53.08, + "learning_rate": 2.3467595058354948e-05, + "loss": 1.9637, + "step": 18339000 + }, + { + "epoch": 53.09, + "learning_rate": 2.3466872858002963e-05, + "loss": 1.9881, + "step": 18339500 + }, + { + "epoch": 53.09, + "learning_rate": 2.346614921035569e-05, + "loss": 1.9952, + "step": 18340000 + }, + { + "epoch": 53.09, + "learning_rate": 2.3465427010003708e-05, + "loss": 1.9858, + "step": 18340500 + }, + { + "epoch": 53.09, + "learning_rate": 2.346470336235643e-05, + "loss": 1.9811, + "step": 18341000 + }, + { + "epoch": 53.09, + "learning_rate": 2.3463979714709152e-05, + "loss": 1.9666, + "step": 18341500 + }, + { + "epoch": 53.09, + "learning_rate": 2.3463256067061874e-05, + "loss": 1.978, + "step": 18342000 + }, + { + "epoch": 53.09, + "learning_rate": 2.34625324194146e-05, + "loss": 1.9883, + "step": 18342500 + }, + { + "epoch": 53.1, + "learning_rate": 2.3461808771767322e-05, + "loss": 1.9784, + "step": 18343000 + }, + { + "epoch": 53.1, + "learning_rate": 2.3461085124120048e-05, + "loss": 1.9491, + "step": 18343500 + }, + { + "epoch": 53.1, + "learning_rate": 2.346036147647277e-05, + "loss": 1.978, + "step": 18344000 + }, + { + "epoch": 53.1, + "learning_rate": 2.3459637828825492e-05, + "loss": 1.9529, + "step": 18344500 + }, + { + "epoch": 53.1, + "learning_rate": 2.3458914181178215e-05, + "loss": 1.9526, + "step": 18345000 + }, + { + "epoch": 53.1, + "learning_rate": 2.3458191980826233e-05, + "loss": 1.9672, + "step": 18345500 + }, + { + "epoch": 53.1, + "learning_rate": 2.3457468333178956e-05, + "loss": 1.9791, + "step": 18346000 + }, + { + "epoch": 53.11, + "learning_rate": 2.3456744685531678e-05, + "loss": 1.9821, + "step": 18346500 + }, + { + "epoch": 53.11, + "learning_rate": 2.34560210378844e-05, + "loss": 1.9907, + "step": 18347000 + }, + { + "epoch": 53.11, + "learning_rate": 2.3455298837532423e-05, + "loss": 1.9697, + "step": 18347500 + }, + { + "epoch": 53.11, + "learning_rate": 2.3454575189885145e-05, + "loss": 1.9728, + "step": 18348000 + }, + { + "epoch": 53.11, + "learning_rate": 2.3453852989533164e-05, + "loss": 1.9698, + "step": 18348500 + }, + { + "epoch": 53.11, + "learning_rate": 2.3453129341885886e-05, + "loss": 1.9711, + "step": 18349000 + }, + { + "epoch": 53.11, + "learning_rate": 2.3452405694238608e-05, + "loss": 1.968, + "step": 18349500 + }, + { + "epoch": 53.12, + "learning_rate": 2.345168204659133e-05, + "loss": 1.9703, + "step": 18350000 + }, + { + "epoch": 53.12, + "learning_rate": 2.3450958398944053e-05, + "loss": 1.9864, + "step": 18350500 + }, + { + "epoch": 53.12, + "learning_rate": 2.3450234751296778e-05, + "loss": 1.9726, + "step": 18351000 + }, + { + "epoch": 53.12, + "learning_rate": 2.34495111036495e-05, + "loss": 1.9773, + "step": 18351500 + }, + { + "epoch": 53.12, + "learning_rate": 2.3448787456002226e-05, + "loss": 1.9539, + "step": 18352000 + }, + { + "epoch": 53.12, + "learning_rate": 2.344806525565024e-05, + "loss": 1.976, + "step": 18352500 + }, + { + "epoch": 53.12, + "learning_rate": 2.3447341608002964e-05, + "loss": 1.9656, + "step": 18353000 + }, + { + "epoch": 53.13, + "learning_rate": 2.344661796035569e-05, + "loss": 1.9687, + "step": 18353500 + }, + { + "epoch": 53.13, + "learning_rate": 2.3445894312708412e-05, + "loss": 1.9729, + "step": 18354000 + }, + { + "epoch": 53.13, + "learning_rate": 2.3445172112356427e-05, + "loss": 1.9917, + "step": 18354500 + }, + { + "epoch": 53.13, + "learning_rate": 2.3444448464709153e-05, + "loss": 1.9772, + "step": 18355000 + }, + { + "epoch": 53.13, + "learning_rate": 2.3443724817061875e-05, + "loss": 1.9611, + "step": 18355500 + }, + { + "epoch": 53.13, + "learning_rate": 2.34430011694146e-05, + "loss": 1.9705, + "step": 18356000 + }, + { + "epoch": 53.13, + "learning_rate": 2.3442277521767323e-05, + "loss": 1.9705, + "step": 18356500 + }, + { + "epoch": 53.14, + "learning_rate": 2.3441553874120045e-05, + "loss": 1.9749, + "step": 18357000 + }, + { + "epoch": 53.14, + "learning_rate": 2.3440831673768064e-05, + "loss": 1.9827, + "step": 18357500 + }, + { + "epoch": 53.14, + "learning_rate": 2.3440108026120786e-05, + "loss": 1.9769, + "step": 18358000 + }, + { + "epoch": 53.14, + "learning_rate": 2.3439384378473512e-05, + "loss": 1.9698, + "step": 18358500 + }, + { + "epoch": 53.14, + "learning_rate": 2.3438662178121528e-05, + "loss": 1.9826, + "step": 18359000 + }, + { + "epoch": 53.14, + "learning_rate": 2.3437938530474253e-05, + "loss": 1.9752, + "step": 18359500 + }, + { + "epoch": 53.14, + "learning_rate": 2.3437214882826975e-05, + "loss": 1.9703, + "step": 18360000 + }, + { + "epoch": 53.15, + "learning_rate": 2.3436491235179698e-05, + "loss": 1.9934, + "step": 18360500 + }, + { + "epoch": 53.15, + "learning_rate": 2.343576758753242e-05, + "loss": 1.9745, + "step": 18361000 + }, + { + "epoch": 53.15, + "learning_rate": 2.3435043939885142e-05, + "loss": 1.9748, + "step": 18361500 + }, + { + "epoch": 53.15, + "learning_rate": 2.3434320292237864e-05, + "loss": 1.983, + "step": 18362000 + }, + { + "epoch": 53.15, + "learning_rate": 2.343359664459059e-05, + "loss": 1.9575, + "step": 18362500 + }, + { + "epoch": 53.15, + "learning_rate": 2.3432872996943316e-05, + "loss": 1.9709, + "step": 18363000 + }, + { + "epoch": 53.15, + "learning_rate": 2.3432149349296038e-05, + "loss": 1.9815, + "step": 18363500 + }, + { + "epoch": 53.16, + "learning_rate": 2.343142570164876e-05, + "loss": 1.9796, + "step": 18364000 + }, + { + "epoch": 53.16, + "learning_rate": 2.343070350129678e-05, + "loss": 1.9495, + "step": 18364500 + }, + { + "epoch": 53.16, + "learning_rate": 2.34299798536495e-05, + "loss": 1.9663, + "step": 18365000 + }, + { + "epoch": 53.16, + "learning_rate": 2.3429257653297517e-05, + "loss": 1.9747, + "step": 18365500 + }, + { + "epoch": 53.16, + "learning_rate": 2.342853400565024e-05, + "loss": 1.9441, + "step": 18366000 + }, + { + "epoch": 53.16, + "learning_rate": 2.3427810358002965e-05, + "loss": 1.9691, + "step": 18366500 + }, + { + "epoch": 53.16, + "learning_rate": 2.342708671035569e-05, + "loss": 1.9848, + "step": 18367000 + }, + { + "epoch": 53.17, + "learning_rate": 2.3426363062708413e-05, + "loss": 1.9609, + "step": 18367500 + }, + { + "epoch": 53.17, + "learning_rate": 2.3425639415061135e-05, + "loss": 1.9566, + "step": 18368000 + }, + { + "epoch": 53.17, + "learning_rate": 2.3424915767413857e-05, + "loss": 1.971, + "step": 18368500 + }, + { + "epoch": 53.17, + "learning_rate": 2.3424193567061876e-05, + "loss": 1.9779, + "step": 18369000 + }, + { + "epoch": 53.17, + "learning_rate": 2.3423469919414598e-05, + "loss": 1.9782, + "step": 18369500 + }, + { + "epoch": 53.17, + "learning_rate": 2.3422746271767324e-05, + "loss": 1.971, + "step": 18370000 + }, + { + "epoch": 53.18, + "learning_rate": 2.3422022624120046e-05, + "loss": 1.9711, + "step": 18370500 + }, + { + "epoch": 53.18, + "learning_rate": 2.3421298976472768e-05, + "loss": 1.9585, + "step": 18371000 + }, + { + "epoch": 53.18, + "learning_rate": 2.342057532882549e-05, + "loss": 1.9929, + "step": 18371500 + }, + { + "epoch": 53.18, + "learning_rate": 2.3419851681178216e-05, + "loss": 1.9728, + "step": 18372000 + }, + { + "epoch": 53.18, + "learning_rate": 2.341912803353094e-05, + "loss": 1.9831, + "step": 18372500 + }, + { + "epoch": 53.18, + "learning_rate": 2.3418405833178954e-05, + "loss": 2.0014, + "step": 18373000 + }, + { + "epoch": 53.18, + "learning_rate": 2.341768218553168e-05, + "loss": 1.9673, + "step": 18373500 + }, + { + "epoch": 53.19, + "learning_rate": 2.3416958537884405e-05, + "loss": 1.9837, + "step": 18374000 + }, + { + "epoch": 53.19, + "learning_rate": 2.3416234890237127e-05, + "loss": 1.9887, + "step": 18374500 + }, + { + "epoch": 53.19, + "learning_rate": 2.341551124258985e-05, + "loss": 1.979, + "step": 18375000 + }, + { + "epoch": 53.19, + "learning_rate": 2.3414789042237865e-05, + "loss": 1.9924, + "step": 18375500 + }, + { + "epoch": 53.19, + "learning_rate": 2.341406539459059e-05, + "loss": 1.9692, + "step": 18376000 + }, + { + "epoch": 53.19, + "learning_rate": 2.3413341746943313e-05, + "loss": 1.9963, + "step": 18376500 + }, + { + "epoch": 53.19, + "learning_rate": 2.3412618099296035e-05, + "loss": 1.9979, + "step": 18377000 + }, + { + "epoch": 53.2, + "learning_rate": 2.341189445164876e-05, + "loss": 1.9719, + "step": 18377500 + }, + { + "epoch": 53.2, + "learning_rate": 2.3411170804001483e-05, + "loss": 1.9927, + "step": 18378000 + }, + { + "epoch": 53.2, + "learning_rate": 2.3410447156354205e-05, + "loss": 1.9947, + "step": 18378500 + }, + { + "epoch": 53.2, + "learning_rate": 2.340972350870693e-05, + "loss": 1.9782, + "step": 18379000 + }, + { + "epoch": 53.2, + "learning_rate": 2.3408999861059653e-05, + "loss": 1.9762, + "step": 18379500 + }, + { + "epoch": 53.2, + "learning_rate": 2.3408276213412375e-05, + "loss": 2.0053, + "step": 18380000 + }, + { + "epoch": 53.2, + "learning_rate": 2.3407552565765098e-05, + "loss": 1.9753, + "step": 18380500 + }, + { + "epoch": 53.21, + "learning_rate": 2.3406828918117823e-05, + "loss": 1.9823, + "step": 18381000 + }, + { + "epoch": 53.21, + "learning_rate": 2.3406105270470546e-05, + "loss": 1.952, + "step": 18381500 + }, + { + "epoch": 53.21, + "learning_rate": 2.3405383070118564e-05, + "loss": 1.9826, + "step": 18382000 + }, + { + "epoch": 53.21, + "learning_rate": 2.3404659422471287e-05, + "loss": 1.9764, + "step": 18382500 + }, + { + "epoch": 53.21, + "learning_rate": 2.3403937222119306e-05, + "loss": 1.9394, + "step": 18383000 + }, + { + "epoch": 53.21, + "learning_rate": 2.3403213574472028e-05, + "loss": 1.9628, + "step": 18383500 + }, + { + "epoch": 53.21, + "learning_rate": 2.340248992682475e-05, + "loss": 1.9629, + "step": 18384000 + }, + { + "epoch": 53.22, + "learning_rate": 2.3401766279177476e-05, + "loss": 1.9631, + "step": 18384500 + }, + { + "epoch": 53.22, + "learning_rate": 2.3401042631530198e-05, + "loss": 1.9983, + "step": 18385000 + }, + { + "epoch": 53.22, + "learning_rate": 2.340031898388292e-05, + "loss": 1.9672, + "step": 18385500 + }, + { + "epoch": 53.22, + "learning_rate": 2.339959678353094e-05, + "loss": 1.9705, + "step": 18386000 + }, + { + "epoch": 53.22, + "learning_rate": 2.339887313588366e-05, + "loss": 1.9753, + "step": 18386500 + }, + { + "epoch": 53.22, + "learning_rate": 2.339815093553168e-05, + "loss": 1.9699, + "step": 18387000 + }, + { + "epoch": 53.22, + "learning_rate": 2.3397428735179696e-05, + "loss": 1.9934, + "step": 18387500 + }, + { + "epoch": 53.23, + "learning_rate": 2.3396705087532418e-05, + "loss": 1.9805, + "step": 18388000 + }, + { + "epoch": 53.23, + "learning_rate": 2.3395981439885144e-05, + "loss": 1.9694, + "step": 18388500 + }, + { + "epoch": 53.23, + "learning_rate": 2.339525779223787e-05, + "loss": 1.9764, + "step": 18389000 + }, + { + "epoch": 53.23, + "learning_rate": 2.3394535591885885e-05, + "loss": 1.9851, + "step": 18389500 + }, + { + "epoch": 53.23, + "learning_rate": 2.3393811944238607e-05, + "loss": 1.9718, + "step": 18390000 + }, + { + "epoch": 53.23, + "learning_rate": 2.3393088296591333e-05, + "loss": 1.9777, + "step": 18390500 + }, + { + "epoch": 53.23, + "learning_rate": 2.3392364648944055e-05, + "loss": 1.9766, + "step": 18391000 + }, + { + "epoch": 53.24, + "learning_rate": 2.3391641001296777e-05, + "loss": 1.9806, + "step": 18391500 + }, + { + "epoch": 53.24, + "learning_rate": 2.33909173536495e-05, + "loss": 1.9801, + "step": 18392000 + }, + { + "epoch": 53.24, + "learning_rate": 2.339019515329752e-05, + "loss": 1.9833, + "step": 18392500 + }, + { + "epoch": 53.24, + "learning_rate": 2.3389471505650244e-05, + "loss": 1.989, + "step": 18393000 + }, + { + "epoch": 53.24, + "learning_rate": 2.3388747858002966e-05, + "loss": 1.9707, + "step": 18393500 + }, + { + "epoch": 53.24, + "learning_rate": 2.338802421035569e-05, + "loss": 1.9727, + "step": 18394000 + }, + { + "epoch": 53.24, + "learning_rate": 2.338730056270841e-05, + "loss": 1.9914, + "step": 18394500 + }, + { + "epoch": 53.25, + "learning_rate": 2.3386576915061133e-05, + "loss": 1.9756, + "step": 18395000 + }, + { + "epoch": 53.25, + "learning_rate": 2.338585326741386e-05, + "loss": 1.9794, + "step": 18395500 + }, + { + "epoch": 53.25, + "learning_rate": 2.3385129619766584e-05, + "loss": 1.9773, + "step": 18396000 + }, + { + "epoch": 53.25, + "learning_rate": 2.3384405972119306e-05, + "loss": 1.9603, + "step": 18396500 + }, + { + "epoch": 53.25, + "learning_rate": 2.338368232447203e-05, + "loss": 1.9892, + "step": 18397000 + }, + { + "epoch": 53.25, + "learning_rate": 2.338295867682475e-05, + "loss": 1.9733, + "step": 18397500 + }, + { + "epoch": 53.25, + "learning_rate": 2.3382235029177473e-05, + "loss": 1.9859, + "step": 18398000 + }, + { + "epoch": 53.26, + "learning_rate": 2.3381511381530195e-05, + "loss": 1.9926, + "step": 18398500 + }, + { + "epoch": 53.26, + "learning_rate": 2.338078773388292e-05, + "loss": 1.9821, + "step": 18399000 + }, + { + "epoch": 53.26, + "learning_rate": 2.3380064086235647e-05, + "loss": 1.9747, + "step": 18399500 + }, + { + "epoch": 53.26, + "learning_rate": 2.3379341885883662e-05, + "loss": 2.0066, + "step": 18400000 + }, + { + "epoch": 53.26, + "learning_rate": 2.3378618238236384e-05, + "loss": 1.9795, + "step": 18400500 + }, + { + "epoch": 53.26, + "learning_rate": 2.337789459058911e-05, + "loss": 1.9719, + "step": 18401000 + }, + { + "epoch": 53.26, + "learning_rate": 2.3377172390237126e-05, + "loss": 1.9516, + "step": 18401500 + }, + { + "epoch": 53.27, + "learning_rate": 2.3376448742589848e-05, + "loss": 1.9491, + "step": 18402000 + }, + { + "epoch": 53.27, + "learning_rate": 2.337572509494257e-05, + "loss": 1.9756, + "step": 18402500 + }, + { + "epoch": 53.27, + "learning_rate": 2.3375001447295296e-05, + "loss": 1.9819, + "step": 18403000 + }, + { + "epoch": 53.27, + "learning_rate": 2.3374279246943315e-05, + "loss": 1.9875, + "step": 18403500 + }, + { + "epoch": 53.27, + "learning_rate": 2.3373555599296037e-05, + "loss": 1.9439, + "step": 18404000 + }, + { + "epoch": 53.27, + "learning_rate": 2.3372833398944056e-05, + "loss": 1.9757, + "step": 18404500 + }, + { + "epoch": 53.27, + "learning_rate": 2.3372109751296778e-05, + "loss": 1.9757, + "step": 18405000 + }, + { + "epoch": 53.28, + "learning_rate": 2.33713861036495e-05, + "loss": 1.9674, + "step": 18405500 + }, + { + "epoch": 53.28, + "learning_rate": 2.3370662456002223e-05, + "loss": 1.9897, + "step": 18406000 + }, + { + "epoch": 53.28, + "learning_rate": 2.3369938808354945e-05, + "loss": 1.9969, + "step": 18406500 + }, + { + "epoch": 53.28, + "learning_rate": 2.336921516070767e-05, + "loss": 1.9783, + "step": 18407000 + }, + { + "epoch": 53.28, + "learning_rate": 2.3368491513060396e-05, + "loss": 1.9876, + "step": 18407500 + }, + { + "epoch": 53.28, + "learning_rate": 2.3367767865413118e-05, + "loss": 1.9844, + "step": 18408000 + }, + { + "epoch": 53.29, + "learning_rate": 2.336704421776584e-05, + "loss": 1.9819, + "step": 18408500 + }, + { + "epoch": 53.29, + "learning_rate": 2.3366320570118563e-05, + "loss": 1.9838, + "step": 18409000 + }, + { + "epoch": 53.29, + "learning_rate": 2.3365596922471285e-05, + "loss": 1.9757, + "step": 18409500 + }, + { + "epoch": 53.29, + "learning_rate": 2.336487327482401e-05, + "loss": 1.9818, + "step": 18410000 + }, + { + "epoch": 53.29, + "learning_rate": 2.3364149627176736e-05, + "loss": 1.9932, + "step": 18410500 + }, + { + "epoch": 53.29, + "learning_rate": 2.336342597952946e-05, + "loss": 1.97, + "step": 18411000 + }, + { + "epoch": 53.29, + "learning_rate": 2.3362706673768064e-05, + "loss": 1.9723, + "step": 18411500 + }, + { + "epoch": 53.3, + "learning_rate": 2.3361984473416083e-05, + "loss": 1.9707, + "step": 18412000 + }, + { + "epoch": 53.3, + "learning_rate": 2.3361260825768805e-05, + "loss": 2.0069, + "step": 18412500 + }, + { + "epoch": 53.3, + "learning_rate": 2.3360537178121527e-05, + "loss": 1.9682, + "step": 18413000 + }, + { + "epoch": 53.3, + "learning_rate": 2.335981353047425e-05, + "loss": 1.9825, + "step": 18413500 + }, + { + "epoch": 53.3, + "learning_rate": 2.335909133012227e-05, + "loss": 1.9934, + "step": 18414000 + }, + { + "epoch": 53.3, + "learning_rate": 2.335836768247499e-05, + "loss": 1.9847, + "step": 18414500 + }, + { + "epoch": 53.3, + "learning_rate": 2.3357644034827716e-05, + "loss": 1.9581, + "step": 18415000 + }, + { + "epoch": 53.31, + "learning_rate": 2.335692038718044e-05, + "loss": 2.0025, + "step": 18415500 + }, + { + "epoch": 53.31, + "learning_rate": 2.335619673953316e-05, + "loss": 1.9699, + "step": 18416000 + }, + { + "epoch": 53.31, + "learning_rate": 2.3355473091885887e-05, + "loss": 1.9908, + "step": 18416500 + }, + { + "epoch": 53.31, + "learning_rate": 2.335474944423861e-05, + "loss": 1.9383, + "step": 18417000 + }, + { + "epoch": 53.31, + "learning_rate": 2.335402579659133e-05, + "loss": 1.978, + "step": 18417500 + }, + { + "epoch": 53.31, + "learning_rate": 2.3353302148944053e-05, + "loss": 1.982, + "step": 18418000 + }, + { + "epoch": 53.31, + "learning_rate": 2.335257850129678e-05, + "loss": 1.9866, + "step": 18418500 + }, + { + "epoch": 53.32, + "learning_rate": 2.33518548536495e-05, + "loss": 1.9607, + "step": 18419000 + }, + { + "epoch": 53.32, + "learning_rate": 2.3351131206002223e-05, + "loss": 1.9845, + "step": 18419500 + }, + { + "epoch": 53.32, + "learning_rate": 2.335040755835495e-05, + "loss": 1.9874, + "step": 18420000 + }, + { + "epoch": 53.32, + "learning_rate": 2.334968391070767e-05, + "loss": 1.9703, + "step": 18420500 + }, + { + "epoch": 53.32, + "learning_rate": 2.3348960263060393e-05, + "loss": 1.9927, + "step": 18421000 + }, + { + "epoch": 53.32, + "learning_rate": 2.3348236615413116e-05, + "loss": 1.9578, + "step": 18421500 + }, + { + "epoch": 53.32, + "learning_rate": 2.334751296776584e-05, + "loss": 1.9611, + "step": 18422000 + }, + { + "epoch": 53.33, + "learning_rate": 2.3346789320118563e-05, + "loss": 1.9647, + "step": 18422500 + }, + { + "epoch": 53.33, + "learning_rate": 2.3346065672471286e-05, + "loss": 1.9888, + "step": 18423000 + }, + { + "epoch": 53.33, + "learning_rate": 2.3345343472119305e-05, + "loss": 1.9642, + "step": 18423500 + }, + { + "epoch": 53.33, + "learning_rate": 2.3344619824472027e-05, + "loss": 1.9623, + "step": 18424000 + }, + { + "epoch": 53.33, + "learning_rate": 2.334389617682475e-05, + "loss": 1.9927, + "step": 18424500 + }, + { + "epoch": 53.33, + "learning_rate": 2.3343173976472768e-05, + "loss": 1.9782, + "step": 18425000 + }, + { + "epoch": 53.33, + "learning_rate": 2.334245032882549e-05, + "loss": 1.9881, + "step": 18425500 + }, + { + "epoch": 53.34, + "learning_rate": 2.3341726681178216e-05, + "loss": 1.9551, + "step": 18426000 + }, + { + "epoch": 53.34, + "learning_rate": 2.3341003033530938e-05, + "loss": 2.0059, + "step": 18426500 + }, + { + "epoch": 53.34, + "learning_rate": 2.3340279385883664e-05, + "loss": 1.9952, + "step": 18427000 + }, + { + "epoch": 53.34, + "learning_rate": 2.3339555738236386e-05, + "loss": 2.0009, + "step": 18427500 + }, + { + "epoch": 53.34, + "learning_rate": 2.3338832090589108e-05, + "loss": 1.9534, + "step": 18428000 + }, + { + "epoch": 53.34, + "learning_rate": 2.333810844294183e-05, + "loss": 1.9939, + "step": 18428500 + }, + { + "epoch": 53.34, + "learning_rate": 2.3337384795294553e-05, + "loss": 1.9976, + "step": 18429000 + }, + { + "epoch": 53.35, + "learning_rate": 2.333666114764728e-05, + "loss": 1.9733, + "step": 18429500 + }, + { + "epoch": 53.35, + "learning_rate": 2.33359375e-05, + "loss": 1.9653, + "step": 18430000 + }, + { + "epoch": 53.35, + "learning_rate": 2.3335213852352726e-05, + "loss": 2.0065, + "step": 18430500 + }, + { + "epoch": 53.35, + "learning_rate": 2.333449020470545e-05, + "loss": 1.9642, + "step": 18431000 + }, + { + "epoch": 53.35, + "learning_rate": 2.3333768004353464e-05, + "loss": 1.983, + "step": 18431500 + }, + { + "epoch": 53.35, + "learning_rate": 2.333304435670619e-05, + "loss": 1.9783, + "step": 18432000 + }, + { + "epoch": 53.35, + "learning_rate": 2.3332322156354205e-05, + "loss": 1.9638, + "step": 18432500 + }, + { + "epoch": 53.36, + "learning_rate": 2.3331599956002224e-05, + "loss": 1.9849, + "step": 18433000 + }, + { + "epoch": 53.36, + "learning_rate": 2.333087630835495e-05, + "loss": 1.9796, + "step": 18433500 + }, + { + "epoch": 53.36, + "learning_rate": 2.3330152660707672e-05, + "loss": 1.9485, + "step": 18434000 + }, + { + "epoch": 53.36, + "learning_rate": 2.3329429013060394e-05, + "loss": 1.9978, + "step": 18434500 + }, + { + "epoch": 53.36, + "learning_rate": 2.3328705365413116e-05, + "loss": 1.975, + "step": 18435000 + }, + { + "epoch": 53.36, + "learning_rate": 2.332798171776584e-05, + "loss": 1.9611, + "step": 18435500 + }, + { + "epoch": 53.36, + "learning_rate": 2.3327258070118564e-05, + "loss": 1.985, + "step": 18436000 + }, + { + "epoch": 53.37, + "learning_rate": 2.3326534422471287e-05, + "loss": 1.9762, + "step": 18436500 + }, + { + "epoch": 53.37, + "learning_rate": 2.3325810774824012e-05, + "loss": 1.9919, + "step": 18437000 + }, + { + "epoch": 53.37, + "learning_rate": 2.3325087127176734e-05, + "loss": 1.9502, + "step": 18437500 + }, + { + "epoch": 53.37, + "learning_rate": 2.3324363479529457e-05, + "loss": 1.9571, + "step": 18438000 + }, + { + "epoch": 53.37, + "learning_rate": 2.332363983188218e-05, + "loss": 1.9962, + "step": 18438500 + }, + { + "epoch": 53.37, + "learning_rate": 2.33229161842349e-05, + "loss": 1.9914, + "step": 18439000 + }, + { + "epoch": 53.37, + "learning_rate": 2.332219398388292e-05, + "loss": 1.9753, + "step": 18439500 + }, + { + "epoch": 53.38, + "learning_rate": 2.3321470336235642e-05, + "loss": 1.9779, + "step": 18440000 + }, + { + "epoch": 53.38, + "learning_rate": 2.3320746688588368e-05, + "loss": 1.9748, + "step": 18440500 + }, + { + "epoch": 53.38, + "learning_rate": 2.332002304094109e-05, + "loss": 1.9978, + "step": 18441000 + }, + { + "epoch": 53.38, + "learning_rate": 2.3319299393293816e-05, + "loss": 1.9798, + "step": 18441500 + }, + { + "epoch": 53.38, + "learning_rate": 2.3318575745646538e-05, + "loss": 1.9471, + "step": 18442000 + }, + { + "epoch": 53.38, + "learning_rate": 2.3317853545294554e-05, + "loss": 1.987, + "step": 18442500 + }, + { + "epoch": 53.38, + "learning_rate": 2.3317129897647276e-05, + "loss": 1.9823, + "step": 18443000 + }, + { + "epoch": 53.39, + "learning_rate": 2.3316407697295295e-05, + "loss": 1.9729, + "step": 18443500 + }, + { + "epoch": 53.39, + "learning_rate": 2.3315684049648017e-05, + "loss": 2.0337, + "step": 18444000 + }, + { + "epoch": 53.39, + "learning_rate": 2.3314960402000743e-05, + "loss": 1.9763, + "step": 18444500 + }, + { + "epoch": 53.39, + "learning_rate": 2.3314236754353465e-05, + "loss": 1.9969, + "step": 18445000 + }, + { + "epoch": 53.39, + "learning_rate": 2.331351310670619e-05, + "loss": 1.9718, + "step": 18445500 + }, + { + "epoch": 53.39, + "learning_rate": 2.3312789459058913e-05, + "loss": 1.9568, + "step": 18446000 + }, + { + "epoch": 53.4, + "learning_rate": 2.3312065811411635e-05, + "loss": 1.9875, + "step": 18446500 + }, + { + "epoch": 53.4, + "learning_rate": 2.3311343611059654e-05, + "loss": 1.9727, + "step": 18447000 + }, + { + "epoch": 53.4, + "learning_rate": 2.3310619963412376e-05, + "loss": 1.9693, + "step": 18447500 + }, + { + "epoch": 53.4, + "learning_rate": 2.3309896315765098e-05, + "loss": 1.9814, + "step": 18448000 + }, + { + "epoch": 53.4, + "learning_rate": 2.3309172668117824e-05, + "loss": 1.9998, + "step": 18448500 + }, + { + "epoch": 53.4, + "learning_rate": 2.3308449020470546e-05, + "loss": 1.9844, + "step": 18449000 + }, + { + "epoch": 53.4, + "learning_rate": 2.330772537282327e-05, + "loss": 1.9677, + "step": 18449500 + }, + { + "epoch": 53.41, + "learning_rate": 2.330700172517599e-05, + "loss": 1.9794, + "step": 18450000 + }, + { + "epoch": 53.41, + "learning_rate": 2.3306278077528716e-05, + "loss": 1.9908, + "step": 18450500 + }, + { + "epoch": 53.41, + "learning_rate": 2.330555442988144e-05, + "loss": 1.9769, + "step": 18451000 + }, + { + "epoch": 53.41, + "learning_rate": 2.3304830782234164e-05, + "loss": 1.9681, + "step": 18451500 + }, + { + "epoch": 53.41, + "learning_rate": 2.330410858188218e-05, + "loss": 1.9856, + "step": 18452000 + }, + { + "epoch": 53.41, + "learning_rate": 2.3303384934234905e-05, + "loss": 1.9811, + "step": 18452500 + }, + { + "epoch": 53.41, + "learning_rate": 2.330266273388292e-05, + "loss": 1.9631, + "step": 18453000 + }, + { + "epoch": 53.42, + "learning_rate": 2.3301939086235643e-05, + "loss": 1.966, + "step": 18453500 + }, + { + "epoch": 53.42, + "learning_rate": 2.3301215438588365e-05, + "loss": 1.9759, + "step": 18454000 + }, + { + "epoch": 53.42, + "learning_rate": 2.330049179094109e-05, + "loss": 1.9687, + "step": 18454500 + }, + { + "epoch": 53.42, + "learning_rate": 2.3299768143293813e-05, + "loss": 1.9942, + "step": 18455000 + }, + { + "epoch": 53.42, + "learning_rate": 2.329904449564654e-05, + "loss": 1.9725, + "step": 18455500 + }, + { + "epoch": 53.42, + "learning_rate": 2.329832084799926e-05, + "loss": 1.9803, + "step": 18456000 + }, + { + "epoch": 53.42, + "learning_rate": 2.329759864764728e-05, + "loss": 1.9791, + "step": 18456500 + }, + { + "epoch": 53.43, + "learning_rate": 2.3296875000000002e-05, + "loss": 2.0035, + "step": 18457000 + }, + { + "epoch": 53.43, + "learning_rate": 2.3296151352352724e-05, + "loss": 1.982, + "step": 18457500 + }, + { + "epoch": 53.43, + "learning_rate": 2.3295427704705447e-05, + "loss": 1.9957, + "step": 18458000 + }, + { + "epoch": 53.43, + "learning_rate": 2.329470405705817e-05, + "loss": 1.9552, + "step": 18458500 + }, + { + "epoch": 53.43, + "learning_rate": 2.3293981856706188e-05, + "loss": 1.9938, + "step": 18459000 + }, + { + "epoch": 53.43, + "learning_rate": 2.3293258209058913e-05, + "loss": 1.9578, + "step": 18459500 + }, + { + "epoch": 53.43, + "learning_rate": 2.3292534561411636e-05, + "loss": 1.9722, + "step": 18460000 + }, + { + "epoch": 53.44, + "learning_rate": 2.3291810913764358e-05, + "loss": 1.9793, + "step": 18460500 + }, + { + "epoch": 53.44, + "learning_rate": 2.329109016070767e-05, + "loss": 1.9929, + "step": 18461000 + }, + { + "epoch": 53.44, + "learning_rate": 2.3290366513060392e-05, + "loss": 1.9878, + "step": 18461500 + }, + { + "epoch": 53.44, + "learning_rate": 2.3289642865413118e-05, + "loss": 1.9858, + "step": 18462000 + }, + { + "epoch": 53.44, + "learning_rate": 2.328891921776584e-05, + "loss": 1.9877, + "step": 18462500 + }, + { + "epoch": 53.44, + "learning_rate": 2.3288195570118562e-05, + "loss": 2.0034, + "step": 18463000 + }, + { + "epoch": 53.44, + "learning_rate": 2.3287471922471288e-05, + "loss": 1.9943, + "step": 18463500 + }, + { + "epoch": 53.45, + "learning_rate": 2.328674827482401e-05, + "loss": 1.9697, + "step": 18464000 + }, + { + "epoch": 53.45, + "learning_rate": 2.3286024627176733e-05, + "loss": 1.951, + "step": 18464500 + }, + { + "epoch": 53.45, + "learning_rate": 2.3285300979529455e-05, + "loss": 1.9854, + "step": 18465000 + }, + { + "epoch": 53.45, + "learning_rate": 2.328457733188218e-05, + "loss": 1.975, + "step": 18465500 + }, + { + "epoch": 53.45, + "learning_rate": 2.3283853684234903e-05, + "loss": 1.9879, + "step": 18466000 + }, + { + "epoch": 53.45, + "learning_rate": 2.3283130036587625e-05, + "loss": 1.9848, + "step": 18466500 + }, + { + "epoch": 53.45, + "learning_rate": 2.328240638894035e-05, + "loss": 1.9851, + "step": 18467000 + }, + { + "epoch": 53.46, + "learning_rate": 2.3281682741293073e-05, + "loss": 1.9775, + "step": 18467500 + }, + { + "epoch": 53.46, + "learning_rate": 2.3280959093645795e-05, + "loss": 1.9885, + "step": 18468000 + }, + { + "epoch": 53.46, + "learning_rate": 2.328023544599852e-05, + "loss": 1.9779, + "step": 18468500 + }, + { + "epoch": 53.46, + "learning_rate": 2.3279513245646536e-05, + "loss": 1.9786, + "step": 18469000 + }, + { + "epoch": 53.46, + "learning_rate": 2.327878959799926e-05, + "loss": 1.9662, + "step": 18469500 + }, + { + "epoch": 53.46, + "learning_rate": 2.327806595035198e-05, + "loss": 1.965, + "step": 18470000 + }, + { + "epoch": 53.46, + "learning_rate": 2.3277343750000003e-05, + "loss": 1.9621, + "step": 18470500 + }, + { + "epoch": 53.47, + "learning_rate": 2.3276620102352725e-05, + "loss": 1.9629, + "step": 18471000 + }, + { + "epoch": 53.47, + "learning_rate": 2.3275897902000744e-05, + "loss": 1.9814, + "step": 18471500 + }, + { + "epoch": 53.47, + "learning_rate": 2.3275174254353466e-05, + "loss": 1.9877, + "step": 18472000 + }, + { + "epoch": 53.47, + "learning_rate": 2.327445060670619e-05, + "loss": 1.96, + "step": 18472500 + }, + { + "epoch": 53.47, + "learning_rate": 2.3273728406354208e-05, + "loss": 1.9924, + "step": 18473000 + }, + { + "epoch": 53.47, + "learning_rate": 2.327300475870693e-05, + "loss": 1.957, + "step": 18473500 + }, + { + "epoch": 53.47, + "learning_rate": 2.3272281111059652e-05, + "loss": 1.968, + "step": 18474000 + }, + { + "epoch": 53.48, + "learning_rate": 2.3271557463412378e-05, + "loss": 1.9908, + "step": 18474500 + }, + { + "epoch": 53.48, + "learning_rate": 2.32708338157651e-05, + "loss": 1.9816, + "step": 18475000 + }, + { + "epoch": 53.48, + "learning_rate": 2.3270110168117822e-05, + "loss": 2.0062, + "step": 18475500 + }, + { + "epoch": 53.48, + "learning_rate": 2.326938796776584e-05, + "loss": 1.9813, + "step": 18476000 + }, + { + "epoch": 53.48, + "learning_rate": 2.3268664320118563e-05, + "loss": 1.981, + "step": 18476500 + }, + { + "epoch": 53.48, + "learning_rate": 2.3267940672471286e-05, + "loss": 1.982, + "step": 18477000 + }, + { + "epoch": 53.48, + "learning_rate": 2.3267218472119304e-05, + "loss": 2.0051, + "step": 18477500 + }, + { + "epoch": 53.49, + "learning_rate": 2.3266494824472027e-05, + "loss": 1.9916, + "step": 18478000 + }, + { + "epoch": 53.49, + "learning_rate": 2.3265771176824752e-05, + "loss": 1.9678, + "step": 18478500 + }, + { + "epoch": 53.49, + "learning_rate": 2.3265047529177475e-05, + "loss": 1.9993, + "step": 18479000 + }, + { + "epoch": 53.49, + "learning_rate": 2.3264323881530197e-05, + "loss": 1.9872, + "step": 18479500 + }, + { + "epoch": 53.49, + "learning_rate": 2.326360023388292e-05, + "loss": 1.9807, + "step": 18480000 + }, + { + "epoch": 53.49, + "learning_rate": 2.3262876586235645e-05, + "loss": 1.9553, + "step": 18480500 + }, + { + "epoch": 53.49, + "learning_rate": 2.3262152938588367e-05, + "loss": 2.0118, + "step": 18481000 + }, + { + "epoch": 53.5, + "learning_rate": 2.326142929094109e-05, + "loss": 1.986, + "step": 18481500 + }, + { + "epoch": 53.5, + "learning_rate": 2.3260705643293815e-05, + "loss": 1.9413, + "step": 18482000 + }, + { + "epoch": 53.5, + "learning_rate": 2.3259981995646537e-05, + "loss": 1.9627, + "step": 18482500 + }, + { + "epoch": 53.5, + "learning_rate": 2.325925834799926e-05, + "loss": 1.9992, + "step": 18483000 + }, + { + "epoch": 53.5, + "learning_rate": 2.3258534700351985e-05, + "loss": 1.9868, + "step": 18483500 + }, + { + "epoch": 53.5, + "learning_rate": 2.3257811052704707e-05, + "loss": 1.9832, + "step": 18484000 + }, + { + "epoch": 53.51, + "learning_rate": 2.325708740505743e-05, + "loss": 1.9902, + "step": 18484500 + }, + { + "epoch": 53.51, + "learning_rate": 2.325636375741015e-05, + "loss": 2.0056, + "step": 18485000 + }, + { + "epoch": 53.51, + "learning_rate": 2.3255640109762877e-05, + "loss": 1.988, + "step": 18485500 + }, + { + "epoch": 53.51, + "learning_rate": 2.32549164621156e-05, + "loss": 1.9642, + "step": 18486000 + }, + { + "epoch": 53.51, + "learning_rate": 2.3254194261763618e-05, + "loss": 1.9864, + "step": 18486500 + }, + { + "epoch": 53.51, + "learning_rate": 2.325347061411634e-05, + "loss": 1.9899, + "step": 18487000 + }, + { + "epoch": 53.51, + "learning_rate": 2.3252746966469063e-05, + "loss": 1.9892, + "step": 18487500 + }, + { + "epoch": 53.52, + "learning_rate": 2.3252023318821785e-05, + "loss": 1.9697, + "step": 18488000 + }, + { + "epoch": 53.52, + "learning_rate": 2.3251301118469804e-05, + "loss": 1.9875, + "step": 18488500 + }, + { + "epoch": 53.52, + "learning_rate": 2.325057747082253e-05, + "loss": 1.9764, + "step": 18489000 + }, + { + "epoch": 53.52, + "learning_rate": 2.3249853823175252e-05, + "loss": 1.9739, + "step": 18489500 + }, + { + "epoch": 53.52, + "learning_rate": 2.3249130175527974e-05, + "loss": 1.9968, + "step": 18490000 + }, + { + "epoch": 53.52, + "learning_rate": 2.3248406527880696e-05, + "loss": 1.9841, + "step": 18490500 + }, + { + "epoch": 53.52, + "learning_rate": 2.3247682880233422e-05, + "loss": 1.9549, + "step": 18491000 + }, + { + "epoch": 53.53, + "learning_rate": 2.3246959232586144e-05, + "loss": 1.987, + "step": 18491500 + }, + { + "epoch": 53.53, + "learning_rate": 2.3246235584938866e-05, + "loss": 1.9805, + "step": 18492000 + }, + { + "epoch": 53.53, + "learning_rate": 2.3245511937291592e-05, + "loss": 1.9845, + "step": 18492500 + }, + { + "epoch": 53.53, + "learning_rate": 2.3244788289644314e-05, + "loss": 1.9691, + "step": 18493000 + }, + { + "epoch": 53.53, + "learning_rate": 2.3244064641997036e-05, + "loss": 1.9839, + "step": 18493500 + }, + { + "epoch": 53.53, + "learning_rate": 2.3243340994349762e-05, + "loss": 1.9792, + "step": 18494000 + }, + { + "epoch": 53.53, + "learning_rate": 2.3242617346702484e-05, + "loss": 1.9867, + "step": 18494500 + }, + { + "epoch": 53.54, + "learning_rate": 2.3241893699055207e-05, + "loss": 1.9947, + "step": 18495000 + }, + { + "epoch": 53.54, + "learning_rate": 2.324117294599852e-05, + "loss": 1.9906, + "step": 18495500 + }, + { + "epoch": 53.54, + "learning_rate": 2.3240450745646534e-05, + "loss": 1.9823, + "step": 18496000 + }, + { + "epoch": 53.54, + "learning_rate": 2.323972709799926e-05, + "loss": 1.9686, + "step": 18496500 + }, + { + "epoch": 53.54, + "learning_rate": 2.3239003450351986e-05, + "loss": 1.996, + "step": 18497000 + }, + { + "epoch": 53.54, + "learning_rate": 2.3238279802704708e-05, + "loss": 1.9769, + "step": 18497500 + }, + { + "epoch": 53.54, + "learning_rate": 2.323755615505743e-05, + "loss": 1.9934, + "step": 18498000 + }, + { + "epoch": 53.55, + "learning_rate": 2.3236832507410152e-05, + "loss": 1.9702, + "step": 18498500 + }, + { + "epoch": 53.55, + "learning_rate": 2.3236108859762875e-05, + "loss": 2.0041, + "step": 18499000 + }, + { + "epoch": 53.55, + "learning_rate": 2.3235386659410893e-05, + "loss": 1.9955, + "step": 18499500 + }, + { + "epoch": 53.55, + "learning_rate": 2.3234663011763616e-05, + "loss": 1.9684, + "step": 18500000 + }, + { + "epoch": 53.55, + "learning_rate": 2.323393936411634e-05, + "loss": 1.9927, + "step": 18500500 + }, + { + "epoch": 53.55, + "learning_rate": 2.3233215716469064e-05, + "loss": 1.9685, + "step": 18501000 + }, + { + "epoch": 53.55, + "learning_rate": 2.3232492068821786e-05, + "loss": 1.9769, + "step": 18501500 + }, + { + "epoch": 53.56, + "learning_rate": 2.323176842117451e-05, + "loss": 1.9935, + "step": 18502000 + }, + { + "epoch": 53.56, + "learning_rate": 2.3231044773527234e-05, + "loss": 1.9984, + "step": 18502500 + }, + { + "epoch": 53.56, + "learning_rate": 2.3230321125879956e-05, + "loss": 1.98, + "step": 18503000 + }, + { + "epoch": 53.56, + "learning_rate": 2.3229600372823268e-05, + "loss": 1.9898, + "step": 18503500 + }, + { + "epoch": 53.56, + "learning_rate": 2.322887672517599e-05, + "loss": 1.9866, + "step": 18504000 + }, + { + "epoch": 53.56, + "learning_rate": 2.3228153077528716e-05, + "loss": 1.9736, + "step": 18504500 + }, + { + "epoch": 53.56, + "learning_rate": 2.3227429429881438e-05, + "loss": 2.0032, + "step": 18505000 + }, + { + "epoch": 53.57, + "learning_rate": 2.322670578223416e-05, + "loss": 1.9548, + "step": 18505500 + }, + { + "epoch": 53.57, + "learning_rate": 2.3225982134586886e-05, + "loss": 1.9731, + "step": 18506000 + }, + { + "epoch": 53.57, + "learning_rate": 2.322525848693961e-05, + "loss": 1.9914, + "step": 18506500 + }, + { + "epoch": 53.57, + "learning_rate": 2.322453483929233e-05, + "loss": 1.9964, + "step": 18507000 + }, + { + "epoch": 53.57, + "learning_rate": 2.3223811191645053e-05, + "loss": 1.9671, + "step": 18507500 + }, + { + "epoch": 53.57, + "learning_rate": 2.322308754399778e-05, + "loss": 1.9707, + "step": 18508000 + }, + { + "epoch": 53.57, + "learning_rate": 2.32223638963505e-05, + "loss": 1.987, + "step": 18508500 + }, + { + "epoch": 53.58, + "learning_rate": 2.3221640248703226e-05, + "loss": 1.9718, + "step": 18509000 + }, + { + "epoch": 53.58, + "learning_rate": 2.322091660105595e-05, + "loss": 1.9906, + "step": 18509500 + }, + { + "epoch": 53.58, + "learning_rate": 2.3220194400703964e-05, + "loss": 1.9867, + "step": 18510000 + }, + { + "epoch": 53.58, + "learning_rate": 2.3219470753056686e-05, + "loss": 1.9614, + "step": 18510500 + }, + { + "epoch": 53.58, + "learning_rate": 2.3218747105409412e-05, + "loss": 1.9701, + "step": 18511000 + }, + { + "epoch": 53.58, + "learning_rate": 2.3218023457762138e-05, + "loss": 1.9888, + "step": 18511500 + }, + { + "epoch": 53.58, + "learning_rate": 2.321729981011486e-05, + "loss": 1.9751, + "step": 18512000 + }, + { + "epoch": 53.59, + "learning_rate": 2.3216576162467582e-05, + "loss": 2.0022, + "step": 18512500 + }, + { + "epoch": 53.59, + "learning_rate": 2.3215852514820304e-05, + "loss": 1.9527, + "step": 18513000 + }, + { + "epoch": 53.59, + "learning_rate": 2.3215128867173026e-05, + "loss": 1.9689, + "step": 18513500 + }, + { + "epoch": 53.59, + "learning_rate": 2.3214405219525752e-05, + "loss": 1.9839, + "step": 18514000 + }, + { + "epoch": 53.59, + "learning_rate": 2.3213683019173768e-05, + "loss": 1.9843, + "step": 18514500 + }, + { + "epoch": 53.59, + "learning_rate": 2.3212959371526493e-05, + "loss": 1.9995, + "step": 18515000 + }, + { + "epoch": 53.59, + "learning_rate": 2.3212235723879216e-05, + "loss": 1.9718, + "step": 18515500 + }, + { + "epoch": 53.6, + "learning_rate": 2.3211512076231938e-05, + "loss": 1.9767, + "step": 18516000 + }, + { + "epoch": 53.6, + "learning_rate": 2.3210788428584663e-05, + "loss": 1.9747, + "step": 18516500 + }, + { + "epoch": 53.6, + "learning_rate": 2.3210064780937386e-05, + "loss": 1.9977, + "step": 18517000 + }, + { + "epoch": 53.6, + "learning_rate": 2.3209341133290108e-05, + "loss": 1.9844, + "step": 18517500 + }, + { + "epoch": 53.6, + "learning_rate": 2.320861748564283e-05, + "loss": 1.9881, + "step": 18518000 + }, + { + "epoch": 53.6, + "learning_rate": 2.3207893837995556e-05, + "loss": 1.9758, + "step": 18518500 + }, + { + "epoch": 53.6, + "learning_rate": 2.3207170190348278e-05, + "loss": 1.9508, + "step": 18519000 + }, + { + "epoch": 53.61, + "learning_rate": 2.3206446542701004e-05, + "loss": 1.9746, + "step": 18519500 + }, + { + "epoch": 53.61, + "learning_rate": 2.320572434234902e-05, + "loss": 1.9793, + "step": 18520000 + }, + { + "epoch": 53.61, + "learning_rate": 2.320500069470174e-05, + "loss": 1.9919, + "step": 18520500 + }, + { + "epoch": 53.61, + "learning_rate": 2.320427849434976e-05, + "loss": 1.9814, + "step": 18521000 + }, + { + "epoch": 53.61, + "learning_rate": 2.3203554846702483e-05, + "loss": 1.963, + "step": 18521500 + }, + { + "epoch": 53.61, + "learning_rate": 2.3202831199055205e-05, + "loss": 1.9746, + "step": 18522000 + }, + { + "epoch": 53.62, + "learning_rate": 2.320210755140793e-05, + "loss": 1.9763, + "step": 18522500 + }, + { + "epoch": 53.62, + "learning_rate": 2.3201383903760653e-05, + "loss": 2.0192, + "step": 18523000 + }, + { + "epoch": 53.62, + "learning_rate": 2.3200660256113378e-05, + "loss": 1.9946, + "step": 18523500 + }, + { + "epoch": 53.62, + "learning_rate": 2.3199938055761394e-05, + "loss": 1.9686, + "step": 18524000 + }, + { + "epoch": 53.62, + "learning_rate": 2.3199214408114116e-05, + "loss": 1.9759, + "step": 18524500 + }, + { + "epoch": 53.62, + "learning_rate": 2.319849076046684e-05, + "loss": 1.9838, + "step": 18525000 + }, + { + "epoch": 53.62, + "learning_rate": 2.3197767112819564e-05, + "loss": 1.9859, + "step": 18525500 + }, + { + "epoch": 53.63, + "learning_rate": 2.319704346517229e-05, + "loss": 1.9734, + "step": 18526000 + }, + { + "epoch": 53.63, + "learning_rate": 2.3196321264820305e-05, + "loss": 1.982, + "step": 18526500 + }, + { + "epoch": 53.63, + "learning_rate": 2.3195597617173027e-05, + "loss": 1.9903, + "step": 18527000 + }, + { + "epoch": 53.63, + "learning_rate": 2.3194873969525753e-05, + "loss": 1.9936, + "step": 18527500 + }, + { + "epoch": 53.63, + "learning_rate": 2.3194150321878475e-05, + "loss": 1.9803, + "step": 18528000 + }, + { + "epoch": 53.63, + "learning_rate": 2.3193426674231197e-05, + "loss": 1.9865, + "step": 18528500 + }, + { + "epoch": 53.63, + "learning_rate": 2.319270302658392e-05, + "loss": 2.0077, + "step": 18529000 + }, + { + "epoch": 53.64, + "learning_rate": 2.3191979378936642e-05, + "loss": 2.0014, + "step": 18529500 + }, + { + "epoch": 53.64, + "learning_rate": 2.3191255731289367e-05, + "loss": 1.9753, + "step": 18530000 + }, + { + "epoch": 53.64, + "learning_rate": 2.3190532083642093e-05, + "loss": 1.9789, + "step": 18530500 + }, + { + "epoch": 53.64, + "learning_rate": 2.318980988329011e-05, + "loss": 1.9624, + "step": 18531000 + }, + { + "epoch": 53.64, + "learning_rate": 2.3189090577528718e-05, + "loss": 2.0131, + "step": 18531500 + }, + { + "epoch": 53.64, + "learning_rate": 2.318836692988144e-05, + "loss": 1.9562, + "step": 18532000 + }, + { + "epoch": 53.64, + "learning_rate": 2.3187643282234162e-05, + "loss": 1.9951, + "step": 18532500 + }, + { + "epoch": 53.65, + "learning_rate": 2.3186919634586884e-05, + "loss": 1.9821, + "step": 18533000 + }, + { + "epoch": 53.65, + "learning_rate": 2.3186195986939607e-05, + "loss": 1.9783, + "step": 18533500 + }, + { + "epoch": 53.65, + "learning_rate": 2.3185472339292332e-05, + "loss": 1.9695, + "step": 18534000 + }, + { + "epoch": 53.65, + "learning_rate": 2.3184748691645054e-05, + "loss": 1.9832, + "step": 18534500 + }, + { + "epoch": 53.65, + "learning_rate": 2.318402504399778e-05, + "loss": 1.9784, + "step": 18535000 + }, + { + "epoch": 53.65, + "learning_rate": 2.3183301396350502e-05, + "loss": 1.9656, + "step": 18535500 + }, + { + "epoch": 53.65, + "learning_rate": 2.3182577748703224e-05, + "loss": 1.9775, + "step": 18536000 + }, + { + "epoch": 53.66, + "learning_rate": 2.3181854101055947e-05, + "loss": 1.9756, + "step": 18536500 + }, + { + "epoch": 53.66, + "learning_rate": 2.318113045340867e-05, + "loss": 1.9804, + "step": 18537000 + }, + { + "epoch": 53.66, + "learning_rate": 2.3180406805761395e-05, + "loss": 1.9846, + "step": 18537500 + }, + { + "epoch": 53.66, + "learning_rate": 2.3179684605409414e-05, + "loss": 1.9858, + "step": 18538000 + }, + { + "epoch": 53.66, + "learning_rate": 2.3178960957762136e-05, + "loss": 1.9643, + "step": 18538500 + }, + { + "epoch": 53.66, + "learning_rate": 2.3178237310114858e-05, + "loss": 1.9848, + "step": 18539000 + }, + { + "epoch": 53.66, + "learning_rate": 2.317751366246758e-05, + "loss": 1.9886, + "step": 18539500 + }, + { + "epoch": 53.67, + "learning_rate": 2.31767914621156e-05, + "loss": 1.9631, + "step": 18540000 + }, + { + "epoch": 53.67, + "learning_rate": 2.3176069261763618e-05, + "loss": 1.995, + "step": 18540500 + }, + { + "epoch": 53.67, + "learning_rate": 2.317534561411634e-05, + "loss": 1.9861, + "step": 18541000 + }, + { + "epoch": 53.67, + "learning_rate": 2.3174621966469066e-05, + "loss": 2.0095, + "step": 18541500 + }, + { + "epoch": 53.67, + "learning_rate": 2.3173898318821788e-05, + "loss": 1.9944, + "step": 18542000 + }, + { + "epoch": 53.67, + "learning_rate": 2.317317467117451e-05, + "loss": 1.9909, + "step": 18542500 + }, + { + "epoch": 53.67, + "learning_rate": 2.317245247082253e-05, + "loss": 1.9692, + "step": 18543000 + }, + { + "epoch": 53.68, + "learning_rate": 2.317172882317525e-05, + "loss": 1.9828, + "step": 18543500 + }, + { + "epoch": 53.68, + "learning_rate": 2.3171005175527974e-05, + "loss": 1.9616, + "step": 18544000 + }, + { + "epoch": 53.68, + "learning_rate": 2.3170281527880696e-05, + "loss": 1.9805, + "step": 18544500 + }, + { + "epoch": 53.68, + "learning_rate": 2.3169557880233418e-05, + "loss": 1.9968, + "step": 18545000 + }, + { + "epoch": 53.68, + "learning_rate": 2.3168834232586144e-05, + "loss": 1.9827, + "step": 18545500 + }, + { + "epoch": 53.68, + "learning_rate": 2.316811058493887e-05, + "loss": 1.9793, + "step": 18546000 + }, + { + "epoch": 53.68, + "learning_rate": 2.3167386937291592e-05, + "loss": 1.98, + "step": 18546500 + }, + { + "epoch": 53.69, + "learning_rate": 2.3166663289644314e-05, + "loss": 1.981, + "step": 18547000 + }, + { + "epoch": 53.69, + "learning_rate": 2.3165939641997036e-05, + "loss": 2.0054, + "step": 18547500 + }, + { + "epoch": 53.69, + "learning_rate": 2.3165217441645055e-05, + "loss": 2.0095, + "step": 18548000 + }, + { + "epoch": 53.69, + "learning_rate": 2.3164493793997777e-05, + "loss": 1.9885, + "step": 18548500 + }, + { + "epoch": 53.69, + "learning_rate": 2.3163770146350503e-05, + "loss": 1.9925, + "step": 18549000 + }, + { + "epoch": 53.69, + "learning_rate": 2.316304794599852e-05, + "loss": 1.9851, + "step": 18549500 + }, + { + "epoch": 53.69, + "learning_rate": 2.3162324298351244e-05, + "loss": 1.9664, + "step": 18550000 + }, + { + "epoch": 53.7, + "learning_rate": 2.3161600650703966e-05, + "loss": 1.9891, + "step": 18550500 + }, + { + "epoch": 53.7, + "learning_rate": 2.316087700305669e-05, + "loss": 1.9781, + "step": 18551000 + }, + { + "epoch": 53.7, + "learning_rate": 2.316015335540941e-05, + "loss": 1.9658, + "step": 18551500 + }, + { + "epoch": 53.7, + "learning_rate": 2.3159429707762133e-05, + "loss": 1.9536, + "step": 18552000 + }, + { + "epoch": 53.7, + "learning_rate": 2.315870606011486e-05, + "loss": 1.9932, + "step": 18552500 + }, + { + "epoch": 53.7, + "learning_rate": 2.315798241246758e-05, + "loss": 2.0032, + "step": 18553000 + }, + { + "epoch": 53.7, + "learning_rate": 2.31572602121156e-05, + "loss": 1.9765, + "step": 18553500 + }, + { + "epoch": 53.71, + "learning_rate": 2.3156536564468322e-05, + "loss": 1.9893, + "step": 18554000 + }, + { + "epoch": 53.71, + "learning_rate": 2.3155812916821044e-05, + "loss": 1.9852, + "step": 18554500 + }, + { + "epoch": 53.71, + "learning_rate": 2.315508926917377e-05, + "loss": 1.9894, + "step": 18555000 + }, + { + "epoch": 53.71, + "learning_rate": 2.3154365621526492e-05, + "loss": 1.9962, + "step": 18555500 + }, + { + "epoch": 53.71, + "learning_rate": 2.3153641973879215e-05, + "loss": 1.9938, + "step": 18556000 + }, + { + "epoch": 53.71, + "learning_rate": 2.315291832623194e-05, + "loss": 1.974, + "step": 18556500 + }, + { + "epoch": 53.71, + "learning_rate": 2.315219612587996e-05, + "loss": 1.9922, + "step": 18557000 + }, + { + "epoch": 53.72, + "learning_rate": 2.3151473925527975e-05, + "loss": 1.9615, + "step": 18557500 + }, + { + "epoch": 53.72, + "learning_rate": 2.3150750277880697e-05, + "loss": 1.9832, + "step": 18558000 + }, + { + "epoch": 53.72, + "learning_rate": 2.315002663023342e-05, + "loss": 1.9815, + "step": 18558500 + }, + { + "epoch": 53.72, + "learning_rate": 2.3149302982586145e-05, + "loss": 1.9772, + "step": 18559000 + }, + { + "epoch": 53.72, + "learning_rate": 2.3148579334938867e-05, + "loss": 1.9732, + "step": 18559500 + }, + { + "epoch": 53.72, + "learning_rate": 2.3147855687291593e-05, + "loss": 1.9781, + "step": 18560000 + }, + { + "epoch": 53.73, + "learning_rate": 2.3147132039644315e-05, + "loss": 1.9769, + "step": 18560500 + }, + { + "epoch": 53.73, + "learning_rate": 2.3146408391997037e-05, + "loss": 1.9796, + "step": 18561000 + }, + { + "epoch": 53.73, + "learning_rate": 2.314568474434976e-05, + "loss": 1.9901, + "step": 18561500 + }, + { + "epoch": 53.73, + "learning_rate": 2.3144961096702485e-05, + "loss": 1.9933, + "step": 18562000 + }, + { + "epoch": 53.73, + "learning_rate": 2.3144237449055207e-05, + "loss": 2.0076, + "step": 18562500 + }, + { + "epoch": 53.73, + "learning_rate": 2.314351380140793e-05, + "loss": 1.9545, + "step": 18563000 + }, + { + "epoch": 53.73, + "learning_rate": 2.3142790153760655e-05, + "loss": 1.9834, + "step": 18563500 + }, + { + "epoch": 53.74, + "learning_rate": 2.314206795340867e-05, + "loss": 1.9618, + "step": 18564000 + }, + { + "epoch": 53.74, + "learning_rate": 2.3141344305761396e-05, + "loss": 1.9764, + "step": 18564500 + }, + { + "epoch": 53.74, + "learning_rate": 2.314062065811412e-05, + "loss": 1.9795, + "step": 18565000 + }, + { + "epoch": 53.74, + "learning_rate": 2.313989701046684e-05, + "loss": 1.9741, + "step": 18565500 + }, + { + "epoch": 53.74, + "learning_rate": 2.313917481011486e-05, + "loss": 1.9776, + "step": 18566000 + }, + { + "epoch": 53.74, + "learning_rate": 2.3138451162467582e-05, + "loss": 1.9981, + "step": 18566500 + }, + { + "epoch": 53.74, + "learning_rate": 2.3137727514820304e-05, + "loss": 1.9971, + "step": 18567000 + }, + { + "epoch": 53.75, + "learning_rate": 2.313700386717303e-05, + "loss": 1.9761, + "step": 18567500 + }, + { + "epoch": 53.75, + "learning_rate": 2.3136280219525752e-05, + "loss": 1.9973, + "step": 18568000 + }, + { + "epoch": 53.75, + "learning_rate": 2.313555801917377e-05, + "loss": 1.9885, + "step": 18568500 + }, + { + "epoch": 53.75, + "learning_rate": 2.3134834371526493e-05, + "loss": 1.9616, + "step": 18569000 + }, + { + "epoch": 53.75, + "learning_rate": 2.3134110723879215e-05, + "loss": 1.9812, + "step": 18569500 + }, + { + "epoch": 53.75, + "learning_rate": 2.3133387076231938e-05, + "loss": 1.9851, + "step": 18570000 + }, + { + "epoch": 53.75, + "learning_rate": 2.313266342858466e-05, + "loss": 1.9853, + "step": 18570500 + }, + { + "epoch": 53.76, + "learning_rate": 2.3131939780937385e-05, + "loss": 1.9838, + "step": 18571000 + }, + { + "epoch": 53.76, + "learning_rate": 2.313121613329011e-05, + "loss": 2.0016, + "step": 18571500 + }, + { + "epoch": 53.76, + "learning_rate": 2.3130492485642833e-05, + "loss": 1.9983, + "step": 18572000 + }, + { + "epoch": 53.76, + "learning_rate": 2.3129768837995555e-05, + "loss": 1.9862, + "step": 18572500 + }, + { + "epoch": 53.76, + "learning_rate": 2.3129048084938868e-05, + "loss": 1.9626, + "step": 18573000 + }, + { + "epoch": 53.76, + "learning_rate": 2.312832443729159e-05, + "loss": 1.9602, + "step": 18573500 + }, + { + "epoch": 53.76, + "learning_rate": 2.312760223693961e-05, + "loss": 1.9766, + "step": 18574000 + }, + { + "epoch": 53.77, + "learning_rate": 2.312687858929233e-05, + "loss": 1.9783, + "step": 18574500 + }, + { + "epoch": 53.77, + "learning_rate": 2.3126154941645053e-05, + "loss": 1.9648, + "step": 18575000 + }, + { + "epoch": 53.77, + "learning_rate": 2.312543129399778e-05, + "loss": 1.9704, + "step": 18575500 + }, + { + "epoch": 53.77, + "learning_rate": 2.31247076463505e-05, + "loss": 1.9966, + "step": 18576000 + }, + { + "epoch": 53.77, + "learning_rate": 2.3123983998703223e-05, + "loss": 2.0068, + "step": 18576500 + }, + { + "epoch": 53.77, + "learning_rate": 2.312326035105595e-05, + "loss": 1.9816, + "step": 18577000 + }, + { + "epoch": 53.77, + "learning_rate": 2.312253670340867e-05, + "loss": 1.9781, + "step": 18577500 + }, + { + "epoch": 53.78, + "learning_rate": 2.3121813055761394e-05, + "loss": 1.9711, + "step": 18578000 + }, + { + "epoch": 53.78, + "learning_rate": 2.312108940811412e-05, + "loss": 1.9773, + "step": 18578500 + }, + { + "epoch": 53.78, + "learning_rate": 2.3120367207762135e-05, + "loss": 1.9914, + "step": 18579000 + }, + { + "epoch": 53.78, + "learning_rate": 2.3119645007410154e-05, + "loss": 1.9976, + "step": 18579500 + }, + { + "epoch": 53.78, + "learning_rate": 2.3118921359762876e-05, + "loss": 1.9868, + "step": 18580000 + }, + { + "epoch": 53.78, + "learning_rate": 2.3118197712115598e-05, + "loss": 1.9853, + "step": 18580500 + }, + { + "epoch": 53.78, + "learning_rate": 2.3117474064468324e-05, + "loss": 1.9839, + "step": 18581000 + }, + { + "epoch": 53.79, + "learning_rate": 2.3116750416821046e-05, + "loss": 1.9685, + "step": 18581500 + }, + { + "epoch": 53.79, + "learning_rate": 2.3116026769173768e-05, + "loss": 1.9671, + "step": 18582000 + }, + { + "epoch": 53.79, + "learning_rate": 2.3115303121526494e-05, + "loss": 1.9713, + "step": 18582500 + }, + { + "epoch": 53.79, + "learning_rate": 2.3114580921174513e-05, + "loss": 2.0246, + "step": 18583000 + }, + { + "epoch": 53.79, + "learning_rate": 2.3113857273527235e-05, + "loss": 1.9872, + "step": 18583500 + }, + { + "epoch": 53.79, + "learning_rate": 2.3113133625879957e-05, + "loss": 1.9721, + "step": 18584000 + }, + { + "epoch": 53.79, + "learning_rate": 2.311240997823268e-05, + "loss": 1.9725, + "step": 18584500 + }, + { + "epoch": 53.8, + "learning_rate": 2.3111686330585402e-05, + "loss": 1.9603, + "step": 18585000 + }, + { + "epoch": 53.8, + "learning_rate": 2.3110962682938124e-05, + "loss": 1.9736, + "step": 18585500 + }, + { + "epoch": 53.8, + "learning_rate": 2.311023903529085e-05, + "loss": 1.9872, + "step": 18586000 + }, + { + "epoch": 53.8, + "learning_rate": 2.3109515387643575e-05, + "loss": 1.965, + "step": 18586500 + }, + { + "epoch": 53.8, + "learning_rate": 2.3108791739996297e-05, + "loss": 1.9736, + "step": 18587000 + }, + { + "epoch": 53.8, + "learning_rate": 2.3108069539644313e-05, + "loss": 1.9737, + "step": 18587500 + }, + { + "epoch": 53.8, + "learning_rate": 2.310734589199704e-05, + "loss": 1.9819, + "step": 18588000 + }, + { + "epoch": 53.81, + "learning_rate": 2.310662224434976e-05, + "loss": 1.9605, + "step": 18588500 + }, + { + "epoch": 53.81, + "learning_rate": 2.3105900043997776e-05, + "loss": 1.9803, + "step": 18589000 + }, + { + "epoch": 53.81, + "learning_rate": 2.31051763963505e-05, + "loss": 1.9787, + "step": 18589500 + }, + { + "epoch": 53.81, + "learning_rate": 2.3104452748703224e-05, + "loss": 1.9768, + "step": 18590000 + }, + { + "epoch": 53.81, + "learning_rate": 2.310372910105595e-05, + "loss": 1.9723, + "step": 18590500 + }, + { + "epoch": 53.81, + "learning_rate": 2.3103005453408672e-05, + "loss": 1.9877, + "step": 18591000 + }, + { + "epoch": 53.81, + "learning_rate": 2.3102281805761394e-05, + "loss": 1.982, + "step": 18591500 + }, + { + "epoch": 53.82, + "learning_rate": 2.3101558158114117e-05, + "loss": 1.9823, + "step": 18592000 + }, + { + "epoch": 53.82, + "learning_rate": 2.310083451046684e-05, + "loss": 1.9687, + "step": 18592500 + }, + { + "epoch": 53.82, + "learning_rate": 2.3100110862819564e-05, + "loss": 1.9737, + "step": 18593000 + }, + { + "epoch": 53.82, + "learning_rate": 2.309938721517229e-05, + "loss": 1.974, + "step": 18593500 + }, + { + "epoch": 53.82, + "learning_rate": 2.3098663567525012e-05, + "loss": 1.9682, + "step": 18594000 + }, + { + "epoch": 53.82, + "learning_rate": 2.3097939919877735e-05, + "loss": 1.9722, + "step": 18594500 + }, + { + "epoch": 53.82, + "learning_rate": 2.309721771952575e-05, + "loss": 2.0019, + "step": 18595000 + }, + { + "epoch": 53.83, + "learning_rate": 2.3096494071878476e-05, + "loss": 1.9915, + "step": 18595500 + }, + { + "epoch": 53.83, + "learning_rate": 2.3095770424231198e-05, + "loss": 1.9927, + "step": 18596000 + }, + { + "epoch": 53.83, + "learning_rate": 2.309504677658392e-05, + "loss": 1.9811, + "step": 18596500 + }, + { + "epoch": 53.83, + "learning_rate": 2.3094323128936642e-05, + "loss": 1.9671, + "step": 18597000 + }, + { + "epoch": 53.83, + "learning_rate": 2.3093599481289368e-05, + "loss": 1.9942, + "step": 18597500 + }, + { + "epoch": 53.83, + "learning_rate": 2.309287583364209e-05, + "loss": 1.9792, + "step": 18598000 + }, + { + "epoch": 53.84, + "learning_rate": 2.309215363329011e-05, + "loss": 1.993, + "step": 18598500 + }, + { + "epoch": 53.84, + "learning_rate": 2.3091431432938128e-05, + "loss": 1.9648, + "step": 18599000 + }, + { + "epoch": 53.84, + "learning_rate": 2.309070778529085e-05, + "loss": 1.9998, + "step": 18599500 + }, + { + "epoch": 53.84, + "learning_rate": 2.3089984137643573e-05, + "loss": 1.9957, + "step": 18600000 + }, + { + "epoch": 53.84, + "learning_rate": 2.3089260489996295e-05, + "loss": 1.9859, + "step": 18600500 + }, + { + "epoch": 53.84, + "learning_rate": 2.308853684234902e-05, + "loss": 1.9957, + "step": 18601000 + }, + { + "epoch": 53.84, + "learning_rate": 2.3087813194701743e-05, + "loss": 1.9848, + "step": 18601500 + }, + { + "epoch": 53.85, + "learning_rate": 2.3087089547054465e-05, + "loss": 1.9688, + "step": 18602000 + }, + { + "epoch": 53.85, + "learning_rate": 2.308636589940719e-05, + "loss": 1.9789, + "step": 18602500 + }, + { + "epoch": 53.85, + "learning_rate": 2.3085642251759913e-05, + "loss": 2.0074, + "step": 18603000 + }, + { + "epoch": 53.85, + "learning_rate": 2.3084918604112635e-05, + "loss": 1.9633, + "step": 18603500 + }, + { + "epoch": 53.85, + "learning_rate": 2.3084194956465357e-05, + "loss": 1.9842, + "step": 18604000 + }, + { + "epoch": 53.85, + "learning_rate": 2.3083471308818083e-05, + "loss": 1.9528, + "step": 18604500 + }, + { + "epoch": 53.85, + "learning_rate": 2.3082747661170805e-05, + "loss": 1.9576, + "step": 18605000 + }, + { + "epoch": 53.86, + "learning_rate": 2.3082025460818824e-05, + "loss": 1.9821, + "step": 18605500 + }, + { + "epoch": 53.86, + "learning_rate": 2.308130326046684e-05, + "loss": 1.9856, + "step": 18606000 + }, + { + "epoch": 53.86, + "learning_rate": 2.308058106011486e-05, + "loss": 1.9824, + "step": 18606500 + }, + { + "epoch": 53.86, + "learning_rate": 2.307985741246758e-05, + "loss": 1.9942, + "step": 18607000 + }, + { + "epoch": 53.86, + "learning_rate": 2.3079133764820303e-05, + "loss": 1.9804, + "step": 18607500 + }, + { + "epoch": 53.86, + "learning_rate": 2.307841011717303e-05, + "loss": 1.9873, + "step": 18608000 + }, + { + "epoch": 53.86, + "learning_rate": 2.3077686469525754e-05, + "loss": 1.9831, + "step": 18608500 + }, + { + "epoch": 53.87, + "learning_rate": 2.3076962821878477e-05, + "loss": 1.9942, + "step": 18609000 + }, + { + "epoch": 53.87, + "learning_rate": 2.30762391742312e-05, + "loss": 1.9978, + "step": 18609500 + }, + { + "epoch": 53.87, + "learning_rate": 2.307551552658392e-05, + "loss": 1.9988, + "step": 18610000 + }, + { + "epoch": 53.87, + "learning_rate": 2.3074794773527233e-05, + "loss": 1.9962, + "step": 18610500 + }, + { + "epoch": 53.87, + "learning_rate": 2.3074071125879955e-05, + "loss": 1.9646, + "step": 18611000 + }, + { + "epoch": 53.87, + "learning_rate": 2.3073347478232678e-05, + "loss": 1.9864, + "step": 18611500 + }, + { + "epoch": 53.87, + "learning_rate": 2.3072623830585403e-05, + "loss": 2.0023, + "step": 18612000 + }, + { + "epoch": 53.88, + "learning_rate": 2.307190018293813e-05, + "loss": 1.9507, + "step": 18612500 + }, + { + "epoch": 53.88, + "learning_rate": 2.307117653529085e-05, + "loss": 2.0081, + "step": 18613000 + }, + { + "epoch": 53.88, + "learning_rate": 2.3070454334938867e-05, + "loss": 1.969, + "step": 18613500 + }, + { + "epoch": 53.88, + "learning_rate": 2.3069730687291592e-05, + "loss": 1.9804, + "step": 18614000 + }, + { + "epoch": 53.88, + "learning_rate": 2.3069007039644315e-05, + "loss": 1.999, + "step": 18614500 + }, + { + "epoch": 53.88, + "learning_rate": 2.3068283391997037e-05, + "loss": 1.9796, + "step": 18615000 + }, + { + "epoch": 53.88, + "learning_rate": 2.306755974434976e-05, + "loss": 1.9745, + "step": 18615500 + }, + { + "epoch": 53.89, + "learning_rate": 2.306683609670248e-05, + "loss": 1.97, + "step": 18616000 + }, + { + "epoch": 53.89, + "learning_rate": 2.3066112449055207e-05, + "loss": 1.9902, + "step": 18616500 + }, + { + "epoch": 53.89, + "learning_rate": 2.306538880140793e-05, + "loss": 1.985, + "step": 18617000 + }, + { + "epoch": 53.89, + "learning_rate": 2.3064665153760655e-05, + "loss": 1.9857, + "step": 18617500 + }, + { + "epoch": 53.89, + "learning_rate": 2.3063941506113377e-05, + "loss": 1.9919, + "step": 18618000 + }, + { + "epoch": 53.89, + "learning_rate": 2.3063219305761393e-05, + "loss": 2.0152, + "step": 18618500 + }, + { + "epoch": 53.89, + "learning_rate": 2.306249710540941e-05, + "loss": 1.9883, + "step": 18619000 + }, + { + "epoch": 53.9, + "learning_rate": 2.306177490505743e-05, + "loss": 1.9685, + "step": 18619500 + }, + { + "epoch": 53.9, + "learning_rate": 2.3061051257410153e-05, + "loss": 1.9622, + "step": 18620000 + }, + { + "epoch": 53.9, + "learning_rate": 2.3060327609762878e-05, + "loss": 1.99, + "step": 18620500 + }, + { + "epoch": 53.9, + "learning_rate": 2.30596039621156e-05, + "loss": 1.9843, + "step": 18621000 + }, + { + "epoch": 53.9, + "learning_rate": 2.3058880314468323e-05, + "loss": 1.9861, + "step": 18621500 + }, + { + "epoch": 53.9, + "learning_rate": 2.3058156666821045e-05, + "loss": 1.9717, + "step": 18622000 + }, + { + "epoch": 53.9, + "learning_rate": 2.3057433019173767e-05, + "loss": 1.9743, + "step": 18622500 + }, + { + "epoch": 53.91, + "learning_rate": 2.3056709371526493e-05, + "loss": 1.9953, + "step": 18623000 + }, + { + "epoch": 53.91, + "learning_rate": 2.3055985723879215e-05, + "loss": 1.9987, + "step": 18623500 + }, + { + "epoch": 53.91, + "learning_rate": 2.3055263523527234e-05, + "loss": 1.9885, + "step": 18624000 + }, + { + "epoch": 53.91, + "learning_rate": 2.3054541323175253e-05, + "loss": 1.9826, + "step": 18624500 + }, + { + "epoch": 53.91, + "learning_rate": 2.3053817675527975e-05, + "loss": 2.0006, + "step": 18625000 + }, + { + "epoch": 53.91, + "learning_rate": 2.3053094027880697e-05, + "loss": 1.9751, + "step": 18625500 + }, + { + "epoch": 53.91, + "learning_rate": 2.305237038023342e-05, + "loss": 1.9808, + "step": 18626000 + }, + { + "epoch": 53.92, + "learning_rate": 2.3051646732586142e-05, + "loss": 1.9922, + "step": 18626500 + }, + { + "epoch": 53.92, + "learning_rate": 2.3050923084938868e-05, + "loss": 1.9823, + "step": 18627000 + }, + { + "epoch": 53.92, + "learning_rate": 2.3050200884586883e-05, + "loss": 1.9734, + "step": 18627500 + }, + { + "epoch": 53.92, + "learning_rate": 2.304947723693961e-05, + "loss": 1.9958, + "step": 18628000 + }, + { + "epoch": 53.92, + "learning_rate": 2.304875358929233e-05, + "loss": 1.9657, + "step": 18628500 + }, + { + "epoch": 53.92, + "learning_rate": 2.3048029941645057e-05, + "loss": 1.9613, + "step": 18629000 + }, + { + "epoch": 53.92, + "learning_rate": 2.304730629399778e-05, + "loss": 2.0197, + "step": 18629500 + }, + { + "epoch": 53.93, + "learning_rate": 2.30465826463505e-05, + "loss": 1.9939, + "step": 18630000 + }, + { + "epoch": 53.93, + "learning_rate": 2.3045858998703223e-05, + "loss": 1.9585, + "step": 18630500 + }, + { + "epoch": 53.93, + "learning_rate": 2.3045135351055946e-05, + "loss": 1.9737, + "step": 18631000 + }, + { + "epoch": 53.93, + "learning_rate": 2.304441170340867e-05, + "loss": 1.9689, + "step": 18631500 + }, + { + "epoch": 53.93, + "learning_rate": 2.3043688055761393e-05, + "loss": 1.972, + "step": 18632000 + }, + { + "epoch": 53.93, + "learning_rate": 2.304296440811412e-05, + "loss": 1.9721, + "step": 18632500 + }, + { + "epoch": 53.93, + "learning_rate": 2.304224076046684e-05, + "loss": 1.9969, + "step": 18633000 + }, + { + "epoch": 53.94, + "learning_rate": 2.3041518560114857e-05, + "loss": 2.0207, + "step": 18633500 + }, + { + "epoch": 53.94, + "learning_rate": 2.3040794912467582e-05, + "loss": 1.9884, + "step": 18634000 + }, + { + "epoch": 53.94, + "learning_rate": 2.3040071264820305e-05, + "loss": 1.9912, + "step": 18634500 + }, + { + "epoch": 53.94, + "learning_rate": 2.3039349064468324e-05, + "loss": 2.0088, + "step": 18635000 + }, + { + "epoch": 53.94, + "learning_rate": 2.3038625416821046e-05, + "loss": 1.972, + "step": 18635500 + }, + { + "epoch": 53.94, + "learning_rate": 2.303790176917377e-05, + "loss": 1.9748, + "step": 18636000 + }, + { + "epoch": 53.95, + "learning_rate": 2.3037179568821787e-05, + "loss": 1.9882, + "step": 18636500 + }, + { + "epoch": 53.95, + "learning_rate": 2.303645592117451e-05, + "loss": 1.9838, + "step": 18637000 + }, + { + "epoch": 53.95, + "learning_rate": 2.303573227352723e-05, + "loss": 2.0027, + "step": 18637500 + }, + { + "epoch": 53.95, + "learning_rate": 2.3035008625879957e-05, + "loss": 1.9771, + "step": 18638000 + }, + { + "epoch": 53.95, + "learning_rate": 2.303428497823268e-05, + "loss": 1.9936, + "step": 18638500 + }, + { + "epoch": 53.95, + "learning_rate": 2.3033561330585405e-05, + "loss": 1.9601, + "step": 18639000 + }, + { + "epoch": 53.95, + "learning_rate": 2.3032837682938127e-05, + "loss": 1.975, + "step": 18639500 + }, + { + "epoch": 53.96, + "learning_rate": 2.303211403529085e-05, + "loss": 2.005, + "step": 18640000 + }, + { + "epoch": 53.96, + "learning_rate": 2.303139183493887e-05, + "loss": 1.9746, + "step": 18640500 + }, + { + "epoch": 53.96, + "learning_rate": 2.303066818729159e-05, + "loss": 1.9831, + "step": 18641000 + }, + { + "epoch": 53.96, + "learning_rate": 2.3029944539644313e-05, + "loss": 2.0014, + "step": 18641500 + }, + { + "epoch": 53.96, + "learning_rate": 2.3029220891997035e-05, + "loss": 1.9682, + "step": 18642000 + }, + { + "epoch": 53.96, + "learning_rate": 2.302849724434976e-05, + "loss": 1.9895, + "step": 18642500 + }, + { + "epoch": 53.96, + "learning_rate": 2.302777504399778e-05, + "loss": 1.9823, + "step": 18643000 + }, + { + "epoch": 53.97, + "learning_rate": 2.3027051396350502e-05, + "loss": 1.9855, + "step": 18643500 + }, + { + "epoch": 53.97, + "learning_rate": 2.3026327748703224e-05, + "loss": 1.9806, + "step": 18644000 + }, + { + "epoch": 53.97, + "learning_rate": 2.3025604101055946e-05, + "loss": 2.0, + "step": 18644500 + }, + { + "epoch": 53.97, + "learning_rate": 2.3024880453408672e-05, + "loss": 1.9613, + "step": 18645000 + }, + { + "epoch": 53.97, + "learning_rate": 2.3024156805761394e-05, + "loss": 1.9813, + "step": 18645500 + }, + { + "epoch": 53.97, + "learning_rate": 2.302343315811412e-05, + "loss": 1.9873, + "step": 18646000 + }, + { + "epoch": 53.97, + "learning_rate": 2.3022709510466842e-05, + "loss": 1.9753, + "step": 18646500 + }, + { + "epoch": 53.98, + "learning_rate": 2.3021987310114858e-05, + "loss": 1.9924, + "step": 18647000 + }, + { + "epoch": 53.98, + "learning_rate": 2.3021263662467583e-05, + "loss": 1.9912, + "step": 18647500 + }, + { + "epoch": 53.98, + "learning_rate": 2.3020540014820305e-05, + "loss": 1.9881, + "step": 18648000 + }, + { + "epoch": 53.98, + "learning_rate": 2.3019816367173028e-05, + "loss": 2.0051, + "step": 18648500 + }, + { + "epoch": 53.98, + "learning_rate": 2.301909271952575e-05, + "loss": 1.9798, + "step": 18649000 + }, + { + "epoch": 53.98, + "learning_rate": 2.3018369071878472e-05, + "loss": 1.9722, + "step": 18649500 + }, + { + "epoch": 53.98, + "learning_rate": 2.3017645424231198e-05, + "loss": 1.9945, + "step": 18650000 + }, + { + "epoch": 53.99, + "learning_rate": 2.3016921776583923e-05, + "loss": 1.9944, + "step": 18650500 + }, + { + "epoch": 53.99, + "learning_rate": 2.3016198128936646e-05, + "loss": 1.9719, + "step": 18651000 + }, + { + "epoch": 53.99, + "learning_rate": 2.3015474481289368e-05, + "loss": 2.0136, + "step": 18651500 + }, + { + "epoch": 53.99, + "learning_rate": 2.301475083364209e-05, + "loss": 1.9766, + "step": 18652000 + }, + { + "epoch": 53.99, + "learning_rate": 2.3014027185994812e-05, + "loss": 1.9841, + "step": 18652500 + }, + { + "epoch": 53.99, + "learning_rate": 2.3013303538347535e-05, + "loss": 1.9878, + "step": 18653000 + }, + { + "epoch": 53.99, + "learning_rate": 2.3012581337995557e-05, + "loss": 1.9761, + "step": 18653500 + }, + { + "epoch": 54.0, + "learning_rate": 2.301185769034828e-05, + "loss": 1.9663, + "step": 18654000 + }, + { + "epoch": 54.0, + "learning_rate": 2.3011134042701e-05, + "loss": 1.9654, + "step": 18654500 + }, + { + "epoch": 54.0, + "learning_rate": 2.3010410395053724e-05, + "loss": 1.99, + "step": 18655000 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.6748646316641627, + "eval_accuracy_mlm": 0.6411324675857366, + "eval_accuracy_nsp": 0.8557813495391694, + "eval_loss": 2.1620771884918213, + "eval_runtime": 331.6379, + "eval_samples_per_second": 1315.851, + "eval_steps_per_second": 54.828, + "step": 18655488 + }, + { + "epoch": 54.0, + "learning_rate": 2.300968674740645e-05, + "loss": 1.9749, + "step": 18655500 + }, + { + "epoch": 54.0, + "learning_rate": 2.3008964547054465e-05, + "loss": 1.989, + "step": 18656000 + }, + { + "epoch": 54.0, + "learning_rate": 2.3008240899407187e-05, + "loss": 1.9611, + "step": 18656500 + }, + { + "epoch": 54.0, + "learning_rate": 2.300751725175991e-05, + "loss": 1.9425, + "step": 18657000 + }, + { + "epoch": 54.01, + "learning_rate": 2.3006793604112635e-05, + "loss": 1.9418, + "step": 18657500 + }, + { + "epoch": 54.01, + "learning_rate": 2.300606995646536e-05, + "loss": 1.974, + "step": 18658000 + }, + { + "epoch": 54.01, + "learning_rate": 2.3005346308818083e-05, + "loss": 1.96, + "step": 18658500 + }, + { + "epoch": 54.01, + "learning_rate": 2.3004624108466098e-05, + "loss": 1.9959, + "step": 18659000 + }, + { + "epoch": 54.01, + "learning_rate": 2.3003900460818824e-05, + "loss": 1.9524, + "step": 18659500 + }, + { + "epoch": 54.01, + "learning_rate": 2.300317826046684e-05, + "loss": 1.9542, + "step": 18660000 + }, + { + "epoch": 54.01, + "learning_rate": 2.300245461281956e-05, + "loss": 1.9436, + "step": 18660500 + }, + { + "epoch": 54.02, + "learning_rate": 2.3001730965172287e-05, + "loss": 1.937, + "step": 18661000 + }, + { + "epoch": 54.02, + "learning_rate": 2.3001007317525013e-05, + "loss": 1.978, + "step": 18661500 + }, + { + "epoch": 54.02, + "learning_rate": 2.3000283669877735e-05, + "loss": 1.9311, + "step": 18662000 + }, + { + "epoch": 54.02, + "learning_rate": 2.2999560022230457e-05, + "loss": 1.961, + "step": 18662500 + }, + { + "epoch": 54.02, + "learning_rate": 2.2998837821878473e-05, + "loss": 1.9807, + "step": 18663000 + }, + { + "epoch": 54.02, + "learning_rate": 2.29981141742312e-05, + "loss": 1.9532, + "step": 18663500 + }, + { + "epoch": 54.02, + "learning_rate": 2.299739052658392e-05, + "loss": 1.9303, + "step": 18664000 + }, + { + "epoch": 54.03, + "learning_rate": 2.2996666878936643e-05, + "loss": 1.9717, + "step": 18664500 + }, + { + "epoch": 54.03, + "learning_rate": 2.299594323128937e-05, + "loss": 1.9747, + "step": 18665000 + }, + { + "epoch": 54.03, + "learning_rate": 2.2995221030937388e-05, + "loss": 1.9615, + "step": 18665500 + }, + { + "epoch": 54.03, + "learning_rate": 2.299449738329011e-05, + "loss": 1.9499, + "step": 18666000 + }, + { + "epoch": 54.03, + "learning_rate": 2.2993775182938125e-05, + "loss": 1.9542, + "step": 18666500 + }, + { + "epoch": 54.03, + "learning_rate": 2.299305153529085e-05, + "loss": 1.9658, + "step": 18667000 + }, + { + "epoch": 54.03, + "learning_rate": 2.2992327887643573e-05, + "loss": 1.9685, + "step": 18667500 + }, + { + "epoch": 54.04, + "learning_rate": 2.2991604239996295e-05, + "loss": 1.959, + "step": 18668000 + }, + { + "epoch": 54.04, + "learning_rate": 2.299088059234902e-05, + "loss": 1.9664, + "step": 18668500 + }, + { + "epoch": 54.04, + "learning_rate": 2.2990156944701743e-05, + "loss": 1.9978, + "step": 18669000 + }, + { + "epoch": 54.04, + "learning_rate": 2.2989433297054466e-05, + "loss": 1.9838, + "step": 18669500 + }, + { + "epoch": 54.04, + "learning_rate": 2.2988711096702484e-05, + "loss": 1.9701, + "step": 18670000 + }, + { + "epoch": 54.04, + "learning_rate": 2.2987987449055207e-05, + "loss": 1.9517, + "step": 18670500 + }, + { + "epoch": 54.04, + "learning_rate": 2.298726380140793e-05, + "loss": 1.9762, + "step": 18671000 + }, + { + "epoch": 54.05, + "learning_rate": 2.298654015376065e-05, + "loss": 1.9523, + "step": 18671500 + }, + { + "epoch": 54.05, + "learning_rate": 2.2985816506113377e-05, + "loss": 1.9649, + "step": 18672000 + }, + { + "epoch": 54.05, + "learning_rate": 2.29850928584661e-05, + "loss": 1.9701, + "step": 18672500 + }, + { + "epoch": 54.05, + "learning_rate": 2.2984369210818825e-05, + "loss": 1.9505, + "step": 18673000 + }, + { + "epoch": 54.05, + "learning_rate": 2.2983645563171547e-05, + "loss": 1.9537, + "step": 18673500 + }, + { + "epoch": 54.05, + "learning_rate": 2.298292191552427e-05, + "loss": 1.9605, + "step": 18674000 + }, + { + "epoch": 54.06, + "learning_rate": 2.298219826787699e-05, + "loss": 1.9759, + "step": 18674500 + }, + { + "epoch": 54.06, + "learning_rate": 2.2981474620229714e-05, + "loss": 1.9464, + "step": 18675000 + }, + { + "epoch": 54.06, + "learning_rate": 2.2980752419877733e-05, + "loss": 1.9613, + "step": 18675500 + }, + { + "epoch": 54.06, + "learning_rate": 2.2980028772230458e-05, + "loss": 1.9844, + "step": 18676000 + }, + { + "epoch": 54.06, + "learning_rate": 2.297930512458318e-05, + "loss": 1.9785, + "step": 18676500 + }, + { + "epoch": 54.06, + "learning_rate": 2.2978581476935903e-05, + "loss": 1.9745, + "step": 18677000 + }, + { + "epoch": 54.06, + "learning_rate": 2.2977857829288625e-05, + "loss": 1.977, + "step": 18677500 + }, + { + "epoch": 54.07, + "learning_rate": 2.297713418164135e-05, + "loss": 1.9492, + "step": 18678000 + }, + { + "epoch": 54.07, + "learning_rate": 2.2976411981289366e-05, + "loss": 1.9386, + "step": 18678500 + }, + { + "epoch": 54.07, + "learning_rate": 2.2975689780937385e-05, + "loss": 1.9818, + "step": 18679000 + }, + { + "epoch": 54.07, + "learning_rate": 2.2974966133290107e-05, + "loss": 1.9845, + "step": 18679500 + }, + { + "epoch": 54.07, + "learning_rate": 2.2974242485642833e-05, + "loss": 1.9707, + "step": 18680000 + }, + { + "epoch": 54.07, + "learning_rate": 2.2973518837995555e-05, + "loss": 1.9861, + "step": 18680500 + }, + { + "epoch": 54.07, + "learning_rate": 2.2972795190348277e-05, + "loss": 1.972, + "step": 18681000 + }, + { + "epoch": 54.08, + "learning_rate": 2.2972071542701003e-05, + "loss": 1.9857, + "step": 18681500 + }, + { + "epoch": 54.08, + "learning_rate": 2.2971347895053725e-05, + "loss": 1.997, + "step": 18682000 + }, + { + "epoch": 54.08, + "learning_rate": 2.2970624247406447e-05, + "loss": 1.9697, + "step": 18682500 + }, + { + "epoch": 54.08, + "learning_rate": 2.296990059975917e-05, + "loss": 1.9562, + "step": 18683000 + }, + { + "epoch": 54.08, + "learning_rate": 2.2969176952111895e-05, + "loss": 1.9566, + "step": 18683500 + }, + { + "epoch": 54.08, + "learning_rate": 2.2968454751759914e-05, + "loss": 1.9511, + "step": 18684000 + }, + { + "epoch": 54.08, + "learning_rate": 2.2967731104112636e-05, + "loss": 1.9855, + "step": 18684500 + }, + { + "epoch": 54.09, + "learning_rate": 2.296700745646536e-05, + "loss": 1.9849, + "step": 18685000 + }, + { + "epoch": 54.09, + "learning_rate": 2.296628380881808e-05, + "loss": 1.9792, + "step": 18685500 + }, + { + "epoch": 54.09, + "learning_rate": 2.2965560161170803e-05, + "loss": 1.9504, + "step": 18686000 + }, + { + "epoch": 54.09, + "learning_rate": 2.296483651352353e-05, + "loss": 1.9719, + "step": 18686500 + }, + { + "epoch": 54.09, + "learning_rate": 2.2964112865876254e-05, + "loss": 1.9783, + "step": 18687000 + }, + { + "epoch": 54.09, + "learning_rate": 2.2963389218228977e-05, + "loss": 1.9833, + "step": 18687500 + }, + { + "epoch": 54.09, + "learning_rate": 2.29626655705817e-05, + "loss": 1.9791, + "step": 18688000 + }, + { + "epoch": 54.1, + "learning_rate": 2.2961943370229714e-05, + "loss": 1.9724, + "step": 18688500 + }, + { + "epoch": 54.1, + "learning_rate": 2.296121972258244e-05, + "loss": 1.9606, + "step": 18689000 + }, + { + "epoch": 54.1, + "learning_rate": 2.2960497522230456e-05, + "loss": 1.95, + "step": 18689500 + }, + { + "epoch": 54.1, + "learning_rate": 2.2959773874583178e-05, + "loss": 1.9931, + "step": 18690000 + }, + { + "epoch": 54.1, + "learning_rate": 2.2959050226935903e-05, + "loss": 1.9732, + "step": 18690500 + }, + { + "epoch": 54.1, + "learning_rate": 2.295832657928863e-05, + "loss": 1.9623, + "step": 18691000 + }, + { + "epoch": 54.1, + "learning_rate": 2.295760293164135e-05, + "loss": 1.9422, + "step": 18691500 + }, + { + "epoch": 54.11, + "learning_rate": 2.2956880731289367e-05, + "loss": 1.9926, + "step": 18692000 + }, + { + "epoch": 54.11, + "learning_rate": 2.2956157083642092e-05, + "loss": 1.9801, + "step": 18692500 + }, + { + "epoch": 54.11, + "learning_rate": 2.2955433435994815e-05, + "loss": 1.9738, + "step": 18693000 + }, + { + "epoch": 54.11, + "learning_rate": 2.2954709788347537e-05, + "loss": 1.9774, + "step": 18693500 + }, + { + "epoch": 54.11, + "learning_rate": 2.2953987587995552e-05, + "loss": 1.971, + "step": 18694000 + }, + { + "epoch": 54.11, + "learning_rate": 2.2953263940348278e-05, + "loss": 1.9697, + "step": 18694500 + }, + { + "epoch": 54.11, + "learning_rate": 2.2952540292701004e-05, + "loss": 1.9626, + "step": 18695000 + }, + { + "epoch": 54.12, + "learning_rate": 2.2951816645053726e-05, + "loss": 1.9507, + "step": 18695500 + }, + { + "epoch": 54.12, + "learning_rate": 2.2951092997406448e-05, + "loss": 1.9681, + "step": 18696000 + }, + { + "epoch": 54.12, + "learning_rate": 2.295036934975917e-05, + "loss": 1.9865, + "step": 18696500 + }, + { + "epoch": 54.12, + "learning_rate": 2.2949645702111893e-05, + "loss": 1.9749, + "step": 18697000 + }, + { + "epoch": 54.12, + "learning_rate": 2.2948922054464618e-05, + "loss": 1.9684, + "step": 18697500 + }, + { + "epoch": 54.12, + "learning_rate": 2.2948198406817344e-05, + "loss": 1.9572, + "step": 18698000 + }, + { + "epoch": 54.12, + "learning_rate": 2.2947474759170066e-05, + "loss": 1.9998, + "step": 18698500 + }, + { + "epoch": 54.13, + "learning_rate": 2.294675111152279e-05, + "loss": 1.9766, + "step": 18699000 + }, + { + "epoch": 54.13, + "learning_rate": 2.294602746387551e-05, + "loss": 1.9736, + "step": 18699500 + }, + { + "epoch": 54.13, + "learning_rate": 2.2945303816228233e-05, + "loss": 1.9768, + "step": 18700000 + }, + { + "epoch": 54.13, + "learning_rate": 2.2944581615876252e-05, + "loss": 1.9658, + "step": 18700500 + }, + { + "epoch": 54.13, + "learning_rate": 2.2943857968228974e-05, + "loss": 1.9635, + "step": 18701000 + }, + { + "epoch": 54.13, + "learning_rate": 2.2943135767876993e-05, + "loss": 1.9801, + "step": 18701500 + }, + { + "epoch": 54.13, + "learning_rate": 2.294241212022972e-05, + "loss": 1.9434, + "step": 18702000 + }, + { + "epoch": 54.14, + "learning_rate": 2.294168847258244e-05, + "loss": 1.965, + "step": 18702500 + }, + { + "epoch": 54.14, + "learning_rate": 2.2940964824935163e-05, + "loss": 1.9571, + "step": 18703000 + }, + { + "epoch": 54.14, + "learning_rate": 2.2940241177287885e-05, + "loss": 1.9681, + "step": 18703500 + }, + { + "epoch": 54.14, + "learning_rate": 2.2939518976935904e-05, + "loss": 1.9707, + "step": 18704000 + }, + { + "epoch": 54.14, + "learning_rate": 2.293879677658392e-05, + "loss": 1.9979, + "step": 18704500 + }, + { + "epoch": 54.14, + "learning_rate": 2.2938073128936642e-05, + "loss": 1.9697, + "step": 18705000 + }, + { + "epoch": 54.14, + "learning_rate": 2.2937349481289368e-05, + "loss": 1.9726, + "step": 18705500 + }, + { + "epoch": 54.15, + "learning_rate": 2.2936625833642093e-05, + "loss": 1.982, + "step": 18706000 + }, + { + "epoch": 54.15, + "learning_rate": 2.2935902185994815e-05, + "loss": 1.9816, + "step": 18706500 + }, + { + "epoch": 54.15, + "learning_rate": 2.2935178538347538e-05, + "loss": 1.9581, + "step": 18707000 + }, + { + "epoch": 54.15, + "learning_rate": 2.293445489070026e-05, + "loss": 1.9768, + "step": 18707500 + }, + { + "epoch": 54.15, + "learning_rate": 2.2933731243052982e-05, + "loss": 1.9745, + "step": 18708000 + }, + { + "epoch": 54.15, + "learning_rate": 2.2933007595405704e-05, + "loss": 1.9767, + "step": 18708500 + }, + { + "epoch": 54.15, + "learning_rate": 2.293228394775843e-05, + "loss": 1.9503, + "step": 18709000 + }, + { + "epoch": 54.16, + "learning_rate": 2.2931560300111156e-05, + "loss": 1.953, + "step": 18709500 + }, + { + "epoch": 54.16, + "learning_rate": 2.2930836652463878e-05, + "loss": 1.9805, + "step": 18710000 + }, + { + "epoch": 54.16, + "learning_rate": 2.29301130048166e-05, + "loss": 1.9731, + "step": 18710500 + }, + { + "epoch": 54.16, + "learning_rate": 2.2929389357169322e-05, + "loss": 1.9517, + "step": 18711000 + }, + { + "epoch": 54.16, + "learning_rate": 2.292866715681734e-05, + "loss": 1.9836, + "step": 18711500 + }, + { + "epoch": 54.16, + "learning_rate": 2.2927943509170064e-05, + "loss": 1.9705, + "step": 18712000 + }, + { + "epoch": 54.17, + "learning_rate": 2.2927219861522786e-05, + "loss": 1.9788, + "step": 18712500 + }, + { + "epoch": 54.17, + "learning_rate": 2.292649621387551e-05, + "loss": 1.9633, + "step": 18713000 + }, + { + "epoch": 54.17, + "learning_rate": 2.292577401352353e-05, + "loss": 1.9693, + "step": 18713500 + }, + { + "epoch": 54.17, + "learning_rate": 2.2925050365876253e-05, + "loss": 1.9745, + "step": 18714000 + }, + { + "epoch": 54.17, + "learning_rate": 2.2924326718228975e-05, + "loss": 1.9628, + "step": 18714500 + }, + { + "epoch": 54.17, + "learning_rate": 2.2923603070581697e-05, + "loss": 1.9419, + "step": 18715000 + }, + { + "epoch": 54.17, + "learning_rate": 2.292288231752501e-05, + "loss": 1.9785, + "step": 18715500 + }, + { + "epoch": 54.18, + "learning_rate": 2.292215866987773e-05, + "loss": 1.9875, + "step": 18716000 + }, + { + "epoch": 54.18, + "learning_rate": 2.2921435022230457e-05, + "loss": 1.971, + "step": 18716500 + }, + { + "epoch": 54.18, + "learning_rate": 2.2920711374583183e-05, + "loss": 1.9516, + "step": 18717000 + }, + { + "epoch": 54.18, + "learning_rate": 2.2919987726935905e-05, + "loss": 1.9766, + "step": 18717500 + }, + { + "epoch": 54.18, + "learning_rate": 2.291926552658392e-05, + "loss": 1.9938, + "step": 18718000 + }, + { + "epoch": 54.18, + "learning_rate": 2.291854332623194e-05, + "loss": 1.9883, + "step": 18718500 + }, + { + "epoch": 54.18, + "learning_rate": 2.2917819678584662e-05, + "loss": 1.9766, + "step": 18719000 + }, + { + "epoch": 54.19, + "learning_rate": 2.2917096030937384e-05, + "loss": 1.9667, + "step": 18719500 + }, + { + "epoch": 54.19, + "learning_rate": 2.2916372383290106e-05, + "loss": 1.9927, + "step": 18720000 + }, + { + "epoch": 54.19, + "learning_rate": 2.2915648735642832e-05, + "loss": 1.9991, + "step": 18720500 + }, + { + "epoch": 54.19, + "learning_rate": 2.2914925087995557e-05, + "loss": 1.98, + "step": 18721000 + }, + { + "epoch": 54.19, + "learning_rate": 2.291420144034828e-05, + "loss": 1.9589, + "step": 18721500 + }, + { + "epoch": 54.19, + "learning_rate": 2.2913477792701002e-05, + "loss": 1.9911, + "step": 18722000 + }, + { + "epoch": 54.19, + "learning_rate": 2.2912754145053724e-05, + "loss": 1.9764, + "step": 18722500 + }, + { + "epoch": 54.2, + "learning_rate": 2.2912030497406446e-05, + "loss": 1.9558, + "step": 18723000 + }, + { + "epoch": 54.2, + "learning_rate": 2.2911308297054465e-05, + "loss": 1.9424, + "step": 18723500 + }, + { + "epoch": 54.2, + "learning_rate": 2.2910584649407188e-05, + "loss": 1.9339, + "step": 18724000 + }, + { + "epoch": 54.2, + "learning_rate": 2.2909862449055207e-05, + "loss": 1.9473, + "step": 18724500 + }, + { + "epoch": 54.2, + "learning_rate": 2.2909138801407932e-05, + "loss": 1.9723, + "step": 18725000 + }, + { + "epoch": 54.2, + "learning_rate": 2.2908416601055948e-05, + "loss": 1.9499, + "step": 18725500 + }, + { + "epoch": 54.2, + "learning_rate": 2.290769295340867e-05, + "loss": 1.9573, + "step": 18726000 + }, + { + "epoch": 54.21, + "learning_rate": 2.2906969305761396e-05, + "loss": 1.9441, + "step": 18726500 + }, + { + "epoch": 54.21, + "learning_rate": 2.2906245658114118e-05, + "loss": 1.9648, + "step": 18727000 + }, + { + "epoch": 54.21, + "learning_rate": 2.290552201046684e-05, + "loss": 1.9646, + "step": 18727500 + }, + { + "epoch": 54.21, + "learning_rate": 2.2904798362819562e-05, + "loss": 1.9873, + "step": 18728000 + }, + { + "epoch": 54.21, + "learning_rate": 2.2904074715172288e-05, + "loss": 1.9569, + "step": 18728500 + }, + { + "epoch": 54.21, + "learning_rate": 2.290335106752501e-05, + "loss": 1.9871, + "step": 18729000 + }, + { + "epoch": 54.21, + "learning_rate": 2.2902627419877736e-05, + "loss": 1.9619, + "step": 18729500 + }, + { + "epoch": 54.22, + "learning_rate": 2.290190521952575e-05, + "loss": 1.9702, + "step": 18730000 + }, + { + "epoch": 54.22, + "learning_rate": 2.2901181571878474e-05, + "loss": 1.9613, + "step": 18730500 + }, + { + "epoch": 54.22, + "learning_rate": 2.2900457924231196e-05, + "loss": 1.9746, + "step": 18731000 + }, + { + "epoch": 54.22, + "learning_rate": 2.289973427658392e-05, + "loss": 1.9629, + "step": 18731500 + }, + { + "epoch": 54.22, + "learning_rate": 2.2899010628936644e-05, + "loss": 1.9776, + "step": 18732000 + }, + { + "epoch": 54.22, + "learning_rate": 2.289828698128937e-05, + "loss": 1.9721, + "step": 18732500 + }, + { + "epoch": 54.22, + "learning_rate": 2.289756333364209e-05, + "loss": 1.9817, + "step": 18733000 + }, + { + "epoch": 54.23, + "learning_rate": 2.2896839685994814e-05, + "loss": 1.9654, + "step": 18733500 + }, + { + "epoch": 54.23, + "learning_rate": 2.2896116038347536e-05, + "loss": 1.9739, + "step": 18734000 + }, + { + "epoch": 54.23, + "learning_rate": 2.2895393837995555e-05, + "loss": 1.9686, + "step": 18734500 + }, + { + "epoch": 54.23, + "learning_rate": 2.2894670190348277e-05, + "loss": 1.9627, + "step": 18735000 + }, + { + "epoch": 54.23, + "learning_rate": 2.2893946542701e-05, + "loss": 1.9513, + "step": 18735500 + }, + { + "epoch": 54.23, + "learning_rate": 2.289322434234902e-05, + "loss": 1.952, + "step": 18736000 + }, + { + "epoch": 54.23, + "learning_rate": 2.2892500694701744e-05, + "loss": 1.965, + "step": 18736500 + }, + { + "epoch": 54.24, + "learning_rate": 2.2891777047054466e-05, + "loss": 1.9408, + "step": 18737000 + }, + { + "epoch": 54.24, + "learning_rate": 2.289105339940719e-05, + "loss": 1.97, + "step": 18737500 + }, + { + "epoch": 54.24, + "learning_rate": 2.289032975175991e-05, + "loss": 1.9741, + "step": 18738000 + }, + { + "epoch": 54.24, + "learning_rate": 2.288960755140793e-05, + "loss": 1.9617, + "step": 18738500 + }, + { + "epoch": 54.24, + "learning_rate": 2.2888883903760652e-05, + "loss": 1.993, + "step": 18739000 + }, + { + "epoch": 54.24, + "learning_rate": 2.2888160256113374e-05, + "loss": 1.99, + "step": 18739500 + }, + { + "epoch": 54.24, + "learning_rate": 2.28874366084661e-05, + "loss": 1.9614, + "step": 18740000 + }, + { + "epoch": 54.25, + "learning_rate": 2.2886712960818822e-05, + "loss": 1.9825, + "step": 18740500 + }, + { + "epoch": 54.25, + "learning_rate": 2.2885989313171547e-05, + "loss": 1.9864, + "step": 18741000 + }, + { + "epoch": 54.25, + "learning_rate": 2.288526566552427e-05, + "loss": 1.9846, + "step": 18741500 + }, + { + "epoch": 54.25, + "learning_rate": 2.2884542017876992e-05, + "loss": 1.9706, + "step": 18742000 + }, + { + "epoch": 54.25, + "learning_rate": 2.288381981752501e-05, + "loss": 1.9504, + "step": 18742500 + }, + { + "epoch": 54.25, + "learning_rate": 2.2883096169877733e-05, + "loss": 1.9685, + "step": 18743000 + }, + { + "epoch": 54.25, + "learning_rate": 2.288237252223046e-05, + "loss": 1.9769, + "step": 18743500 + }, + { + "epoch": 54.26, + "learning_rate": 2.288164887458318e-05, + "loss": 1.9631, + "step": 18744000 + }, + { + "epoch": 54.26, + "learning_rate": 2.2880925226935903e-05, + "loss": 1.9563, + "step": 18744500 + }, + { + "epoch": 54.26, + "learning_rate": 2.2880201579288625e-05, + "loss": 1.9756, + "step": 18745000 + }, + { + "epoch": 54.26, + "learning_rate": 2.2879477931641348e-05, + "loss": 1.9615, + "step": 18745500 + }, + { + "epoch": 54.26, + "learning_rate": 2.2878754283994073e-05, + "loss": 1.9712, + "step": 18746000 + }, + { + "epoch": 54.26, + "learning_rate": 2.287803208364209e-05, + "loss": 1.9825, + "step": 18746500 + }, + { + "epoch": 54.26, + "learning_rate": 2.2877308435994814e-05, + "loss": 1.9642, + "step": 18747000 + }, + { + "epoch": 54.27, + "learning_rate": 2.2876584788347537e-05, + "loss": 1.9806, + "step": 18747500 + }, + { + "epoch": 54.27, + "learning_rate": 2.2875862587995556e-05, + "loss": 1.9813, + "step": 18748000 + }, + { + "epoch": 54.27, + "learning_rate": 2.2875138940348278e-05, + "loss": 1.9832, + "step": 18748500 + }, + { + "epoch": 54.27, + "learning_rate": 2.2874415292701e-05, + "loss": 1.9737, + "step": 18749000 + }, + { + "epoch": 54.27, + "learning_rate": 2.2873691645053726e-05, + "loss": 1.9814, + "step": 18749500 + }, + { + "epoch": 54.27, + "learning_rate": 2.2872967997406448e-05, + "loss": 1.9765, + "step": 18750000 + }, + { + "epoch": 54.28, + "learning_rate": 2.287224434975917e-05, + "loss": 2.0037, + "step": 18750500 + }, + { + "epoch": 54.28, + "learning_rate": 2.2871520702111896e-05, + "loss": 1.979, + "step": 18751000 + }, + { + "epoch": 54.28, + "learning_rate": 2.287079850175991e-05, + "loss": 1.9738, + "step": 18751500 + }, + { + "epoch": 54.28, + "learning_rate": 2.2870074854112637e-05, + "loss": 1.9666, + "step": 18752000 + }, + { + "epoch": 54.28, + "learning_rate": 2.286935120646536e-05, + "loss": 1.9738, + "step": 18752500 + }, + { + "epoch": 54.28, + "learning_rate": 2.2868629006113375e-05, + "loss": 1.9733, + "step": 18753000 + }, + { + "epoch": 54.28, + "learning_rate": 2.28679053584661e-05, + "loss": 1.9713, + "step": 18753500 + }, + { + "epoch": 54.29, + "learning_rate": 2.2867181710818823e-05, + "loss": 1.9857, + "step": 18754000 + }, + { + "epoch": 54.29, + "learning_rate": 2.2866458063171548e-05, + "loss": 1.9733, + "step": 18754500 + }, + { + "epoch": 54.29, + "learning_rate": 2.286573441552427e-05, + "loss": 2.0092, + "step": 18755000 + }, + { + "epoch": 54.29, + "learning_rate": 2.286501221517229e-05, + "loss": 1.9798, + "step": 18755500 + }, + { + "epoch": 54.29, + "learning_rate": 2.286428856752501e-05, + "loss": 1.9647, + "step": 18756000 + }, + { + "epoch": 54.29, + "learning_rate": 2.2863564919877734e-05, + "loss": 1.971, + "step": 18756500 + }, + { + "epoch": 54.29, + "learning_rate": 2.2862841272230456e-05, + "loss": 1.9652, + "step": 18757000 + }, + { + "epoch": 54.3, + "learning_rate": 2.286211762458318e-05, + "loss": 1.9859, + "step": 18757500 + }, + { + "epoch": 54.3, + "learning_rate": 2.28613939769359e-05, + "loss": 1.9703, + "step": 18758000 + }, + { + "epoch": 54.3, + "learning_rate": 2.2860671776583923e-05, + "loss": 1.9667, + "step": 18758500 + }, + { + "epoch": 54.3, + "learning_rate": 2.2859948128936645e-05, + "loss": 1.9545, + "step": 18759000 + }, + { + "epoch": 54.3, + "learning_rate": 2.2859224481289367e-05, + "loss": 1.9872, + "step": 18759500 + }, + { + "epoch": 54.3, + "learning_rate": 2.285850083364209e-05, + "loss": 1.9827, + "step": 18760000 + }, + { + "epoch": 54.3, + "learning_rate": 2.285777863329011e-05, + "loss": 1.9698, + "step": 18760500 + }, + { + "epoch": 54.31, + "learning_rate": 2.285705498564283e-05, + "loss": 1.9914, + "step": 18761000 + }, + { + "epoch": 54.31, + "learning_rate": 2.2856331337995553e-05, + "loss": 1.9836, + "step": 18761500 + }, + { + "epoch": 54.31, + "learning_rate": 2.2855607690348275e-05, + "loss": 1.9744, + "step": 18762000 + }, + { + "epoch": 54.31, + "learning_rate": 2.2854884042701e-05, + "loss": 1.9856, + "step": 18762500 + }, + { + "epoch": 54.31, + "learning_rate": 2.285416184234902e-05, + "loss": 1.9496, + "step": 18763000 + }, + { + "epoch": 54.31, + "learning_rate": 2.2853438194701742e-05, + "loss": 2.0061, + "step": 18763500 + }, + { + "epoch": 54.31, + "learning_rate": 2.2852714547054464e-05, + "loss": 1.9877, + "step": 18764000 + }, + { + "epoch": 54.32, + "learning_rate": 2.285199089940719e-05, + "loss": 1.9656, + "step": 18764500 + }, + { + "epoch": 54.32, + "learning_rate": 2.2851267251759912e-05, + "loss": 1.9677, + "step": 18765000 + }, + { + "epoch": 54.32, + "learning_rate": 2.2850543604112634e-05, + "loss": 1.9717, + "step": 18765500 + }, + { + "epoch": 54.32, + "learning_rate": 2.284981995646536e-05, + "loss": 1.9945, + "step": 18766000 + }, + { + "epoch": 54.32, + "learning_rate": 2.2849096308818082e-05, + "loss": 1.9732, + "step": 18766500 + }, + { + "epoch": 54.32, + "learning_rate": 2.2848372661170805e-05, + "loss": 1.9514, + "step": 18767000 + }, + { + "epoch": 54.32, + "learning_rate": 2.2847649013523527e-05, + "loss": 1.9702, + "step": 18767500 + }, + { + "epoch": 54.33, + "learning_rate": 2.2846925365876252e-05, + "loss": 2.014, + "step": 18768000 + }, + { + "epoch": 54.33, + "learning_rate": 2.2846201718228975e-05, + "loss": 1.9648, + "step": 18768500 + }, + { + "epoch": 54.33, + "learning_rate": 2.2845478070581697e-05, + "loss": 1.9746, + "step": 18769000 + }, + { + "epoch": 54.33, + "learning_rate": 2.284475731752501e-05, + "loss": 1.9629, + "step": 18769500 + }, + { + "epoch": 54.33, + "learning_rate": 2.2844033669877735e-05, + "loss": 1.9798, + "step": 18770000 + }, + { + "epoch": 54.33, + "learning_rate": 2.2843310022230457e-05, + "loss": 1.9898, + "step": 18770500 + }, + { + "epoch": 54.33, + "learning_rate": 2.2842587821878476e-05, + "loss": 1.9612, + "step": 18771000 + }, + { + "epoch": 54.34, + "learning_rate": 2.2841864174231198e-05, + "loss": 1.9806, + "step": 18771500 + }, + { + "epoch": 54.34, + "learning_rate": 2.284114052658392e-05, + "loss": 1.9766, + "step": 18772000 + }, + { + "epoch": 54.34, + "learning_rate": 2.2840416878936643e-05, + "loss": 2.0022, + "step": 18772500 + }, + { + "epoch": 54.34, + "learning_rate": 2.2839693231289365e-05, + "loss": 1.9966, + "step": 18773000 + }, + { + "epoch": 54.34, + "learning_rate": 2.283896958364209e-05, + "loss": 1.9809, + "step": 18773500 + }, + { + "epoch": 54.34, + "learning_rate": 2.2838245935994816e-05, + "loss": 1.9697, + "step": 18774000 + }, + { + "epoch": 54.34, + "learning_rate": 2.2837522288347538e-05, + "loss": 1.9675, + "step": 18774500 + }, + { + "epoch": 54.35, + "learning_rate": 2.2836800087995554e-05, + "loss": 1.9566, + "step": 18775000 + }, + { + "epoch": 54.35, + "learning_rate": 2.283607644034828e-05, + "loss": 1.9554, + "step": 18775500 + }, + { + "epoch": 54.35, + "learning_rate": 2.2835352792701002e-05, + "loss": 1.9611, + "step": 18776000 + }, + { + "epoch": 54.35, + "learning_rate": 2.2834629145053724e-05, + "loss": 1.9823, + "step": 18776500 + }, + { + "epoch": 54.35, + "learning_rate": 2.283390549740645e-05, + "loss": 1.9743, + "step": 18777000 + }, + { + "epoch": 54.35, + "learning_rate": 2.2833181849759172e-05, + "loss": 1.9487, + "step": 18777500 + }, + { + "epoch": 54.35, + "learning_rate": 2.2832461096702484e-05, + "loss": 1.9819, + "step": 18778000 + }, + { + "epoch": 54.36, + "learning_rate": 2.2831737449055206e-05, + "loss": 1.9609, + "step": 18778500 + }, + { + "epoch": 54.36, + "learning_rate": 2.283101380140793e-05, + "loss": 1.9679, + "step": 18779000 + }, + { + "epoch": 54.36, + "learning_rate": 2.2830290153760654e-05, + "loss": 1.984, + "step": 18779500 + }, + { + "epoch": 54.36, + "learning_rate": 2.2829566506113376e-05, + "loss": 1.9903, + "step": 18780000 + }, + { + "epoch": 54.36, + "learning_rate": 2.28288428584661e-05, + "loss": 1.962, + "step": 18780500 + }, + { + "epoch": 54.36, + "learning_rate": 2.2828119210818824e-05, + "loss": 1.9751, + "step": 18781000 + }, + { + "epoch": 54.36, + "learning_rate": 2.2827395563171546e-05, + "loss": 1.9589, + "step": 18781500 + }, + { + "epoch": 54.37, + "learning_rate": 2.282667191552427e-05, + "loss": 1.9688, + "step": 18782000 + }, + { + "epoch": 54.37, + "learning_rate": 2.282594826787699e-05, + "loss": 1.9845, + "step": 18782500 + }, + { + "epoch": 54.37, + "learning_rate": 2.2825224620229717e-05, + "loss": 1.9986, + "step": 18783000 + }, + { + "epoch": 54.37, + "learning_rate": 2.282450097258244e-05, + "loss": 1.9815, + "step": 18783500 + }, + { + "epoch": 54.37, + "learning_rate": 2.282377732493516e-05, + "loss": 1.9611, + "step": 18784000 + }, + { + "epoch": 54.37, + "learning_rate": 2.2823053677287887e-05, + "loss": 1.9622, + "step": 18784500 + }, + { + "epoch": 54.37, + "learning_rate": 2.28223329242312e-05, + "loss": 1.9862, + "step": 18785000 + }, + { + "epoch": 54.38, + "learning_rate": 2.282160927658392e-05, + "loss": 1.9727, + "step": 18785500 + }, + { + "epoch": 54.38, + "learning_rate": 2.2820885628936643e-05, + "loss": 1.9855, + "step": 18786000 + }, + { + "epoch": 54.38, + "learning_rate": 2.282016198128937e-05, + "loss": 1.9647, + "step": 18786500 + }, + { + "epoch": 54.38, + "learning_rate": 2.281943833364209e-05, + "loss": 1.981, + "step": 18787000 + }, + { + "epoch": 54.38, + "learning_rate": 2.2818714685994813e-05, + "loss": 1.9714, + "step": 18787500 + }, + { + "epoch": 54.38, + "learning_rate": 2.2817991038347536e-05, + "loss": 1.9717, + "step": 18788000 + }, + { + "epoch": 54.39, + "learning_rate": 2.281726739070026e-05, + "loss": 1.9502, + "step": 18788500 + }, + { + "epoch": 54.39, + "learning_rate": 2.2816543743052984e-05, + "loss": 1.9694, + "step": 18789000 + }, + { + "epoch": 54.39, + "learning_rate": 2.2815820095405706e-05, + "loss": 1.979, + "step": 18789500 + }, + { + "epoch": 54.39, + "learning_rate": 2.281509644775843e-05, + "loss": 1.9813, + "step": 18790000 + }, + { + "epoch": 54.39, + "learning_rate": 2.2814374247406447e-05, + "loss": 1.9739, + "step": 18790500 + }, + { + "epoch": 54.39, + "learning_rate": 2.281365059975917e-05, + "loss": 1.9836, + "step": 18791000 + }, + { + "epoch": 54.39, + "learning_rate": 2.2812926952111895e-05, + "loss": 1.9708, + "step": 18791500 + }, + { + "epoch": 54.4, + "learning_rate": 2.281220330446462e-05, + "loss": 2.0082, + "step": 18792000 + }, + { + "epoch": 54.4, + "learning_rate": 2.2811479656817343e-05, + "loss": 1.9833, + "step": 18792500 + }, + { + "epoch": 54.4, + "learning_rate": 2.2810756009170065e-05, + "loss": 1.9575, + "step": 18793000 + }, + { + "epoch": 54.4, + "learning_rate": 2.2810032361522787e-05, + "loss": 1.9952, + "step": 18793500 + }, + { + "epoch": 54.4, + "learning_rate": 2.280930871387551e-05, + "loss": 1.9716, + "step": 18794000 + }, + { + "epoch": 54.4, + "learning_rate": 2.280858651352353e-05, + "loss": 1.9765, + "step": 18794500 + }, + { + "epoch": 54.4, + "learning_rate": 2.280786286587625e-05, + "loss": 1.9636, + "step": 18795000 + }, + { + "epoch": 54.41, + "learning_rate": 2.2807139218228976e-05, + "loss": 1.9598, + "step": 18795500 + }, + { + "epoch": 54.41, + "learning_rate": 2.28064155705817e-05, + "loss": 1.9703, + "step": 18796000 + }, + { + "epoch": 54.41, + "learning_rate": 2.280569192293442e-05, + "loss": 2.0163, + "step": 18796500 + }, + { + "epoch": 54.41, + "learning_rate": 2.280496972258244e-05, + "loss": 1.989, + "step": 18797000 + }, + { + "epoch": 54.41, + "learning_rate": 2.2804246074935162e-05, + "loss": 1.9651, + "step": 18797500 + }, + { + "epoch": 54.41, + "learning_rate": 2.280352387458318e-05, + "loss": 2.0077, + "step": 18798000 + }, + { + "epoch": 54.41, + "learning_rate": 2.2802800226935903e-05, + "loss": 1.9874, + "step": 18798500 + }, + { + "epoch": 54.42, + "learning_rate": 2.2802076579288625e-05, + "loss": 1.9593, + "step": 18799000 + }, + { + "epoch": 54.42, + "learning_rate": 2.280135293164135e-05, + "loss": 1.9531, + "step": 18799500 + }, + { + "epoch": 54.42, + "learning_rate": 2.2800629283994073e-05, + "loss": 1.9872, + "step": 18800000 + }, + { + "epoch": 54.42, + "learning_rate": 2.2799907083642092e-05, + "loss": 1.9739, + "step": 18800500 + }, + { + "epoch": 54.42, + "learning_rate": 2.2799183435994814e-05, + "loss": 1.99, + "step": 18801000 + }, + { + "epoch": 54.42, + "learning_rate": 2.2798459788347537e-05, + "loss": 1.9718, + "step": 18801500 + }, + { + "epoch": 54.42, + "learning_rate": 2.279773614070026e-05, + "loss": 1.9671, + "step": 18802000 + }, + { + "epoch": 54.43, + "learning_rate": 2.2797012493052984e-05, + "loss": 1.9764, + "step": 18802500 + }, + { + "epoch": 54.43, + "learning_rate": 2.2796288845405707e-05, + "loss": 1.9854, + "step": 18803000 + }, + { + "epoch": 54.43, + "learning_rate": 2.2795565197758432e-05, + "loss": 1.9693, + "step": 18803500 + }, + { + "epoch": 54.43, + "learning_rate": 2.2794841550111154e-05, + "loss": 1.9773, + "step": 18804000 + }, + { + "epoch": 54.43, + "learning_rate": 2.2794117902463877e-05, + "loss": 1.9767, + "step": 18804500 + }, + { + "epoch": 54.43, + "learning_rate": 2.27933942548166e-05, + "loss": 1.9716, + "step": 18805000 + }, + { + "epoch": 54.43, + "learning_rate": 2.279267060716932e-05, + "loss": 1.9722, + "step": 18805500 + }, + { + "epoch": 54.44, + "learning_rate": 2.279194840681734e-05, + "loss": 1.9802, + "step": 18806000 + }, + { + "epoch": 54.44, + "learning_rate": 2.2791224759170062e-05, + "loss": 1.9821, + "step": 18806500 + }, + { + "epoch": 54.44, + "learning_rate": 2.2790501111522788e-05, + "loss": 1.9768, + "step": 18807000 + }, + { + "epoch": 54.44, + "learning_rate": 2.278977746387551e-05, + "loss": 1.9722, + "step": 18807500 + }, + { + "epoch": 54.44, + "learning_rate": 2.2789053816228232e-05, + "loss": 1.9805, + "step": 18808000 + }, + { + "epoch": 54.44, + "learning_rate": 2.2788330168580958e-05, + "loss": 1.9882, + "step": 18808500 + }, + { + "epoch": 54.44, + "learning_rate": 2.2787607968228974e-05, + "loss": 1.982, + "step": 18809000 + }, + { + "epoch": 54.45, + "learning_rate": 2.2786885767876993e-05, + "loss": 1.957, + "step": 18809500 + }, + { + "epoch": 54.45, + "learning_rate": 2.2786162120229715e-05, + "loss": 1.9722, + "step": 18810000 + }, + { + "epoch": 54.45, + "learning_rate": 2.2785438472582437e-05, + "loss": 1.9907, + "step": 18810500 + }, + { + "epoch": 54.45, + "learning_rate": 2.2784714824935163e-05, + "loss": 1.973, + "step": 18811000 + }, + { + "epoch": 54.45, + "learning_rate": 2.2783991177287885e-05, + "loss": 1.9756, + "step": 18811500 + }, + { + "epoch": 54.45, + "learning_rate": 2.278326752964061e-05, + "loss": 1.9753, + "step": 18812000 + }, + { + "epoch": 54.45, + "learning_rate": 2.2782543881993333e-05, + "loss": 1.9572, + "step": 18812500 + }, + { + "epoch": 54.46, + "learning_rate": 2.2781820234346055e-05, + "loss": 1.9593, + "step": 18813000 + }, + { + "epoch": 54.46, + "learning_rate": 2.2781096586698777e-05, + "loss": 1.9895, + "step": 18813500 + }, + { + "epoch": 54.46, + "learning_rate": 2.27803729390515e-05, + "loss": 1.9602, + "step": 18814000 + }, + { + "epoch": 54.46, + "learning_rate": 2.2779650738699522e-05, + "loss": 1.9769, + "step": 18814500 + }, + { + "epoch": 54.46, + "learning_rate": 2.2778927091052244e-05, + "loss": 1.9509, + "step": 18815000 + }, + { + "epoch": 54.46, + "learning_rate": 2.2778203443404966e-05, + "loss": 1.9792, + "step": 18815500 + }, + { + "epoch": 54.46, + "learning_rate": 2.2777481243052985e-05, + "loss": 1.9973, + "step": 18816000 + }, + { + "epoch": 54.47, + "learning_rate": 2.2776757595405707e-05, + "loss": 1.9753, + "step": 18816500 + }, + { + "epoch": 54.47, + "learning_rate": 2.2776035395053723e-05, + "loss": 1.9918, + "step": 18817000 + }, + { + "epoch": 54.47, + "learning_rate": 2.277531174740645e-05, + "loss": 1.9593, + "step": 18817500 + }, + { + "epoch": 54.47, + "learning_rate": 2.277458809975917e-05, + "loss": 1.9867, + "step": 18818000 + }, + { + "epoch": 54.47, + "learning_rate": 2.277386589940719e-05, + "loss": 1.9606, + "step": 18818500 + }, + { + "epoch": 54.47, + "learning_rate": 2.2773142251759912e-05, + "loss": 1.9677, + "step": 18819000 + }, + { + "epoch": 54.47, + "learning_rate": 2.2772418604112634e-05, + "loss": 1.9882, + "step": 18819500 + }, + { + "epoch": 54.48, + "learning_rate": 2.277169495646536e-05, + "loss": 1.9798, + "step": 18820000 + }, + { + "epoch": 54.48, + "learning_rate": 2.2770971308818082e-05, + "loss": 1.9557, + "step": 18820500 + }, + { + "epoch": 54.48, + "learning_rate": 2.2770247661170804e-05, + "loss": 1.9491, + "step": 18821000 + }, + { + "epoch": 54.48, + "learning_rate": 2.2769524013523527e-05, + "loss": 1.9597, + "step": 18821500 + }, + { + "epoch": 54.48, + "learning_rate": 2.276880326046684e-05, + "loss": 1.9551, + "step": 18822000 + }, + { + "epoch": 54.48, + "learning_rate": 2.2768079612819564e-05, + "loss": 1.968, + "step": 18822500 + }, + { + "epoch": 54.48, + "learning_rate": 2.2767355965172287e-05, + "loss": 1.9663, + "step": 18823000 + }, + { + "epoch": 54.49, + "learning_rate": 2.2766632317525012e-05, + "loss": 1.9767, + "step": 18823500 + }, + { + "epoch": 54.49, + "learning_rate": 2.2765908669877735e-05, + "loss": 1.9947, + "step": 18824000 + }, + { + "epoch": 54.49, + "learning_rate": 2.2765185022230457e-05, + "loss": 1.9667, + "step": 18824500 + }, + { + "epoch": 54.49, + "learning_rate": 2.276446137458318e-05, + "loss": 2.0037, + "step": 18825000 + }, + { + "epoch": 54.49, + "learning_rate": 2.27637377269359e-05, + "loss": 1.9692, + "step": 18825500 + }, + { + "epoch": 54.49, + "learning_rate": 2.2763014079288627e-05, + "loss": 2.0036, + "step": 18826000 + }, + { + "epoch": 54.5, + "learning_rate": 2.276229043164135e-05, + "loss": 1.9733, + "step": 18826500 + }, + { + "epoch": 54.5, + "learning_rate": 2.2761566783994075e-05, + "loss": 1.9659, + "step": 18827000 + }, + { + "epoch": 54.5, + "learning_rate": 2.2760843136346797e-05, + "loss": 1.9832, + "step": 18827500 + }, + { + "epoch": 54.5, + "learning_rate": 2.2760120935994812e-05, + "loss": 1.9753, + "step": 18828000 + }, + { + "epoch": 54.5, + "learning_rate": 2.2759397288347538e-05, + "loss": 1.9723, + "step": 18828500 + }, + { + "epoch": 54.5, + "learning_rate": 2.275867364070026e-05, + "loss": 1.9541, + "step": 18829000 + }, + { + "epoch": 54.5, + "learning_rate": 2.2757949993052986e-05, + "loss": 1.9681, + "step": 18829500 + }, + { + "epoch": 54.51, + "learning_rate": 2.2757226345405708e-05, + "loss": 1.9696, + "step": 18830000 + }, + { + "epoch": 54.51, + "learning_rate": 2.275650269775843e-05, + "loss": 1.9721, + "step": 18830500 + }, + { + "epoch": 54.51, + "learning_rate": 2.2755779050111153e-05, + "loss": 1.9879, + "step": 18831000 + }, + { + "epoch": 54.51, + "learning_rate": 2.2755055402463875e-05, + "loss": 1.9813, + "step": 18831500 + }, + { + "epoch": 54.51, + "learning_rate": 2.27543317548166e-05, + "loss": 1.9786, + "step": 18832000 + }, + { + "epoch": 54.51, + "learning_rate": 2.2753609554464616e-05, + "loss": 1.9695, + "step": 18832500 + }, + { + "epoch": 54.51, + "learning_rate": 2.275288590681734e-05, + "loss": 1.9771, + "step": 18833000 + }, + { + "epoch": 54.52, + "learning_rate": 2.2752162259170064e-05, + "loss": 1.9779, + "step": 18833500 + }, + { + "epoch": 54.52, + "learning_rate": 2.275143861152279e-05, + "loss": 1.9705, + "step": 18834000 + }, + { + "epoch": 54.52, + "learning_rate": 2.2750716411170805e-05, + "loss": 1.9772, + "step": 18834500 + }, + { + "epoch": 54.52, + "learning_rate": 2.2749992763523527e-05, + "loss": 2.0091, + "step": 18835000 + }, + { + "epoch": 54.52, + "learning_rate": 2.274926911587625e-05, + "loss": 1.9673, + "step": 18835500 + }, + { + "epoch": 54.52, + "learning_rate": 2.2748545468228975e-05, + "loss": 1.977, + "step": 18836000 + }, + { + "epoch": 54.52, + "learning_rate": 2.2747821820581697e-05, + "loss": 1.9754, + "step": 18836500 + }, + { + "epoch": 54.53, + "learning_rate": 2.2747098172934423e-05, + "loss": 1.9777, + "step": 18837000 + }, + { + "epoch": 54.53, + "learning_rate": 2.2746374525287145e-05, + "loss": 1.9761, + "step": 18837500 + }, + { + "epoch": 54.53, + "learning_rate": 2.2745650877639868e-05, + "loss": 1.9524, + "step": 18838000 + }, + { + "epoch": 54.53, + "learning_rate": 2.274492722999259e-05, + "loss": 1.9863, + "step": 18838500 + }, + { + "epoch": 54.53, + "learning_rate": 2.2744203582345315e-05, + "loss": 1.9753, + "step": 18839000 + }, + { + "epoch": 54.53, + "learning_rate": 2.274348138199333e-05, + "loss": 1.9948, + "step": 18839500 + }, + { + "epoch": 54.53, + "learning_rate": 2.2742757734346053e-05, + "loss": 1.9794, + "step": 18840000 + }, + { + "epoch": 54.54, + "learning_rate": 2.274203408669878e-05, + "loss": 1.9957, + "step": 18840500 + }, + { + "epoch": 54.54, + "learning_rate": 2.27413104390515e-05, + "loss": 1.9792, + "step": 18841000 + }, + { + "epoch": 54.54, + "learning_rate": 2.274058823869952e-05, + "loss": 1.9606, + "step": 18841500 + }, + { + "epoch": 54.54, + "learning_rate": 2.2739864591052242e-05, + "loss": 1.9841, + "step": 18842000 + }, + { + "epoch": 54.54, + "learning_rate": 2.2739140943404964e-05, + "loss": 1.982, + "step": 18842500 + }, + { + "epoch": 54.54, + "learning_rate": 2.273841729575769e-05, + "loss": 1.9773, + "step": 18843000 + }, + { + "epoch": 54.54, + "learning_rate": 2.2737693648110412e-05, + "loss": 1.9969, + "step": 18843500 + }, + { + "epoch": 54.55, + "learning_rate": 2.2736970000463138e-05, + "loss": 1.9938, + "step": 18844000 + }, + { + "epoch": 54.55, + "learning_rate": 2.273624635281586e-05, + "loss": 1.9569, + "step": 18844500 + }, + { + "epoch": 54.55, + "learning_rate": 2.2735522705168582e-05, + "loss": 1.9808, + "step": 18845000 + }, + { + "epoch": 54.55, + "learning_rate": 2.2734799057521305e-05, + "loss": 1.9926, + "step": 18845500 + }, + { + "epoch": 54.55, + "learning_rate": 2.2734075409874027e-05, + "loss": 1.9801, + "step": 18846000 + }, + { + "epoch": 54.55, + "learning_rate": 2.2733351762226752e-05, + "loss": 1.9816, + "step": 18846500 + }, + { + "epoch": 54.55, + "learning_rate": 2.2732628114579475e-05, + "loss": 1.9655, + "step": 18847000 + }, + { + "epoch": 54.56, + "learning_rate": 2.27319044669322e-05, + "loss": 1.9599, + "step": 18847500 + }, + { + "epoch": 54.56, + "learning_rate": 2.2731182266580216e-05, + "loss": 2.0045, + "step": 18848000 + }, + { + "epoch": 54.56, + "learning_rate": 2.273045861893294e-05, + "loss": 1.9767, + "step": 18848500 + }, + { + "epoch": 54.56, + "learning_rate": 2.2729734971285664e-05, + "loss": 1.9851, + "step": 18849000 + }, + { + "epoch": 54.56, + "learning_rate": 2.272901277093368e-05, + "loss": 1.9903, + "step": 18849500 + }, + { + "epoch": 54.56, + "learning_rate": 2.27282891232864e-05, + "loss": 1.9729, + "step": 18850000 + }, + { + "epoch": 54.56, + "learning_rate": 2.2727565475639127e-05, + "loss": 1.9847, + "step": 18850500 + }, + { + "epoch": 54.57, + "learning_rate": 2.272684182799185e-05, + "loss": 1.9739, + "step": 18851000 + }, + { + "epoch": 54.57, + "learning_rate": 2.2726118180344575e-05, + "loss": 1.9667, + "step": 18851500 + }, + { + "epoch": 54.57, + "learning_rate": 2.2725394532697297e-05, + "loss": 1.9966, + "step": 18852000 + }, + { + "epoch": 54.57, + "learning_rate": 2.272467088505002e-05, + "loss": 1.9951, + "step": 18852500 + }, + { + "epoch": 54.57, + "learning_rate": 2.272394723740274e-05, + "loss": 1.9841, + "step": 18853000 + }, + { + "epoch": 54.57, + "learning_rate": 2.2723223589755467e-05, + "loss": 1.9635, + "step": 18853500 + }, + { + "epoch": 54.57, + "learning_rate": 2.272249994210819e-05, + "loss": 1.979, + "step": 18854000 + }, + { + "epoch": 54.58, + "learning_rate": 2.2721776294460912e-05, + "loss": 1.9848, + "step": 18854500 + }, + { + "epoch": 54.58, + "learning_rate": 2.2721054094108927e-05, + "loss": 1.9787, + "step": 18855000 + }, + { + "epoch": 54.58, + "learning_rate": 2.2720330446461653e-05, + "loss": 1.9809, + "step": 18855500 + }, + { + "epoch": 54.58, + "learning_rate": 2.2719608246109672e-05, + "loss": 1.9729, + "step": 18856000 + }, + { + "epoch": 54.58, + "learning_rate": 2.271888604575769e-05, + "loss": 1.9706, + "step": 18856500 + }, + { + "epoch": 54.58, + "learning_rate": 2.2718162398110413e-05, + "loss": 1.9862, + "step": 18857000 + }, + { + "epoch": 54.58, + "learning_rate": 2.2717438750463135e-05, + "loss": 1.9911, + "step": 18857500 + }, + { + "epoch": 54.59, + "learning_rate": 2.2716715102815858e-05, + "loss": 1.9775, + "step": 18858000 + }, + { + "epoch": 54.59, + "learning_rate": 2.271599145516858e-05, + "loss": 1.9816, + "step": 18858500 + }, + { + "epoch": 54.59, + "learning_rate": 2.2715267807521305e-05, + "loss": 1.9758, + "step": 18859000 + }, + { + "epoch": 54.59, + "learning_rate": 2.271454415987403e-05, + "loss": 1.9844, + "step": 18859500 + }, + { + "epoch": 54.59, + "learning_rate": 2.2713820512226753e-05, + "loss": 1.9767, + "step": 18860000 + }, + { + "epoch": 54.59, + "learning_rate": 2.271309831187477e-05, + "loss": 1.9708, + "step": 18860500 + }, + { + "epoch": 54.59, + "learning_rate": 2.271237466422749e-05, + "loss": 1.9753, + "step": 18861000 + }, + { + "epoch": 54.6, + "learning_rate": 2.2711651016580217e-05, + "loss": 1.9807, + "step": 18861500 + }, + { + "epoch": 54.6, + "learning_rate": 2.271092736893294e-05, + "loss": 1.9655, + "step": 18862000 + }, + { + "epoch": 54.6, + "learning_rate": 2.271020372128566e-05, + "loss": 1.968, + "step": 18862500 + }, + { + "epoch": 54.6, + "learning_rate": 2.2709480073638387e-05, + "loss": 2.0054, + "step": 18863000 + }, + { + "epoch": 54.6, + "learning_rate": 2.270875642599111e-05, + "loss": 1.977, + "step": 18863500 + }, + { + "epoch": 54.6, + "learning_rate": 2.270803277834383e-05, + "loss": 1.9667, + "step": 18864000 + }, + { + "epoch": 54.61, + "learning_rate": 2.2707309130696557e-05, + "loss": 1.9847, + "step": 18864500 + }, + { + "epoch": 54.61, + "learning_rate": 2.270658548304928e-05, + "loss": 1.9844, + "step": 18865000 + }, + { + "epoch": 54.61, + "learning_rate": 2.2705861835402e-05, + "loss": 1.9684, + "step": 18865500 + }, + { + "epoch": 54.61, + "learning_rate": 2.2705138187754724e-05, + "loss": 1.9905, + "step": 18866000 + }, + { + "epoch": 54.61, + "learning_rate": 2.270441454010745e-05, + "loss": 2.0043, + "step": 18866500 + }, + { + "epoch": 54.61, + "learning_rate": 2.2703692339755468e-05, + "loss": 1.9819, + "step": 18867000 + }, + { + "epoch": 54.61, + "learning_rate": 2.270296869210819e-05, + "loss": 1.9978, + "step": 18867500 + }, + { + "epoch": 54.62, + "learning_rate": 2.2702245044460913e-05, + "loss": 1.9842, + "step": 18868000 + }, + { + "epoch": 54.62, + "learning_rate": 2.2701521396813635e-05, + "loss": 1.9674, + "step": 18868500 + }, + { + "epoch": 54.62, + "learning_rate": 2.2700797749166357e-05, + "loss": 1.9545, + "step": 18869000 + }, + { + "epoch": 54.62, + "learning_rate": 2.270007699610967e-05, + "loss": 1.9814, + "step": 18869500 + }, + { + "epoch": 54.62, + "learning_rate": 2.2699353348462395e-05, + "loss": 1.9788, + "step": 18870000 + }, + { + "epoch": 54.62, + "learning_rate": 2.269862970081512e-05, + "loss": 1.9726, + "step": 18870500 + }, + { + "epoch": 54.62, + "learning_rate": 2.2697906053167843e-05, + "loss": 1.9757, + "step": 18871000 + }, + { + "epoch": 54.63, + "learning_rate": 2.2697182405520565e-05, + "loss": 1.9896, + "step": 18871500 + }, + { + "epoch": 54.63, + "learning_rate": 2.2696458757873287e-05, + "loss": 1.9852, + "step": 18872000 + }, + { + "epoch": 54.63, + "learning_rate": 2.269573511022601e-05, + "loss": 1.9503, + "step": 18872500 + }, + { + "epoch": 54.63, + "learning_rate": 2.2695011462578732e-05, + "loss": 1.9563, + "step": 18873000 + }, + { + "epoch": 54.63, + "learning_rate": 2.2694287814931457e-05, + "loss": 1.9793, + "step": 18873500 + }, + { + "epoch": 54.63, + "learning_rate": 2.269356706187477e-05, + "loss": 1.983, + "step": 18874000 + }, + { + "epoch": 54.63, + "learning_rate": 2.2692843414227495e-05, + "loss": 1.986, + "step": 18874500 + }, + { + "epoch": 54.64, + "learning_rate": 2.2692119766580217e-05, + "loss": 1.9546, + "step": 18875000 + }, + { + "epoch": 54.64, + "learning_rate": 2.269139611893294e-05, + "loss": 1.9928, + "step": 18875500 + }, + { + "epoch": 54.64, + "learning_rate": 2.2690673918580955e-05, + "loss": 2.0007, + "step": 18876000 + }, + { + "epoch": 54.64, + "learning_rate": 2.268995027093368e-05, + "loss": 1.9879, + "step": 18876500 + }, + { + "epoch": 54.64, + "learning_rate": 2.2689226623286403e-05, + "loss": 2.0036, + "step": 18877000 + }, + { + "epoch": 54.64, + "learning_rate": 2.2688502975639125e-05, + "loss": 1.9953, + "step": 18877500 + }, + { + "epoch": 54.64, + "learning_rate": 2.268777932799185e-05, + "loss": 1.952, + "step": 18878000 + }, + { + "epoch": 54.65, + "learning_rate": 2.2687055680344573e-05, + "loss": 1.9839, + "step": 18878500 + }, + { + "epoch": 54.65, + "learning_rate": 2.2686333479992592e-05, + "loss": 1.9732, + "step": 18879000 + }, + { + "epoch": 54.65, + "learning_rate": 2.2685609832345314e-05, + "loss": 1.9825, + "step": 18879500 + }, + { + "epoch": 54.65, + "learning_rate": 2.2684886184698037e-05, + "loss": 1.9801, + "step": 18880000 + }, + { + "epoch": 54.65, + "learning_rate": 2.268416253705076e-05, + "loss": 1.9784, + "step": 18880500 + }, + { + "epoch": 54.65, + "learning_rate": 2.268343888940348e-05, + "loss": 1.9803, + "step": 18881000 + }, + { + "epoch": 54.65, + "learning_rate": 2.2682715241756207e-05, + "loss": 1.9585, + "step": 18881500 + }, + { + "epoch": 54.66, + "learning_rate": 2.2681993041404226e-05, + "loss": 1.9704, + "step": 18882000 + }, + { + "epoch": 54.66, + "learning_rate": 2.2681269393756948e-05, + "loss": 1.9587, + "step": 18882500 + }, + { + "epoch": 54.66, + "learning_rate": 2.268054574610967e-05, + "loss": 1.9841, + "step": 18883000 + }, + { + "epoch": 54.66, + "learning_rate": 2.2679822098462396e-05, + "loss": 1.9699, + "step": 18883500 + }, + { + "epoch": 54.66, + "learning_rate": 2.267909989811041e-05, + "loss": 1.9629, + "step": 18884000 + }, + { + "epoch": 54.66, + "learning_rate": 2.2678376250463134e-05, + "loss": 2.0018, + "step": 18884500 + }, + { + "epoch": 54.66, + "learning_rate": 2.267765260281586e-05, + "loss": 1.9661, + "step": 18885000 + }, + { + "epoch": 54.67, + "learning_rate": 2.2676930402463878e-05, + "loss": 1.9577, + "step": 18885500 + }, + { + "epoch": 54.67, + "learning_rate": 2.26762067548166e-05, + "loss": 1.9687, + "step": 18886000 + }, + { + "epoch": 54.67, + "learning_rate": 2.2675483107169323e-05, + "loss": 1.9835, + "step": 18886500 + }, + { + "epoch": 54.67, + "learning_rate": 2.2674759459522045e-05, + "loss": 1.9978, + "step": 18887000 + }, + { + "epoch": 54.67, + "learning_rate": 2.267403581187477e-05, + "loss": 1.9689, + "step": 18887500 + }, + { + "epoch": 54.67, + "learning_rate": 2.2673312164227493e-05, + "loss": 1.9651, + "step": 18888000 + }, + { + "epoch": 54.67, + "learning_rate": 2.2672588516580215e-05, + "loss": 1.9781, + "step": 18888500 + }, + { + "epoch": 54.68, + "learning_rate": 2.267186486893294e-05, + "loss": 1.993, + "step": 18889000 + }, + { + "epoch": 54.68, + "learning_rate": 2.2671141221285663e-05, + "loss": 1.9713, + "step": 18889500 + }, + { + "epoch": 54.68, + "learning_rate": 2.267041902093368e-05, + "loss": 1.9956, + "step": 18890000 + }, + { + "epoch": 54.68, + "learning_rate": 2.2669695373286404e-05, + "loss": 1.9646, + "step": 18890500 + }, + { + "epoch": 54.68, + "learning_rate": 2.2668971725639126e-05, + "loss": 1.9546, + "step": 18891000 + }, + { + "epoch": 54.68, + "learning_rate": 2.266824807799185e-05, + "loss": 1.9825, + "step": 18891500 + }, + { + "epoch": 54.68, + "learning_rate": 2.266752443034457e-05, + "loss": 1.9745, + "step": 18892000 + }, + { + "epoch": 54.69, + "learning_rate": 2.2666800782697296e-05, + "loss": 1.9907, + "step": 18892500 + }, + { + "epoch": 54.69, + "learning_rate": 2.2666077135050022e-05, + "loss": 2.0153, + "step": 18893000 + }, + { + "epoch": 54.69, + "learning_rate": 2.2665353487402744e-05, + "loss": 1.9599, + "step": 18893500 + }, + { + "epoch": 54.69, + "learning_rate": 2.2664629839755466e-05, + "loss": 1.9572, + "step": 18894000 + }, + { + "epoch": 54.69, + "learning_rate": 2.266390619210819e-05, + "loss": 1.9818, + "step": 18894500 + }, + { + "epoch": 54.69, + "learning_rate": 2.266318254446091e-05, + "loss": 1.9523, + "step": 18895000 + }, + { + "epoch": 54.69, + "learning_rate": 2.2662458896813636e-05, + "loss": 1.9822, + "step": 18895500 + }, + { + "epoch": 54.7, + "learning_rate": 2.2661736696461652e-05, + "loss": 1.9755, + "step": 18896000 + }, + { + "epoch": 54.7, + "learning_rate": 2.2661013048814378e-05, + "loss": 1.9655, + "step": 18896500 + }, + { + "epoch": 54.7, + "learning_rate": 2.26602894011671e-05, + "loss": 1.9839, + "step": 18897000 + }, + { + "epoch": 54.7, + "learning_rate": 2.2659565753519822e-05, + "loss": 2.0034, + "step": 18897500 + }, + { + "epoch": 54.7, + "learning_rate": 2.2658842105872548e-05, + "loss": 1.9785, + "step": 18898000 + }, + { + "epoch": 54.7, + "learning_rate": 2.265811845822527e-05, + "loss": 1.9865, + "step": 18898500 + }, + { + "epoch": 54.7, + "learning_rate": 2.2657394810577992e-05, + "loss": 1.9756, + "step": 18899000 + }, + { + "epoch": 54.71, + "learning_rate": 2.2656671162930714e-05, + "loss": 1.9504, + "step": 18899500 + }, + { + "epoch": 54.71, + "learning_rate": 2.265594751528344e-05, + "loss": 1.9772, + "step": 18900000 + }, + { + "epoch": 54.71, + "learning_rate": 2.265522531493146e-05, + "loss": 1.9625, + "step": 18900500 + }, + { + "epoch": 54.71, + "learning_rate": 2.2654503114579474e-05, + "loss": 1.9815, + "step": 18901000 + }, + { + "epoch": 54.71, + "learning_rate": 2.26537794669322e-05, + "loss": 1.9765, + "step": 18901500 + }, + { + "epoch": 54.71, + "learning_rate": 2.2653057266580216e-05, + "loss": 1.9778, + "step": 18902000 + }, + { + "epoch": 54.71, + "learning_rate": 2.2652333618932938e-05, + "loss": 1.9899, + "step": 18902500 + }, + { + "epoch": 54.72, + "learning_rate": 2.265160997128566e-05, + "loss": 1.9784, + "step": 18903000 + }, + { + "epoch": 54.72, + "learning_rate": 2.2650886323638386e-05, + "loss": 1.9802, + "step": 18903500 + }, + { + "epoch": 54.72, + "learning_rate": 2.265016267599111e-05, + "loss": 1.9956, + "step": 18904000 + }, + { + "epoch": 54.72, + "learning_rate": 2.2649439028343834e-05, + "loss": 1.9596, + "step": 18904500 + }, + { + "epoch": 54.72, + "learning_rate": 2.2648715380696556e-05, + "loss": 1.9515, + "step": 18905000 + }, + { + "epoch": 54.72, + "learning_rate": 2.2647991733049278e-05, + "loss": 1.9447, + "step": 18905500 + }, + { + "epoch": 54.73, + "learning_rate": 2.2647269532697297e-05, + "loss": 1.9735, + "step": 18906000 + }, + { + "epoch": 54.73, + "learning_rate": 2.264654588505002e-05, + "loss": 1.9792, + "step": 18906500 + }, + { + "epoch": 54.73, + "learning_rate": 2.264582223740274e-05, + "loss": 1.9763, + "step": 18907000 + }, + { + "epoch": 54.73, + "learning_rate": 2.2645098589755467e-05, + "loss": 1.9556, + "step": 18907500 + }, + { + "epoch": 54.73, + "learning_rate": 2.264437494210819e-05, + "loss": 1.9985, + "step": 18908000 + }, + { + "epoch": 54.73, + "learning_rate": 2.264365129446091e-05, + "loss": 1.9762, + "step": 18908500 + }, + { + "epoch": 54.73, + "learning_rate": 2.2642927646813637e-05, + "loss": 1.9732, + "step": 18909000 + }, + { + "epoch": 54.74, + "learning_rate": 2.264220399916636e-05, + "loss": 1.9867, + "step": 18909500 + }, + { + "epoch": 54.74, + "learning_rate": 2.264148035151908e-05, + "loss": 1.9887, + "step": 18910000 + }, + { + "epoch": 54.74, + "learning_rate": 2.2640756703871804e-05, + "loss": 1.9516, + "step": 18910500 + }, + { + "epoch": 54.74, + "learning_rate": 2.2640034503519823e-05, + "loss": 1.9892, + "step": 18911000 + }, + { + "epoch": 54.74, + "learning_rate": 2.263931085587255e-05, + "loss": 1.9857, + "step": 18911500 + }, + { + "epoch": 54.74, + "learning_rate": 2.263858720822527e-05, + "loss": 1.969, + "step": 18912000 + }, + { + "epoch": 54.74, + "learning_rate": 2.2637863560577993e-05, + "loss": 1.9554, + "step": 18912500 + }, + { + "epoch": 54.75, + "learning_rate": 2.2637139912930715e-05, + "loss": 1.9735, + "step": 18913000 + }, + { + "epoch": 54.75, + "learning_rate": 2.2636417712578734e-05, + "loss": 1.9639, + "step": 18913500 + }, + { + "epoch": 54.75, + "learning_rate": 2.263569551222675e-05, + "loss": 1.9598, + "step": 18914000 + }, + { + "epoch": 54.75, + "learning_rate": 2.2634971864579475e-05, + "loss": 1.9813, + "step": 18914500 + }, + { + "epoch": 54.75, + "learning_rate": 2.26342482169322e-05, + "loss": 1.9873, + "step": 18915000 + }, + { + "epoch": 54.75, + "learning_rate": 2.2633524569284923e-05, + "loss": 1.9858, + "step": 18915500 + }, + { + "epoch": 54.75, + "learning_rate": 2.2632800921637645e-05, + "loss": 1.9723, + "step": 18916000 + }, + { + "epoch": 54.76, + "learning_rate": 2.2632078721285664e-05, + "loss": 1.9665, + "step": 18916500 + }, + { + "epoch": 54.76, + "learning_rate": 2.2631355073638387e-05, + "loss": 1.9783, + "step": 18917000 + }, + { + "epoch": 54.76, + "learning_rate": 2.263063142599111e-05, + "loss": 1.9871, + "step": 18917500 + }, + { + "epoch": 54.76, + "learning_rate": 2.262990777834383e-05, + "loss": 1.9891, + "step": 18918000 + }, + { + "epoch": 54.76, + "learning_rate": 2.2629184130696553e-05, + "loss": 1.9803, + "step": 18918500 + }, + { + "epoch": 54.76, + "learning_rate": 2.262846048304928e-05, + "loss": 2.0, + "step": 18919000 + }, + { + "epoch": 54.76, + "learning_rate": 2.2627736835402e-05, + "loss": 1.979, + "step": 18919500 + }, + { + "epoch": 54.77, + "learning_rate": 2.2627013187754727e-05, + "loss": 1.9829, + "step": 18920000 + }, + { + "epoch": 54.77, + "learning_rate": 2.2626290987402742e-05, + "loss": 2.0102, + "step": 18920500 + }, + { + "epoch": 54.77, + "learning_rate": 2.2625567339755465e-05, + "loss": 1.9744, + "step": 18921000 + }, + { + "epoch": 54.77, + "learning_rate": 2.262484369210819e-05, + "loss": 1.9715, + "step": 18921500 + }, + { + "epoch": 54.77, + "learning_rate": 2.2624120044460912e-05, + "loss": 1.9876, + "step": 18922000 + }, + { + "epoch": 54.77, + "learning_rate": 2.2623396396813638e-05, + "loss": 1.9793, + "step": 18922500 + }, + { + "epoch": 54.77, + "learning_rate": 2.2622674196461654e-05, + "loss": 1.9734, + "step": 18923000 + }, + { + "epoch": 54.78, + "learning_rate": 2.2621951996109672e-05, + "loss": 1.9987, + "step": 18923500 + }, + { + "epoch": 54.78, + "learning_rate": 2.2621228348462395e-05, + "loss": 1.9641, + "step": 18924000 + }, + { + "epoch": 54.78, + "learning_rate": 2.2620504700815117e-05, + "loss": 1.9924, + "step": 18924500 + }, + { + "epoch": 54.78, + "learning_rate": 2.261978105316784e-05, + "loss": 1.9418, + "step": 18925000 + }, + { + "epoch": 54.78, + "learning_rate": 2.2619057405520565e-05, + "loss": 1.9773, + "step": 18925500 + }, + { + "epoch": 54.78, + "learning_rate": 2.2618333757873287e-05, + "loss": 1.9881, + "step": 18926000 + }, + { + "epoch": 54.78, + "learning_rate": 2.2617610110226013e-05, + "loss": 1.9953, + "step": 18926500 + }, + { + "epoch": 54.79, + "learning_rate": 2.2616887909874028e-05, + "loss": 1.9799, + "step": 18927000 + }, + { + "epoch": 54.79, + "learning_rate": 2.2616164262226754e-05, + "loss": 1.9595, + "step": 18927500 + }, + { + "epoch": 54.79, + "learning_rate": 2.2615440614579476e-05, + "loss": 1.9764, + "step": 18928000 + }, + { + "epoch": 54.79, + "learning_rate": 2.2614716966932198e-05, + "loss": 1.9922, + "step": 18928500 + }, + { + "epoch": 54.79, + "learning_rate": 2.261399331928492e-05, + "loss": 2.0055, + "step": 18929000 + }, + { + "epoch": 54.79, + "learning_rate": 2.2613269671637643e-05, + "loss": 1.961, + "step": 18929500 + }, + { + "epoch": 54.79, + "learning_rate": 2.261254602399037e-05, + "loss": 1.9854, + "step": 18930000 + }, + { + "epoch": 54.8, + "learning_rate": 2.2611823823638387e-05, + "loss": 1.9589, + "step": 18930500 + }, + { + "epoch": 54.8, + "learning_rate": 2.261110017599111e-05, + "loss": 1.9541, + "step": 18931000 + }, + { + "epoch": 54.8, + "learning_rate": 2.2610376528343832e-05, + "loss": 1.9726, + "step": 18931500 + }, + { + "epoch": 54.8, + "learning_rate": 2.260965432799185e-05, + "loss": 1.985, + "step": 18932000 + }, + { + "epoch": 54.8, + "learning_rate": 2.2608930680344573e-05, + "loss": 1.9919, + "step": 18932500 + }, + { + "epoch": 54.8, + "learning_rate": 2.2608207032697295e-05, + "loss": 1.9767, + "step": 18933000 + }, + { + "epoch": 54.8, + "learning_rate": 2.2607483385050017e-05, + "loss": 1.987, + "step": 18933500 + }, + { + "epoch": 54.81, + "learning_rate": 2.2606759737402743e-05, + "loss": 1.9802, + "step": 18934000 + }, + { + "epoch": 54.81, + "learning_rate": 2.2606036089755465e-05, + "loss": 1.988, + "step": 18934500 + }, + { + "epoch": 54.81, + "learning_rate": 2.260531244210819e-05, + "loss": 1.9636, + "step": 18935000 + }, + { + "epoch": 54.81, + "learning_rate": 2.2604588794460913e-05, + "loss": 1.9779, + "step": 18935500 + }, + { + "epoch": 54.81, + "learning_rate": 2.2603865146813635e-05, + "loss": 1.9671, + "step": 18936000 + }, + { + "epoch": 54.81, + "learning_rate": 2.2603142946461654e-05, + "loss": 1.9855, + "step": 18936500 + }, + { + "epoch": 54.81, + "learning_rate": 2.2602419298814377e-05, + "loss": 1.9661, + "step": 18937000 + }, + { + "epoch": 54.82, + "learning_rate": 2.2601695651167102e-05, + "loss": 1.9661, + "step": 18937500 + }, + { + "epoch": 54.82, + "learning_rate": 2.2600972003519824e-05, + "loss": 1.9707, + "step": 18938000 + }, + { + "epoch": 54.82, + "learning_rate": 2.2600248355872547e-05, + "loss": 1.9668, + "step": 18938500 + }, + { + "epoch": 54.82, + "learning_rate": 2.259952470822527e-05, + "loss": 1.9632, + "step": 18939000 + }, + { + "epoch": 54.82, + "learning_rate": 2.259880106057799e-05, + "loss": 1.975, + "step": 18939500 + }, + { + "epoch": 54.82, + "learning_rate": 2.2598077412930717e-05, + "loss": 1.9678, + "step": 18940000 + }, + { + "epoch": 54.82, + "learning_rate": 2.259735376528344e-05, + "loss": 1.9831, + "step": 18940500 + }, + { + "epoch": 54.83, + "learning_rate": 2.2596631564931455e-05, + "loss": 1.981, + "step": 18941000 + }, + { + "epoch": 54.83, + "learning_rate": 2.259590791728418e-05, + "loss": 1.9578, + "step": 18941500 + }, + { + "epoch": 54.83, + "learning_rate": 2.25951857169322e-05, + "loss": 1.9823, + "step": 18942000 + }, + { + "epoch": 54.83, + "learning_rate": 2.259446206928492e-05, + "loss": 1.97, + "step": 18942500 + }, + { + "epoch": 54.83, + "learning_rate": 2.2593738421637644e-05, + "loss": 1.9995, + "step": 18943000 + }, + { + "epoch": 54.83, + "learning_rate": 2.2593014773990366e-05, + "loss": 1.9942, + "step": 18943500 + }, + { + "epoch": 54.84, + "learning_rate": 2.259229112634309e-05, + "loss": 1.9967, + "step": 18944000 + }, + { + "epoch": 54.84, + "learning_rate": 2.2591567478695814e-05, + "loss": 2.0123, + "step": 18944500 + }, + { + "epoch": 54.84, + "learning_rate": 2.259084383104854e-05, + "loss": 1.9889, + "step": 18945000 + }, + { + "epoch": 54.84, + "learning_rate": 2.259012018340126e-05, + "loss": 1.9731, + "step": 18945500 + }, + { + "epoch": 54.84, + "learning_rate": 2.258939798304928e-05, + "loss": 1.9694, + "step": 18946000 + }, + { + "epoch": 54.84, + "learning_rate": 2.2588674335402003e-05, + "loss": 1.9958, + "step": 18946500 + }, + { + "epoch": 54.84, + "learning_rate": 2.2587950687754725e-05, + "loss": 1.974, + "step": 18947000 + }, + { + "epoch": 54.85, + "learning_rate": 2.2587227040107447e-05, + "loss": 1.9844, + "step": 18947500 + }, + { + "epoch": 54.85, + "learning_rate": 2.258650339246017e-05, + "loss": 1.9764, + "step": 18948000 + }, + { + "epoch": 54.85, + "learning_rate": 2.258578119210819e-05, + "loss": 2.0131, + "step": 18948500 + }, + { + "epoch": 54.85, + "learning_rate": 2.2585057544460914e-05, + "loss": 2.0031, + "step": 18949000 + }, + { + "epoch": 54.85, + "learning_rate": 2.258433534410893e-05, + "loss": 1.9913, + "step": 18949500 + }, + { + "epoch": 54.85, + "learning_rate": 2.2583611696461655e-05, + "loss": 1.9595, + "step": 18950000 + }, + { + "epoch": 54.85, + "learning_rate": 2.2582888048814377e-05, + "loss": 1.9821, + "step": 18950500 + }, + { + "epoch": 54.86, + "learning_rate": 2.25821644011671e-05, + "loss": 1.992, + "step": 18951000 + }, + { + "epoch": 54.86, + "learning_rate": 2.258144220081512e-05, + "loss": 1.9925, + "step": 18951500 + }, + { + "epoch": 54.86, + "learning_rate": 2.258071855316784e-05, + "loss": 1.9709, + "step": 18952000 + }, + { + "epoch": 54.86, + "learning_rate": 2.2579996352815856e-05, + "loss": 1.9742, + "step": 18952500 + }, + { + "epoch": 54.86, + "learning_rate": 2.2579272705168582e-05, + "loss": 1.9736, + "step": 18953000 + }, + { + "epoch": 54.86, + "learning_rate": 2.2578549057521308e-05, + "loss": 1.9907, + "step": 18953500 + }, + { + "epoch": 54.86, + "learning_rate": 2.257782540987403e-05, + "loss": 1.9948, + "step": 18954000 + }, + { + "epoch": 54.87, + "learning_rate": 2.2577101762226752e-05, + "loss": 1.9753, + "step": 18954500 + }, + { + "epoch": 54.87, + "learning_rate": 2.2576378114579474e-05, + "loss": 1.9949, + "step": 18955000 + }, + { + "epoch": 54.87, + "learning_rate": 2.2575654466932197e-05, + "loss": 1.9862, + "step": 18955500 + }, + { + "epoch": 54.87, + "learning_rate": 2.257493081928492e-05, + "loss": 1.9844, + "step": 18956000 + }, + { + "epoch": 54.87, + "learning_rate": 2.2574207171637644e-05, + "loss": 1.9718, + "step": 18956500 + }, + { + "epoch": 54.87, + "learning_rate": 2.257348352399037e-05, + "loss": 1.9728, + "step": 18957000 + }, + { + "epoch": 54.87, + "learning_rate": 2.2572759876343092e-05, + "loss": 1.9761, + "step": 18957500 + }, + { + "epoch": 54.88, + "learning_rate": 2.2572036228695814e-05, + "loss": 1.9553, + "step": 18958000 + }, + { + "epoch": 54.88, + "learning_rate": 2.2571312581048537e-05, + "loss": 1.995, + "step": 18958500 + }, + { + "epoch": 54.88, + "learning_rate": 2.257058893340126e-05, + "loss": 1.9737, + "step": 18959000 + }, + { + "epoch": 54.88, + "learning_rate": 2.256986528575398e-05, + "loss": 1.9735, + "step": 18959500 + }, + { + "epoch": 54.88, + "learning_rate": 2.2569141638106707e-05, + "loss": 1.9794, + "step": 18960000 + }, + { + "epoch": 54.88, + "learning_rate": 2.2568417990459432e-05, + "loss": 1.995, + "step": 18960500 + }, + { + "epoch": 54.88, + "learning_rate": 2.2567694342812155e-05, + "loss": 1.9698, + "step": 18961000 + }, + { + "epoch": 54.89, + "learning_rate": 2.2566970695164877e-05, + "loss": 1.9667, + "step": 18961500 + }, + { + "epoch": 54.89, + "learning_rate": 2.2566248494812896e-05, + "loss": 1.9762, + "step": 18962000 + }, + { + "epoch": 54.89, + "learning_rate": 2.2565524847165618e-05, + "loss": 1.9478, + "step": 18962500 + }, + { + "epoch": 54.89, + "learning_rate": 2.256480119951834e-05, + "loss": 1.9827, + "step": 18963000 + }, + { + "epoch": 54.89, + "learning_rate": 2.256407899916636e-05, + "loss": 1.9781, + "step": 18963500 + }, + { + "epoch": 54.89, + "learning_rate": 2.2563356798814378e-05, + "loss": 1.9633, + "step": 18964000 + }, + { + "epoch": 54.89, + "learning_rate": 2.25626331511671e-05, + "loss": 1.9737, + "step": 18964500 + }, + { + "epoch": 54.9, + "learning_rate": 2.2561909503519823e-05, + "loss": 1.9487, + "step": 18965000 + }, + { + "epoch": 54.9, + "learning_rate": 2.2561185855872545e-05, + "loss": 1.9604, + "step": 18965500 + }, + { + "epoch": 54.9, + "learning_rate": 2.256046220822527e-05, + "loss": 1.9663, + "step": 18966000 + }, + { + "epoch": 54.9, + "learning_rate": 2.2559738560577993e-05, + "loss": 1.9778, + "step": 18966500 + }, + { + "epoch": 54.9, + "learning_rate": 2.2559014912930715e-05, + "loss": 2.0022, + "step": 18967000 + }, + { + "epoch": 54.9, + "learning_rate": 2.255829126528344e-05, + "loss": 1.9485, + "step": 18967500 + }, + { + "epoch": 54.9, + "learning_rate": 2.2557567617636163e-05, + "loss": 2.0108, + "step": 18968000 + }, + { + "epoch": 54.91, + "learning_rate": 2.2556843969988885e-05, + "loss": 1.9713, + "step": 18968500 + }, + { + "epoch": 54.91, + "learning_rate": 2.255612032234161e-05, + "loss": 1.9625, + "step": 18969000 + }, + { + "epoch": 54.91, + "learning_rate": 2.2555396674694333e-05, + "loss": 1.9692, + "step": 18969500 + }, + { + "epoch": 54.91, + "learning_rate": 2.2554673027047055e-05, + "loss": 1.9605, + "step": 18970000 + }, + { + "epoch": 54.91, + "learning_rate": 2.2553949379399777e-05, + "loss": 1.9754, + "step": 18970500 + }, + { + "epoch": 54.91, + "learning_rate": 2.2553227179047796e-05, + "loss": 1.9764, + "step": 18971000 + }, + { + "epoch": 54.91, + "learning_rate": 2.2552503531400522e-05, + "loss": 1.9546, + "step": 18971500 + }, + { + "epoch": 54.92, + "learning_rate": 2.2551779883753244e-05, + "loss": 1.9851, + "step": 18972000 + }, + { + "epoch": 54.92, + "learning_rate": 2.2551056236105966e-05, + "loss": 1.9793, + "step": 18972500 + }, + { + "epoch": 54.92, + "learning_rate": 2.255033258845869e-05, + "loss": 1.9667, + "step": 18973000 + }, + { + "epoch": 54.92, + "learning_rate": 2.2549610388106708e-05, + "loss": 1.9652, + "step": 18973500 + }, + { + "epoch": 54.92, + "learning_rate": 2.2548888187754723e-05, + "loss": 2.0086, + "step": 18974000 + }, + { + "epoch": 54.92, + "learning_rate": 2.254816454010745e-05, + "loss": 1.9601, + "step": 18974500 + }, + { + "epoch": 54.92, + "learning_rate": 2.254744089246017e-05, + "loss": 1.9926, + "step": 18975000 + }, + { + "epoch": 54.93, + "learning_rate": 2.2546717244812897e-05, + "loss": 1.9611, + "step": 18975500 + }, + { + "epoch": 54.93, + "learning_rate": 2.254599359716562e-05, + "loss": 2.0004, + "step": 18976000 + }, + { + "epoch": 54.93, + "learning_rate": 2.2545271396813634e-05, + "loss": 1.9718, + "step": 18976500 + }, + { + "epoch": 54.93, + "learning_rate": 2.2544549196461653e-05, + "loss": 1.9805, + "step": 18977000 + }, + { + "epoch": 54.93, + "learning_rate": 2.2543825548814376e-05, + "loss": 1.9932, + "step": 18977500 + }, + { + "epoch": 54.93, + "learning_rate": 2.2543101901167098e-05, + "loss": 1.9551, + "step": 18978000 + }, + { + "epoch": 54.93, + "learning_rate": 2.2542378253519823e-05, + "loss": 1.9901, + "step": 18978500 + }, + { + "epoch": 54.94, + "learning_rate": 2.254165460587255e-05, + "loss": 1.9553, + "step": 18979000 + }, + { + "epoch": 54.94, + "learning_rate": 2.254093095822527e-05, + "loss": 1.968, + "step": 18979500 + }, + { + "epoch": 54.94, + "learning_rate": 2.2540207310577994e-05, + "loss": 1.9908, + "step": 18980000 + }, + { + "epoch": 54.94, + "learning_rate": 2.2539483662930716e-05, + "loss": 1.9721, + "step": 18980500 + }, + { + "epoch": 54.94, + "learning_rate": 2.2538760015283438e-05, + "loss": 1.9765, + "step": 18981000 + }, + { + "epoch": 54.94, + "learning_rate": 2.253803636763616e-05, + "loss": 1.981, + "step": 18981500 + }, + { + "epoch": 54.95, + "learning_rate": 2.2537315614579472e-05, + "loss": 1.9878, + "step": 18982000 + }, + { + "epoch": 54.95, + "learning_rate": 2.2536591966932198e-05, + "loss": 1.9873, + "step": 18982500 + }, + { + "epoch": 54.95, + "learning_rate": 2.2535868319284924e-05, + "loss": 1.9721, + "step": 18983000 + }, + { + "epoch": 54.95, + "learning_rate": 2.2535144671637646e-05, + "loss": 1.9661, + "step": 18983500 + }, + { + "epoch": 54.95, + "learning_rate": 2.2534421023990368e-05, + "loss": 1.9908, + "step": 18984000 + }, + { + "epoch": 54.95, + "learning_rate": 2.253369737634309e-05, + "loss": 1.9468, + "step": 18984500 + }, + { + "epoch": 54.95, + "learning_rate": 2.2532973728695813e-05, + "loss": 1.9689, + "step": 18985000 + }, + { + "epoch": 54.96, + "learning_rate": 2.2532250081048535e-05, + "loss": 1.9997, + "step": 18985500 + }, + { + "epoch": 54.96, + "learning_rate": 2.253152643340126e-05, + "loss": 1.9755, + "step": 18986000 + }, + { + "epoch": 54.96, + "learning_rate": 2.2530802785753986e-05, + "loss": 1.9962, + "step": 18986500 + }, + { + "epoch": 54.96, + "learning_rate": 2.253007913810671e-05, + "loss": 1.9723, + "step": 18987000 + }, + { + "epoch": 54.96, + "learning_rate": 2.252935549045943e-05, + "loss": 1.9976, + "step": 18987500 + }, + { + "epoch": 54.96, + "learning_rate": 2.2528631842812153e-05, + "loss": 1.9666, + "step": 18988000 + }, + { + "epoch": 54.96, + "learning_rate": 2.2527908195164875e-05, + "loss": 1.9699, + "step": 18988500 + }, + { + "epoch": 54.97, + "learning_rate": 2.25271845475176e-05, + "loss": 1.958, + "step": 18989000 + }, + { + "epoch": 54.97, + "learning_rate": 2.2526460899870326e-05, + "loss": 1.9638, + "step": 18989500 + }, + { + "epoch": 54.97, + "learning_rate": 2.252573725222305e-05, + "loss": 1.9873, + "step": 18990000 + }, + { + "epoch": 54.97, + "learning_rate": 2.2525015051871064e-05, + "loss": 1.9781, + "step": 18990500 + }, + { + "epoch": 54.97, + "learning_rate": 2.2524291404223786e-05, + "loss": 1.9915, + "step": 18991000 + }, + { + "epoch": 54.97, + "learning_rate": 2.2523567756576512e-05, + "loss": 1.9867, + "step": 18991500 + }, + { + "epoch": 54.97, + "learning_rate": 2.2522844108929234e-05, + "loss": 1.9549, + "step": 18992000 + }, + { + "epoch": 54.98, + "learning_rate": 2.2522120461281956e-05, + "loss": 1.9818, + "step": 18992500 + }, + { + "epoch": 54.98, + "learning_rate": 2.2521398260929975e-05, + "loss": 1.9874, + "step": 18993000 + }, + { + "epoch": 54.98, + "learning_rate": 2.25206746132827e-05, + "loss": 1.9825, + "step": 18993500 + }, + { + "epoch": 54.98, + "learning_rate": 2.2519950965635423e-05, + "loss": 1.9738, + "step": 18994000 + }, + { + "epoch": 54.98, + "learning_rate": 2.2519227317988145e-05, + "loss": 1.996, + "step": 18994500 + }, + { + "epoch": 54.98, + "learning_rate": 2.2518503670340868e-05, + "loss": 1.9433, + "step": 18995000 + }, + { + "epoch": 54.98, + "learning_rate": 2.251778002269359e-05, + "loss": 1.9928, + "step": 18995500 + }, + { + "epoch": 54.99, + "learning_rate": 2.251705782234161e-05, + "loss": 1.9601, + "step": 18996000 + }, + { + "epoch": 54.99, + "learning_rate": 2.251633417469433e-05, + "loss": 1.9539, + "step": 18996500 + }, + { + "epoch": 54.99, + "learning_rate": 2.2515610527047057e-05, + "loss": 2.0001, + "step": 18997000 + }, + { + "epoch": 54.99, + "learning_rate": 2.2514888326695076e-05, + "loss": 2.0158, + "step": 18997500 + }, + { + "epoch": 54.99, + "learning_rate": 2.2514164679047798e-05, + "loss": 1.9984, + "step": 18998000 + }, + { + "epoch": 54.99, + "learning_rate": 2.251344103140052e-05, + "loss": 1.9686, + "step": 18998500 + }, + { + "epoch": 54.99, + "learning_rate": 2.2512717383753242e-05, + "loss": 1.9777, + "step": 18999000 + }, + { + "epoch": 55.0, + "learning_rate": 2.2511993736105965e-05, + "loss": 1.973, + "step": 18999500 + }, + { + "epoch": 55.0, + "learning_rate": 2.251127008845869e-05, + "loss": 1.9756, + "step": 19000000 + }, + { + "epoch": 55.0, + "learning_rate": 2.2510546440811412e-05, + "loss": 1.9994, + "step": 19000500 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.6754768735668595, + "eval_accuracy_mlm": 0.6420146190389152, + "eval_accuracy_nsp": 0.8550320129426701, + "eval_loss": 2.161105155944824, + "eval_runtime": 331.5839, + "eval_samples_per_second": 1316.065, + "eval_steps_per_second": 54.837, + "step": 19000960 + }, + { + "epoch": 55.0, + "learning_rate": 2.250982424045943e-05, + "loss": 1.9806, + "step": 19001000 + }, + { + "epoch": 55.0, + "learning_rate": 2.250910204010745e-05, + "loss": 1.9872, + "step": 19001500 + }, + { + "epoch": 55.0, + "learning_rate": 2.2508378392460173e-05, + "loss": 1.9389, + "step": 19002000 + }, + { + "epoch": 55.0, + "learning_rate": 2.2507654744812895e-05, + "loss": 1.967, + "step": 19002500 + }, + { + "epoch": 55.01, + "learning_rate": 2.2506931097165617e-05, + "loss": 1.9646, + "step": 19003000 + }, + { + "epoch": 55.01, + "learning_rate": 2.250620744951834e-05, + "loss": 1.9836, + "step": 19003500 + }, + { + "epoch": 55.01, + "learning_rate": 2.2505483801871065e-05, + "loss": 1.9732, + "step": 19004000 + }, + { + "epoch": 55.01, + "learning_rate": 2.250476015422379e-05, + "loss": 1.9739, + "step": 19004500 + }, + { + "epoch": 55.01, + "learning_rate": 2.2504036506576513e-05, + "loss": 1.9462, + "step": 19005000 + }, + { + "epoch": 55.01, + "learning_rate": 2.2503312858929235e-05, + "loss": 1.9426, + "step": 19005500 + }, + { + "epoch": 55.01, + "learning_rate": 2.2502590658577254e-05, + "loss": 1.96, + "step": 19006000 + }, + { + "epoch": 55.02, + "learning_rate": 2.2501867010929976e-05, + "loss": 1.9779, + "step": 19006500 + }, + { + "epoch": 55.02, + "learning_rate": 2.25011433632827e-05, + "loss": 1.9652, + "step": 19007000 + }, + { + "epoch": 55.02, + "learning_rate": 2.250041971563542e-05, + "loss": 1.969, + "step": 19007500 + }, + { + "epoch": 55.02, + "learning_rate": 2.2499696067988143e-05, + "loss": 1.9475, + "step": 19008000 + }, + { + "epoch": 55.02, + "learning_rate": 2.249897242034087e-05, + "loss": 1.9695, + "step": 19008500 + }, + { + "epoch": 55.02, + "learning_rate": 2.249824877269359e-05, + "loss": 1.9568, + "step": 19009000 + }, + { + "epoch": 55.02, + "learning_rate": 2.2497525125046316e-05, + "loss": 1.9672, + "step": 19009500 + }, + { + "epoch": 55.03, + "learning_rate": 2.249680147739904e-05, + "loss": 1.9606, + "step": 19010000 + }, + { + "epoch": 55.03, + "learning_rate": 2.249607782975176e-05, + "loss": 1.9504, + "step": 19010500 + }, + { + "epoch": 55.03, + "learning_rate": 2.2495355629399776e-05, + "loss": 1.9571, + "step": 19011000 + }, + { + "epoch": 55.03, + "learning_rate": 2.2494631981752502e-05, + "loss": 1.9563, + "step": 19011500 + }, + { + "epoch": 55.03, + "learning_rate": 2.2493909781400518e-05, + "loss": 1.9638, + "step": 19012000 + }, + { + "epoch": 55.03, + "learning_rate": 2.249318758104854e-05, + "loss": 1.9832, + "step": 19012500 + }, + { + "epoch": 55.03, + "learning_rate": 2.2492463933401262e-05, + "loss": 1.9491, + "step": 19013000 + }, + { + "epoch": 55.04, + "learning_rate": 2.2491740285753984e-05, + "loss": 1.9554, + "step": 19013500 + }, + { + "epoch": 55.04, + "learning_rate": 2.2491016638106707e-05, + "loss": 1.9477, + "step": 19014000 + }, + { + "epoch": 55.04, + "learning_rate": 2.2490294437754726e-05, + "loss": 1.9484, + "step": 19014500 + }, + { + "epoch": 55.04, + "learning_rate": 2.2489570790107448e-05, + "loss": 1.9825, + "step": 19015000 + }, + { + "epoch": 55.04, + "learning_rate": 2.248884714246017e-05, + "loss": 1.9757, + "step": 19015500 + }, + { + "epoch": 55.04, + "learning_rate": 2.2488123494812896e-05, + "loss": 1.9589, + "step": 19016000 + }, + { + "epoch": 55.04, + "learning_rate": 2.2487399847165618e-05, + "loss": 1.9615, + "step": 19016500 + }, + { + "epoch": 55.05, + "learning_rate": 2.248667619951834e-05, + "loss": 1.9585, + "step": 19017000 + }, + { + "epoch": 55.05, + "learning_rate": 2.248595399916636e-05, + "loss": 1.9467, + "step": 19017500 + }, + { + "epoch": 55.05, + "learning_rate": 2.2485231798814378e-05, + "loss": 1.9542, + "step": 19018000 + }, + { + "epoch": 55.05, + "learning_rate": 2.24845081511671e-05, + "loss": 1.9399, + "step": 19018500 + }, + { + "epoch": 55.05, + "learning_rate": 2.2483784503519822e-05, + "loss": 1.9588, + "step": 19019000 + }, + { + "epoch": 55.05, + "learning_rate": 2.248306230316784e-05, + "loss": 1.964, + "step": 19019500 + }, + { + "epoch": 55.06, + "learning_rate": 2.2482338655520567e-05, + "loss": 1.9653, + "step": 19020000 + }, + { + "epoch": 55.06, + "learning_rate": 2.248161500787329e-05, + "loss": 1.9701, + "step": 19020500 + }, + { + "epoch": 55.06, + "learning_rate": 2.248089136022601e-05, + "loss": 1.9589, + "step": 19021000 + }, + { + "epoch": 55.06, + "learning_rate": 2.2480167712578734e-05, + "loss": 1.953, + "step": 19021500 + }, + { + "epoch": 55.06, + "learning_rate": 2.2479444064931456e-05, + "loss": 1.9579, + "step": 19022000 + }, + { + "epoch": 55.06, + "learning_rate": 2.2478720417284178e-05, + "loss": 1.9843, + "step": 19022500 + }, + { + "epoch": 55.06, + "learning_rate": 2.2477996769636904e-05, + "loss": 1.9589, + "step": 19023000 + }, + { + "epoch": 55.07, + "learning_rate": 2.247727312198963e-05, + "loss": 1.995, + "step": 19023500 + }, + { + "epoch": 55.07, + "learning_rate": 2.247654947434235e-05, + "loss": 1.966, + "step": 19024000 + }, + { + "epoch": 55.07, + "learning_rate": 2.2475828721285664e-05, + "loss": 1.9648, + "step": 19024500 + }, + { + "epoch": 55.07, + "learning_rate": 2.2475105073638386e-05, + "loss": 1.975, + "step": 19025000 + }, + { + "epoch": 55.07, + "learning_rate": 2.247438142599111e-05, + "loss": 1.9534, + "step": 19025500 + }, + { + "epoch": 55.07, + "learning_rate": 2.247365777834383e-05, + "loss": 1.9678, + "step": 19026000 + }, + { + "epoch": 55.07, + "learning_rate": 2.2472934130696556e-05, + "loss": 1.947, + "step": 19026500 + }, + { + "epoch": 55.08, + "learning_rate": 2.247221048304928e-05, + "loss": 1.9623, + "step": 19027000 + }, + { + "epoch": 55.08, + "learning_rate": 2.2471486835402004e-05, + "loss": 1.9635, + "step": 19027500 + }, + { + "epoch": 55.08, + "learning_rate": 2.2470763187754726e-05, + "loss": 1.9532, + "step": 19028000 + }, + { + "epoch": 55.08, + "learning_rate": 2.247003954010745e-05, + "loss": 1.9695, + "step": 19028500 + }, + { + "epoch": 55.08, + "learning_rate": 2.246931589246017e-05, + "loss": 1.9828, + "step": 19029000 + }, + { + "epoch": 55.08, + "learning_rate": 2.2468592244812893e-05, + "loss": 1.9591, + "step": 19029500 + }, + { + "epoch": 55.08, + "learning_rate": 2.246786859716562e-05, + "loss": 1.951, + "step": 19030000 + }, + { + "epoch": 55.09, + "learning_rate": 2.246714494951834e-05, + "loss": 1.9544, + "step": 19030500 + }, + { + "epoch": 55.09, + "learning_rate": 2.2466421301871066e-05, + "loss": 1.9464, + "step": 19031000 + }, + { + "epoch": 55.09, + "learning_rate": 2.246569765422379e-05, + "loss": 1.9733, + "step": 19031500 + }, + { + "epoch": 55.09, + "learning_rate": 2.2464975453871808e-05, + "loss": 1.9509, + "step": 19032000 + }, + { + "epoch": 55.09, + "learning_rate": 2.246425180622453e-05, + "loss": 1.9658, + "step": 19032500 + }, + { + "epoch": 55.09, + "learning_rate": 2.2463528158577252e-05, + "loss": 1.9487, + "step": 19033000 + }, + { + "epoch": 55.09, + "learning_rate": 2.2462804510929974e-05, + "loss": 1.9707, + "step": 19033500 + }, + { + "epoch": 55.1, + "learning_rate": 2.2462080863282697e-05, + "loss": 1.9896, + "step": 19034000 + }, + { + "epoch": 55.1, + "learning_rate": 2.2461357215635422e-05, + "loss": 1.9576, + "step": 19034500 + }, + { + "epoch": 55.1, + "learning_rate": 2.2460633567988144e-05, + "loss": 1.9836, + "step": 19035000 + }, + { + "epoch": 55.1, + "learning_rate": 2.245990992034087e-05, + "loss": 1.9598, + "step": 19035500 + }, + { + "epoch": 55.1, + "learning_rate": 2.2459186272693592e-05, + "loss": 1.9674, + "step": 19036000 + }, + { + "epoch": 55.1, + "learning_rate": 2.2458462625046315e-05, + "loss": 1.9553, + "step": 19036500 + }, + { + "epoch": 55.1, + "learning_rate": 2.2457738977399037e-05, + "loss": 1.9525, + "step": 19037000 + }, + { + "epoch": 55.11, + "learning_rate": 2.2457016777047056e-05, + "loss": 1.9524, + "step": 19037500 + }, + { + "epoch": 55.11, + "learning_rate": 2.2456293129399778e-05, + "loss": 1.9732, + "step": 19038000 + }, + { + "epoch": 55.11, + "learning_rate": 2.2455569481752504e-05, + "loss": 1.9799, + "step": 19038500 + }, + { + "epoch": 55.11, + "learning_rate": 2.2454845834105226e-05, + "loss": 1.9679, + "step": 19039000 + }, + { + "epoch": 55.11, + "learning_rate": 2.2454123633753245e-05, + "loss": 1.9477, + "step": 19039500 + }, + { + "epoch": 55.11, + "learning_rate": 2.2453399986105967e-05, + "loss": 1.9637, + "step": 19040000 + }, + { + "epoch": 55.11, + "learning_rate": 2.245267633845869e-05, + "loss": 1.9647, + "step": 19040500 + }, + { + "epoch": 55.12, + "learning_rate": 2.245195269081141e-05, + "loss": 1.9747, + "step": 19041000 + }, + { + "epoch": 55.12, + "learning_rate": 2.2451231937754724e-05, + "loss": 1.9668, + "step": 19041500 + }, + { + "epoch": 55.12, + "learning_rate": 2.2450508290107446e-05, + "loss": 1.9817, + "step": 19042000 + }, + { + "epoch": 55.12, + "learning_rate": 2.244978464246017e-05, + "loss": 1.9467, + "step": 19042500 + }, + { + "epoch": 55.12, + "learning_rate": 2.2449060994812894e-05, + "loss": 1.9447, + "step": 19043000 + }, + { + "epoch": 55.12, + "learning_rate": 2.244833734716562e-05, + "loss": 1.9744, + "step": 19043500 + }, + { + "epoch": 55.12, + "learning_rate": 2.244761369951834e-05, + "loss": 1.9751, + "step": 19044000 + }, + { + "epoch": 55.13, + "learning_rate": 2.2446890051871064e-05, + "loss": 1.9461, + "step": 19044500 + }, + { + "epoch": 55.13, + "learning_rate": 2.2446166404223786e-05, + "loss": 1.9908, + "step": 19045000 + }, + { + "epoch": 55.13, + "learning_rate": 2.244544275657651e-05, + "loss": 1.9503, + "step": 19045500 + }, + { + "epoch": 55.13, + "learning_rate": 2.2444719108929234e-05, + "loss": 1.9547, + "step": 19046000 + }, + { + "epoch": 55.13, + "learning_rate": 2.244399546128196e-05, + "loss": 1.9534, + "step": 19046500 + }, + { + "epoch": 55.13, + "learning_rate": 2.2443271813634682e-05, + "loss": 1.9649, + "step": 19047000 + }, + { + "epoch": 55.13, + "learning_rate": 2.2442549613282697e-05, + "loss": 1.962, + "step": 19047500 + }, + { + "epoch": 55.14, + "learning_rate": 2.244182596563542e-05, + "loss": 1.9584, + "step": 19048000 + }, + { + "epoch": 55.14, + "learning_rate": 2.2441102317988145e-05, + "loss": 1.9455, + "step": 19048500 + }, + { + "epoch": 55.14, + "learning_rate": 2.2440378670340867e-05, + "loss": 1.9522, + "step": 19049000 + }, + { + "epoch": 55.14, + "learning_rate": 2.2439655022693593e-05, + "loss": 1.9648, + "step": 19049500 + }, + { + "epoch": 55.14, + "learning_rate": 2.2438931375046315e-05, + "loss": 1.9687, + "step": 19050000 + }, + { + "epoch": 55.14, + "learning_rate": 2.2438209174694334e-05, + "loss": 1.9537, + "step": 19050500 + }, + { + "epoch": 55.14, + "learning_rate": 2.2437485527047057e-05, + "loss": 1.9459, + "step": 19051000 + }, + { + "epoch": 55.15, + "learning_rate": 2.243676187939978e-05, + "loss": 1.9512, + "step": 19051500 + }, + { + "epoch": 55.15, + "learning_rate": 2.24360382317525e-05, + "loss": 1.9842, + "step": 19052000 + }, + { + "epoch": 55.15, + "learning_rate": 2.243531603140052e-05, + "loss": 1.955, + "step": 19052500 + }, + { + "epoch": 55.15, + "learning_rate": 2.2434592383753242e-05, + "loss": 1.9827, + "step": 19053000 + }, + { + "epoch": 55.15, + "learning_rate": 2.2433868736105968e-05, + "loss": 1.9712, + "step": 19053500 + }, + { + "epoch": 55.15, + "learning_rate": 2.243314508845869e-05, + "loss": 1.9563, + "step": 19054000 + }, + { + "epoch": 55.15, + "learning_rate": 2.2432421440811412e-05, + "loss": 1.9502, + "step": 19054500 + }, + { + "epoch": 55.16, + "learning_rate": 2.2431697793164134e-05, + "loss": 1.9708, + "step": 19055000 + }, + { + "epoch": 55.16, + "learning_rate": 2.2430975592812153e-05, + "loss": 1.9648, + "step": 19055500 + }, + { + "epoch": 55.16, + "learning_rate": 2.2430251945164876e-05, + "loss": 1.9667, + "step": 19056000 + }, + { + "epoch": 55.16, + "learning_rate": 2.2429528297517598e-05, + "loss": 1.957, + "step": 19056500 + }, + { + "epoch": 55.16, + "learning_rate": 2.2428804649870324e-05, + "loss": 1.9744, + "step": 19057000 + }, + { + "epoch": 55.16, + "learning_rate": 2.242808100222305e-05, + "loss": 1.9822, + "step": 19057500 + }, + { + "epoch": 55.17, + "learning_rate": 2.242735735457577e-05, + "loss": 1.9728, + "step": 19058000 + }, + { + "epoch": 55.17, + "learning_rate": 2.2426633706928494e-05, + "loss": 1.9505, + "step": 19058500 + }, + { + "epoch": 55.17, + "learning_rate": 2.2425910059281216e-05, + "loss": 1.9778, + "step": 19059000 + }, + { + "epoch": 55.17, + "learning_rate": 2.2425186411633938e-05, + "loss": 1.9612, + "step": 19059500 + }, + { + "epoch": 55.17, + "learning_rate": 2.242446276398666e-05, + "loss": 1.9779, + "step": 19060000 + }, + { + "epoch": 55.17, + "learning_rate": 2.2423739116339386e-05, + "loss": 1.952, + "step": 19060500 + }, + { + "epoch": 55.17, + "learning_rate": 2.2423016915987405e-05, + "loss": 1.9618, + "step": 19061000 + }, + { + "epoch": 55.18, + "learning_rate": 2.2422293268340127e-05, + "loss": 1.9835, + "step": 19061500 + }, + { + "epoch": 55.18, + "learning_rate": 2.242156962069285e-05, + "loss": 1.9266, + "step": 19062000 + }, + { + "epoch": 55.18, + "learning_rate": 2.2420845973045575e-05, + "loss": 1.9409, + "step": 19062500 + }, + { + "epoch": 55.18, + "learning_rate": 2.2420122325398297e-05, + "loss": 1.9348, + "step": 19063000 + }, + { + "epoch": 55.18, + "learning_rate": 2.241939867775102e-05, + "loss": 1.961, + "step": 19063500 + }, + { + "epoch": 55.18, + "learning_rate": 2.241867503010374e-05, + "loss": 1.9643, + "step": 19064000 + }, + { + "epoch": 55.18, + "learning_rate": 2.2417954277047057e-05, + "loss": 1.9744, + "step": 19064500 + }, + { + "epoch": 55.19, + "learning_rate": 2.241723062939978e-05, + "loss": 1.9677, + "step": 19065000 + }, + { + "epoch": 55.19, + "learning_rate": 2.2416506981752502e-05, + "loss": 1.975, + "step": 19065500 + }, + { + "epoch": 55.19, + "learning_rate": 2.241578478140052e-05, + "loss": 1.9648, + "step": 19066000 + }, + { + "epoch": 55.19, + "learning_rate": 2.2415061133753243e-05, + "loss": 1.9618, + "step": 19066500 + }, + { + "epoch": 55.19, + "learning_rate": 2.2414337486105965e-05, + "loss": 1.976, + "step": 19067000 + }, + { + "epoch": 55.19, + "learning_rate": 2.2413613838458687e-05, + "loss": 1.9616, + "step": 19067500 + }, + { + "epoch": 55.19, + "learning_rate": 2.2412890190811413e-05, + "loss": 1.9815, + "step": 19068000 + }, + { + "epoch": 55.2, + "learning_rate": 2.241216654316414e-05, + "loss": 1.9621, + "step": 19068500 + }, + { + "epoch": 55.2, + "learning_rate": 2.241144289551686e-05, + "loss": 1.9482, + "step": 19069000 + }, + { + "epoch": 55.2, + "learning_rate": 2.2410719247869583e-05, + "loss": 1.9917, + "step": 19069500 + }, + { + "epoch": 55.2, + "learning_rate": 2.2409995600222305e-05, + "loss": 1.9904, + "step": 19070000 + }, + { + "epoch": 55.2, + "learning_rate": 2.2409271952575028e-05, + "loss": 1.9615, + "step": 19070500 + }, + { + "epoch": 55.2, + "learning_rate": 2.240854830492775e-05, + "loss": 1.9535, + "step": 19071000 + }, + { + "epoch": 55.2, + "learning_rate": 2.2407824657280475e-05, + "loss": 1.9555, + "step": 19071500 + }, + { + "epoch": 55.21, + "learning_rate": 2.2407102456928494e-05, + "loss": 1.9396, + "step": 19072000 + }, + { + "epoch": 55.21, + "learning_rate": 2.2406378809281217e-05, + "loss": 1.9565, + "step": 19072500 + }, + { + "epoch": 55.21, + "learning_rate": 2.2405656608929236e-05, + "loss": 1.9896, + "step": 19073000 + }, + { + "epoch": 55.21, + "learning_rate": 2.2404932961281958e-05, + "loss": 1.954, + "step": 19073500 + }, + { + "epoch": 55.21, + "learning_rate": 2.240420931363468e-05, + "loss": 1.9671, + "step": 19074000 + }, + { + "epoch": 55.21, + "learning_rate": 2.2403485665987402e-05, + "loss": 1.9521, + "step": 19074500 + }, + { + "epoch": 55.21, + "learning_rate": 2.2402762018340125e-05, + "loss": 1.9579, + "step": 19075000 + }, + { + "epoch": 55.22, + "learning_rate": 2.240203837069285e-05, + "loss": 1.9554, + "step": 19075500 + }, + { + "epoch": 55.22, + "learning_rate": 2.2401314723045576e-05, + "loss": 1.9505, + "step": 19076000 + }, + { + "epoch": 55.22, + "learning_rate": 2.2400591075398298e-05, + "loss": 1.984, + "step": 19076500 + }, + { + "epoch": 55.22, + "learning_rate": 2.2399868875046314e-05, + "loss": 1.9782, + "step": 19077000 + }, + { + "epoch": 55.22, + "learning_rate": 2.239914522739904e-05, + "loss": 1.9474, + "step": 19077500 + }, + { + "epoch": 55.22, + "learning_rate": 2.239842157975176e-05, + "loss": 1.9574, + "step": 19078000 + }, + { + "epoch": 55.22, + "learning_rate": 2.2397697932104484e-05, + "loss": 1.9522, + "step": 19078500 + }, + { + "epoch": 55.23, + "learning_rate": 2.2396974284457206e-05, + "loss": 1.992, + "step": 19079000 + }, + { + "epoch": 55.23, + "learning_rate": 2.239625063680993e-05, + "loss": 1.9965, + "step": 19079500 + }, + { + "epoch": 55.23, + "learning_rate": 2.2395526989162654e-05, + "loss": 1.9663, + "step": 19080000 + }, + { + "epoch": 55.23, + "learning_rate": 2.2394803341515376e-05, + "loss": 1.9925, + "step": 19080500 + }, + { + "epoch": 55.23, + "learning_rate": 2.2394082588458688e-05, + "loss": 1.966, + "step": 19081000 + }, + { + "epoch": 55.23, + "learning_rate": 2.2393358940811414e-05, + "loss": 1.9628, + "step": 19081500 + }, + { + "epoch": 55.23, + "learning_rate": 2.2392635293164136e-05, + "loss": 1.9562, + "step": 19082000 + }, + { + "epoch": 55.24, + "learning_rate": 2.2391911645516858e-05, + "loss": 1.9507, + "step": 19082500 + }, + { + "epoch": 55.24, + "learning_rate": 2.2391187997869584e-05, + "loss": 1.9614, + "step": 19083000 + }, + { + "epoch": 55.24, + "learning_rate": 2.2390464350222306e-05, + "loss": 2.0017, + "step": 19083500 + }, + { + "epoch": 55.24, + "learning_rate": 2.2389742149870325e-05, + "loss": 1.9414, + "step": 19084000 + }, + { + "epoch": 55.24, + "learning_rate": 2.2389018502223047e-05, + "loss": 1.9752, + "step": 19084500 + }, + { + "epoch": 55.24, + "learning_rate": 2.238829485457577e-05, + "loss": 1.9895, + "step": 19085000 + }, + { + "epoch": 55.24, + "learning_rate": 2.2387571206928492e-05, + "loss": 1.9748, + "step": 19085500 + }, + { + "epoch": 55.25, + "learning_rate": 2.2386847559281214e-05, + "loss": 1.945, + "step": 19086000 + }, + { + "epoch": 55.25, + "learning_rate": 2.238612391163394e-05, + "loss": 1.9707, + "step": 19086500 + }, + { + "epoch": 55.25, + "learning_rate": 2.2385400263986665e-05, + "loss": 1.9716, + "step": 19087000 + }, + { + "epoch": 55.25, + "learning_rate": 2.2384676616339388e-05, + "loss": 1.9921, + "step": 19087500 + }, + { + "epoch": 55.25, + "learning_rate": 2.238395296869211e-05, + "loss": 1.9445, + "step": 19088000 + }, + { + "epoch": 55.25, + "learning_rate": 2.238323076834013e-05, + "loss": 1.9601, + "step": 19088500 + }, + { + "epoch": 55.25, + "learning_rate": 2.2382508567988144e-05, + "loss": 1.9405, + "step": 19089000 + }, + { + "epoch": 55.26, + "learning_rate": 2.2381784920340866e-05, + "loss": 1.981, + "step": 19089500 + }, + { + "epoch": 55.26, + "learning_rate": 2.238106127269359e-05, + "loss": 1.9534, + "step": 19090000 + }, + { + "epoch": 55.26, + "learning_rate": 2.2380337625046314e-05, + "loss": 1.9784, + "step": 19090500 + }, + { + "epoch": 55.26, + "learning_rate": 2.237961397739904e-05, + "loss": 1.9714, + "step": 19091000 + }, + { + "epoch": 55.26, + "learning_rate": 2.2378890329751762e-05, + "loss": 1.9903, + "step": 19091500 + }, + { + "epoch": 55.26, + "learning_rate": 2.2378166682104484e-05, + "loss": 1.9657, + "step": 19092000 + }, + { + "epoch": 55.26, + "learning_rate": 2.2377443034457207e-05, + "loss": 1.9694, + "step": 19092500 + }, + { + "epoch": 55.27, + "learning_rate": 2.2376720834105226e-05, + "loss": 1.97, + "step": 19093000 + }, + { + "epoch": 55.27, + "learning_rate": 2.237599863375324e-05, + "loss": 1.9678, + "step": 19093500 + }, + { + "epoch": 55.27, + "learning_rate": 2.237527643340126e-05, + "loss": 1.9531, + "step": 19094000 + }, + { + "epoch": 55.27, + "learning_rate": 2.2374552785753982e-05, + "loss": 1.9856, + "step": 19094500 + }, + { + "epoch": 55.27, + "learning_rate": 2.2373829138106708e-05, + "loss": 1.9798, + "step": 19095000 + }, + { + "epoch": 55.27, + "learning_rate": 2.237310549045943e-05, + "loss": 1.9827, + "step": 19095500 + }, + { + "epoch": 55.28, + "learning_rate": 2.2372381842812152e-05, + "loss": 1.9654, + "step": 19096000 + }, + { + "epoch": 55.28, + "learning_rate": 2.2371658195164878e-05, + "loss": 1.9438, + "step": 19096500 + }, + { + "epoch": 55.28, + "learning_rate": 2.2370935994812894e-05, + "loss": 1.9653, + "step": 19097000 + }, + { + "epoch": 55.28, + "learning_rate": 2.2370212347165616e-05, + "loss": 1.9811, + "step": 19097500 + }, + { + "epoch": 55.28, + "learning_rate": 2.236948869951834e-05, + "loss": 1.9664, + "step": 19098000 + }, + { + "epoch": 55.28, + "learning_rate": 2.2368765051871067e-05, + "loss": 1.9678, + "step": 19098500 + }, + { + "epoch": 55.28, + "learning_rate": 2.236804140422379e-05, + "loss": 1.9659, + "step": 19099000 + }, + { + "epoch": 55.29, + "learning_rate": 2.236731775657651e-05, + "loss": 1.9693, + "step": 19099500 + }, + { + "epoch": 55.29, + "learning_rate": 2.2366594108929234e-05, + "loss": 1.9692, + "step": 19100000 + }, + { + "epoch": 55.29, + "learning_rate": 2.2365871908577253e-05, + "loss": 1.9882, + "step": 19100500 + }, + { + "epoch": 55.29, + "learning_rate": 2.2365148260929975e-05, + "loss": 1.9691, + "step": 19101000 + }, + { + "epoch": 55.29, + "learning_rate": 2.2364424613282697e-05, + "loss": 1.9966, + "step": 19101500 + }, + { + "epoch": 55.29, + "learning_rate": 2.2363700965635423e-05, + "loss": 1.9714, + "step": 19102000 + }, + { + "epoch": 55.29, + "learning_rate": 2.2362977317988145e-05, + "loss": 1.963, + "step": 19102500 + }, + { + "epoch": 55.3, + "learning_rate": 2.2362253670340867e-05, + "loss": 1.9778, + "step": 19103000 + }, + { + "epoch": 55.3, + "learning_rate": 2.2361530022693593e-05, + "loss": 1.9709, + "step": 19103500 + }, + { + "epoch": 55.3, + "learning_rate": 2.2360806375046315e-05, + "loss": 1.9737, + "step": 19104000 + }, + { + "epoch": 55.3, + "learning_rate": 2.236008417469433e-05, + "loss": 1.963, + "step": 19104500 + }, + { + "epoch": 55.3, + "learning_rate": 2.235936197434235e-05, + "loss": 1.9613, + "step": 19105000 + }, + { + "epoch": 55.3, + "learning_rate": 2.2358638326695072e-05, + "loss": 1.9469, + "step": 19105500 + }, + { + "epoch": 55.3, + "learning_rate": 2.2357914679047797e-05, + "loss": 1.9806, + "step": 19106000 + }, + { + "epoch": 55.31, + "learning_rate": 2.235719103140052e-05, + "loss": 1.9517, + "step": 19106500 + }, + { + "epoch": 55.31, + "learning_rate": 2.235646883104854e-05, + "loss": 1.9829, + "step": 19107000 + }, + { + "epoch": 55.31, + "learning_rate": 2.235574518340126e-05, + "loss": 1.9848, + "step": 19107500 + }, + { + "epoch": 55.31, + "learning_rate": 2.2355021535753983e-05, + "loss": 1.9532, + "step": 19108000 + }, + { + "epoch": 55.31, + "learning_rate": 2.2354297888106705e-05, + "loss": 1.9775, + "step": 19108500 + }, + { + "epoch": 55.31, + "learning_rate": 2.235357424045943e-05, + "loss": 1.9629, + "step": 19109000 + }, + { + "epoch": 55.31, + "learning_rate": 2.2352850592812157e-05, + "loss": 1.9729, + "step": 19109500 + }, + { + "epoch": 55.32, + "learning_rate": 2.235212694516488e-05, + "loss": 1.9542, + "step": 19110000 + }, + { + "epoch": 55.32, + "learning_rate": 2.2351404744812894e-05, + "loss": 1.9674, + "step": 19110500 + }, + { + "epoch": 55.32, + "learning_rate": 2.2350681097165617e-05, + "loss": 1.9627, + "step": 19111000 + }, + { + "epoch": 55.32, + "learning_rate": 2.2349957449518342e-05, + "loss": 2.0051, + "step": 19111500 + }, + { + "epoch": 55.32, + "learning_rate": 2.2349233801871064e-05, + "loss": 1.9904, + "step": 19112000 + }, + { + "epoch": 55.32, + "learning_rate": 2.2348510154223787e-05, + "loss": 1.9515, + "step": 19112500 + }, + { + "epoch": 55.32, + "learning_rate": 2.23477894011671e-05, + "loss": 1.951, + "step": 19113000 + }, + { + "epoch": 55.33, + "learning_rate": 2.234706575351982e-05, + "loss": 1.9725, + "step": 19113500 + }, + { + "epoch": 55.33, + "learning_rate": 2.2346342105872547e-05, + "loss": 1.9871, + "step": 19114000 + }, + { + "epoch": 55.33, + "learning_rate": 2.234561845822527e-05, + "loss": 1.9456, + "step": 19114500 + }, + { + "epoch": 55.33, + "learning_rate": 2.2344894810577995e-05, + "loss": 1.9585, + "step": 19115000 + }, + { + "epoch": 55.33, + "learning_rate": 2.2344171162930717e-05, + "loss": 1.9569, + "step": 19115500 + }, + { + "epoch": 55.33, + "learning_rate": 2.234344751528344e-05, + "loss": 1.9788, + "step": 19116000 + }, + { + "epoch": 55.33, + "learning_rate": 2.234272386763616e-05, + "loss": 1.9691, + "step": 19116500 + }, + { + "epoch": 55.34, + "learning_rate": 2.2342000219988884e-05, + "loss": 1.9483, + "step": 19117000 + }, + { + "epoch": 55.34, + "learning_rate": 2.234127657234161e-05, + "loss": 1.9686, + "step": 19117500 + }, + { + "epoch": 55.34, + "learning_rate": 2.234055292469433e-05, + "loss": 1.9753, + "step": 19118000 + }, + { + "epoch": 55.34, + "learning_rate": 2.2339829277047057e-05, + "loss": 1.9617, + "step": 19118500 + }, + { + "epoch": 55.34, + "learning_rate": 2.233910562939978e-05, + "loss": 1.97, + "step": 19119000 + }, + { + "epoch": 55.34, + "learning_rate": 2.2338383429047795e-05, + "loss": 1.9826, + "step": 19119500 + }, + { + "epoch": 55.34, + "learning_rate": 2.2337661228695814e-05, + "loss": 1.955, + "step": 19120000 + }, + { + "epoch": 55.35, + "learning_rate": 2.2336937581048536e-05, + "loss": 1.9608, + "step": 19120500 + }, + { + "epoch": 55.35, + "learning_rate": 2.233621393340126e-05, + "loss": 1.9763, + "step": 19121000 + }, + { + "epoch": 55.35, + "learning_rate": 2.2335490285753984e-05, + "loss": 1.9254, + "step": 19121500 + }, + { + "epoch": 55.35, + "learning_rate": 2.2334768085402003e-05, + "loss": 1.9796, + "step": 19122000 + }, + { + "epoch": 55.35, + "learning_rate": 2.2334044437754725e-05, + "loss": 1.999, + "step": 19122500 + }, + { + "epoch": 55.35, + "learning_rate": 2.2333320790107447e-05, + "loss": 1.9921, + "step": 19123000 + }, + { + "epoch": 55.35, + "learning_rate": 2.233259714246017e-05, + "loss": 1.9694, + "step": 19123500 + }, + { + "epoch": 55.36, + "learning_rate": 2.2331873494812895e-05, + "loss": 1.9533, + "step": 19124000 + }, + { + "epoch": 55.36, + "learning_rate": 2.2331149847165617e-05, + "loss": 1.9675, + "step": 19124500 + }, + { + "epoch": 55.36, + "learning_rate": 2.2330426199518343e-05, + "loss": 1.9748, + "step": 19125000 + }, + { + "epoch": 55.36, + "learning_rate": 2.2329702551871065e-05, + "loss": 1.9685, + "step": 19125500 + }, + { + "epoch": 55.36, + "learning_rate": 2.2328980351519084e-05, + "loss": 1.9715, + "step": 19126000 + }, + { + "epoch": 55.36, + "learning_rate": 2.2328256703871806e-05, + "loss": 1.9981, + "step": 19126500 + }, + { + "epoch": 55.36, + "learning_rate": 2.232753305622453e-05, + "loss": 1.9877, + "step": 19127000 + }, + { + "epoch": 55.37, + "learning_rate": 2.232680940857725e-05, + "loss": 1.9674, + "step": 19127500 + }, + { + "epoch": 55.37, + "learning_rate": 2.2326085760929973e-05, + "loss": 1.9577, + "step": 19128000 + }, + { + "epoch": 55.37, + "learning_rate": 2.23253621132827e-05, + "loss": 2.005, + "step": 19128500 + }, + { + "epoch": 55.37, + "learning_rate": 2.2324639912930718e-05, + "loss": 1.9778, + "step": 19129000 + }, + { + "epoch": 55.37, + "learning_rate": 2.232391626528344e-05, + "loss": 1.9715, + "step": 19129500 + }, + { + "epoch": 55.37, + "learning_rate": 2.2323192617636162e-05, + "loss": 1.9619, + "step": 19130000 + }, + { + "epoch": 55.37, + "learning_rate": 2.2322468969988884e-05, + "loss": 1.9745, + "step": 19130500 + }, + { + "epoch": 55.38, + "learning_rate": 2.232174532234161e-05, + "loss": 1.9737, + "step": 19131000 + }, + { + "epoch": 55.38, + "learning_rate": 2.2321021674694332e-05, + "loss": 1.9717, + "step": 19131500 + }, + { + "epoch": 55.38, + "learning_rate": 2.2320298027047058e-05, + "loss": 1.9764, + "step": 19132000 + }, + { + "epoch": 55.38, + "learning_rate": 2.231957437939978e-05, + "loss": 1.9552, + "step": 19132500 + }, + { + "epoch": 55.38, + "learning_rate": 2.2318850731752502e-05, + "loss": 1.9635, + "step": 19133000 + }, + { + "epoch": 55.38, + "learning_rate": 2.2318127084105225e-05, + "loss": 1.9531, + "step": 19133500 + }, + { + "epoch": 55.39, + "learning_rate": 2.2317403436457947e-05, + "loss": 1.9886, + "step": 19134000 + }, + { + "epoch": 55.39, + "learning_rate": 2.2316679788810672e-05, + "loss": 1.9496, + "step": 19134500 + }, + { + "epoch": 55.39, + "learning_rate": 2.2315957588458688e-05, + "loss": 1.9717, + "step": 19135000 + }, + { + "epoch": 55.39, + "learning_rate": 2.231523394081141e-05, + "loss": 1.9658, + "step": 19135500 + }, + { + "epoch": 55.39, + "learning_rate": 2.2314510293164136e-05, + "loss": 1.966, + "step": 19136000 + }, + { + "epoch": 55.39, + "learning_rate": 2.231378664551686e-05, + "loss": 2.011, + "step": 19136500 + }, + { + "epoch": 55.39, + "learning_rate": 2.2313062997869584e-05, + "loss": 1.9665, + "step": 19137000 + }, + { + "epoch": 55.4, + "learning_rate": 2.2312339350222306e-05, + "loss": 1.9842, + "step": 19137500 + }, + { + "epoch": 55.4, + "learning_rate": 2.2311618597165618e-05, + "loss": 1.9702, + "step": 19138000 + }, + { + "epoch": 55.4, + "learning_rate": 2.2310896396813634e-05, + "loss": 1.9519, + "step": 19138500 + }, + { + "epoch": 55.4, + "learning_rate": 2.231017274916636e-05, + "loss": 1.9801, + "step": 19139000 + }, + { + "epoch": 55.4, + "learning_rate": 2.230944910151908e-05, + "loss": 1.9716, + "step": 19139500 + }, + { + "epoch": 55.4, + "learning_rate": 2.2308725453871807e-05, + "loss": 1.9662, + "step": 19140000 + }, + { + "epoch": 55.4, + "learning_rate": 2.2308003253519823e-05, + "loss": 1.9525, + "step": 19140500 + }, + { + "epoch": 55.41, + "learning_rate": 2.230727960587255e-05, + "loss": 1.9328, + "step": 19141000 + }, + { + "epoch": 55.41, + "learning_rate": 2.230655595822527e-05, + "loss": 1.971, + "step": 19141500 + }, + { + "epoch": 55.41, + "learning_rate": 2.2305832310577993e-05, + "loss": 1.9589, + "step": 19142000 + }, + { + "epoch": 55.41, + "learning_rate": 2.2305108662930715e-05, + "loss": 1.9762, + "step": 19142500 + }, + { + "epoch": 55.41, + "learning_rate": 2.2304385015283437e-05, + "loss": 1.9798, + "step": 19143000 + }, + { + "epoch": 55.41, + "learning_rate": 2.2303662814931456e-05, + "loss": 1.9882, + "step": 19143500 + }, + { + "epoch": 55.41, + "learning_rate": 2.2302939167284182e-05, + "loss": 1.9632, + "step": 19144000 + }, + { + "epoch": 55.42, + "learning_rate": 2.2302215519636904e-05, + "loss": 1.9577, + "step": 19144500 + }, + { + "epoch": 55.42, + "learning_rate": 2.2301493319284923e-05, + "loss": 1.9831, + "step": 19145000 + }, + { + "epoch": 55.42, + "learning_rate": 2.2300769671637645e-05, + "loss": 1.9851, + "step": 19145500 + }, + { + "epoch": 55.42, + "learning_rate": 2.2300046023990368e-05, + "loss": 1.9521, + "step": 19146000 + }, + { + "epoch": 55.42, + "learning_rate": 2.229932237634309e-05, + "loss": 1.9861, + "step": 19146500 + }, + { + "epoch": 55.42, + "learning_rate": 2.2298598728695812e-05, + "loss": 1.9621, + "step": 19147000 + }, + { + "epoch": 55.42, + "learning_rate": 2.2297875081048538e-05, + "loss": 1.979, + "step": 19147500 + }, + { + "epoch": 55.43, + "learning_rate": 2.229715143340126e-05, + "loss": 1.9594, + "step": 19148000 + }, + { + "epoch": 55.43, + "learning_rate": 2.2296427785753986e-05, + "loss": 1.9741, + "step": 19148500 + }, + { + "epoch": 55.43, + "learning_rate": 2.2295704138106708e-05, + "loss": 1.9681, + "step": 19149000 + }, + { + "epoch": 55.43, + "learning_rate": 2.229498049045943e-05, + "loss": 1.9802, + "step": 19149500 + }, + { + "epoch": 55.43, + "learning_rate": 2.229425829010745e-05, + "loss": 1.9597, + "step": 19150000 + }, + { + "epoch": 55.43, + "learning_rate": 2.229353464246017e-05, + "loss": 1.9792, + "step": 19150500 + }, + { + "epoch": 55.43, + "learning_rate": 2.2292810994812897e-05, + "loss": 1.9746, + "step": 19151000 + }, + { + "epoch": 55.44, + "learning_rate": 2.229208734716562e-05, + "loss": 1.9711, + "step": 19151500 + }, + { + "epoch": 55.44, + "learning_rate": 2.229136369951834e-05, + "loss": 1.9759, + "step": 19152000 + }, + { + "epoch": 55.44, + "learning_rate": 2.229064149916636e-05, + "loss": 1.9741, + "step": 19152500 + }, + { + "epoch": 55.44, + "learning_rate": 2.2289917851519082e-05, + "loss": 1.9543, + "step": 19153000 + }, + { + "epoch": 55.44, + "learning_rate": 2.2289194203871805e-05, + "loss": 1.9704, + "step": 19153500 + }, + { + "epoch": 55.44, + "learning_rate": 2.2288470556224527e-05, + "loss": 1.9674, + "step": 19154000 + }, + { + "epoch": 55.44, + "learning_rate": 2.228774690857725e-05, + "loss": 1.9699, + "step": 19154500 + }, + { + "epoch": 55.45, + "learning_rate": 2.2287023260929975e-05, + "loss": 1.9622, + "step": 19155000 + }, + { + "epoch": 55.45, + "learning_rate": 2.22862996132827e-05, + "loss": 1.9614, + "step": 19155500 + }, + { + "epoch": 55.45, + "learning_rate": 2.2285575965635423e-05, + "loss": 1.9735, + "step": 19156000 + }, + { + "epoch": 55.45, + "learning_rate": 2.2284852317988145e-05, + "loss": 1.9586, + "step": 19156500 + }, + { + "epoch": 55.45, + "learning_rate": 2.2284128670340867e-05, + "loss": 1.9812, + "step": 19157000 + }, + { + "epoch": 55.45, + "learning_rate": 2.228340502269359e-05, + "loss": 1.9681, + "step": 19157500 + }, + { + "epoch": 55.45, + "learning_rate": 2.228268137504631e-05, + "loss": 1.9734, + "step": 19158000 + }, + { + "epoch": 55.46, + "learning_rate": 2.2281959174694334e-05, + "loss": 1.9999, + "step": 19158500 + }, + { + "epoch": 55.46, + "learning_rate": 2.2281235527047056e-05, + "loss": 1.9656, + "step": 19159000 + }, + { + "epoch": 55.46, + "learning_rate": 2.228051187939978e-05, + "loss": 1.9678, + "step": 19159500 + }, + { + "epoch": 55.46, + "learning_rate": 2.22797882317525e-05, + "loss": 1.9605, + "step": 19160000 + }, + { + "epoch": 55.46, + "learning_rate": 2.2279064584105226e-05, + "loss": 1.9665, + "step": 19160500 + }, + { + "epoch": 55.46, + "learning_rate": 2.227834093645795e-05, + "loss": 1.9725, + "step": 19161000 + }, + { + "epoch": 55.46, + "learning_rate": 2.227761728881067e-05, + "loss": 1.9716, + "step": 19161500 + }, + { + "epoch": 55.47, + "learning_rate": 2.227689508845869e-05, + "loss": 1.991, + "step": 19162000 + }, + { + "epoch": 55.47, + "learning_rate": 2.2276171440811415e-05, + "loss": 1.9843, + "step": 19162500 + }, + { + "epoch": 55.47, + "learning_rate": 2.227544924045943e-05, + "loss": 1.9714, + "step": 19163000 + }, + { + "epoch": 55.47, + "learning_rate": 2.2274725592812153e-05, + "loss": 1.9708, + "step": 19163500 + }, + { + "epoch": 55.47, + "learning_rate": 2.2274001945164875e-05, + "loss": 1.9525, + "step": 19164000 + }, + { + "epoch": 55.47, + "learning_rate": 2.22732782975176e-05, + "loss": 1.9675, + "step": 19164500 + }, + { + "epoch": 55.47, + "learning_rate": 2.2272554649870323e-05, + "loss": 1.9911, + "step": 19165000 + }, + { + "epoch": 55.48, + "learning_rate": 2.2271831002223045e-05, + "loss": 1.9776, + "step": 19165500 + }, + { + "epoch": 55.48, + "learning_rate": 2.227110735457577e-05, + "loss": 1.9788, + "step": 19166000 + }, + { + "epoch": 55.48, + "learning_rate": 2.2270383706928493e-05, + "loss": 1.9908, + "step": 19166500 + }, + { + "epoch": 55.48, + "learning_rate": 2.2269660059281215e-05, + "loss": 1.9539, + "step": 19167000 + }, + { + "epoch": 55.48, + "learning_rate": 2.226893641163394e-05, + "loss": 1.967, + "step": 19167500 + }, + { + "epoch": 55.48, + "learning_rate": 2.2268214211281957e-05, + "loss": 1.9919, + "step": 19168000 + }, + { + "epoch": 55.48, + "learning_rate": 2.2267492010929976e-05, + "loss": 1.9673, + "step": 19168500 + }, + { + "epoch": 55.49, + "learning_rate": 2.226676981057799e-05, + "loss": 1.9523, + "step": 19169000 + }, + { + "epoch": 55.49, + "learning_rate": 2.2266046162930713e-05, + "loss": 1.9853, + "step": 19169500 + }, + { + "epoch": 55.49, + "learning_rate": 2.226532251528344e-05, + "loss": 1.9496, + "step": 19170000 + }, + { + "epoch": 55.49, + "learning_rate": 2.2264600314931458e-05, + "loss": 1.9665, + "step": 19170500 + }, + { + "epoch": 55.49, + "learning_rate": 2.226387666728418e-05, + "loss": 1.9872, + "step": 19171000 + }, + { + "epoch": 55.49, + "learning_rate": 2.2263153019636902e-05, + "loss": 1.9616, + "step": 19171500 + }, + { + "epoch": 55.5, + "learning_rate": 2.2262429371989628e-05, + "loss": 1.9834, + "step": 19172000 + }, + { + "epoch": 55.5, + "learning_rate": 2.226170572434235e-05, + "loss": 1.9698, + "step": 19172500 + }, + { + "epoch": 55.5, + "learning_rate": 2.2260982076695072e-05, + "loss": 1.9507, + "step": 19173000 + }, + { + "epoch": 55.5, + "learning_rate": 2.2260258429047798e-05, + "loss": 1.9624, + "step": 19173500 + }, + { + "epoch": 55.5, + "learning_rate": 2.225953478140052e-05, + "loss": 1.9772, + "step": 19174000 + }, + { + "epoch": 55.5, + "learning_rate": 2.225881258104854e-05, + "loss": 1.9515, + "step": 19174500 + }, + { + "epoch": 55.5, + "learning_rate": 2.225808893340126e-05, + "loss": 1.9832, + "step": 19175000 + }, + { + "epoch": 55.51, + "learning_rate": 2.2257365285753984e-05, + "loss": 1.9881, + "step": 19175500 + }, + { + "epoch": 55.51, + "learning_rate": 2.2256641638106706e-05, + "loss": 1.9797, + "step": 19176000 + }, + { + "epoch": 55.51, + "learning_rate": 2.2255917990459428e-05, + "loss": 1.9531, + "step": 19176500 + }, + { + "epoch": 55.51, + "learning_rate": 2.2255194342812154e-05, + "loss": 1.9484, + "step": 19177000 + }, + { + "epoch": 55.51, + "learning_rate": 2.225447069516488e-05, + "loss": 1.9694, + "step": 19177500 + }, + { + "epoch": 55.51, + "learning_rate": 2.2253749942108192e-05, + "loss": 1.9642, + "step": 19178000 + }, + { + "epoch": 55.51, + "learning_rate": 2.2253026294460914e-05, + "loss": 1.9838, + "step": 19178500 + }, + { + "epoch": 55.52, + "learning_rate": 2.2252302646813636e-05, + "loss": 1.953, + "step": 19179000 + }, + { + "epoch": 55.52, + "learning_rate": 2.225157899916636e-05, + "loss": 1.9897, + "step": 19179500 + }, + { + "epoch": 55.52, + "learning_rate": 2.2250856798814377e-05, + "loss": 1.9698, + "step": 19180000 + }, + { + "epoch": 55.52, + "learning_rate": 2.22501331511671e-05, + "loss": 1.9551, + "step": 19180500 + }, + { + "epoch": 55.52, + "learning_rate": 2.2249409503519822e-05, + "loss": 1.9529, + "step": 19181000 + }, + { + "epoch": 55.52, + "learning_rate": 2.2248685855872547e-05, + "loss": 1.9917, + "step": 19181500 + }, + { + "epoch": 55.52, + "learning_rate": 2.224796220822527e-05, + "loss": 1.9372, + "step": 19182000 + }, + { + "epoch": 55.53, + "learning_rate": 2.2247238560577992e-05, + "loss": 1.972, + "step": 19182500 + }, + { + "epoch": 55.53, + "learning_rate": 2.2246514912930718e-05, + "loss": 1.9881, + "step": 19183000 + }, + { + "epoch": 55.53, + "learning_rate": 2.224579126528344e-05, + "loss": 1.9931, + "step": 19183500 + }, + { + "epoch": 55.53, + "learning_rate": 2.2245067617636162e-05, + "loss": 1.9481, + "step": 19184000 + }, + { + "epoch": 55.53, + "learning_rate": 2.2244343969988884e-05, + "loss": 1.9564, + "step": 19184500 + }, + { + "epoch": 55.53, + "learning_rate": 2.224362032234161e-05, + "loss": 1.9781, + "step": 19185000 + }, + { + "epoch": 55.53, + "learning_rate": 2.2242896674694332e-05, + "loss": 1.9885, + "step": 19185500 + }, + { + "epoch": 55.54, + "learning_rate": 2.2242173027047054e-05, + "loss": 1.9958, + "step": 19186000 + }, + { + "epoch": 55.54, + "learning_rate": 2.224144937939978e-05, + "loss": 1.9826, + "step": 19186500 + }, + { + "epoch": 55.54, + "learning_rate": 2.2240725731752502e-05, + "loss": 1.9864, + "step": 19187000 + }, + { + "epoch": 55.54, + "learning_rate": 2.2240002084105224e-05, + "loss": 1.9659, + "step": 19187500 + }, + { + "epoch": 55.54, + "learning_rate": 2.223927843645795e-05, + "loss": 1.9596, + "step": 19188000 + }, + { + "epoch": 55.54, + "learning_rate": 2.223855623610597e-05, + "loss": 1.9772, + "step": 19188500 + }, + { + "epoch": 55.54, + "learning_rate": 2.2237834035753985e-05, + "loss": 1.9786, + "step": 19189000 + }, + { + "epoch": 55.55, + "learning_rate": 2.2237110388106707e-05, + "loss": 2.0, + "step": 19189500 + }, + { + "epoch": 55.55, + "learning_rate": 2.223638674045943e-05, + "loss": 1.9451, + "step": 19190000 + }, + { + "epoch": 55.55, + "learning_rate": 2.2235663092812155e-05, + "loss": 1.9878, + "step": 19190500 + }, + { + "epoch": 55.55, + "learning_rate": 2.2234939445164877e-05, + "loss": 1.9642, + "step": 19191000 + }, + { + "epoch": 55.55, + "learning_rate": 2.22342157975176e-05, + "loss": 1.9613, + "step": 19191500 + }, + { + "epoch": 55.55, + "learning_rate": 2.2233492149870325e-05, + "loss": 1.9537, + "step": 19192000 + }, + { + "epoch": 55.55, + "learning_rate": 2.2232771396813637e-05, + "loss": 1.9756, + "step": 19192500 + }, + { + "epoch": 55.56, + "learning_rate": 2.223204774916636e-05, + "loss": 1.9688, + "step": 19193000 + }, + { + "epoch": 55.56, + "learning_rate": 2.223132410151908e-05, + "loss": 1.9654, + "step": 19193500 + }, + { + "epoch": 55.56, + "learning_rate": 2.2230600453871807e-05, + "loss": 1.9769, + "step": 19194000 + }, + { + "epoch": 55.56, + "learning_rate": 2.222987680622453e-05, + "loss": 1.9699, + "step": 19194500 + }, + { + "epoch": 55.56, + "learning_rate": 2.222915315857725e-05, + "loss": 1.9761, + "step": 19195000 + }, + { + "epoch": 55.56, + "learning_rate": 2.2228429510929974e-05, + "loss": 1.9728, + "step": 19195500 + }, + { + "epoch": 55.56, + "learning_rate": 2.22277058632827e-05, + "loss": 1.9707, + "step": 19196000 + }, + { + "epoch": 55.57, + "learning_rate": 2.222698221563542e-05, + "loss": 1.964, + "step": 19196500 + }, + { + "epoch": 55.57, + "learning_rate": 2.2226261462578734e-05, + "loss": 1.9599, + "step": 19197000 + }, + { + "epoch": 55.57, + "learning_rate": 2.2225537814931456e-05, + "loss": 1.9505, + "step": 19197500 + }, + { + "epoch": 55.57, + "learning_rate": 2.2224814167284182e-05, + "loss": 1.9927, + "step": 19198000 + }, + { + "epoch": 55.57, + "learning_rate": 2.2224090519636904e-05, + "loss": 1.9644, + "step": 19198500 + }, + { + "epoch": 55.57, + "learning_rate": 2.2223366871989626e-05, + "loss": 1.9925, + "step": 19199000 + }, + { + "epoch": 55.57, + "learning_rate": 2.222264322434235e-05, + "loss": 1.9408, + "step": 19199500 + }, + { + "epoch": 55.58, + "learning_rate": 2.2221919576695074e-05, + "loss": 1.981, + "step": 19200000 + }, + { + "epoch": 55.58, + "learning_rate": 2.2221195929047796e-05, + "loss": 1.9765, + "step": 19200500 + }, + { + "epoch": 55.58, + "learning_rate": 2.2220473728695815e-05, + "loss": 1.9599, + "step": 19201000 + }, + { + "epoch": 55.58, + "learning_rate": 2.2219750081048537e-05, + "loss": 1.944, + "step": 19201500 + }, + { + "epoch": 55.58, + "learning_rate": 2.221902643340126e-05, + "loss": 1.981, + "step": 19202000 + }, + { + "epoch": 55.58, + "learning_rate": 2.2218302785753982e-05, + "loss": 1.9578, + "step": 19202500 + }, + { + "epoch": 55.58, + "learning_rate": 2.2217579138106708e-05, + "loss": 1.9875, + "step": 19203000 + }, + { + "epoch": 55.59, + "learning_rate": 2.2216855490459433e-05, + "loss": 2.0035, + "step": 19203500 + }, + { + "epoch": 55.59, + "learning_rate": 2.2216131842812155e-05, + "loss": 1.9774, + "step": 19204000 + }, + { + "epoch": 55.59, + "learning_rate": 2.2215408195164878e-05, + "loss": 1.9562, + "step": 19204500 + }, + { + "epoch": 55.59, + "learning_rate": 2.22146845475176e-05, + "loss": 1.976, + "step": 19205000 + }, + { + "epoch": 55.59, + "learning_rate": 2.2213960899870322e-05, + "loss": 1.9784, + "step": 19205500 + }, + { + "epoch": 55.59, + "learning_rate": 2.221323869951834e-05, + "loss": 1.9634, + "step": 19206000 + }, + { + "epoch": 55.59, + "learning_rate": 2.2212516499166357e-05, + "loss": 1.9742, + "step": 19206500 + }, + { + "epoch": 55.6, + "learning_rate": 2.2211792851519082e-05, + "loss": 1.9518, + "step": 19207000 + }, + { + "epoch": 55.6, + "learning_rate": 2.2211069203871808e-05, + "loss": 1.9576, + "step": 19207500 + }, + { + "epoch": 55.6, + "learning_rate": 2.221034555622453e-05, + "loss": 1.931, + "step": 19208000 + }, + { + "epoch": 55.6, + "learning_rate": 2.2209621908577252e-05, + "loss": 1.9501, + "step": 19208500 + }, + { + "epoch": 55.6, + "learning_rate": 2.220889970822527e-05, + "loss": 1.9702, + "step": 19209000 + }, + { + "epoch": 55.6, + "learning_rate": 2.2208176060577993e-05, + "loss": 1.9813, + "step": 19209500 + }, + { + "epoch": 55.61, + "learning_rate": 2.2207452412930716e-05, + "loss": 1.9579, + "step": 19210000 + }, + { + "epoch": 55.61, + "learning_rate": 2.2206728765283438e-05, + "loss": 1.9628, + "step": 19210500 + }, + { + "epoch": 55.61, + "learning_rate": 2.2206005117636164e-05, + "loss": 1.9694, + "step": 19211000 + }, + { + "epoch": 55.61, + "learning_rate": 2.2205281469988886e-05, + "loss": 1.9811, + "step": 19211500 + }, + { + "epoch": 55.61, + "learning_rate": 2.2204557822341608e-05, + "loss": 1.9544, + "step": 19212000 + }, + { + "epoch": 55.61, + "learning_rate": 2.2203834174694334e-05, + "loss": 1.9718, + "step": 19212500 + }, + { + "epoch": 55.61, + "learning_rate": 2.2203110527047056e-05, + "loss": 1.9901, + "step": 19213000 + }, + { + "epoch": 55.62, + "learning_rate": 2.2202386879399778e-05, + "loss": 1.9665, + "step": 19213500 + }, + { + "epoch": 55.62, + "learning_rate": 2.22016632317525e-05, + "loss": 1.9769, + "step": 19214000 + }, + { + "epoch": 55.62, + "learning_rate": 2.2200939584105226e-05, + "loss": 1.9847, + "step": 19214500 + }, + { + "epoch": 55.62, + "learning_rate": 2.2200215936457948e-05, + "loss": 1.9539, + "step": 19215000 + }, + { + "epoch": 55.62, + "learning_rate": 2.219949228881067e-05, + "loss": 1.9736, + "step": 19215500 + }, + { + "epoch": 55.62, + "learning_rate": 2.2198768641163396e-05, + "loss": 1.9697, + "step": 19216000 + }, + { + "epoch": 55.62, + "learning_rate": 2.219804644081141e-05, + "loss": 1.9696, + "step": 19216500 + }, + { + "epoch": 55.63, + "learning_rate": 2.2197322793164134e-05, + "loss": 1.9756, + "step": 19217000 + }, + { + "epoch": 55.63, + "learning_rate": 2.2196600592812153e-05, + "loss": 1.9698, + "step": 19217500 + }, + { + "epoch": 55.63, + "learning_rate": 2.2195876945164875e-05, + "loss": 1.9561, + "step": 19218000 + }, + { + "epoch": 55.63, + "learning_rate": 2.21951532975176e-05, + "loss": 1.9648, + "step": 19218500 + }, + { + "epoch": 55.63, + "learning_rate": 2.2194429649870323e-05, + "loss": 1.9708, + "step": 19219000 + }, + { + "epoch": 55.63, + "learning_rate": 2.219370600222305e-05, + "loss": 1.9712, + "step": 19219500 + }, + { + "epoch": 55.63, + "learning_rate": 2.219298235457577e-05, + "loss": 1.974, + "step": 19220000 + }, + { + "epoch": 55.64, + "learning_rate": 2.2192260154223786e-05, + "loss": 1.9675, + "step": 19220500 + }, + { + "epoch": 55.64, + "learning_rate": 2.219153650657651e-05, + "loss": 1.9709, + "step": 19221000 + }, + { + "epoch": 55.64, + "learning_rate": 2.2190812858929234e-05, + "loss": 1.9767, + "step": 19221500 + }, + { + "epoch": 55.64, + "learning_rate": 2.219008921128196e-05, + "loss": 1.9769, + "step": 19222000 + }, + { + "epoch": 55.64, + "learning_rate": 2.2189365563634682e-05, + "loss": 1.968, + "step": 19222500 + }, + { + "epoch": 55.64, + "learning_rate": 2.2188641915987404e-05, + "loss": 1.9737, + "step": 19223000 + }, + { + "epoch": 55.64, + "learning_rate": 2.2187918268340126e-05, + "loss": 1.9645, + "step": 19223500 + }, + { + "epoch": 55.65, + "learning_rate": 2.218719462069285e-05, + "loss": 1.9678, + "step": 19224000 + }, + { + "epoch": 55.65, + "learning_rate": 2.2186472420340868e-05, + "loss": 2.0016, + "step": 19224500 + }, + { + "epoch": 55.65, + "learning_rate": 2.218574877269359e-05, + "loss": 1.9635, + "step": 19225000 + }, + { + "epoch": 55.65, + "learning_rate": 2.2185025125046312e-05, + "loss": 1.9921, + "step": 19225500 + }, + { + "epoch": 55.65, + "learning_rate": 2.2184301477399038e-05, + "loss": 1.9699, + "step": 19226000 + }, + { + "epoch": 55.65, + "learning_rate": 2.218357782975176e-05, + "loss": 1.9933, + "step": 19226500 + }, + { + "epoch": 55.65, + "learning_rate": 2.2182854182104486e-05, + "loss": 1.9863, + "step": 19227000 + }, + { + "epoch": 55.66, + "learning_rate": 2.2182130534457208e-05, + "loss": 1.9668, + "step": 19227500 + }, + { + "epoch": 55.66, + "learning_rate": 2.218140688680993e-05, + "loss": 2.0179, + "step": 19228000 + }, + { + "epoch": 55.66, + "learning_rate": 2.2180683239162652e-05, + "loss": 1.9674, + "step": 19228500 + }, + { + "epoch": 55.66, + "learning_rate": 2.217996103881067e-05, + "loss": 1.9657, + "step": 19229000 + }, + { + "epoch": 55.66, + "learning_rate": 2.2179237391163397e-05, + "loss": 2.0042, + "step": 19229500 + }, + { + "epoch": 55.66, + "learning_rate": 2.217851374351612e-05, + "loss": 1.9898, + "step": 19230000 + }, + { + "epoch": 55.66, + "learning_rate": 2.217779009586884e-05, + "loss": 1.9862, + "step": 19230500 + }, + { + "epoch": 55.67, + "learning_rate": 2.2177066448221564e-05, + "loss": 1.9672, + "step": 19231000 + }, + { + "epoch": 55.67, + "learning_rate": 2.2176344247869583e-05, + "loss": 1.9539, + "step": 19231500 + }, + { + "epoch": 55.67, + "learning_rate": 2.2175620600222305e-05, + "loss": 1.9895, + "step": 19232000 + }, + { + "epoch": 55.67, + "learning_rate": 2.2174896952575027e-05, + "loss": 1.9724, + "step": 19232500 + }, + { + "epoch": 55.67, + "learning_rate": 2.2174173304927753e-05, + "loss": 1.9628, + "step": 19233000 + }, + { + "epoch": 55.67, + "learning_rate": 2.217345110457577e-05, + "loss": 1.9613, + "step": 19233500 + }, + { + "epoch": 55.67, + "learning_rate": 2.2172727456928494e-05, + "loss": 1.9611, + "step": 19234000 + }, + { + "epoch": 55.68, + "learning_rate": 2.2172003809281216e-05, + "loss": 1.9654, + "step": 19234500 + }, + { + "epoch": 55.68, + "learning_rate": 2.2171280161633938e-05, + "loss": 1.9459, + "step": 19235000 + }, + { + "epoch": 55.68, + "learning_rate": 2.2170557961281957e-05, + "loss": 1.9587, + "step": 19235500 + }, + { + "epoch": 55.68, + "learning_rate": 2.216983431363468e-05, + "loss": 1.9408, + "step": 19236000 + }, + { + "epoch": 55.68, + "learning_rate": 2.21691106659874e-05, + "loss": 1.9651, + "step": 19236500 + }, + { + "epoch": 55.68, + "learning_rate": 2.2168387018340127e-05, + "loss": 1.9767, + "step": 19237000 + }, + { + "epoch": 55.68, + "learning_rate": 2.216766337069285e-05, + "loss": 1.962, + "step": 19237500 + }, + { + "epoch": 55.69, + "learning_rate": 2.216694117034087e-05, + "loss": 1.9853, + "step": 19238000 + }, + { + "epoch": 55.69, + "learning_rate": 2.216621752269359e-05, + "loss": 1.9753, + "step": 19238500 + }, + { + "epoch": 55.69, + "learning_rate": 2.216549532234161e-05, + "loss": 1.9981, + "step": 19239000 + }, + { + "epoch": 55.69, + "learning_rate": 2.2164771674694332e-05, + "loss": 1.9844, + "step": 19239500 + }, + { + "epoch": 55.69, + "learning_rate": 2.2164048027047054e-05, + "loss": 1.9816, + "step": 19240000 + }, + { + "epoch": 55.69, + "learning_rate": 2.2163325826695073e-05, + "loss": 1.9725, + "step": 19240500 + }, + { + "epoch": 55.69, + "learning_rate": 2.21626021790478e-05, + "loss": 1.9583, + "step": 19241000 + }, + { + "epoch": 55.7, + "learning_rate": 2.216187853140052e-05, + "loss": 2.0042, + "step": 19241500 + }, + { + "epoch": 55.7, + "learning_rate": 2.2161154883753243e-05, + "loss": 1.9533, + "step": 19242000 + }, + { + "epoch": 55.7, + "learning_rate": 2.2160431236105965e-05, + "loss": 1.9512, + "step": 19242500 + }, + { + "epoch": 55.7, + "learning_rate": 2.2159707588458688e-05, + "loss": 1.9887, + "step": 19243000 + }, + { + "epoch": 55.7, + "learning_rate": 2.2158983940811413e-05, + "loss": 1.962, + "step": 19243500 + }, + { + "epoch": 55.7, + "learning_rate": 2.2158260293164135e-05, + "loss": 1.9693, + "step": 19244000 + }, + { + "epoch": 55.7, + "learning_rate": 2.2157538092812154e-05, + "loss": 1.9691, + "step": 19244500 + }, + { + "epoch": 55.71, + "learning_rate": 2.2156814445164877e-05, + "loss": 1.9954, + "step": 19245000 + }, + { + "epoch": 55.71, + "learning_rate": 2.2156092244812896e-05, + "loss": 1.9551, + "step": 19245500 + }, + { + "epoch": 55.71, + "learning_rate": 2.2155368597165618e-05, + "loss": 1.9794, + "step": 19246000 + }, + { + "epoch": 55.71, + "learning_rate": 2.215464494951834e-05, + "loss": 1.9833, + "step": 19246500 + }, + { + "epoch": 55.71, + "learning_rate": 2.2153921301871062e-05, + "loss": 1.9619, + "step": 19247000 + }, + { + "epoch": 55.71, + "learning_rate": 2.2153197654223788e-05, + "loss": 1.9624, + "step": 19247500 + }, + { + "epoch": 55.72, + "learning_rate": 2.215247400657651e-05, + "loss": 1.978, + "step": 19248000 + }, + { + "epoch": 55.72, + "learning_rate": 2.2151750358929236e-05, + "loss": 1.9643, + "step": 19248500 + }, + { + "epoch": 55.72, + "learning_rate": 2.2151026711281958e-05, + "loss": 1.9695, + "step": 19249000 + }, + { + "epoch": 55.72, + "learning_rate": 2.215030306363468e-05, + "loss": 1.9683, + "step": 19249500 + }, + { + "epoch": 55.72, + "learning_rate": 2.2149579415987402e-05, + "loss": 1.9747, + "step": 19250000 + }, + { + "epoch": 55.72, + "learning_rate": 2.214885721563542e-05, + "loss": 1.9733, + "step": 19250500 + }, + { + "epoch": 55.72, + "learning_rate": 2.2148133567988144e-05, + "loss": 2.0025, + "step": 19251000 + }, + { + "epoch": 55.73, + "learning_rate": 2.2147409920340866e-05, + "loss": 1.9558, + "step": 19251500 + }, + { + "epoch": 55.73, + "learning_rate": 2.214668627269359e-05, + "loss": 1.986, + "step": 19252000 + }, + { + "epoch": 55.73, + "learning_rate": 2.2145962625046314e-05, + "loss": 1.9705, + "step": 19252500 + }, + { + "epoch": 55.73, + "learning_rate": 2.214523897739904e-05, + "loss": 1.9785, + "step": 19253000 + }, + { + "epoch": 55.73, + "learning_rate": 2.214451822434235e-05, + "loss": 1.9751, + "step": 19253500 + }, + { + "epoch": 55.73, + "learning_rate": 2.2143794576695074e-05, + "loss": 1.971, + "step": 19254000 + }, + { + "epoch": 55.73, + "learning_rate": 2.2143070929047796e-05, + "loss": 2.01, + "step": 19254500 + }, + { + "epoch": 55.74, + "learning_rate": 2.214234728140052e-05, + "loss": 1.9996, + "step": 19255000 + }, + { + "epoch": 55.74, + "learning_rate": 2.214162363375324e-05, + "loss": 1.9545, + "step": 19255500 + }, + { + "epoch": 55.74, + "learning_rate": 2.2140899986105966e-05, + "loss": 1.9642, + "step": 19256000 + }, + { + "epoch": 55.74, + "learning_rate": 2.2140176338458692e-05, + "loss": 1.9928, + "step": 19256500 + }, + { + "epoch": 55.74, + "learning_rate": 2.2139452690811414e-05, + "loss": 1.9499, + "step": 19257000 + }, + { + "epoch": 55.74, + "learning_rate": 2.2138729043164136e-05, + "loss": 1.9655, + "step": 19257500 + }, + { + "epoch": 55.74, + "learning_rate": 2.213800539551686e-05, + "loss": 1.9843, + "step": 19258000 + }, + { + "epoch": 55.75, + "learning_rate": 2.213728174786958e-05, + "loss": 1.9739, + "step": 19258500 + }, + { + "epoch": 55.75, + "learning_rate": 2.21365595475176e-05, + "loss": 1.9626, + "step": 19259000 + }, + { + "epoch": 55.75, + "learning_rate": 2.2135835899870325e-05, + "loss": 1.9899, + "step": 19259500 + }, + { + "epoch": 55.75, + "learning_rate": 2.2135112252223048e-05, + "loss": 1.9683, + "step": 19260000 + }, + { + "epoch": 55.75, + "learning_rate": 2.213438860457577e-05, + "loss": 1.9399, + "step": 19260500 + }, + { + "epoch": 55.75, + "learning_rate": 2.213366640422379e-05, + "loss": 1.9627, + "step": 19261000 + }, + { + "epoch": 55.75, + "learning_rate": 2.213294275657651e-05, + "loss": 1.9692, + "step": 19261500 + }, + { + "epoch": 55.76, + "learning_rate": 2.2132219108929233e-05, + "loss": 1.9768, + "step": 19262000 + }, + { + "epoch": 55.76, + "learning_rate": 2.2131496908577252e-05, + "loss": 1.9876, + "step": 19262500 + }, + { + "epoch": 55.76, + "learning_rate": 2.2130773260929974e-05, + "loss": 1.971, + "step": 19263000 + }, + { + "epoch": 55.76, + "learning_rate": 2.21300496132827e-05, + "loss": 2.0019, + "step": 19263500 + }, + { + "epoch": 55.76, + "learning_rate": 2.2129325965635422e-05, + "loss": 1.9609, + "step": 19264000 + }, + { + "epoch": 55.76, + "learning_rate": 2.2128602317988144e-05, + "loss": 1.9617, + "step": 19264500 + }, + { + "epoch": 55.76, + "learning_rate": 2.2127878670340867e-05, + "loss": 1.9924, + "step": 19265000 + }, + { + "epoch": 55.77, + "learning_rate": 2.2127155022693592e-05, + "loss": 1.9556, + "step": 19265500 + }, + { + "epoch": 55.77, + "learning_rate": 2.2126431375046315e-05, + "loss": 1.9518, + "step": 19266000 + }, + { + "epoch": 55.77, + "learning_rate": 2.212570917469433e-05, + "loss": 1.952, + "step": 19266500 + }, + { + "epoch": 55.77, + "learning_rate": 2.2124985527047056e-05, + "loss": 1.9779, + "step": 19267000 + }, + { + "epoch": 55.77, + "learning_rate": 2.212426187939978e-05, + "loss": 1.945, + "step": 19267500 + }, + { + "epoch": 55.77, + "learning_rate": 2.2123538231752504e-05, + "loss": 1.9528, + "step": 19268000 + }, + { + "epoch": 55.77, + "learning_rate": 2.2122814584105226e-05, + "loss": 1.9722, + "step": 19268500 + }, + { + "epoch": 55.78, + "learning_rate": 2.212209238375324e-05, + "loss": 1.984, + "step": 19269000 + }, + { + "epoch": 55.78, + "learning_rate": 2.2121368736105967e-05, + "loss": 1.9632, + "step": 19269500 + }, + { + "epoch": 55.78, + "learning_rate": 2.212064508845869e-05, + "loss": 1.9717, + "step": 19270000 + }, + { + "epoch": 55.78, + "learning_rate": 2.211992144081141e-05, + "loss": 1.9749, + "step": 19270500 + }, + { + "epoch": 55.78, + "learning_rate": 2.2119197793164137e-05, + "loss": 1.9556, + "step": 19271000 + }, + { + "epoch": 55.78, + "learning_rate": 2.211847414551686e-05, + "loss": 1.978, + "step": 19271500 + }, + { + "epoch": 55.78, + "learning_rate": 2.211775049786958e-05, + "loss": 1.9489, + "step": 19272000 + }, + { + "epoch": 55.79, + "learning_rate": 2.2117026850222307e-05, + "loss": 1.9922, + "step": 19272500 + }, + { + "epoch": 55.79, + "learning_rate": 2.211630320257503e-05, + "loss": 1.9945, + "step": 19273000 + }, + { + "epoch": 55.79, + "learning_rate": 2.211557955492775e-05, + "loss": 1.9898, + "step": 19273500 + }, + { + "epoch": 55.79, + "learning_rate": 2.2114857354575767e-05, + "loss": 1.9661, + "step": 19274000 + }, + { + "epoch": 55.79, + "learning_rate": 2.2114133706928493e-05, + "loss": 1.9753, + "step": 19274500 + }, + { + "epoch": 55.79, + "learning_rate": 2.211341005928122e-05, + "loss": 1.9609, + "step": 19275000 + }, + { + "epoch": 55.79, + "learning_rate": 2.211268641163394e-05, + "loss": 1.9783, + "step": 19275500 + }, + { + "epoch": 55.8, + "learning_rate": 2.2111964211281956e-05, + "loss": 1.9642, + "step": 19276000 + }, + { + "epoch": 55.8, + "learning_rate": 2.2111240563634682e-05, + "loss": 1.9697, + "step": 19276500 + }, + { + "epoch": 55.8, + "learning_rate": 2.2110516915987404e-05, + "loss": 1.9621, + "step": 19277000 + }, + { + "epoch": 55.8, + "learning_rate": 2.210979471563542e-05, + "loss": 1.9908, + "step": 19277500 + }, + { + "epoch": 55.8, + "learning_rate": 2.2109071067988145e-05, + "loss": 1.9974, + "step": 19278000 + }, + { + "epoch": 55.8, + "learning_rate": 2.2108347420340867e-05, + "loss": 1.9722, + "step": 19278500 + }, + { + "epoch": 55.8, + "learning_rate": 2.2107623772693593e-05, + "loss": 1.9666, + "step": 19279000 + }, + { + "epoch": 55.81, + "learning_rate": 2.2106900125046315e-05, + "loss": 1.9808, + "step": 19279500 + }, + { + "epoch": 55.81, + "learning_rate": 2.210617792469433e-05, + "loss": 1.9946, + "step": 19280000 + }, + { + "epoch": 55.81, + "learning_rate": 2.2105454277047056e-05, + "loss": 1.988, + "step": 19280500 + }, + { + "epoch": 55.81, + "learning_rate": 2.210473062939978e-05, + "loss": 1.9514, + "step": 19281000 + }, + { + "epoch": 55.81, + "learning_rate": 2.21040069817525e-05, + "loss": 1.9654, + "step": 19281500 + }, + { + "epoch": 55.81, + "learning_rate": 2.2103283334105227e-05, + "loss": 1.97, + "step": 19282000 + }, + { + "epoch": 55.81, + "learning_rate": 2.2102561133753246e-05, + "loss": 1.9823, + "step": 19282500 + }, + { + "epoch": 55.82, + "learning_rate": 2.210183893340126e-05, + "loss": 1.9317, + "step": 19283000 + }, + { + "epoch": 55.82, + "learning_rate": 2.2101115285753983e-05, + "loss": 1.9624, + "step": 19283500 + }, + { + "epoch": 55.82, + "learning_rate": 2.2100391638106706e-05, + "loss": 1.9901, + "step": 19284000 + }, + { + "epoch": 55.82, + "learning_rate": 2.209966799045943e-05, + "loss": 1.9803, + "step": 19284500 + }, + { + "epoch": 55.82, + "learning_rate": 2.2098944342812153e-05, + "loss": 1.9722, + "step": 19285000 + }, + { + "epoch": 55.82, + "learning_rate": 2.2098220695164876e-05, + "loss": 1.9793, + "step": 19285500 + }, + { + "epoch": 55.83, + "learning_rate": 2.20974970475176e-05, + "loss": 1.9595, + "step": 19286000 + }, + { + "epoch": 55.83, + "learning_rate": 2.209677484716562e-05, + "loss": 1.9758, + "step": 19286500 + }, + { + "epoch": 55.83, + "learning_rate": 2.2096051199518342e-05, + "loss": 1.9591, + "step": 19287000 + }, + { + "epoch": 55.83, + "learning_rate": 2.2095327551871065e-05, + "loss": 1.9679, + "step": 19287500 + }, + { + "epoch": 55.83, + "learning_rate": 2.2094603904223787e-05, + "loss": 1.9953, + "step": 19288000 + }, + { + "epoch": 55.83, + "learning_rate": 2.2093881703871806e-05, + "loss": 1.9685, + "step": 19288500 + }, + { + "epoch": 55.83, + "learning_rate": 2.2093158056224528e-05, + "loss": 1.97, + "step": 19289000 + }, + { + "epoch": 55.84, + "learning_rate": 2.209243440857725e-05, + "loss": 1.9684, + "step": 19289500 + }, + { + "epoch": 55.84, + "learning_rate": 2.2091710760929976e-05, + "loss": 1.9762, + "step": 19290000 + }, + { + "epoch": 55.84, + "learning_rate": 2.2090987113282698e-05, + "loss": 1.9743, + "step": 19290500 + }, + { + "epoch": 55.84, + "learning_rate": 2.209026346563542e-05, + "loss": 2.0042, + "step": 19291000 + }, + { + "epoch": 55.84, + "learning_rate": 2.2089539817988146e-05, + "loss": 1.9617, + "step": 19291500 + }, + { + "epoch": 55.84, + "learning_rate": 2.2088816170340868e-05, + "loss": 1.9708, + "step": 19292000 + }, + { + "epoch": 55.84, + "learning_rate": 2.208809252269359e-05, + "loss": 1.98, + "step": 19292500 + }, + { + "epoch": 55.85, + "learning_rate": 2.2087368875046313e-05, + "loss": 1.9524, + "step": 19293000 + }, + { + "epoch": 55.85, + "learning_rate": 2.208664522739904e-05, + "loss": 1.9921, + "step": 19293500 + }, + { + "epoch": 55.85, + "learning_rate": 2.208592157975176e-05, + "loss": 1.9902, + "step": 19294000 + }, + { + "epoch": 55.85, + "learning_rate": 2.208519937939978e-05, + "loss": 1.9496, + "step": 19294500 + }, + { + "epoch": 55.85, + "learning_rate": 2.2084475731752502e-05, + "loss": 1.9512, + "step": 19295000 + }, + { + "epoch": 55.85, + "learning_rate": 2.2083752084105224e-05, + "loss": 1.9751, + "step": 19295500 + }, + { + "epoch": 55.85, + "learning_rate": 2.2083028436457946e-05, + "loss": 1.99, + "step": 19296000 + }, + { + "epoch": 55.86, + "learning_rate": 2.2082304788810672e-05, + "loss": 1.9771, + "step": 19296500 + }, + { + "epoch": 55.86, + "learning_rate": 2.2081581141163397e-05, + "loss": 1.9683, + "step": 19297000 + }, + { + "epoch": 55.86, + "learning_rate": 2.208085749351612e-05, + "loss": 1.9831, + "step": 19297500 + }, + { + "epoch": 55.86, + "learning_rate": 2.2080133845868842e-05, + "loss": 1.9806, + "step": 19298000 + }, + { + "epoch": 55.86, + "learning_rate": 2.2079410198221564e-05, + "loss": 1.9811, + "step": 19298500 + }, + { + "epoch": 55.86, + "learning_rate": 2.2078686550574286e-05, + "loss": 1.9603, + "step": 19299000 + }, + { + "epoch": 55.86, + "learning_rate": 2.207796290292701e-05, + "loss": 1.9821, + "step": 19299500 + }, + { + "epoch": 55.87, + "learning_rate": 2.2077240702575028e-05, + "loss": 2.0051, + "step": 19300000 + }, + { + "epoch": 55.87, + "learning_rate": 2.2076517054927753e-05, + "loss": 1.962, + "step": 19300500 + }, + { + "epoch": 55.87, + "learning_rate": 2.2075794854575772e-05, + "loss": 1.9943, + "step": 19301000 + }, + { + "epoch": 55.87, + "learning_rate": 2.2075071206928494e-05, + "loss": 1.9714, + "step": 19301500 + }, + { + "epoch": 55.87, + "learning_rate": 2.2074347559281217e-05, + "loss": 1.9989, + "step": 19302000 + }, + { + "epoch": 55.87, + "learning_rate": 2.207362391163394e-05, + "loss": 1.9922, + "step": 19302500 + }, + { + "epoch": 55.87, + "learning_rate": 2.2072901711281958e-05, + "loss": 1.9631, + "step": 19303000 + }, + { + "epoch": 55.88, + "learning_rate": 2.207217806363468e-05, + "loss": 1.9736, + "step": 19303500 + }, + { + "epoch": 55.88, + "learning_rate": 2.2071454415987402e-05, + "loss": 1.9685, + "step": 19304000 + }, + { + "epoch": 55.88, + "learning_rate": 2.2070730768340128e-05, + "loss": 1.9666, + "step": 19304500 + }, + { + "epoch": 55.88, + "learning_rate": 2.2070008567988147e-05, + "loss": 1.9677, + "step": 19305000 + }, + { + "epoch": 55.88, + "learning_rate": 2.206928492034087e-05, + "loss": 1.9771, + "step": 19305500 + }, + { + "epoch": 55.88, + "learning_rate": 2.206856127269359e-05, + "loss": 1.9355, + "step": 19306000 + }, + { + "epoch": 55.88, + "learning_rate": 2.2067837625046314e-05, + "loss": 1.9794, + "step": 19306500 + }, + { + "epoch": 55.89, + "learning_rate": 2.2067113977399036e-05, + "loss": 1.9751, + "step": 19307000 + }, + { + "epoch": 55.89, + "learning_rate": 2.206639032975176e-05, + "loss": 1.9352, + "step": 19307500 + }, + { + "epoch": 55.89, + "learning_rate": 2.2065666682104487e-05, + "loss": 1.9953, + "step": 19308000 + }, + { + "epoch": 55.89, + "learning_rate": 2.206494303445721e-05, + "loss": 1.9688, + "step": 19308500 + }, + { + "epoch": 55.89, + "learning_rate": 2.2064220834105225e-05, + "loss": 1.9594, + "step": 19309000 + }, + { + "epoch": 55.89, + "learning_rate": 2.206349718645795e-05, + "loss": 1.9612, + "step": 19309500 + }, + { + "epoch": 55.89, + "learning_rate": 2.2062773538810673e-05, + "loss": 1.9503, + "step": 19310000 + }, + { + "epoch": 55.9, + "learning_rate": 2.2062049891163395e-05, + "loss": 1.9692, + "step": 19310500 + }, + { + "epoch": 55.9, + "learning_rate": 2.2061326243516117e-05, + "loss": 1.966, + "step": 19311000 + }, + { + "epoch": 55.9, + "learning_rate": 2.206060259586884e-05, + "loss": 1.9761, + "step": 19311500 + }, + { + "epoch": 55.9, + "learning_rate": 2.2059878948221565e-05, + "loss": 1.9775, + "step": 19312000 + }, + { + "epoch": 55.9, + "learning_rate": 2.2059155300574287e-05, + "loss": 1.9615, + "step": 19312500 + }, + { + "epoch": 55.9, + "learning_rate": 2.2058431652927013e-05, + "loss": 1.9589, + "step": 19313000 + }, + { + "epoch": 55.9, + "learning_rate": 2.2057708005279735e-05, + "loss": 1.9781, + "step": 19313500 + }, + { + "epoch": 55.91, + "learning_rate": 2.2056984357632457e-05, + "loss": 1.9627, + "step": 19314000 + }, + { + "epoch": 55.91, + "learning_rate": 2.2056262157280473e-05, + "loss": 1.9945, + "step": 19314500 + }, + { + "epoch": 55.91, + "learning_rate": 2.2055539956928492e-05, + "loss": 1.9583, + "step": 19315000 + }, + { + "epoch": 55.91, + "learning_rate": 2.2054816309281217e-05, + "loss": 1.97, + "step": 19315500 + }, + { + "epoch": 55.91, + "learning_rate": 2.205409266163394e-05, + "loss": 1.9735, + "step": 19316000 + }, + { + "epoch": 55.91, + "learning_rate": 2.2053369013986662e-05, + "loss": 1.9548, + "step": 19316500 + }, + { + "epoch": 55.91, + "learning_rate": 2.2052645366339387e-05, + "loss": 1.9682, + "step": 19317000 + }, + { + "epoch": 55.92, + "learning_rate": 2.205192171869211e-05, + "loss": 1.9606, + "step": 19317500 + }, + { + "epoch": 55.92, + "learning_rate": 2.2051198071044832e-05, + "loss": 1.9875, + "step": 19318000 + }, + { + "epoch": 55.92, + "learning_rate": 2.2050474423397554e-05, + "loss": 1.9607, + "step": 19318500 + }, + { + "epoch": 55.92, + "learning_rate": 2.204975077575028e-05, + "loss": 1.9703, + "step": 19319000 + }, + { + "epoch": 55.92, + "learning_rate": 2.2049027128103002e-05, + "loss": 1.9728, + "step": 19319500 + }, + { + "epoch": 55.92, + "learning_rate": 2.2048303480455724e-05, + "loss": 2.0002, + "step": 19320000 + }, + { + "epoch": 55.92, + "learning_rate": 2.204757983280845e-05, + "loss": 1.9444, + "step": 19320500 + }, + { + "epoch": 55.93, + "learning_rate": 2.2046856185161172e-05, + "loss": 1.9407, + "step": 19321000 + }, + { + "epoch": 55.93, + "learning_rate": 2.2046133984809188e-05, + "loss": 1.9571, + "step": 19321500 + }, + { + "epoch": 55.93, + "learning_rate": 2.2045410337161913e-05, + "loss": 1.9901, + "step": 19322000 + }, + { + "epoch": 55.93, + "learning_rate": 2.2044686689514636e-05, + "loss": 1.9768, + "step": 19322500 + }, + { + "epoch": 55.93, + "learning_rate": 2.204396304186736e-05, + "loss": 1.9879, + "step": 19323000 + }, + { + "epoch": 55.93, + "learning_rate": 2.2043240841515377e-05, + "loss": 1.9906, + "step": 19323500 + }, + { + "epoch": 55.94, + "learning_rate": 2.2042517193868102e-05, + "loss": 1.9731, + "step": 19324000 + }, + { + "epoch": 55.94, + "learning_rate": 2.2041793546220825e-05, + "loss": 2.0012, + "step": 19324500 + }, + { + "epoch": 55.94, + "learning_rate": 2.2041069898573547e-05, + "loss": 1.9789, + "step": 19325000 + }, + { + "epoch": 55.94, + "learning_rate": 2.204034625092627e-05, + "loss": 1.9804, + "step": 19325500 + }, + { + "epoch": 55.94, + "learning_rate": 2.203962260327899e-05, + "loss": 1.9711, + "step": 19326000 + }, + { + "epoch": 55.94, + "learning_rate": 2.2038898955631717e-05, + "loss": 1.949, + "step": 19326500 + }, + { + "epoch": 55.94, + "learning_rate": 2.203817530798444e-05, + "loss": 1.9857, + "step": 19327000 + }, + { + "epoch": 55.95, + "learning_rate": 2.2037453107632458e-05, + "loss": 1.974, + "step": 19327500 + }, + { + "epoch": 55.95, + "learning_rate": 2.203672945998518e-05, + "loss": 1.9673, + "step": 19328000 + }, + { + "epoch": 55.95, + "learning_rate": 2.2036005812337903e-05, + "loss": 1.9629, + "step": 19328500 + }, + { + "epoch": 55.95, + "learning_rate": 2.2035282164690628e-05, + "loss": 1.985, + "step": 19329000 + }, + { + "epoch": 55.95, + "learning_rate": 2.203455851704335e-05, + "loss": 1.9895, + "step": 19329500 + }, + { + "epoch": 55.95, + "learning_rate": 2.2033836316691366e-05, + "loss": 1.9902, + "step": 19330000 + }, + { + "epoch": 55.95, + "learning_rate": 2.2033114116339388e-05, + "loss": 1.9659, + "step": 19330500 + }, + { + "epoch": 55.96, + "learning_rate": 2.203239046869211e-05, + "loss": 1.977, + "step": 19331000 + }, + { + "epoch": 55.96, + "learning_rate": 2.2031666821044833e-05, + "loss": 1.9891, + "step": 19331500 + }, + { + "epoch": 55.96, + "learning_rate": 2.2030943173397555e-05, + "loss": 1.9624, + "step": 19332000 + }, + { + "epoch": 55.96, + "learning_rate": 2.2030220973045574e-05, + "loss": 1.9585, + "step": 19332500 + }, + { + "epoch": 55.96, + "learning_rate": 2.2029497325398296e-05, + "loss": 1.9748, + "step": 19333000 + }, + { + "epoch": 55.96, + "learning_rate": 2.2028775125046315e-05, + "loss": 1.9653, + "step": 19333500 + }, + { + "epoch": 55.96, + "learning_rate": 2.2028051477399037e-05, + "loss": 1.9973, + "step": 19334000 + }, + { + "epoch": 55.97, + "learning_rate": 2.2027327829751763e-05, + "loss": 1.969, + "step": 19334500 + }, + { + "epoch": 55.97, + "learning_rate": 2.2026604182104485e-05, + "loss": 1.9617, + "step": 19335000 + }, + { + "epoch": 55.97, + "learning_rate": 2.2025880534457207e-05, + "loss": 1.9862, + "step": 19335500 + }, + { + "epoch": 55.97, + "learning_rate": 2.202515688680993e-05, + "loss": 1.982, + "step": 19336000 + }, + { + "epoch": 55.97, + "learning_rate": 2.2024433239162652e-05, + "loss": 1.968, + "step": 19336500 + }, + { + "epoch": 55.97, + "learning_rate": 2.202371103881067e-05, + "loss": 1.9727, + "step": 19337000 + }, + { + "epoch": 55.97, + "learning_rate": 2.2022987391163393e-05, + "loss": 1.9719, + "step": 19337500 + }, + { + "epoch": 55.98, + "learning_rate": 2.202226374351612e-05, + "loss": 1.9769, + "step": 19338000 + }, + { + "epoch": 55.98, + "learning_rate": 2.202154009586884e-05, + "loss": 1.96, + "step": 19338500 + }, + { + "epoch": 55.98, + "learning_rate": 2.2020816448221567e-05, + "loss": 1.9648, + "step": 19339000 + }, + { + "epoch": 55.98, + "learning_rate": 2.202009280057429e-05, + "loss": 1.988, + "step": 19339500 + }, + { + "epoch": 55.98, + "learning_rate": 2.201936915292701e-05, + "loss": 1.9792, + "step": 19340000 + }, + { + "epoch": 55.98, + "learning_rate": 2.2018645505279733e-05, + "loss": 1.9794, + "step": 19340500 + }, + { + "epoch": 55.98, + "learning_rate": 2.2017921857632455e-05, + "loss": 1.9952, + "step": 19341000 + }, + { + "epoch": 55.99, + "learning_rate": 2.2017199657280474e-05, + "loss": 1.9716, + "step": 19341500 + }, + { + "epoch": 55.99, + "learning_rate": 2.20164760096332e-05, + "loss": 1.9693, + "step": 19342000 + }, + { + "epoch": 55.99, + "learning_rate": 2.2015752361985922e-05, + "loss": 1.9581, + "step": 19342500 + }, + { + "epoch": 55.99, + "learning_rate": 2.2015028714338645e-05, + "loss": 1.9645, + "step": 19343000 + }, + { + "epoch": 55.99, + "learning_rate": 2.2014305066691367e-05, + "loss": 1.9989, + "step": 19343500 + }, + { + "epoch": 55.99, + "learning_rate": 2.2013581419044092e-05, + "loss": 1.9718, + "step": 19344000 + }, + { + "epoch": 55.99, + "learning_rate": 2.2012859218692108e-05, + "loss": 1.9909, + "step": 19344500 + }, + { + "epoch": 56.0, + "learning_rate": 2.201213557104483e-05, + "loss": 1.9929, + "step": 19345000 + }, + { + "epoch": 56.0, + "learning_rate": 2.2011411923397556e-05, + "loss": 1.9667, + "step": 19345500 + }, + { + "epoch": 56.0, + "learning_rate": 2.2010688275750278e-05, + "loss": 1.9712, + "step": 19346000 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.6760111246694773, + "eval_accuracy_mlm": 0.6422216699057036, + "eval_accuracy_nsp": 0.8570142030220951, + "eval_loss": 2.155001163482666, + "eval_runtime": 331.9519, + "eval_samples_per_second": 1314.606, + "eval_steps_per_second": 54.776, + "step": 19346432 + }, + { + "epoch": 56.0, + "learning_rate": 2.2009966075398297e-05, + "loss": 1.9831, + "step": 19346500 + }, + { + "epoch": 56.0, + "learning_rate": 2.2009243875046316e-05, + "loss": 1.9425, + "step": 19347000 + }, + { + "epoch": 56.0, + "learning_rate": 2.2008520227399038e-05, + "loss": 1.953, + "step": 19347500 + }, + { + "epoch": 56.0, + "learning_rate": 2.200779657975176e-05, + "loss": 1.9466, + "step": 19348000 + }, + { + "epoch": 56.01, + "learning_rate": 2.2007072932104483e-05, + "loss": 1.9602, + "step": 19348500 + }, + { + "epoch": 56.01, + "learning_rate": 2.20063507317525e-05, + "loss": 1.947, + "step": 19349000 + }, + { + "epoch": 56.01, + "learning_rate": 2.2005627084105227e-05, + "loss": 1.9583, + "step": 19349500 + }, + { + "epoch": 56.01, + "learning_rate": 2.200490343645795e-05, + "loss": 1.9722, + "step": 19350000 + }, + { + "epoch": 56.01, + "learning_rate": 2.200417978881067e-05, + "loss": 1.9281, + "step": 19350500 + }, + { + "epoch": 56.01, + "learning_rate": 2.200345758845869e-05, + "loss": 1.9633, + "step": 19351000 + }, + { + "epoch": 56.01, + "learning_rate": 2.2002733940811413e-05, + "loss": 1.938, + "step": 19351500 + }, + { + "epoch": 56.02, + "learning_rate": 2.2002010293164135e-05, + "loss": 1.9654, + "step": 19352000 + }, + { + "epoch": 56.02, + "learning_rate": 2.2001286645516857e-05, + "loss": 1.9775, + "step": 19352500 + }, + { + "epoch": 56.02, + "learning_rate": 2.2000562997869583e-05, + "loss": 1.9549, + "step": 19353000 + }, + { + "epoch": 56.02, + "learning_rate": 2.1999839350222305e-05, + "loss": 1.964, + "step": 19353500 + }, + { + "epoch": 56.02, + "learning_rate": 2.199911570257503e-05, + "loss": 1.953, + "step": 19354000 + }, + { + "epoch": 56.02, + "learning_rate": 2.1998393502223046e-05, + "loss": 1.9697, + "step": 19354500 + }, + { + "epoch": 56.02, + "learning_rate": 2.199766985457577e-05, + "loss": 1.96, + "step": 19355000 + }, + { + "epoch": 56.03, + "learning_rate": 2.1996946206928494e-05, + "loss": 1.9548, + "step": 19355500 + }, + { + "epoch": 56.03, + "learning_rate": 2.1996222559281216e-05, + "loss": 1.931, + "step": 19356000 + }, + { + "epoch": 56.03, + "learning_rate": 2.199549891163394e-05, + "loss": 1.9517, + "step": 19356500 + }, + { + "epoch": 56.03, + "learning_rate": 2.1994775263986664e-05, + "loss": 1.9388, + "step": 19357000 + }, + { + "epoch": 56.03, + "learning_rate": 2.1994051616339386e-05, + "loss": 1.9672, + "step": 19357500 + }, + { + "epoch": 56.03, + "learning_rate": 2.199332796869211e-05, + "loss": 1.983, + "step": 19358000 + }, + { + "epoch": 56.03, + "learning_rate": 2.1992605768340128e-05, + "loss": 1.9552, + "step": 19358500 + }, + { + "epoch": 56.04, + "learning_rate": 2.199188212069285e-05, + "loss": 1.9521, + "step": 19359000 + }, + { + "epoch": 56.04, + "learning_rate": 2.1991158473045572e-05, + "loss": 1.9437, + "step": 19359500 + }, + { + "epoch": 56.04, + "learning_rate": 2.1990434825398294e-05, + "loss": 1.9451, + "step": 19360000 + }, + { + "epoch": 56.04, + "learning_rate": 2.198971117775102e-05, + "loss": 1.9409, + "step": 19360500 + }, + { + "epoch": 56.04, + "learning_rate": 2.1988987530103746e-05, + "loss": 1.9525, + "step": 19361000 + }, + { + "epoch": 56.04, + "learning_rate": 2.198826532975176e-05, + "loss": 1.9648, + "step": 19361500 + }, + { + "epoch": 56.05, + "learning_rate": 2.1987541682104483e-05, + "loss": 1.9468, + "step": 19362000 + }, + { + "epoch": 56.05, + "learning_rate": 2.1986818034457206e-05, + "loss": 1.967, + "step": 19362500 + }, + { + "epoch": 56.05, + "learning_rate": 2.198609438680993e-05, + "loss": 1.9682, + "step": 19363000 + }, + { + "epoch": 56.05, + "learning_rate": 2.1985370739162653e-05, + "loss": 1.9514, + "step": 19363500 + }, + { + "epoch": 56.05, + "learning_rate": 2.198464709151538e-05, + "loss": 1.9606, + "step": 19364000 + }, + { + "epoch": 56.05, + "learning_rate": 2.19839234438681e-05, + "loss": 1.9584, + "step": 19364500 + }, + { + "epoch": 56.05, + "learning_rate": 2.1983199796220824e-05, + "loss": 1.9355, + "step": 19365000 + }, + { + "epoch": 56.06, + "learning_rate": 2.1982477595868843e-05, + "loss": 1.9518, + "step": 19365500 + }, + { + "epoch": 56.06, + "learning_rate": 2.1981753948221565e-05, + "loss": 1.9823, + "step": 19366000 + }, + { + "epoch": 56.06, + "learning_rate": 2.1981030300574287e-05, + "loss": 1.9429, + "step": 19366500 + }, + { + "epoch": 56.06, + "learning_rate": 2.1980308100222306e-05, + "loss": 1.9619, + "step": 19367000 + }, + { + "epoch": 56.06, + "learning_rate": 2.1979584452575028e-05, + "loss": 1.9374, + "step": 19367500 + }, + { + "epoch": 56.06, + "learning_rate": 2.1978860804927754e-05, + "loss": 1.9503, + "step": 19368000 + }, + { + "epoch": 56.06, + "learning_rate": 2.1978137157280476e-05, + "loss": 1.9645, + "step": 19368500 + }, + { + "epoch": 56.07, + "learning_rate": 2.1977414956928495e-05, + "loss": 1.9417, + "step": 19369000 + }, + { + "epoch": 56.07, + "learning_rate": 2.1976691309281217e-05, + "loss": 1.9599, + "step": 19369500 + }, + { + "epoch": 56.07, + "learning_rate": 2.197596766163394e-05, + "loss": 1.9747, + "step": 19370000 + }, + { + "epoch": 56.07, + "learning_rate": 2.197524401398666e-05, + "loss": 1.9359, + "step": 19370500 + }, + { + "epoch": 56.07, + "learning_rate": 2.1974520366339384e-05, + "loss": 1.9546, + "step": 19371000 + }, + { + "epoch": 56.07, + "learning_rate": 2.197379671869211e-05, + "loss": 1.9521, + "step": 19371500 + }, + { + "epoch": 56.07, + "learning_rate": 2.1973073071044835e-05, + "loss": 1.9357, + "step": 19372000 + }, + { + "epoch": 56.08, + "learning_rate": 2.1972349423397557e-05, + "loss": 1.9646, + "step": 19372500 + }, + { + "epoch": 56.08, + "learning_rate": 2.1971627223045573e-05, + "loss": 1.987, + "step": 19373000 + }, + { + "epoch": 56.08, + "learning_rate": 2.1970905022693592e-05, + "loss": 1.9396, + "step": 19373500 + }, + { + "epoch": 56.08, + "learning_rate": 2.1970181375046314e-05, + "loss": 2.0014, + "step": 19374000 + }, + { + "epoch": 56.08, + "learning_rate": 2.1969457727399036e-05, + "loss": 1.9722, + "step": 19374500 + }, + { + "epoch": 56.08, + "learning_rate": 2.196873407975176e-05, + "loss": 1.9612, + "step": 19375000 + }, + { + "epoch": 56.08, + "learning_rate": 2.1968010432104484e-05, + "loss": 1.974, + "step": 19375500 + }, + { + "epoch": 56.09, + "learning_rate": 2.196728678445721e-05, + "loss": 1.9733, + "step": 19376000 + }, + { + "epoch": 56.09, + "learning_rate": 2.1966564584105225e-05, + "loss": 1.9367, + "step": 19376500 + }, + { + "epoch": 56.09, + "learning_rate": 2.1965842383753244e-05, + "loss": 1.9752, + "step": 19377000 + }, + { + "epoch": 56.09, + "learning_rate": 2.1965118736105967e-05, + "loss": 1.9618, + "step": 19377500 + }, + { + "epoch": 56.09, + "learning_rate": 2.196439508845869e-05, + "loss": 1.9661, + "step": 19378000 + }, + { + "epoch": 56.09, + "learning_rate": 2.196367144081141e-05, + "loss": 1.9782, + "step": 19378500 + }, + { + "epoch": 56.09, + "learning_rate": 2.1962947793164133e-05, + "loss": 1.9581, + "step": 19379000 + }, + { + "epoch": 56.1, + "learning_rate": 2.196222414551686e-05, + "loss": 1.9674, + "step": 19379500 + }, + { + "epoch": 56.1, + "learning_rate": 2.1961500497869584e-05, + "loss": 1.9614, + "step": 19380000 + }, + { + "epoch": 56.1, + "learning_rate": 2.19607782975176e-05, + "loss": 1.9471, + "step": 19380500 + }, + { + "epoch": 56.1, + "learning_rate": 2.1960054649870322e-05, + "loss": 1.9664, + "step": 19381000 + }, + { + "epoch": 56.1, + "learning_rate": 2.1959331002223048e-05, + "loss": 1.9748, + "step": 19381500 + }, + { + "epoch": 56.1, + "learning_rate": 2.195860735457577e-05, + "loss": 1.9631, + "step": 19382000 + }, + { + "epoch": 56.1, + "learning_rate": 2.1957883706928492e-05, + "loss": 1.9568, + "step": 19382500 + }, + { + "epoch": 56.11, + "learning_rate": 2.1957160059281218e-05, + "loss": 1.9484, + "step": 19383000 + }, + { + "epoch": 56.11, + "learning_rate": 2.195643641163394e-05, + "loss": 1.935, + "step": 19383500 + }, + { + "epoch": 56.11, + "learning_rate": 2.1955712763986662e-05, + "loss": 1.9515, + "step": 19384000 + }, + { + "epoch": 56.11, + "learning_rate": 2.1954989116339385e-05, + "loss": 1.9509, + "step": 19384500 + }, + { + "epoch": 56.11, + "learning_rate": 2.195426546869211e-05, + "loss": 1.9469, + "step": 19385000 + }, + { + "epoch": 56.11, + "learning_rate": 2.1953541821044833e-05, + "loss": 1.9665, + "step": 19385500 + }, + { + "epoch": 56.11, + "learning_rate": 2.1952818173397555e-05, + "loss": 1.9645, + "step": 19386000 + }, + { + "epoch": 56.12, + "learning_rate": 2.195209452575028e-05, + "loss": 1.96, + "step": 19386500 + }, + { + "epoch": 56.12, + "learning_rate": 2.1951370878103003e-05, + "loss": 1.9637, + "step": 19387000 + }, + { + "epoch": 56.12, + "learning_rate": 2.195064867775102e-05, + "loss": 1.9833, + "step": 19387500 + }, + { + "epoch": 56.12, + "learning_rate": 2.1949925030103744e-05, + "loss": 1.9664, + "step": 19388000 + }, + { + "epoch": 56.12, + "learning_rate": 2.1949201382456466e-05, + "loss": 1.9648, + "step": 19388500 + }, + { + "epoch": 56.12, + "learning_rate": 2.1948477734809188e-05, + "loss": 1.969, + "step": 19389000 + }, + { + "epoch": 56.12, + "learning_rate": 2.194775408716191e-05, + "loss": 1.9506, + "step": 19389500 + }, + { + "epoch": 56.13, + "learning_rate": 2.1947030439514636e-05, + "loss": 1.9424, + "step": 19390000 + }, + { + "epoch": 56.13, + "learning_rate": 2.1946306791867362e-05, + "loss": 1.9759, + "step": 19390500 + }, + { + "epoch": 56.13, + "learning_rate": 2.1945583144220084e-05, + "loss": 1.9766, + "step": 19391000 + }, + { + "epoch": 56.13, + "learning_rate": 2.1944859496572806e-05, + "loss": 1.9547, + "step": 19391500 + }, + { + "epoch": 56.13, + "learning_rate": 2.194413584892553e-05, + "loss": 1.9721, + "step": 19392000 + }, + { + "epoch": 56.13, + "learning_rate": 2.194341220127825e-05, + "loss": 1.9593, + "step": 19392500 + }, + { + "epoch": 56.13, + "learning_rate": 2.1942688553630973e-05, + "loss": 1.9522, + "step": 19393000 + }, + { + "epoch": 56.14, + "learning_rate": 2.19419649059837e-05, + "loss": 1.9712, + "step": 19393500 + }, + { + "epoch": 56.14, + "learning_rate": 2.1941241258336424e-05, + "loss": 1.9429, + "step": 19394000 + }, + { + "epoch": 56.14, + "learning_rate": 2.194051905798444e-05, + "loss": 1.958, + "step": 19394500 + }, + { + "epoch": 56.14, + "learning_rate": 2.1939795410337162e-05, + "loss": 1.94, + "step": 19395000 + }, + { + "epoch": 56.14, + "learning_rate": 2.1939071762689888e-05, + "loss": 1.9557, + "step": 19395500 + }, + { + "epoch": 56.14, + "learning_rate": 2.193834811504261e-05, + "loss": 1.949, + "step": 19396000 + }, + { + "epoch": 56.14, + "learning_rate": 2.1937625914690625e-05, + "loss": 1.946, + "step": 19396500 + }, + { + "epoch": 56.15, + "learning_rate": 2.193690226704335e-05, + "loss": 1.9743, + "step": 19397000 + }, + { + "epoch": 56.15, + "learning_rate": 2.1936180066691367e-05, + "loss": 1.9549, + "step": 19397500 + }, + { + "epoch": 56.15, + "learning_rate": 2.1935456419044092e-05, + "loss": 1.9558, + "step": 19398000 + }, + { + "epoch": 56.15, + "learning_rate": 2.1934732771396814e-05, + "loss": 1.9757, + "step": 19398500 + }, + { + "epoch": 56.15, + "learning_rate": 2.1934009123749537e-05, + "loss": 1.9587, + "step": 19399000 + }, + { + "epoch": 56.15, + "learning_rate": 2.1933285476102262e-05, + "loss": 1.9562, + "step": 19399500 + }, + { + "epoch": 56.16, + "learning_rate": 2.1932561828454984e-05, + "loss": 1.9692, + "step": 19400000 + }, + { + "epoch": 56.16, + "learning_rate": 2.1931839628103e-05, + "loss": 1.9862, + "step": 19400500 + }, + { + "epoch": 56.16, + "learning_rate": 2.1931115980455726e-05, + "loss": 1.9588, + "step": 19401000 + }, + { + "epoch": 56.16, + "learning_rate": 2.193039233280845e-05, + "loss": 1.959, + "step": 19401500 + }, + { + "epoch": 56.16, + "learning_rate": 2.1929668685161174e-05, + "loss": 1.9553, + "step": 19402000 + }, + { + "epoch": 56.16, + "learning_rate": 2.1928945037513896e-05, + "loss": 1.9625, + "step": 19402500 + }, + { + "epoch": 56.16, + "learning_rate": 2.1928221389866618e-05, + "loss": 1.9714, + "step": 19403000 + }, + { + "epoch": 56.17, + "learning_rate": 2.192749774221934e-05, + "loss": 1.9696, + "step": 19403500 + }, + { + "epoch": 56.17, + "learning_rate": 2.192677554186736e-05, + "loss": 1.9568, + "step": 19404000 + }, + { + "epoch": 56.17, + "learning_rate": 2.192605189422008e-05, + "loss": 1.9652, + "step": 19404500 + }, + { + "epoch": 56.17, + "learning_rate": 2.1925328246572807e-05, + "loss": 1.944, + "step": 19405000 + }, + { + "epoch": 56.17, + "learning_rate": 2.192460459892553e-05, + "loss": 1.974, + "step": 19405500 + }, + { + "epoch": 56.17, + "learning_rate": 2.192388095127825e-05, + "loss": 1.9775, + "step": 19406000 + }, + { + "epoch": 56.17, + "learning_rate": 2.1923157303630977e-05, + "loss": 1.951, + "step": 19406500 + }, + { + "epoch": 56.18, + "learning_rate": 2.19224336559837e-05, + "loss": 1.9589, + "step": 19407000 + }, + { + "epoch": 56.18, + "learning_rate": 2.192171000833642e-05, + "loss": 1.9567, + "step": 19407500 + }, + { + "epoch": 56.18, + "learning_rate": 2.192098780798444e-05, + "loss": 1.9587, + "step": 19408000 + }, + { + "epoch": 56.18, + "learning_rate": 2.1920264160337163e-05, + "loss": 1.9649, + "step": 19408500 + }, + { + "epoch": 56.18, + "learning_rate": 2.191954051268989e-05, + "loss": 1.9587, + "step": 19409000 + }, + { + "epoch": 56.18, + "learning_rate": 2.191881686504261e-05, + "loss": 1.9575, + "step": 19409500 + }, + { + "epoch": 56.18, + "learning_rate": 2.1918094664690626e-05, + "loss": 1.9492, + "step": 19410000 + }, + { + "epoch": 56.19, + "learning_rate": 2.1917371017043352e-05, + "loss": 1.9466, + "step": 19410500 + }, + { + "epoch": 56.19, + "learning_rate": 2.1916648816691367e-05, + "loss": 1.9927, + "step": 19411000 + }, + { + "epoch": 56.19, + "learning_rate": 2.191592516904409e-05, + "loss": 1.9742, + "step": 19411500 + }, + { + "epoch": 56.19, + "learning_rate": 2.1915201521396815e-05, + "loss": 1.9518, + "step": 19412000 + }, + { + "epoch": 56.19, + "learning_rate": 2.1914477873749537e-05, + "loss": 1.9662, + "step": 19412500 + }, + { + "epoch": 56.19, + "learning_rate": 2.1913754226102263e-05, + "loss": 1.9689, + "step": 19413000 + }, + { + "epoch": 56.19, + "learning_rate": 2.1913030578454985e-05, + "loss": 1.9532, + "step": 19413500 + }, + { + "epoch": 56.2, + "learning_rate": 2.1912306930807708e-05, + "loss": 1.9562, + "step": 19414000 + }, + { + "epoch": 56.2, + "learning_rate": 2.191158328316043e-05, + "loss": 1.9713, + "step": 19414500 + }, + { + "epoch": 56.2, + "learning_rate": 2.1910859635513152e-05, + "loss": 1.9379, + "step": 19415000 + }, + { + "epoch": 56.2, + "learning_rate": 2.1910135987865878e-05, + "loss": 1.9343, + "step": 19415500 + }, + { + "epoch": 56.2, + "learning_rate": 2.1909413787513893e-05, + "loss": 1.9692, + "step": 19416000 + }, + { + "epoch": 56.2, + "learning_rate": 2.190869013986662e-05, + "loss": 1.9345, + "step": 19416500 + }, + { + "epoch": 56.2, + "learning_rate": 2.190796649221934e-05, + "loss": 1.9385, + "step": 19417000 + }, + { + "epoch": 56.21, + "learning_rate": 2.1907242844572067e-05, + "loss": 1.9707, + "step": 19417500 + }, + { + "epoch": 56.21, + "learning_rate": 2.190651919692479e-05, + "loss": 1.9572, + "step": 19418000 + }, + { + "epoch": 56.21, + "learning_rate": 2.190579554927751e-05, + "loss": 1.9677, + "step": 19418500 + }, + { + "epoch": 56.21, + "learning_rate": 2.1905071901630233e-05, + "loss": 1.9485, + "step": 19419000 + }, + { + "epoch": 56.21, + "learning_rate": 2.1904348253982956e-05, + "loss": 1.9556, + "step": 19419500 + }, + { + "epoch": 56.21, + "learning_rate": 2.1903626053630978e-05, + "loss": 1.9368, + "step": 19420000 + }, + { + "epoch": 56.21, + "learning_rate": 2.19029024059837e-05, + "loss": 1.9755, + "step": 19420500 + }, + { + "epoch": 56.22, + "learning_rate": 2.1902178758336422e-05, + "loss": 1.959, + "step": 19421000 + }, + { + "epoch": 56.22, + "learning_rate": 2.190145655798444e-05, + "loss": 1.9558, + "step": 19421500 + }, + { + "epoch": 56.22, + "learning_rate": 2.1900732910337164e-05, + "loss": 1.9839, + "step": 19422000 + }, + { + "epoch": 56.22, + "learning_rate": 2.1900009262689886e-05, + "loss": 1.9413, + "step": 19422500 + }, + { + "epoch": 56.22, + "learning_rate": 2.1899285615042608e-05, + "loss": 1.94, + "step": 19423000 + }, + { + "epoch": 56.22, + "learning_rate": 2.189856196739533e-05, + "loss": 1.9779, + "step": 19423500 + }, + { + "epoch": 56.22, + "learning_rate": 2.1897839767043353e-05, + "loss": 1.9544, + "step": 19424000 + }, + { + "epoch": 56.23, + "learning_rate": 2.1897117566691368e-05, + "loss": 1.9438, + "step": 19424500 + }, + { + "epoch": 56.23, + "learning_rate": 2.189639391904409e-05, + "loss": 1.9468, + "step": 19425000 + }, + { + "epoch": 56.23, + "learning_rate": 2.1895670271396816e-05, + "loss": 1.9744, + "step": 19425500 + }, + { + "epoch": 56.23, + "learning_rate": 2.1894946623749538e-05, + "loss": 1.9535, + "step": 19426000 + }, + { + "epoch": 56.23, + "learning_rate": 2.189422297610226e-05, + "loss": 1.9538, + "step": 19426500 + }, + { + "epoch": 56.23, + "learning_rate": 2.1893499328454983e-05, + "loss": 1.9787, + "step": 19427000 + }, + { + "epoch": 56.23, + "learning_rate": 2.189277568080771e-05, + "loss": 1.9611, + "step": 19427500 + }, + { + "epoch": 56.24, + "learning_rate": 2.189205203316043e-05, + "loss": 1.9607, + "step": 19428000 + }, + { + "epoch": 56.24, + "learning_rate": 2.1891328385513156e-05, + "loss": 1.9552, + "step": 19428500 + }, + { + "epoch": 56.24, + "learning_rate": 2.189060473786588e-05, + "loss": 1.9486, + "step": 19429000 + }, + { + "epoch": 56.24, + "learning_rate": 2.18898810902186e-05, + "loss": 1.9545, + "step": 19429500 + }, + { + "epoch": 56.24, + "learning_rate": 2.1889157442571323e-05, + "loss": 1.9537, + "step": 19430000 + }, + { + "epoch": 56.24, + "learning_rate": 2.1888433794924045e-05, + "loss": 1.9523, + "step": 19430500 + }, + { + "epoch": 56.24, + "learning_rate": 2.1887711594572064e-05, + "loss": 1.9506, + "step": 19431000 + }, + { + "epoch": 56.25, + "learning_rate": 2.188698794692479e-05, + "loss": 1.9461, + "step": 19431500 + }, + { + "epoch": 56.25, + "learning_rate": 2.1886264299277512e-05, + "loss": 1.9764, + "step": 19432000 + }, + { + "epoch": 56.25, + "learning_rate": 2.1885540651630234e-05, + "loss": 1.9835, + "step": 19432500 + }, + { + "epoch": 56.25, + "learning_rate": 2.1884817003982956e-05, + "loss": 1.9384, + "step": 19433000 + }, + { + "epoch": 56.25, + "learning_rate": 2.1884093356335682e-05, + "loss": 1.9502, + "step": 19433500 + }, + { + "epoch": 56.25, + "learning_rate": 2.1883369708688404e-05, + "loss": 1.9572, + "step": 19434000 + }, + { + "epoch": 56.25, + "learning_rate": 2.1882646061041126e-05, + "loss": 1.9607, + "step": 19434500 + }, + { + "epoch": 56.26, + "learning_rate": 2.1881922413393852e-05, + "loss": 1.9853, + "step": 19435000 + }, + { + "epoch": 56.26, + "learning_rate": 2.1881198765746574e-05, + "loss": 1.9229, + "step": 19435500 + }, + { + "epoch": 56.26, + "learning_rate": 2.1880476565394593e-05, + "loss": 1.996, + "step": 19436000 + }, + { + "epoch": 56.26, + "learning_rate": 2.187975436504261e-05, + "loss": 1.9372, + "step": 19436500 + }, + { + "epoch": 56.26, + "learning_rate": 2.187903071739533e-05, + "loss": 1.9417, + "step": 19437000 + }, + { + "epoch": 56.26, + "learning_rate": 2.187830851704335e-05, + "loss": 1.9654, + "step": 19437500 + }, + { + "epoch": 56.27, + "learning_rate": 2.1877584869396072e-05, + "loss": 1.9656, + "step": 19438000 + }, + { + "epoch": 56.27, + "learning_rate": 2.1876861221748794e-05, + "loss": 1.9649, + "step": 19438500 + }, + { + "epoch": 56.27, + "learning_rate": 2.187613757410152e-05, + "loss": 1.9391, + "step": 19439000 + }, + { + "epoch": 56.27, + "learning_rate": 2.1875413926454246e-05, + "loss": 1.9451, + "step": 19439500 + }, + { + "epoch": 56.27, + "learning_rate": 2.1874690278806968e-05, + "loss": 1.9627, + "step": 19440000 + }, + { + "epoch": 56.27, + "learning_rate": 2.1873968078454983e-05, + "loss": 1.9544, + "step": 19440500 + }, + { + "epoch": 56.27, + "learning_rate": 2.1873245878103002e-05, + "loss": 1.9757, + "step": 19441000 + }, + { + "epoch": 56.28, + "learning_rate": 2.1872522230455725e-05, + "loss": 1.9785, + "step": 19441500 + }, + { + "epoch": 56.28, + "learning_rate": 2.1871798582808447e-05, + "loss": 1.9589, + "step": 19442000 + }, + { + "epoch": 56.28, + "learning_rate": 2.1871074935161173e-05, + "loss": 1.9502, + "step": 19442500 + }, + { + "epoch": 56.28, + "learning_rate": 2.187035273480919e-05, + "loss": 1.9486, + "step": 19443000 + }, + { + "epoch": 56.28, + "learning_rate": 2.1869629087161914e-05, + "loss": 1.9696, + "step": 19443500 + }, + { + "epoch": 56.28, + "learning_rate": 2.1868905439514636e-05, + "loss": 1.9572, + "step": 19444000 + }, + { + "epoch": 56.28, + "learning_rate": 2.1868181791867358e-05, + "loss": 1.9478, + "step": 19444500 + }, + { + "epoch": 56.29, + "learning_rate": 2.1867458144220084e-05, + "loss": 1.938, + "step": 19445000 + }, + { + "epoch": 56.29, + "learning_rate": 2.1866734496572806e-05, + "loss": 1.9682, + "step": 19445500 + }, + { + "epoch": 56.29, + "learning_rate": 2.1866010848925528e-05, + "loss": 1.9598, + "step": 19446000 + }, + { + "epoch": 56.29, + "learning_rate": 2.1865287201278254e-05, + "loss": 1.9499, + "step": 19446500 + }, + { + "epoch": 56.29, + "learning_rate": 2.1864563553630976e-05, + "loss": 1.9582, + "step": 19447000 + }, + { + "epoch": 56.29, + "learning_rate": 2.18638399059837e-05, + "loss": 1.977, + "step": 19447500 + }, + { + "epoch": 56.29, + "learning_rate": 2.186311625833642e-05, + "loss": 1.9693, + "step": 19448000 + }, + { + "epoch": 56.3, + "learning_rate": 2.1862392610689146e-05, + "loss": 1.9568, + "step": 19448500 + }, + { + "epoch": 56.3, + "learning_rate": 2.186166896304187e-05, + "loss": 1.9657, + "step": 19449000 + }, + { + "epoch": 56.3, + "learning_rate": 2.1860946762689884e-05, + "loss": 1.9443, + "step": 19449500 + }, + { + "epoch": 56.3, + "learning_rate": 2.1860224562337903e-05, + "loss": 1.9452, + "step": 19450000 + }, + { + "epoch": 56.3, + "learning_rate": 2.185950091469063e-05, + "loss": 1.9728, + "step": 19450500 + }, + { + "epoch": 56.3, + "learning_rate": 2.185877726704335e-05, + "loss": 1.9524, + "step": 19451000 + }, + { + "epoch": 56.3, + "learning_rate": 2.1858053619396073e-05, + "loss": 1.9745, + "step": 19451500 + }, + { + "epoch": 56.31, + "learning_rate": 2.1857329971748795e-05, + "loss": 1.9619, + "step": 19452000 + }, + { + "epoch": 56.31, + "learning_rate": 2.185660632410152e-05, + "loss": 1.9769, + "step": 19452500 + }, + { + "epoch": 56.31, + "learning_rate": 2.1855882676454243e-05, + "loss": 1.9634, + "step": 19453000 + }, + { + "epoch": 56.31, + "learning_rate": 2.185515902880697e-05, + "loss": 1.9797, + "step": 19453500 + }, + { + "epoch": 56.31, + "learning_rate": 2.185443538115969e-05, + "loss": 1.9526, + "step": 19454000 + }, + { + "epoch": 56.31, + "learning_rate": 2.1853711733512413e-05, + "loss": 1.9567, + "step": 19454500 + }, + { + "epoch": 56.31, + "learning_rate": 2.1852989533160432e-05, + "loss": 1.9715, + "step": 19455000 + }, + { + "epoch": 56.32, + "learning_rate": 2.1852265885513154e-05, + "loss": 1.9491, + "step": 19455500 + }, + { + "epoch": 56.32, + "learning_rate": 2.1851542237865877e-05, + "loss": 1.9527, + "step": 19456000 + }, + { + "epoch": 56.32, + "learning_rate": 2.1850820037513896e-05, + "loss": 1.9775, + "step": 19456500 + }, + { + "epoch": 56.32, + "learning_rate": 2.1850096389866618e-05, + "loss": 1.953, + "step": 19457000 + }, + { + "epoch": 56.32, + "learning_rate": 2.1849372742219343e-05, + "loss": 1.9632, + "step": 19457500 + }, + { + "epoch": 56.32, + "learning_rate": 2.1848649094572066e-05, + "loss": 1.9428, + "step": 19458000 + }, + { + "epoch": 56.32, + "learning_rate": 2.1847925446924788e-05, + "loss": 1.9642, + "step": 19458500 + }, + { + "epoch": 56.33, + "learning_rate": 2.184720179927751e-05, + "loss": 1.989, + "step": 19459000 + }, + { + "epoch": 56.33, + "learning_rate": 2.1846478151630236e-05, + "loss": 1.9518, + "step": 19459500 + }, + { + "epoch": 56.33, + "learning_rate": 2.1845754503982958e-05, + "loss": 1.9825, + "step": 19460000 + }, + { + "epoch": 56.33, + "learning_rate": 2.184503085633568e-05, + "loss": 1.9622, + "step": 19460500 + }, + { + "epoch": 56.33, + "learning_rate": 2.1844308655983696e-05, + "loss": 1.96, + "step": 19461000 + }, + { + "epoch": 56.33, + "learning_rate": 2.184358500833642e-05, + "loss": 1.9404, + "step": 19461500 + }, + { + "epoch": 56.33, + "learning_rate": 2.1842861360689147e-05, + "loss": 1.9777, + "step": 19462000 + }, + { + "epoch": 56.34, + "learning_rate": 2.1842139160337163e-05, + "loss": 1.9839, + "step": 19462500 + }, + { + "epoch": 56.34, + "learning_rate": 2.1841415512689885e-05, + "loss": 1.9685, + "step": 19463000 + }, + { + "epoch": 56.34, + "learning_rate": 2.184069186504261e-05, + "loss": 1.9757, + "step": 19463500 + }, + { + "epoch": 56.34, + "learning_rate": 2.1839968217395333e-05, + "loss": 1.9793, + "step": 19464000 + }, + { + "epoch": 56.34, + "learning_rate": 2.1839244569748055e-05, + "loss": 1.9721, + "step": 19464500 + }, + { + "epoch": 56.34, + "learning_rate": 2.183852092210078e-05, + "loss": 1.9572, + "step": 19465000 + }, + { + "epoch": 56.34, + "learning_rate": 2.1837797274453503e-05, + "loss": 1.9414, + "step": 19465500 + }, + { + "epoch": 56.35, + "learning_rate": 2.1837073626806225e-05, + "loss": 1.9384, + "step": 19466000 + }, + { + "epoch": 56.35, + "learning_rate": 2.1836349979158947e-05, + "loss": 1.9497, + "step": 19466500 + }, + { + "epoch": 56.35, + "learning_rate": 2.1835626331511673e-05, + "loss": 1.9577, + "step": 19467000 + }, + { + "epoch": 56.35, + "learning_rate": 2.1834902683864395e-05, + "loss": 1.9583, + "step": 19467500 + }, + { + "epoch": 56.35, + "learning_rate": 2.1834179036217117e-05, + "loss": 1.953, + "step": 19468000 + }, + { + "epoch": 56.35, + "learning_rate": 2.1833455388569843e-05, + "loss": 1.9791, + "step": 19468500 + }, + { + "epoch": 56.35, + "learning_rate": 2.1832733188217862e-05, + "loss": 1.9562, + "step": 19469000 + }, + { + "epoch": 56.36, + "learning_rate": 2.1832009540570584e-05, + "loss": 1.975, + "step": 19469500 + }, + { + "epoch": 56.36, + "learning_rate": 2.18312873402186e-05, + "loss": 1.9485, + "step": 19470000 + }, + { + "epoch": 56.36, + "learning_rate": 2.1830563692571325e-05, + "loss": 1.9521, + "step": 19470500 + }, + { + "epoch": 56.36, + "learning_rate": 2.1829840044924047e-05, + "loss": 1.9538, + "step": 19471000 + }, + { + "epoch": 56.36, + "learning_rate": 2.182911639727677e-05, + "loss": 1.959, + "step": 19471500 + }, + { + "epoch": 56.36, + "learning_rate": 2.1828392749629492e-05, + "loss": 1.9574, + "step": 19472000 + }, + { + "epoch": 56.36, + "learning_rate": 2.1827669101982218e-05, + "loss": 1.9554, + "step": 19472500 + }, + { + "epoch": 56.37, + "learning_rate": 2.1826946901630237e-05, + "loss": 1.9575, + "step": 19473000 + }, + { + "epoch": 56.37, + "learning_rate": 2.1826224701278252e-05, + "loss": 1.9494, + "step": 19473500 + }, + { + "epoch": 56.37, + "learning_rate": 2.1825501053630974e-05, + "loss": 1.9362, + "step": 19474000 + }, + { + "epoch": 56.37, + "learning_rate": 2.18247774059837e-05, + "loss": 1.9768, + "step": 19474500 + }, + { + "epoch": 56.37, + "learning_rate": 2.1824053758336422e-05, + "loss": 1.9462, + "step": 19475000 + }, + { + "epoch": 56.37, + "learning_rate": 2.1823330110689144e-05, + "loss": 1.9456, + "step": 19475500 + }, + { + "epoch": 56.38, + "learning_rate": 2.182260646304187e-05, + "loss": 1.9612, + "step": 19476000 + }, + { + "epoch": 56.38, + "learning_rate": 2.1821882815394592e-05, + "loss": 1.9441, + "step": 19476500 + }, + { + "epoch": 56.38, + "learning_rate": 2.1821159167747314e-05, + "loss": 1.9724, + "step": 19477000 + }, + { + "epoch": 56.38, + "learning_rate": 2.1820435520100037e-05, + "loss": 1.963, + "step": 19477500 + }, + { + "epoch": 56.38, + "learning_rate": 2.1819711872452762e-05, + "loss": 1.9662, + "step": 19478000 + }, + { + "epoch": 56.38, + "learning_rate": 2.1818988224805485e-05, + "loss": 1.9509, + "step": 19478500 + }, + { + "epoch": 56.38, + "learning_rate": 2.1818264577158207e-05, + "loss": 1.9514, + "step": 19479000 + }, + { + "epoch": 56.39, + "learning_rate": 2.1817542376806226e-05, + "loss": 1.9434, + "step": 19479500 + }, + { + "epoch": 56.39, + "learning_rate": 2.181681872915895e-05, + "loss": 1.9664, + "step": 19480000 + }, + { + "epoch": 56.39, + "learning_rate": 2.1816096528806967e-05, + "loss": 1.9677, + "step": 19480500 + }, + { + "epoch": 56.39, + "learning_rate": 2.181537288115969e-05, + "loss": 1.98, + "step": 19481000 + }, + { + "epoch": 56.39, + "learning_rate": 2.181464923351241e-05, + "loss": 1.9451, + "step": 19481500 + }, + { + "epoch": 56.39, + "learning_rate": 2.1813925585865137e-05, + "loss": 1.9783, + "step": 19482000 + }, + { + "epoch": 56.39, + "learning_rate": 2.181320193821786e-05, + "loss": 1.9271, + "step": 19482500 + }, + { + "epoch": 56.4, + "learning_rate": 2.181247829057058e-05, + "loss": 1.969, + "step": 19483000 + }, + { + "epoch": 56.4, + "learning_rate": 2.18117560902186e-05, + "loss": 1.9516, + "step": 19483500 + }, + { + "epoch": 56.4, + "learning_rate": 2.1811032442571326e-05, + "loss": 1.9584, + "step": 19484000 + }, + { + "epoch": 56.4, + "learning_rate": 2.1810308794924048e-05, + "loss": 1.951, + "step": 19484500 + }, + { + "epoch": 56.4, + "learning_rate": 2.180958514727677e-05, + "loss": 1.9454, + "step": 19485000 + }, + { + "epoch": 56.4, + "learning_rate": 2.1808861499629493e-05, + "loss": 1.9623, + "step": 19485500 + }, + { + "epoch": 56.4, + "learning_rate": 2.1808137851982215e-05, + "loss": 1.965, + "step": 19486000 + }, + { + "epoch": 56.41, + "learning_rate": 2.1807414204334937e-05, + "loss": 1.9667, + "step": 19486500 + }, + { + "epoch": 56.41, + "learning_rate": 2.1806690556687663e-05, + "loss": 1.9701, + "step": 19487000 + }, + { + "epoch": 56.41, + "learning_rate": 2.180596690904039e-05, + "loss": 1.9742, + "step": 19487500 + }, + { + "epoch": 56.41, + "learning_rate": 2.180524326139311e-05, + "loss": 1.9599, + "step": 19488000 + }, + { + "epoch": 56.41, + "learning_rate": 2.1804519613745833e-05, + "loss": 1.9469, + "step": 19488500 + }, + { + "epoch": 56.41, + "learning_rate": 2.1803795966098555e-05, + "loss": 1.9584, + "step": 19489000 + }, + { + "epoch": 56.41, + "learning_rate": 2.1803073765746574e-05, + "loss": 1.9631, + "step": 19489500 + }, + { + "epoch": 56.42, + "learning_rate": 2.1802350118099296e-05, + "loss": 1.9903, + "step": 19490000 + }, + { + "epoch": 56.42, + "learning_rate": 2.180162647045202e-05, + "loss": 1.9666, + "step": 19490500 + }, + { + "epoch": 56.42, + "learning_rate": 2.180090427010004e-05, + "loss": 1.9591, + "step": 19491000 + }, + { + "epoch": 56.42, + "learning_rate": 2.1800180622452763e-05, + "loss": 1.9772, + "step": 19491500 + }, + { + "epoch": 56.42, + "learning_rate": 2.1799456974805485e-05, + "loss": 1.9617, + "step": 19492000 + }, + { + "epoch": 56.42, + "learning_rate": 2.1798733327158208e-05, + "loss": 1.9978, + "step": 19492500 + }, + { + "epoch": 56.42, + "learning_rate": 2.179800967951093e-05, + "loss": 1.9514, + "step": 19493000 + }, + { + "epoch": 56.43, + "learning_rate": 2.1797286031863652e-05, + "loss": 1.9589, + "step": 19493500 + }, + { + "epoch": 56.43, + "learning_rate": 2.1796562384216378e-05, + "loss": 1.962, + "step": 19494000 + }, + { + "epoch": 56.43, + "learning_rate": 2.1795838736569103e-05, + "loss": 1.9705, + "step": 19494500 + }, + { + "epoch": 56.43, + "learning_rate": 2.1795115088921826e-05, + "loss": 1.9747, + "step": 19495000 + }, + { + "epoch": 56.43, + "learning_rate": 2.179439288856984e-05, + "loss": 1.9458, + "step": 19495500 + }, + { + "epoch": 56.43, + "learning_rate": 2.1793669240922567e-05, + "loss": 1.9647, + "step": 19496000 + }, + { + "epoch": 56.43, + "learning_rate": 2.179294559327529e-05, + "loss": 1.9747, + "step": 19496500 + }, + { + "epoch": 56.44, + "learning_rate": 2.179222194562801e-05, + "loss": 1.9595, + "step": 19497000 + }, + { + "epoch": 56.44, + "learning_rate": 2.1791498297980733e-05, + "loss": 1.9751, + "step": 19497500 + }, + { + "epoch": 56.44, + "learning_rate": 2.179077465033346e-05, + "loss": 1.9913, + "step": 19498000 + }, + { + "epoch": 56.44, + "learning_rate": 2.1790052449981478e-05, + "loss": 1.9576, + "step": 19498500 + }, + { + "epoch": 56.44, + "learning_rate": 2.17893288023342e-05, + "loss": 1.9665, + "step": 19499000 + }, + { + "epoch": 56.44, + "learning_rate": 2.1788605154686922e-05, + "loss": 1.9482, + "step": 19499500 + }, + { + "epoch": 56.44, + "learning_rate": 2.178788295433494e-05, + "loss": 1.9574, + "step": 19500000 + }, + { + "epoch": 56.45, + "learning_rate": 2.1787159306687664e-05, + "loss": 1.9675, + "step": 19500500 + }, + { + "epoch": 56.45, + "learning_rate": 2.1786435659040386e-05, + "loss": 1.9761, + "step": 19501000 + }, + { + "epoch": 56.45, + "learning_rate": 2.1785712011393108e-05, + "loss": 1.9557, + "step": 19501500 + }, + { + "epoch": 56.45, + "learning_rate": 2.1784988363745834e-05, + "loss": 1.9765, + "step": 19502000 + }, + { + "epoch": 56.45, + "learning_rate": 2.1784264716098556e-05, + "loss": 1.9692, + "step": 19502500 + }, + { + "epoch": 56.45, + "learning_rate": 2.1783541068451278e-05, + "loss": 1.9537, + "step": 19503000 + }, + { + "epoch": 56.45, + "learning_rate": 2.1782817420804004e-05, + "loss": 1.9706, + "step": 19503500 + }, + { + "epoch": 56.46, + "learning_rate": 2.178209522045202e-05, + "loss": 1.9747, + "step": 19504000 + }, + { + "epoch": 56.46, + "learning_rate": 2.178137157280474e-05, + "loss": 1.9688, + "step": 19504500 + }, + { + "epoch": 56.46, + "learning_rate": 2.1780647925157467e-05, + "loss": 2.0037, + "step": 19505000 + }, + { + "epoch": 56.46, + "learning_rate": 2.1779925724805483e-05, + "loss": 1.9535, + "step": 19505500 + }, + { + "epoch": 56.46, + "learning_rate": 2.177920207715821e-05, + "loss": 1.9549, + "step": 19506000 + }, + { + "epoch": 56.46, + "learning_rate": 2.177847842951093e-05, + "loss": 1.9746, + "step": 19506500 + }, + { + "epoch": 56.46, + "learning_rate": 2.1777754781863656e-05, + "loss": 1.9607, + "step": 19507000 + }, + { + "epoch": 56.47, + "learning_rate": 2.177703113421638e-05, + "loss": 1.9827, + "step": 19507500 + }, + { + "epoch": 56.47, + "learning_rate": 2.17763074865691e-05, + "loss": 1.9656, + "step": 19508000 + }, + { + "epoch": 56.47, + "learning_rate": 2.1775583838921823e-05, + "loss": 1.9584, + "step": 19508500 + }, + { + "epoch": 56.47, + "learning_rate": 2.1774860191274545e-05, + "loss": 1.9855, + "step": 19509000 + }, + { + "epoch": 56.47, + "learning_rate": 2.177413654362727e-05, + "loss": 1.9694, + "step": 19509500 + }, + { + "epoch": 56.47, + "learning_rate": 2.1773412895979993e-05, + "loss": 1.955, + "step": 19510000 + }, + { + "epoch": 56.47, + "learning_rate": 2.177268924833272e-05, + "loss": 1.973, + "step": 19510500 + }, + { + "epoch": 56.48, + "learning_rate": 2.177196560068544e-05, + "loss": 1.9514, + "step": 19511000 + }, + { + "epoch": 56.48, + "learning_rate": 2.1771243400333456e-05, + "loss": 1.9532, + "step": 19511500 + }, + { + "epoch": 56.48, + "learning_rate": 2.1770521199981475e-05, + "loss": 1.9757, + "step": 19512000 + }, + { + "epoch": 56.48, + "learning_rate": 2.1769797552334198e-05, + "loss": 1.9614, + "step": 19512500 + }, + { + "epoch": 56.48, + "learning_rate": 2.176907390468692e-05, + "loss": 1.952, + "step": 19513000 + }, + { + "epoch": 56.48, + "learning_rate": 2.1768350257039645e-05, + "loss": 1.9384, + "step": 19513500 + }, + { + "epoch": 56.49, + "learning_rate": 2.1767626609392368e-05, + "loss": 1.9741, + "step": 19514000 + }, + { + "epoch": 56.49, + "learning_rate": 2.1766902961745093e-05, + "loss": 1.9701, + "step": 19514500 + }, + { + "epoch": 56.49, + "learning_rate": 2.1766179314097816e-05, + "loss": 1.9615, + "step": 19515000 + }, + { + "epoch": 56.49, + "learning_rate": 2.176545711374583e-05, + "loss": 1.9527, + "step": 19515500 + }, + { + "epoch": 56.49, + "learning_rate": 2.1764733466098557e-05, + "loss": 1.9619, + "step": 19516000 + }, + { + "epoch": 56.49, + "learning_rate": 2.176400981845128e-05, + "loss": 1.9659, + "step": 19516500 + }, + { + "epoch": 56.49, + "learning_rate": 2.1763286170804005e-05, + "loss": 1.9538, + "step": 19517000 + }, + { + "epoch": 56.5, + "learning_rate": 2.1762562523156727e-05, + "loss": 1.9963, + "step": 19517500 + }, + { + "epoch": 56.5, + "learning_rate": 2.176183887550945e-05, + "loss": 1.9385, + "step": 19518000 + }, + { + "epoch": 56.5, + "learning_rate": 2.176111522786217e-05, + "loss": 1.96, + "step": 19518500 + }, + { + "epoch": 56.5, + "learning_rate": 2.1760391580214894e-05, + "loss": 1.9426, + "step": 19519000 + }, + { + "epoch": 56.5, + "learning_rate": 2.1759669379862912e-05, + "loss": 1.956, + "step": 19519500 + }, + { + "epoch": 56.5, + "learning_rate": 2.175894717951093e-05, + "loss": 1.9538, + "step": 19520000 + }, + { + "epoch": 56.5, + "learning_rate": 2.1758224979158947e-05, + "loss": 1.9802, + "step": 19520500 + }, + { + "epoch": 56.51, + "learning_rate": 2.1757501331511673e-05, + "loss": 1.946, + "step": 19521000 + }, + { + "epoch": 56.51, + "learning_rate": 2.1756777683864395e-05, + "loss": 1.961, + "step": 19521500 + }, + { + "epoch": 56.51, + "learning_rate": 2.175605403621712e-05, + "loss": 1.9763, + "step": 19522000 + }, + { + "epoch": 56.51, + "learning_rate": 2.1755330388569843e-05, + "loss": 1.984, + "step": 19522500 + }, + { + "epoch": 56.51, + "learning_rate": 2.1754606740922565e-05, + "loss": 1.9752, + "step": 19523000 + }, + { + "epoch": 56.51, + "learning_rate": 2.1753883093275287e-05, + "loss": 1.9515, + "step": 19523500 + }, + { + "epoch": 56.51, + "learning_rate": 2.175315944562801e-05, + "loss": 1.9864, + "step": 19524000 + }, + { + "epoch": 56.52, + "learning_rate": 2.1752435797980735e-05, + "loss": 1.9673, + "step": 19524500 + }, + { + "epoch": 56.52, + "learning_rate": 2.1751712150333457e-05, + "loss": 1.9852, + "step": 19525000 + }, + { + "epoch": 56.52, + "learning_rate": 2.1750988502686183e-05, + "loss": 1.9637, + "step": 19525500 + }, + { + "epoch": 56.52, + "learning_rate": 2.1750264855038905e-05, + "loss": 1.9646, + "step": 19526000 + }, + { + "epoch": 56.52, + "learning_rate": 2.1749541207391627e-05, + "loss": 1.9938, + "step": 19526500 + }, + { + "epoch": 56.52, + "learning_rate": 2.1748819007039646e-05, + "loss": 1.9633, + "step": 19527000 + }, + { + "epoch": 56.52, + "learning_rate": 2.174809535939237e-05, + "loss": 1.9901, + "step": 19527500 + }, + { + "epoch": 56.53, + "learning_rate": 2.1747371711745094e-05, + "loss": 1.9657, + "step": 19528000 + }, + { + "epoch": 56.53, + "learning_rate": 2.1746648064097816e-05, + "loss": 1.984, + "step": 19528500 + }, + { + "epoch": 56.53, + "learning_rate": 2.174592441645054e-05, + "loss": 1.9659, + "step": 19529000 + }, + { + "epoch": 56.53, + "learning_rate": 2.174520076880326e-05, + "loss": 1.96, + "step": 19529500 + }, + { + "epoch": 56.53, + "learning_rate": 2.1744477121155983e-05, + "loss": 1.9616, + "step": 19530000 + }, + { + "epoch": 56.53, + "learning_rate": 2.174375347350871e-05, + "loss": 1.9323, + "step": 19530500 + }, + { + "epoch": 56.53, + "learning_rate": 2.174302982586143e-05, + "loss": 1.955, + "step": 19531000 + }, + { + "epoch": 56.54, + "learning_rate": 2.1742309072804743e-05, + "loss": 1.9706, + "step": 19531500 + }, + { + "epoch": 56.54, + "learning_rate": 2.174158542515747e-05, + "loss": 1.9721, + "step": 19532000 + }, + { + "epoch": 56.54, + "learning_rate": 2.1740863224805484e-05, + "loss": 1.9608, + "step": 19532500 + }, + { + "epoch": 56.54, + "learning_rate": 2.174013957715821e-05, + "loss": 1.9433, + "step": 19533000 + }, + { + "epoch": 56.54, + "learning_rate": 2.1739415929510932e-05, + "loss": 1.9564, + "step": 19533500 + }, + { + "epoch": 56.54, + "learning_rate": 2.1738692281863654e-05, + "loss": 1.9846, + "step": 19534000 + }, + { + "epoch": 56.54, + "learning_rate": 2.1737968634216377e-05, + "loss": 1.9571, + "step": 19534500 + }, + { + "epoch": 56.55, + "learning_rate": 2.17372449865691e-05, + "loss": 1.9383, + "step": 19535000 + }, + { + "epoch": 56.55, + "learning_rate": 2.1736521338921825e-05, + "loss": 1.9733, + "step": 19535500 + }, + { + "epoch": 56.55, + "learning_rate": 2.1735797691274547e-05, + "loss": 1.9553, + "step": 19536000 + }, + { + "epoch": 56.55, + "learning_rate": 2.1735074043627272e-05, + "loss": 1.9618, + "step": 19536500 + }, + { + "epoch": 56.55, + "learning_rate": 2.1734350395979995e-05, + "loss": 1.9754, + "step": 19537000 + }, + { + "epoch": 56.55, + "learning_rate": 2.173362819562801e-05, + "loss": 1.9582, + "step": 19537500 + }, + { + "epoch": 56.55, + "learning_rate": 2.1732904547980736e-05, + "loss": 1.9621, + "step": 19538000 + }, + { + "epoch": 56.56, + "learning_rate": 2.1732180900333458e-05, + "loss": 1.9749, + "step": 19538500 + }, + { + "epoch": 56.56, + "learning_rate": 2.173145725268618e-05, + "loss": 1.9758, + "step": 19539000 + }, + { + "epoch": 56.56, + "learning_rate": 2.17307350523342e-05, + "loss": 1.9638, + "step": 19539500 + }, + { + "epoch": 56.56, + "learning_rate": 2.173001140468692e-05, + "loss": 1.9547, + "step": 19540000 + }, + { + "epoch": 56.56, + "learning_rate": 2.1729287757039647e-05, + "loss": 1.9686, + "step": 19540500 + }, + { + "epoch": 56.56, + "learning_rate": 2.172856410939237e-05, + "loss": 1.9894, + "step": 19541000 + }, + { + "epoch": 56.56, + "learning_rate": 2.172784046174509e-05, + "loss": 1.9523, + "step": 19541500 + }, + { + "epoch": 56.57, + "learning_rate": 2.1727116814097814e-05, + "loss": 1.9627, + "step": 19542000 + }, + { + "epoch": 56.57, + "learning_rate": 2.1726396061041126e-05, + "loss": 1.9781, + "step": 19542500 + }, + { + "epoch": 56.57, + "learning_rate": 2.1725672413393848e-05, + "loss": 1.9493, + "step": 19543000 + }, + { + "epoch": 56.57, + "learning_rate": 2.1724948765746574e-05, + "loss": 1.9739, + "step": 19543500 + }, + { + "epoch": 56.57, + "learning_rate": 2.17242251180993e-05, + "loss": 1.9441, + "step": 19544000 + }, + { + "epoch": 56.57, + "learning_rate": 2.1723501470452022e-05, + "loss": 1.9658, + "step": 19544500 + }, + { + "epoch": 56.57, + "learning_rate": 2.1722777822804744e-05, + "loss": 1.9591, + "step": 19545000 + }, + { + "epoch": 56.58, + "learning_rate": 2.172205562245276e-05, + "loss": 1.9811, + "step": 19545500 + }, + { + "epoch": 56.58, + "learning_rate": 2.1721331974805485e-05, + "loss": 1.9441, + "step": 19546000 + }, + { + "epoch": 56.58, + "learning_rate": 2.1720608327158207e-05, + "loss": 1.9683, + "step": 19546500 + }, + { + "epoch": 56.58, + "learning_rate": 2.1719884679510933e-05, + "loss": 1.9501, + "step": 19547000 + }, + { + "epoch": 56.58, + "learning_rate": 2.1719161031863655e-05, + "loss": 1.9634, + "step": 19547500 + }, + { + "epoch": 56.58, + "learning_rate": 2.1718437384216377e-05, + "loss": 1.9604, + "step": 19548000 + }, + { + "epoch": 56.58, + "learning_rate": 2.1717715183864396e-05, + "loss": 1.9641, + "step": 19548500 + }, + { + "epoch": 56.59, + "learning_rate": 2.171699153621712e-05, + "loss": 1.9767, + "step": 19549000 + }, + { + "epoch": 56.59, + "learning_rate": 2.171626788856984e-05, + "loss": 1.9408, + "step": 19549500 + }, + { + "epoch": 56.59, + "learning_rate": 2.1715544240922563e-05, + "loss": 1.9645, + "step": 19550000 + }, + { + "epoch": 56.59, + "learning_rate": 2.1714822040570582e-05, + "loss": 1.9519, + "step": 19550500 + }, + { + "epoch": 56.59, + "learning_rate": 2.1714098392923308e-05, + "loss": 1.9431, + "step": 19551000 + }, + { + "epoch": 56.59, + "learning_rate": 2.171337474527603e-05, + "loss": 1.9453, + "step": 19551500 + }, + { + "epoch": 56.6, + "learning_rate": 2.1712651097628752e-05, + "loss": 1.96, + "step": 19552000 + }, + { + "epoch": 56.6, + "learning_rate": 2.1711927449981474e-05, + "loss": 1.9692, + "step": 19552500 + }, + { + "epoch": 56.6, + "learning_rate": 2.17112038023342e-05, + "loss": 1.9421, + "step": 19553000 + }, + { + "epoch": 56.6, + "learning_rate": 2.1710480154686922e-05, + "loss": 1.9616, + "step": 19553500 + }, + { + "epoch": 56.6, + "learning_rate": 2.1709756507039644e-05, + "loss": 1.9577, + "step": 19554000 + }, + { + "epoch": 56.6, + "learning_rate": 2.170903285939237e-05, + "loss": 1.999, + "step": 19554500 + }, + { + "epoch": 56.6, + "learning_rate": 2.1708309211745092e-05, + "loss": 1.951, + "step": 19555000 + }, + { + "epoch": 56.61, + "learning_rate": 2.1707585564097815e-05, + "loss": 1.9699, + "step": 19555500 + }, + { + "epoch": 56.61, + "learning_rate": 2.1706861916450537e-05, + "loss": 1.9742, + "step": 19556000 + }, + { + "epoch": 56.61, + "learning_rate": 2.1706138268803262e-05, + "loss": 1.9591, + "step": 19556500 + }, + { + "epoch": 56.61, + "learning_rate": 2.1705414621155985e-05, + "loss": 1.9609, + "step": 19557000 + }, + { + "epoch": 56.61, + "learning_rate": 2.1704692420804e-05, + "loss": 1.9421, + "step": 19557500 + }, + { + "epoch": 56.61, + "learning_rate": 2.1703968773156726e-05, + "loss": 1.9604, + "step": 19558000 + }, + { + "epoch": 56.61, + "learning_rate": 2.170324512550945e-05, + "loss": 1.9562, + "step": 19558500 + }, + { + "epoch": 56.62, + "learning_rate": 2.1702521477862174e-05, + "loss": 1.9531, + "step": 19559000 + }, + { + "epoch": 56.62, + "learning_rate": 2.1701797830214896e-05, + "loss": 1.9705, + "step": 19559500 + }, + { + "epoch": 56.62, + "learning_rate": 2.170107562986291e-05, + "loss": 1.9877, + "step": 19560000 + }, + { + "epoch": 56.62, + "learning_rate": 2.1700351982215637e-05, + "loss": 1.9784, + "step": 19560500 + }, + { + "epoch": 56.62, + "learning_rate": 2.169962833456836e-05, + "loss": 1.9802, + "step": 19561000 + }, + { + "epoch": 56.62, + "learning_rate": 2.169890468692108e-05, + "loss": 1.9876, + "step": 19561500 + }, + { + "epoch": 56.62, + "learning_rate": 2.1698181039273807e-05, + "loss": 1.9396, + "step": 19562000 + }, + { + "epoch": 56.63, + "learning_rate": 2.169745739162653e-05, + "loss": 1.9755, + "step": 19562500 + }, + { + "epoch": 56.63, + "learning_rate": 2.169673519127455e-05, + "loss": 1.9666, + "step": 19563000 + }, + { + "epoch": 56.63, + "learning_rate": 2.169601154362727e-05, + "loss": 1.9597, + "step": 19563500 + }, + { + "epoch": 56.63, + "learning_rate": 2.1695287895979993e-05, + "loss": 1.9616, + "step": 19564000 + }, + { + "epoch": 56.63, + "learning_rate": 2.1694564248332715e-05, + "loss": 1.9583, + "step": 19564500 + }, + { + "epoch": 56.63, + "learning_rate": 2.1693840600685437e-05, + "loss": 1.9627, + "step": 19565000 + }, + { + "epoch": 56.63, + "learning_rate": 2.1693116953038163e-05, + "loss": 1.9424, + "step": 19565500 + }, + { + "epoch": 56.64, + "learning_rate": 2.169239330539089e-05, + "loss": 1.9684, + "step": 19566000 + }, + { + "epoch": 56.64, + "learning_rate": 2.169166965774361e-05, + "loss": 1.963, + "step": 19566500 + }, + { + "epoch": 56.64, + "learning_rate": 2.1690947457391626e-05, + "loss": 1.9817, + "step": 19567000 + }, + { + "epoch": 56.64, + "learning_rate": 2.1690223809744352e-05, + "loss": 1.9232, + "step": 19567500 + }, + { + "epoch": 56.64, + "learning_rate": 2.1689500162097074e-05, + "loss": 1.9583, + "step": 19568000 + }, + { + "epoch": 56.64, + "learning_rate": 2.1688776514449796e-05, + "loss": 1.9473, + "step": 19568500 + }, + { + "epoch": 56.64, + "learning_rate": 2.1688054314097815e-05, + "loss": 1.9817, + "step": 19569000 + }, + { + "epoch": 56.65, + "learning_rate": 2.168733066645054e-05, + "loss": 1.9466, + "step": 19569500 + }, + { + "epoch": 56.65, + "learning_rate": 2.1686607018803263e-05, + "loss": 1.9812, + "step": 19570000 + }, + { + "epoch": 56.65, + "learning_rate": 2.1685883371155985e-05, + "loss": 1.9565, + "step": 19570500 + }, + { + "epoch": 56.65, + "learning_rate": 2.1685159723508708e-05, + "loss": 1.9494, + "step": 19571000 + }, + { + "epoch": 56.65, + "learning_rate": 2.1684437523156727e-05, + "loss": 1.9936, + "step": 19571500 + }, + { + "epoch": 56.65, + "learning_rate": 2.1683715322804742e-05, + "loss": 1.9859, + "step": 19572000 + }, + { + "epoch": 56.65, + "learning_rate": 2.1682991675157464e-05, + "loss": 1.947, + "step": 19572500 + }, + { + "epoch": 56.66, + "learning_rate": 2.168226802751019e-05, + "loss": 1.9835, + "step": 19573000 + }, + { + "epoch": 56.66, + "learning_rate": 2.1681544379862916e-05, + "loss": 1.9452, + "step": 19573500 + }, + { + "epoch": 56.66, + "learning_rate": 2.1680820732215638e-05, + "loss": 1.9639, + "step": 19574000 + }, + { + "epoch": 56.66, + "learning_rate": 2.168009708456836e-05, + "loss": 1.9737, + "step": 19574500 + }, + { + "epoch": 56.66, + "learning_rate": 2.1679373436921082e-05, + "loss": 1.9689, + "step": 19575000 + }, + { + "epoch": 56.66, + "learning_rate": 2.1678649789273805e-05, + "loss": 1.9437, + "step": 19575500 + }, + { + "epoch": 56.66, + "learning_rate": 2.1677927588921824e-05, + "loss": 1.9667, + "step": 19576000 + }, + { + "epoch": 56.67, + "learning_rate": 2.1677203941274546e-05, + "loss": 1.9592, + "step": 19576500 + }, + { + "epoch": 56.67, + "learning_rate": 2.167648029362727e-05, + "loss": 1.9729, + "step": 19577000 + }, + { + "epoch": 56.67, + "learning_rate": 2.1675756645979994e-05, + "loss": 1.9647, + "step": 19577500 + }, + { + "epoch": 56.67, + "learning_rate": 2.1675032998332716e-05, + "loss": 1.949, + "step": 19578000 + }, + { + "epoch": 56.67, + "learning_rate": 2.167430935068544e-05, + "loss": 1.9583, + "step": 19578500 + }, + { + "epoch": 56.67, + "learning_rate": 2.1673585703038164e-05, + "loss": 1.9791, + "step": 19579000 + }, + { + "epoch": 56.67, + "learning_rate": 2.1672862055390886e-05, + "loss": 1.9835, + "step": 19579500 + }, + { + "epoch": 56.68, + "learning_rate": 2.1672138407743608e-05, + "loss": 1.9565, + "step": 19580000 + }, + { + "epoch": 56.68, + "learning_rate": 2.1671414760096334e-05, + "loss": 1.996, + "step": 19580500 + }, + { + "epoch": 56.68, + "learning_rate": 2.1670691112449056e-05, + "loss": 1.9642, + "step": 19581000 + }, + { + "epoch": 56.68, + "learning_rate": 2.1669968912097075e-05, + "loss": 1.9712, + "step": 19581500 + }, + { + "epoch": 56.68, + "learning_rate": 2.166924671174509e-05, + "loss": 1.9704, + "step": 19582000 + }, + { + "epoch": 56.68, + "learning_rate": 2.1668523064097816e-05, + "loss": 1.9702, + "step": 19582500 + }, + { + "epoch": 56.68, + "learning_rate": 2.166779941645054e-05, + "loss": 1.9773, + "step": 19583000 + }, + { + "epoch": 56.69, + "learning_rate": 2.166707576880326e-05, + "loss": 1.9663, + "step": 19583500 + }, + { + "epoch": 56.69, + "learning_rate": 2.166635356845128e-05, + "loss": 1.97, + "step": 19584000 + }, + { + "epoch": 56.69, + "learning_rate": 2.16656313680993e-05, + "loss": 1.9693, + "step": 19584500 + }, + { + "epoch": 56.69, + "learning_rate": 2.166490772045202e-05, + "loss": 1.9759, + "step": 19585000 + }, + { + "epoch": 56.69, + "learning_rate": 2.1664184072804743e-05, + "loss": 1.9802, + "step": 19585500 + }, + { + "epoch": 56.69, + "learning_rate": 2.1663460425157465e-05, + "loss": 1.9853, + "step": 19586000 + }, + { + "epoch": 56.69, + "learning_rate": 2.166273677751019e-05, + "loss": 1.9624, + "step": 19586500 + }, + { + "epoch": 56.7, + "learning_rate": 2.1662013129862913e-05, + "loss": 1.9783, + "step": 19587000 + }, + { + "epoch": 56.7, + "learning_rate": 2.1661289482215635e-05, + "loss": 1.9611, + "step": 19587500 + }, + { + "epoch": 56.7, + "learning_rate": 2.166056583456836e-05, + "loss": 1.9719, + "step": 19588000 + }, + { + "epoch": 56.7, + "learning_rate": 2.1659842186921083e-05, + "loss": 1.9477, + "step": 19588500 + }, + { + "epoch": 56.7, + "learning_rate": 2.1659118539273805e-05, + "loss": 1.9462, + "step": 19589000 + }, + { + "epoch": 56.7, + "learning_rate": 2.1658396338921824e-05, + "loss": 1.9791, + "step": 19589500 + }, + { + "epoch": 56.71, + "learning_rate": 2.1657672691274547e-05, + "loss": 1.9708, + "step": 19590000 + }, + { + "epoch": 56.71, + "learning_rate": 2.1656950490922566e-05, + "loss": 1.9485, + "step": 19590500 + }, + { + "epoch": 56.71, + "learning_rate": 2.1656226843275288e-05, + "loss": 1.9679, + "step": 19591000 + }, + { + "epoch": 56.71, + "learning_rate": 2.165550319562801e-05, + "loss": 1.973, + "step": 19591500 + }, + { + "epoch": 56.71, + "learning_rate": 2.1654779547980736e-05, + "loss": 1.9795, + "step": 19592000 + }, + { + "epoch": 56.71, + "learning_rate": 2.1654055900333458e-05, + "loss": 1.9678, + "step": 19592500 + }, + { + "epoch": 56.71, + "learning_rate": 2.165333225268618e-05, + "loss": 1.9546, + "step": 19593000 + }, + { + "epoch": 56.72, + "learning_rate": 2.16526100523342e-05, + "loss": 1.9478, + "step": 19593500 + }, + { + "epoch": 56.72, + "learning_rate": 2.165188640468692e-05, + "loss": 1.9738, + "step": 19594000 + }, + { + "epoch": 56.72, + "learning_rate": 2.1651162757039643e-05, + "loss": 1.9854, + "step": 19594500 + }, + { + "epoch": 56.72, + "learning_rate": 2.165043910939237e-05, + "loss": 1.9496, + "step": 19595000 + }, + { + "epoch": 56.72, + "learning_rate": 2.1649715461745095e-05, + "loss": 1.9479, + "step": 19595500 + }, + { + "epoch": 56.72, + "learning_rate": 2.1648991814097817e-05, + "loss": 1.9655, + "step": 19596000 + }, + { + "epoch": 56.72, + "learning_rate": 2.164826816645054e-05, + "loss": 1.9627, + "step": 19596500 + }, + { + "epoch": 56.73, + "learning_rate": 2.164754451880326e-05, + "loss": 1.9507, + "step": 19597000 + }, + { + "epoch": 56.73, + "learning_rate": 2.1646820871155984e-05, + "loss": 1.9623, + "step": 19597500 + }, + { + "epoch": 56.73, + "learning_rate": 2.1646097223508706e-05, + "loss": 1.9745, + "step": 19598000 + }, + { + "epoch": 56.73, + "learning_rate": 2.164537357586143e-05, + "loss": 1.9535, + "step": 19598500 + }, + { + "epoch": 56.73, + "learning_rate": 2.1644649928214157e-05, + "loss": 1.9725, + "step": 19599000 + }, + { + "epoch": 56.73, + "learning_rate": 2.164392628056688e-05, + "loss": 1.9421, + "step": 19599500 + }, + { + "epoch": 56.73, + "learning_rate": 2.164320552751019e-05, + "loss": 1.9616, + "step": 19600000 + }, + { + "epoch": 56.74, + "learning_rate": 2.1642481879862914e-05, + "loss": 1.9441, + "step": 19600500 + }, + { + "epoch": 56.74, + "learning_rate": 2.1641758232215636e-05, + "loss": 1.9535, + "step": 19601000 + }, + { + "epoch": 56.74, + "learning_rate": 2.1641036031863655e-05, + "loss": 1.9688, + "step": 19601500 + }, + { + "epoch": 56.74, + "learning_rate": 2.1640312384216377e-05, + "loss": 1.9548, + "step": 19602000 + }, + { + "epoch": 56.74, + "learning_rate": 2.16395887365691e-05, + "loss": 1.9623, + "step": 19602500 + }, + { + "epoch": 56.74, + "learning_rate": 2.163886653621712e-05, + "loss": 1.9804, + "step": 19603000 + }, + { + "epoch": 56.74, + "learning_rate": 2.1638142888569844e-05, + "loss": 1.9564, + "step": 19603500 + }, + { + "epoch": 56.75, + "learning_rate": 2.1637419240922566e-05, + "loss": 1.9593, + "step": 19604000 + }, + { + "epoch": 56.75, + "learning_rate": 2.163669559327529e-05, + "loss": 1.9601, + "step": 19604500 + }, + { + "epoch": 56.75, + "learning_rate": 2.163597194562801e-05, + "loss": 1.9593, + "step": 19605000 + }, + { + "epoch": 56.75, + "learning_rate": 2.1635248297980733e-05, + "loss": 1.9812, + "step": 19605500 + }, + { + "epoch": 56.75, + "learning_rate": 2.1634526097628752e-05, + "loss": 1.9767, + "step": 19606000 + }, + { + "epoch": 56.75, + "learning_rate": 2.1633802449981474e-05, + "loss": 1.9852, + "step": 19606500 + }, + { + "epoch": 56.75, + "learning_rate": 2.16330788023342e-05, + "loss": 1.9489, + "step": 19607000 + }, + { + "epoch": 56.76, + "learning_rate": 2.1632355154686922e-05, + "loss": 1.9537, + "step": 19607500 + }, + { + "epoch": 56.76, + "learning_rate": 2.1631631507039644e-05, + "loss": 1.9589, + "step": 19608000 + }, + { + "epoch": 56.76, + "learning_rate": 2.1630909306687663e-05, + "loss": 1.9645, + "step": 19608500 + }, + { + "epoch": 56.76, + "learning_rate": 2.1630185659040385e-05, + "loss": 1.9843, + "step": 19609000 + }, + { + "epoch": 56.76, + "learning_rate": 2.1629462011393108e-05, + "loss": 1.9551, + "step": 19609500 + }, + { + "epoch": 56.76, + "learning_rate": 2.1628738363745833e-05, + "loss": 1.9733, + "step": 19610000 + }, + { + "epoch": 56.76, + "learning_rate": 2.1628014716098556e-05, + "loss": 1.9675, + "step": 19610500 + }, + { + "epoch": 56.77, + "learning_rate": 2.162729106845128e-05, + "loss": 1.9801, + "step": 19611000 + }, + { + "epoch": 56.77, + "learning_rate": 2.1626567420804003e-05, + "loss": 1.9728, + "step": 19611500 + }, + { + "epoch": 56.77, + "learning_rate": 2.1625845220452022e-05, + "loss": 1.9836, + "step": 19612000 + }, + { + "epoch": 56.77, + "learning_rate": 2.1625123020100038e-05, + "loss": 1.9514, + "step": 19612500 + }, + { + "epoch": 56.77, + "learning_rate": 2.162439937245276e-05, + "loss": 1.9732, + "step": 19613000 + }, + { + "epoch": 56.77, + "learning_rate": 2.1623675724805482e-05, + "loss": 1.9761, + "step": 19613500 + }, + { + "epoch": 56.77, + "learning_rate": 2.1622952077158208e-05, + "loss": 1.9617, + "step": 19614000 + }, + { + "epoch": 56.78, + "learning_rate": 2.1622228429510934e-05, + "loss": 1.982, + "step": 19614500 + }, + { + "epoch": 56.78, + "learning_rate": 2.1621504781863656e-05, + "loss": 1.956, + "step": 19615000 + }, + { + "epoch": 56.78, + "learning_rate": 2.1620781134216378e-05, + "loss": 1.9666, + "step": 19615500 + }, + { + "epoch": 56.78, + "learning_rate": 2.16200574865691e-05, + "loss": 1.966, + "step": 19616000 + }, + { + "epoch": 56.78, + "learning_rate": 2.161933528621712e-05, + "loss": 1.9663, + "step": 19616500 + }, + { + "epoch": 56.78, + "learning_rate": 2.161861163856984e-05, + "loss": 1.9762, + "step": 19617000 + }, + { + "epoch": 56.78, + "learning_rate": 2.1617887990922564e-05, + "loss": 1.9689, + "step": 19617500 + }, + { + "epoch": 56.79, + "learning_rate": 2.1617164343275286e-05, + "loss": 1.972, + "step": 19618000 + }, + { + "epoch": 56.79, + "learning_rate": 2.161644069562801e-05, + "loss": 1.9811, + "step": 19618500 + }, + { + "epoch": 56.79, + "learning_rate": 2.1615717047980734e-05, + "loss": 1.9938, + "step": 19619000 + }, + { + "epoch": 56.79, + "learning_rate": 2.1614994847628753e-05, + "loss": 1.9598, + "step": 19619500 + }, + { + "epoch": 56.79, + "learning_rate": 2.1614271199981475e-05, + "loss": 1.9559, + "step": 19620000 + }, + { + "epoch": 56.79, + "learning_rate": 2.1613547552334197e-05, + "loss": 1.9538, + "step": 19620500 + }, + { + "epoch": 56.79, + "learning_rate": 2.1612823904686923e-05, + "loss": 1.9417, + "step": 19621000 + }, + { + "epoch": 56.8, + "learning_rate": 2.1612100257039645e-05, + "loss": 1.9832, + "step": 19621500 + }, + { + "epoch": 56.8, + "learning_rate": 2.161137660939237e-05, + "loss": 1.9684, + "step": 19622000 + }, + { + "epoch": 56.8, + "learning_rate": 2.1610654409040386e-05, + "loss": 1.9461, + "step": 19622500 + }, + { + "epoch": 56.8, + "learning_rate": 2.160993076139311e-05, + "loss": 1.9817, + "step": 19623000 + }, + { + "epoch": 56.8, + "learning_rate": 2.1609207113745834e-05, + "loss": 1.9616, + "step": 19623500 + }, + { + "epoch": 56.8, + "learning_rate": 2.1608483466098556e-05, + "loss": 1.9839, + "step": 19624000 + }, + { + "epoch": 56.8, + "learning_rate": 2.160775981845128e-05, + "loss": 1.9638, + "step": 19624500 + }, + { + "epoch": 56.81, + "learning_rate": 2.1607036170804e-05, + "loss": 1.9631, + "step": 19625000 + }, + { + "epoch": 56.81, + "learning_rate": 2.1606312523156726e-05, + "loss": 1.9483, + "step": 19625500 + }, + { + "epoch": 56.81, + "learning_rate": 2.160558887550945e-05, + "loss": 1.9391, + "step": 19626000 + }, + { + "epoch": 56.81, + "learning_rate": 2.1604865227862174e-05, + "loss": 1.9661, + "step": 19626500 + }, + { + "epoch": 56.81, + "learning_rate": 2.1604141580214897e-05, + "loss": 1.9768, + "step": 19627000 + }, + { + "epoch": 56.81, + "learning_rate": 2.160341793256762e-05, + "loss": 1.9541, + "step": 19627500 + }, + { + "epoch": 56.82, + "learning_rate": 2.160269428492034e-05, + "loss": 1.9827, + "step": 19628000 + }, + { + "epoch": 56.82, + "learning_rate": 2.1601970637273063e-05, + "loss": 1.9915, + "step": 19628500 + }, + { + "epoch": 56.82, + "learning_rate": 2.1601248436921082e-05, + "loss": 1.9656, + "step": 19629000 + }, + { + "epoch": 56.82, + "learning_rate": 2.1600524789273808e-05, + "loss": 1.961, + "step": 19629500 + }, + { + "epoch": 56.82, + "learning_rate": 2.159980114162653e-05, + "loss": 1.958, + "step": 19630000 + }, + { + "epoch": 56.82, + "learning_rate": 2.1599077493979252e-05, + "loss": 1.9883, + "step": 19630500 + }, + { + "epoch": 56.82, + "learning_rate": 2.1598353846331974e-05, + "loss": 1.9683, + "step": 19631000 + }, + { + "epoch": 56.83, + "learning_rate": 2.15976301986847e-05, + "loss": 1.9795, + "step": 19631500 + }, + { + "epoch": 56.83, + "learning_rate": 2.1596906551037422e-05, + "loss": 1.9859, + "step": 19632000 + }, + { + "epoch": 56.83, + "learning_rate": 2.1596182903390145e-05, + "loss": 1.9696, + "step": 19632500 + }, + { + "epoch": 56.83, + "learning_rate": 2.159545925574287e-05, + "loss": 1.982, + "step": 19633000 + }, + { + "epoch": 56.83, + "learning_rate": 2.1594737055390886e-05, + "loss": 1.9465, + "step": 19633500 + }, + { + "epoch": 56.83, + "learning_rate": 2.159401340774361e-05, + "loss": 1.9697, + "step": 19634000 + }, + { + "epoch": 56.83, + "learning_rate": 2.1593291207391627e-05, + "loss": 1.9841, + "step": 19634500 + }, + { + "epoch": 56.84, + "learning_rate": 2.1592569007039646e-05, + "loss": 1.9683, + "step": 19635000 + }, + { + "epoch": 56.84, + "learning_rate": 2.1591845359392368e-05, + "loss": 1.9764, + "step": 19635500 + }, + { + "epoch": 56.84, + "learning_rate": 2.159112171174509e-05, + "loss": 1.9724, + "step": 19636000 + }, + { + "epoch": 56.84, + "learning_rate": 2.1590398064097813e-05, + "loss": 1.9535, + "step": 19636500 + }, + { + "epoch": 56.84, + "learning_rate": 2.1589674416450538e-05, + "loss": 1.9707, + "step": 19637000 + }, + { + "epoch": 56.84, + "learning_rate": 2.1588950768803264e-05, + "loss": 1.9496, + "step": 19637500 + }, + { + "epoch": 56.84, + "learning_rate": 2.1588227121155986e-05, + "loss": 1.9384, + "step": 19638000 + }, + { + "epoch": 56.85, + "learning_rate": 2.1587504920804e-05, + "loss": 1.9661, + "step": 19638500 + }, + { + "epoch": 56.85, + "learning_rate": 2.1586781273156724e-05, + "loss": 1.9421, + "step": 19639000 + }, + { + "epoch": 56.85, + "learning_rate": 2.158605762550945e-05, + "loss": 1.9665, + "step": 19639500 + }, + { + "epoch": 56.85, + "learning_rate": 2.158533397786217e-05, + "loss": 1.9631, + "step": 19640000 + }, + { + "epoch": 56.85, + "learning_rate": 2.1584610330214897e-05, + "loss": 1.9525, + "step": 19640500 + }, + { + "epoch": 56.85, + "learning_rate": 2.158388668256762e-05, + "loss": 1.9611, + "step": 19641000 + }, + { + "epoch": 56.85, + "learning_rate": 2.1583163034920342e-05, + "loss": 1.9642, + "step": 19641500 + }, + { + "epoch": 56.86, + "learning_rate": 2.1582439387273064e-05, + "loss": 1.9614, + "step": 19642000 + }, + { + "epoch": 56.86, + "learning_rate": 2.158171573962579e-05, + "loss": 1.9788, + "step": 19642500 + }, + { + "epoch": 56.86, + "learning_rate": 2.1580992091978512e-05, + "loss": 1.9642, + "step": 19643000 + }, + { + "epoch": 56.86, + "learning_rate": 2.1580268444331234e-05, + "loss": 1.9552, + "step": 19643500 + }, + { + "epoch": 56.86, + "learning_rate": 2.157954479668396e-05, + "loss": 1.9702, + "step": 19644000 + }, + { + "epoch": 56.86, + "learning_rate": 2.1578822596331975e-05, + "loss": 1.9677, + "step": 19644500 + }, + { + "epoch": 56.86, + "learning_rate": 2.15780989486847e-05, + "loss": 1.9631, + "step": 19645000 + }, + { + "epoch": 56.87, + "learning_rate": 2.1577375301037423e-05, + "loss": 1.9803, + "step": 19645500 + }, + { + "epoch": 56.87, + "learning_rate": 2.157665310068544e-05, + "loss": 1.9615, + "step": 19646000 + }, + { + "epoch": 56.87, + "learning_rate": 2.1575930900333458e-05, + "loss": 1.9631, + "step": 19646500 + }, + { + "epoch": 56.87, + "learning_rate": 2.1575208699981477e-05, + "loss": 1.9578, + "step": 19647000 + }, + { + "epoch": 56.87, + "learning_rate": 2.15744850523342e-05, + "loss": 1.9515, + "step": 19647500 + }, + { + "epoch": 56.87, + "learning_rate": 2.157376140468692e-05, + "loss": 1.976, + "step": 19648000 + }, + { + "epoch": 56.87, + "learning_rate": 2.1573037757039647e-05, + "loss": 1.9958, + "step": 19648500 + }, + { + "epoch": 56.88, + "learning_rate": 2.157231410939237e-05, + "loss": 1.9684, + "step": 19649000 + }, + { + "epoch": 56.88, + "learning_rate": 2.157159046174509e-05, + "loss": 1.948, + "step": 19649500 + }, + { + "epoch": 56.88, + "learning_rate": 2.1570866814097813e-05, + "loss": 1.9849, + "step": 19650000 + }, + { + "epoch": 56.88, + "learning_rate": 2.157014316645054e-05, + "loss": 1.9866, + "step": 19650500 + }, + { + "epoch": 56.88, + "learning_rate": 2.156941951880326e-05, + "loss": 1.9624, + "step": 19651000 + }, + { + "epoch": 56.88, + "learning_rate": 2.1568695871155987e-05, + "loss": 1.9847, + "step": 19651500 + }, + { + "epoch": 56.88, + "learning_rate": 2.156797222350871e-05, + "loss": 1.9736, + "step": 19652000 + }, + { + "epoch": 56.89, + "learning_rate": 2.156724857586143e-05, + "loss": 1.9606, + "step": 19652500 + }, + { + "epoch": 56.89, + "learning_rate": 2.1566524928214154e-05, + "loss": 1.9559, + "step": 19653000 + }, + { + "epoch": 56.89, + "learning_rate": 2.1565802727862172e-05, + "loss": 1.9505, + "step": 19653500 + }, + { + "epoch": 56.89, + "learning_rate": 2.1565079080214895e-05, + "loss": 1.971, + "step": 19654000 + }, + { + "epoch": 56.89, + "learning_rate": 2.1564355432567617e-05, + "loss": 1.9673, + "step": 19654500 + }, + { + "epoch": 56.89, + "learning_rate": 2.156363178492034e-05, + "loss": 1.9632, + "step": 19655000 + }, + { + "epoch": 56.89, + "learning_rate": 2.1562908137273065e-05, + "loss": 1.9599, + "step": 19655500 + }, + { + "epoch": 56.9, + "learning_rate": 2.1562185936921084e-05, + "loss": 1.9625, + "step": 19656000 + }, + { + "epoch": 56.9, + "learning_rate": 2.1561462289273806e-05, + "loss": 1.9913, + "step": 19656500 + }, + { + "epoch": 56.9, + "learning_rate": 2.1560738641626528e-05, + "loss": 1.981, + "step": 19657000 + }, + { + "epoch": 56.9, + "learning_rate": 2.1560016441274547e-05, + "loss": 1.9743, + "step": 19657500 + }, + { + "epoch": 56.9, + "learning_rate": 2.155929279362727e-05, + "loss": 1.9534, + "step": 19658000 + }, + { + "epoch": 56.9, + "learning_rate": 2.155856914597999e-05, + "loss": 1.966, + "step": 19658500 + }, + { + "epoch": 56.9, + "learning_rate": 2.1557845498332714e-05, + "loss": 1.9804, + "step": 19659000 + }, + { + "epoch": 56.91, + "learning_rate": 2.155712185068544e-05, + "loss": 1.9401, + "step": 19659500 + }, + { + "epoch": 56.91, + "learning_rate": 2.1556398203038165e-05, + "loss": 1.9734, + "step": 19660000 + }, + { + "epoch": 56.91, + "learning_rate": 2.1555674555390887e-05, + "loss": 1.9583, + "step": 19660500 + }, + { + "epoch": 56.91, + "learning_rate": 2.155495090774361e-05, + "loss": 1.964, + "step": 19661000 + }, + { + "epoch": 56.91, + "learning_rate": 2.1554227260096332e-05, + "loss": 1.9666, + "step": 19661500 + }, + { + "epoch": 56.91, + "learning_rate": 2.1553503612449054e-05, + "loss": 1.976, + "step": 19662000 + }, + { + "epoch": 56.91, + "learning_rate": 2.155277996480178e-05, + "loss": 1.9587, + "step": 19662500 + }, + { + "epoch": 56.92, + "learning_rate": 2.15520577644498e-05, + "loss": 1.9517, + "step": 19663000 + }, + { + "epoch": 56.92, + "learning_rate": 2.155133411680252e-05, + "loss": 1.9782, + "step": 19663500 + }, + { + "epoch": 56.92, + "learning_rate": 2.1550610469155243e-05, + "loss": 1.9648, + "step": 19664000 + }, + { + "epoch": 56.92, + "learning_rate": 2.1549886821507965e-05, + "loss": 1.9659, + "step": 19664500 + }, + { + "epoch": 56.92, + "learning_rate": 2.1549164621155984e-05, + "loss": 1.9844, + "step": 19665000 + }, + { + "epoch": 56.92, + "learning_rate": 2.1548440973508706e-05, + "loss": 1.9753, + "step": 19665500 + }, + { + "epoch": 56.93, + "learning_rate": 2.154771732586143e-05, + "loss": 1.9553, + "step": 19666000 + }, + { + "epoch": 56.93, + "learning_rate": 2.1546993678214154e-05, + "loss": 2.0003, + "step": 19666500 + }, + { + "epoch": 56.93, + "learning_rate": 2.154627003056688e-05, + "loss": 1.9837, + "step": 19667000 + }, + { + "epoch": 56.93, + "learning_rate": 2.1545546382919602e-05, + "loss": 1.9762, + "step": 19667500 + }, + { + "epoch": 56.93, + "learning_rate": 2.1544824182567618e-05, + "loss": 1.9743, + "step": 19668000 + }, + { + "epoch": 56.93, + "learning_rate": 2.1544100534920343e-05, + "loss": 1.9557, + "step": 19668500 + }, + { + "epoch": 56.93, + "learning_rate": 2.1543376887273066e-05, + "loss": 1.9737, + "step": 19669000 + }, + { + "epoch": 56.94, + "learning_rate": 2.1542653239625788e-05, + "loss": 1.9606, + "step": 19669500 + }, + { + "epoch": 56.94, + "learning_rate": 2.154192959197851e-05, + "loss": 1.9955, + "step": 19670000 + }, + { + "epoch": 56.94, + "learning_rate": 2.1541205944331236e-05, + "loss": 1.9554, + "step": 19670500 + }, + { + "epoch": 56.94, + "learning_rate": 2.1540482296683958e-05, + "loss": 1.973, + "step": 19671000 + }, + { + "epoch": 56.94, + "learning_rate": 2.153975864903668e-05, + "loss": 1.9724, + "step": 19671500 + }, + { + "epoch": 56.94, + "learning_rate": 2.1539035001389406e-05, + "loss": 1.9744, + "step": 19672000 + }, + { + "epoch": 56.94, + "learning_rate": 2.1538311353742128e-05, + "loss": 1.9584, + "step": 19672500 + }, + { + "epoch": 56.95, + "learning_rate": 2.153758770609485e-05, + "loss": 1.9721, + "step": 19673000 + }, + { + "epoch": 56.95, + "learning_rate": 2.1536864058447572e-05, + "loss": 1.9791, + "step": 19673500 + }, + { + "epoch": 56.95, + "learning_rate": 2.1536140410800298e-05, + "loss": 1.9572, + "step": 19674000 + }, + { + "epoch": 56.95, + "learning_rate": 2.153541676315302e-05, + "loss": 1.9361, + "step": 19674500 + }, + { + "epoch": 56.95, + "learning_rate": 2.1534693115505743e-05, + "loss": 1.9845, + "step": 19675000 + }, + { + "epoch": 56.95, + "learning_rate": 2.1533969467858468e-05, + "loss": 1.9507, + "step": 19675500 + }, + { + "epoch": 56.95, + "learning_rate": 2.1533247267506484e-05, + "loss": 1.9672, + "step": 19676000 + }, + { + "epoch": 56.96, + "learning_rate": 2.1532523619859206e-05, + "loss": 1.987, + "step": 19676500 + }, + { + "epoch": 56.96, + "learning_rate": 2.153179997221193e-05, + "loss": 1.9599, + "step": 19677000 + }, + { + "epoch": 56.96, + "learning_rate": 2.153107777185995e-05, + "loss": 1.9523, + "step": 19677500 + }, + { + "epoch": 56.96, + "learning_rate": 2.1530354124212673e-05, + "loss": 1.9628, + "step": 19678000 + }, + { + "epoch": 56.96, + "learning_rate": 2.1529630476565395e-05, + "loss": 1.985, + "step": 19678500 + }, + { + "epoch": 56.96, + "learning_rate": 2.1528906828918117e-05, + "loss": 1.95, + "step": 19679000 + }, + { + "epoch": 56.96, + "learning_rate": 2.1528184628566136e-05, + "loss": 1.9915, + "step": 19679500 + }, + { + "epoch": 56.97, + "learning_rate": 2.152746098091886e-05, + "loss": 1.9632, + "step": 19680000 + }, + { + "epoch": 56.97, + "learning_rate": 2.152673733327158e-05, + "loss": 1.9815, + "step": 19680500 + }, + { + "epoch": 56.97, + "learning_rate": 2.1526013685624306e-05, + "loss": 1.9794, + "step": 19681000 + }, + { + "epoch": 56.97, + "learning_rate": 2.1525291485272325e-05, + "loss": 1.9876, + "step": 19681500 + }, + { + "epoch": 56.97, + "learning_rate": 2.1524567837625047e-05, + "loss": 1.9702, + "step": 19682000 + }, + { + "epoch": 56.97, + "learning_rate": 2.152384418997777e-05, + "loss": 1.9883, + "step": 19682500 + }, + { + "epoch": 56.97, + "learning_rate": 2.152312198962579e-05, + "loss": 1.9543, + "step": 19683000 + }, + { + "epoch": 56.98, + "learning_rate": 2.152239834197851e-05, + "loss": 1.969, + "step": 19683500 + }, + { + "epoch": 56.98, + "learning_rate": 2.1521674694331233e-05, + "loss": 1.9501, + "step": 19684000 + }, + { + "epoch": 56.98, + "learning_rate": 2.1520951046683955e-05, + "loss": 1.979, + "step": 19684500 + }, + { + "epoch": 56.98, + "learning_rate": 2.152022739903668e-05, + "loss": 1.9686, + "step": 19685000 + }, + { + "epoch": 56.98, + "learning_rate": 2.15195051986847e-05, + "loss": 1.9717, + "step": 19685500 + }, + { + "epoch": 56.98, + "learning_rate": 2.1518781551037422e-05, + "loss": 1.9929, + "step": 19686000 + }, + { + "epoch": 56.98, + "learning_rate": 2.1518057903390144e-05, + "loss": 1.9657, + "step": 19686500 + }, + { + "epoch": 56.99, + "learning_rate": 2.151733425574287e-05, + "loss": 1.949, + "step": 19687000 + }, + { + "epoch": 56.99, + "learning_rate": 2.1516610608095592e-05, + "loss": 1.9635, + "step": 19687500 + }, + { + "epoch": 56.99, + "learning_rate": 2.1515886960448314e-05, + "loss": 1.9742, + "step": 19688000 + }, + { + "epoch": 56.99, + "learning_rate": 2.1515163312801037e-05, + "loss": 1.953, + "step": 19688500 + }, + { + "epoch": 56.99, + "learning_rate": 2.1514439665153762e-05, + "loss": 1.9405, + "step": 19689000 + }, + { + "epoch": 56.99, + "learning_rate": 2.1513716017506485e-05, + "loss": 1.9907, + "step": 19689500 + }, + { + "epoch": 56.99, + "learning_rate": 2.1512992369859207e-05, + "loss": 1.9667, + "step": 19690000 + }, + { + "epoch": 57.0, + "learning_rate": 2.1512268722211932e-05, + "loss": 1.9593, + "step": 19690500 + }, + { + "epoch": 57.0, + "learning_rate": 2.1511545074564655e-05, + "loss": 1.9784, + "step": 19691000 + }, + { + "epoch": 57.0, + "learning_rate": 2.1510821426917377e-05, + "loss": 1.9584, + "step": 19691500 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.6764066632297837, + "eval_accuracy_mlm": 0.6428924696020535, + "eval_accuracy_nsp": 0.8560700847414904, + "eval_loss": 2.166233777999878, + "eval_runtime": 331.9117, + "eval_samples_per_second": 1314.765, + "eval_steps_per_second": 54.783, + "step": 19691904 + }, + { + "epoch": 57.0, + "learning_rate": 2.1510099226565396e-05, + "loss": 1.9737, + "step": 19692000 + }, + { + "epoch": 57.0, + "learning_rate": 2.150937557891812e-05, + "loss": 1.9697, + "step": 19692500 + }, + { + "epoch": 57.0, + "learning_rate": 2.1508651931270844e-05, + "loss": 1.9555, + "step": 19693000 + }, + { + "epoch": 57.0, + "learning_rate": 2.150792973091886e-05, + "loss": 1.9462, + "step": 19693500 + }, + { + "epoch": 57.01, + "learning_rate": 2.1507206083271585e-05, + "loss": 1.9601, + "step": 19694000 + }, + { + "epoch": 57.01, + "learning_rate": 2.1506482435624307e-05, + "loss": 1.9598, + "step": 19694500 + }, + { + "epoch": 57.01, + "learning_rate": 2.150575878797703e-05, + "loss": 1.9496, + "step": 19695000 + }, + { + "epoch": 57.01, + "learning_rate": 2.150503514032975e-05, + "loss": 1.9817, + "step": 19695500 + }, + { + "epoch": 57.01, + "learning_rate": 2.1504311492682477e-05, + "loss": 1.9445, + "step": 19696000 + }, + { + "epoch": 57.01, + "learning_rate": 2.15035878450352e-05, + "loss": 1.9501, + "step": 19696500 + }, + { + "epoch": 57.01, + "learning_rate": 2.150286419738792e-05, + "loss": 1.9462, + "step": 19697000 + }, + { + "epoch": 57.02, + "learning_rate": 2.150214199703594e-05, + "loss": 1.9322, + "step": 19697500 + }, + { + "epoch": 57.02, + "learning_rate": 2.1501418349388663e-05, + "loss": 1.9825, + "step": 19698000 + }, + { + "epoch": 57.02, + "learning_rate": 2.1500694701741385e-05, + "loss": 1.9342, + "step": 19698500 + }, + { + "epoch": 57.02, + "learning_rate": 2.149997105409411e-05, + "loss": 1.9475, + "step": 19699000 + }, + { + "epoch": 57.02, + "learning_rate": 2.1499247406446833e-05, + "loss": 1.9187, + "step": 19699500 + }, + { + "epoch": 57.02, + "learning_rate": 2.149852375879956e-05, + "loss": 1.9676, + "step": 19700000 + }, + { + "epoch": 57.02, + "learning_rate": 2.1497801558447574e-05, + "loss": 1.9836, + "step": 19700500 + }, + { + "epoch": 57.03, + "learning_rate": 2.1497077910800296e-05, + "loss": 1.9237, + "step": 19701000 + }, + { + "epoch": 57.03, + "learning_rate": 2.149635715774361e-05, + "loss": 1.9256, + "step": 19701500 + }, + { + "epoch": 57.03, + "learning_rate": 2.1495633510096334e-05, + "loss": 1.9687, + "step": 19702000 + }, + { + "epoch": 57.03, + "learning_rate": 2.1494909862449056e-05, + "loss": 1.9474, + "step": 19702500 + }, + { + "epoch": 57.03, + "learning_rate": 2.149418621480178e-05, + "loss": 1.9543, + "step": 19703000 + }, + { + "epoch": 57.03, + "learning_rate": 2.14934625671545e-05, + "loss": 1.9065, + "step": 19703500 + }, + { + "epoch": 57.04, + "learning_rate": 2.1492738919507227e-05, + "loss": 1.9547, + "step": 19704000 + }, + { + "epoch": 57.04, + "learning_rate": 2.149201527185995e-05, + "loss": 1.9595, + "step": 19704500 + }, + { + "epoch": 57.04, + "learning_rate": 2.1491291624212674e-05, + "loss": 1.9643, + "step": 19705000 + }, + { + "epoch": 57.04, + "learning_rate": 2.1490567976565397e-05, + "loss": 1.9671, + "step": 19705500 + }, + { + "epoch": 57.04, + "learning_rate": 2.148984432891812e-05, + "loss": 1.9669, + "step": 19706000 + }, + { + "epoch": 57.04, + "learning_rate": 2.148912068127084e-05, + "loss": 1.9643, + "step": 19706500 + }, + { + "epoch": 57.04, + "learning_rate": 2.1488397033623563e-05, + "loss": 1.9543, + "step": 19707000 + }, + { + "epoch": 57.05, + "learning_rate": 2.1487674833271586e-05, + "loss": 1.963, + "step": 19707500 + }, + { + "epoch": 57.05, + "learning_rate": 2.1486951185624308e-05, + "loss": 1.9328, + "step": 19708000 + }, + { + "epoch": 57.05, + "learning_rate": 2.148622753797703e-05, + "loss": 1.971, + "step": 19708500 + }, + { + "epoch": 57.05, + "learning_rate": 2.1485503890329752e-05, + "loss": 1.957, + "step": 19709000 + }, + { + "epoch": 57.05, + "learning_rate": 2.1484780242682475e-05, + "loss": 1.9477, + "step": 19709500 + }, + { + "epoch": 57.05, + "learning_rate": 2.14840565950352e-05, + "loss": 1.9622, + "step": 19710000 + }, + { + "epoch": 57.05, + "learning_rate": 2.1483332947387922e-05, + "loss": 1.9549, + "step": 19710500 + }, + { + "epoch": 57.06, + "learning_rate": 2.1482610747035938e-05, + "loss": 1.9372, + "step": 19711000 + }, + { + "epoch": 57.06, + "learning_rate": 2.1481887099388664e-05, + "loss": 1.9336, + "step": 19711500 + }, + { + "epoch": 57.06, + "learning_rate": 2.1481163451741386e-05, + "loss": 1.9321, + "step": 19712000 + }, + { + "epoch": 57.06, + "learning_rate": 2.148043980409411e-05, + "loss": 1.9475, + "step": 19712500 + }, + { + "epoch": 57.06, + "learning_rate": 2.1479716156446834e-05, + "loss": 1.9703, + "step": 19713000 + }, + { + "epoch": 57.06, + "learning_rate": 2.1478992508799556e-05, + "loss": 1.9622, + "step": 19713500 + }, + { + "epoch": 57.06, + "learning_rate": 2.1478268861152278e-05, + "loss": 1.9357, + "step": 19714000 + }, + { + "epoch": 57.07, + "learning_rate": 2.1477545213505004e-05, + "loss": 1.9427, + "step": 19714500 + }, + { + "epoch": 57.07, + "learning_rate": 2.1476821565857726e-05, + "loss": 1.9726, + "step": 19715000 + }, + { + "epoch": 57.07, + "learning_rate": 2.1476097918210448e-05, + "loss": 1.9291, + "step": 19715500 + }, + { + "epoch": 57.07, + "learning_rate": 2.1475375717858467e-05, + "loss": 1.9686, + "step": 19716000 + }, + { + "epoch": 57.07, + "learning_rate": 2.147465207021119e-05, + "loss": 1.9627, + "step": 19716500 + }, + { + "epoch": 57.07, + "learning_rate": 2.147392842256391e-05, + "loss": 1.9598, + "step": 19717000 + }, + { + "epoch": 57.07, + "learning_rate": 2.1473204774916637e-05, + "loss": 1.9804, + "step": 19717500 + }, + { + "epoch": 57.08, + "learning_rate": 2.147248112726936e-05, + "loss": 1.9423, + "step": 19718000 + }, + { + "epoch": 57.08, + "learning_rate": 2.1471760374212672e-05, + "loss": 1.9469, + "step": 19718500 + }, + { + "epoch": 57.08, + "learning_rate": 2.1471036726565397e-05, + "loss": 1.9656, + "step": 19719000 + }, + { + "epoch": 57.08, + "learning_rate": 2.1470314526213413e-05, + "loss": 1.9548, + "step": 19719500 + }, + { + "epoch": 57.08, + "learning_rate": 2.146959087856614e-05, + "loss": 1.9448, + "step": 19720000 + }, + { + "epoch": 57.08, + "learning_rate": 2.146886723091886e-05, + "loss": 1.9403, + "step": 19720500 + }, + { + "epoch": 57.08, + "learning_rate": 2.1468143583271583e-05, + "loss": 1.9558, + "step": 19721000 + }, + { + "epoch": 57.09, + "learning_rate": 2.1467419935624305e-05, + "loss": 1.955, + "step": 19721500 + }, + { + "epoch": 57.09, + "learning_rate": 2.1466696287977028e-05, + "loss": 1.9367, + "step": 19722000 + }, + { + "epoch": 57.09, + "learning_rate": 2.1465972640329753e-05, + "loss": 1.9489, + "step": 19722500 + }, + { + "epoch": 57.09, + "learning_rate": 2.1465248992682475e-05, + "loss": 1.935, + "step": 19723000 + }, + { + "epoch": 57.09, + "learning_rate": 2.1464526792330494e-05, + "loss": 1.9602, + "step": 19723500 + }, + { + "epoch": 57.09, + "learning_rate": 2.1463803144683217e-05, + "loss": 1.9601, + "step": 19724000 + }, + { + "epoch": 57.09, + "learning_rate": 2.146307949703594e-05, + "loss": 1.9496, + "step": 19724500 + }, + { + "epoch": 57.1, + "learning_rate": 2.1462355849388664e-05, + "loss": 1.9541, + "step": 19725000 + }, + { + "epoch": 57.1, + "learning_rate": 2.1461632201741387e-05, + "loss": 1.9418, + "step": 19725500 + }, + { + "epoch": 57.1, + "learning_rate": 2.1460910001389402e-05, + "loss": 1.9731, + "step": 19726000 + }, + { + "epoch": 57.1, + "learning_rate": 2.1460186353742128e-05, + "loss": 1.9493, + "step": 19726500 + }, + { + "epoch": 57.1, + "learning_rate": 2.145946270609485e-05, + "loss": 1.9662, + "step": 19727000 + }, + { + "epoch": 57.1, + "learning_rate": 2.1458739058447576e-05, + "loss": 1.9501, + "step": 19727500 + }, + { + "epoch": 57.1, + "learning_rate": 2.145801685809559e-05, + "loss": 1.9657, + "step": 19728000 + }, + { + "epoch": 57.11, + "learning_rate": 2.1457293210448313e-05, + "loss": 1.9531, + "step": 19728500 + }, + { + "epoch": 57.11, + "learning_rate": 2.145656956280104e-05, + "loss": 1.9828, + "step": 19729000 + }, + { + "epoch": 57.11, + "learning_rate": 2.145584591515376e-05, + "loss": 1.9449, + "step": 19729500 + }, + { + "epoch": 57.11, + "learning_rate": 2.1455122267506487e-05, + "loss": 1.971, + "step": 19730000 + }, + { + "epoch": 57.11, + "learning_rate": 2.145439861985921e-05, + "loss": 1.9665, + "step": 19730500 + }, + { + "epoch": 57.11, + "learning_rate": 2.145367497221193e-05, + "loss": 1.963, + "step": 19731000 + }, + { + "epoch": 57.11, + "learning_rate": 2.1452951324564654e-05, + "loss": 1.9337, + "step": 19731500 + }, + { + "epoch": 57.12, + "learning_rate": 2.1452227676917376e-05, + "loss": 1.9466, + "step": 19732000 + }, + { + "epoch": 57.12, + "learning_rate": 2.14515040292701e-05, + "loss": 1.9525, + "step": 19732500 + }, + { + "epoch": 57.12, + "learning_rate": 2.1450781828918117e-05, + "loss": 1.9539, + "step": 19733000 + }, + { + "epoch": 57.12, + "learning_rate": 2.1450058181270843e-05, + "loss": 1.9659, + "step": 19733500 + }, + { + "epoch": 57.12, + "learning_rate": 2.144933598091886e-05, + "loss": 1.9598, + "step": 19734000 + }, + { + "epoch": 57.12, + "learning_rate": 2.1448612333271584e-05, + "loss": 1.9683, + "step": 19734500 + }, + { + "epoch": 57.12, + "learning_rate": 2.1447888685624306e-05, + "loss": 1.9468, + "step": 19735000 + }, + { + "epoch": 57.13, + "learning_rate": 2.144716503797703e-05, + "loss": 1.9474, + "step": 19735500 + }, + { + "epoch": 57.13, + "learning_rate": 2.1446441390329754e-05, + "loss": 1.9648, + "step": 19736000 + }, + { + "epoch": 57.13, + "learning_rate": 2.144571918997777e-05, + "loss": 1.9377, + "step": 19736500 + }, + { + "epoch": 57.13, + "learning_rate": 2.1444995542330492e-05, + "loss": 1.9758, + "step": 19737000 + }, + { + "epoch": 57.13, + "learning_rate": 2.1444271894683217e-05, + "loss": 1.9498, + "step": 19737500 + }, + { + "epoch": 57.13, + "learning_rate": 2.144354824703594e-05, + "loss": 1.9327, + "step": 19738000 + }, + { + "epoch": 57.13, + "learning_rate": 2.1442824599388665e-05, + "loss": 1.9401, + "step": 19738500 + }, + { + "epoch": 57.14, + "learning_rate": 2.144210239903668e-05, + "loss": 1.9361, + "step": 19739000 + }, + { + "epoch": 57.14, + "learning_rate": 2.1441378751389403e-05, + "loss": 1.9593, + "step": 19739500 + }, + { + "epoch": 57.14, + "learning_rate": 2.1440656551037422e-05, + "loss": 1.964, + "step": 19740000 + }, + { + "epoch": 57.14, + "learning_rate": 2.1439932903390144e-05, + "loss": 1.9345, + "step": 19740500 + }, + { + "epoch": 57.14, + "learning_rate": 2.1439210703038163e-05, + "loss": 1.9596, + "step": 19741000 + }, + { + "epoch": 57.14, + "learning_rate": 2.143848705539089e-05, + "loss": 1.9451, + "step": 19741500 + }, + { + "epoch": 57.15, + "learning_rate": 2.143776340774361e-05, + "loss": 1.9658, + "step": 19742000 + }, + { + "epoch": 57.15, + "learning_rate": 2.1437039760096333e-05, + "loss": 1.9373, + "step": 19742500 + }, + { + "epoch": 57.15, + "learning_rate": 2.1436316112449055e-05, + "loss": 1.9361, + "step": 19743000 + }, + { + "epoch": 57.15, + "learning_rate": 2.1435592464801778e-05, + "loss": 1.9661, + "step": 19743500 + }, + { + "epoch": 57.15, + "learning_rate": 2.1434868817154503e-05, + "loss": 1.9849, + "step": 19744000 + }, + { + "epoch": 57.15, + "learning_rate": 2.1434145169507226e-05, + "loss": 1.938, + "step": 19744500 + }, + { + "epoch": 57.15, + "learning_rate": 2.143342152185995e-05, + "loss": 1.9264, + "step": 19745000 + }, + { + "epoch": 57.16, + "learning_rate": 2.1432697874212673e-05, + "loss": 1.9563, + "step": 19745500 + }, + { + "epoch": 57.16, + "learning_rate": 2.1431974226565396e-05, + "loss": 1.9528, + "step": 19746000 + }, + { + "epoch": 57.16, + "learning_rate": 2.1431250578918118e-05, + "loss": 1.9728, + "step": 19746500 + }, + { + "epoch": 57.16, + "learning_rate": 2.143052693127084e-05, + "loss": 1.9444, + "step": 19747000 + }, + { + "epoch": 57.16, + "learning_rate": 2.1429803283623566e-05, + "loss": 1.9606, + "step": 19747500 + }, + { + "epoch": 57.16, + "learning_rate": 2.1429079635976288e-05, + "loss": 1.9634, + "step": 19748000 + }, + { + "epoch": 57.16, + "learning_rate": 2.1428357435624303e-05, + "loss": 1.9331, + "step": 19748500 + }, + { + "epoch": 57.17, + "learning_rate": 2.142763378797703e-05, + "loss": 1.9711, + "step": 19749000 + }, + { + "epoch": 57.17, + "learning_rate": 2.1426910140329755e-05, + "loss": 1.9593, + "step": 19749500 + }, + { + "epoch": 57.17, + "learning_rate": 2.1426186492682477e-05, + "loss": 1.9574, + "step": 19750000 + }, + { + "epoch": 57.17, + "learning_rate": 2.14254628450352e-05, + "loss": 1.957, + "step": 19750500 + }, + { + "epoch": 57.17, + "learning_rate": 2.1424740644683218e-05, + "loss": 1.9436, + "step": 19751000 + }, + { + "epoch": 57.17, + "learning_rate": 2.1424018444331234e-05, + "loss": 1.9631, + "step": 19751500 + }, + { + "epoch": 57.17, + "learning_rate": 2.1423294796683956e-05, + "loss": 1.9408, + "step": 19752000 + }, + { + "epoch": 57.18, + "learning_rate": 2.142257114903668e-05, + "loss": 1.9415, + "step": 19752500 + }, + { + "epoch": 57.18, + "learning_rate": 2.1421847501389404e-05, + "loss": 1.9568, + "step": 19753000 + }, + { + "epoch": 57.18, + "learning_rate": 2.142112385374213e-05, + "loss": 1.9674, + "step": 19753500 + }, + { + "epoch": 57.18, + "learning_rate": 2.142040020609485e-05, + "loss": 1.9618, + "step": 19754000 + }, + { + "epoch": 57.18, + "learning_rate": 2.1419676558447574e-05, + "loss": 1.952, + "step": 19754500 + }, + { + "epoch": 57.18, + "learning_rate": 2.1418952910800296e-05, + "loss": 1.9549, + "step": 19755000 + }, + { + "epoch": 57.18, + "learning_rate": 2.141822926315302e-05, + "loss": 1.9581, + "step": 19755500 + }, + { + "epoch": 57.19, + "learning_rate": 2.1417505615505744e-05, + "loss": 1.9635, + "step": 19756000 + }, + { + "epoch": 57.19, + "learning_rate": 2.141678196785847e-05, + "loss": 1.9483, + "step": 19756500 + }, + { + "epoch": 57.19, + "learning_rate": 2.1416058320211192e-05, + "loss": 1.9716, + "step": 19757000 + }, + { + "epoch": 57.19, + "learning_rate": 2.1415334672563914e-05, + "loss": 1.9463, + "step": 19757500 + }, + { + "epoch": 57.19, + "learning_rate": 2.1414611024916636e-05, + "loss": 1.9547, + "step": 19758000 + }, + { + "epoch": 57.19, + "learning_rate": 2.1413888824564655e-05, + "loss": 1.9518, + "step": 19758500 + }, + { + "epoch": 57.19, + "learning_rate": 2.141316662421267e-05, + "loss": 1.967, + "step": 19759000 + }, + { + "epoch": 57.2, + "learning_rate": 2.1412442976565393e-05, + "loss": 1.9507, + "step": 19759500 + }, + { + "epoch": 57.2, + "learning_rate": 2.141171932891812e-05, + "loss": 1.9626, + "step": 19760000 + }, + { + "epoch": 57.2, + "learning_rate": 2.1410995681270844e-05, + "loss": 1.9543, + "step": 19760500 + }, + { + "epoch": 57.2, + "learning_rate": 2.1410272033623566e-05, + "loss": 1.9688, + "step": 19761000 + }, + { + "epoch": 57.2, + "learning_rate": 2.1409549833271582e-05, + "loss": 1.9466, + "step": 19761500 + }, + { + "epoch": 57.2, + "learning_rate": 2.14088276329196e-05, + "loss": 1.949, + "step": 19762000 + }, + { + "epoch": 57.2, + "learning_rate": 2.1408103985272323e-05, + "loss": 1.9629, + "step": 19762500 + }, + { + "epoch": 57.21, + "learning_rate": 2.1407380337625045e-05, + "loss": 1.9606, + "step": 19763000 + }, + { + "epoch": 57.21, + "learning_rate": 2.1406656689977768e-05, + "loss": 1.9438, + "step": 19763500 + }, + { + "epoch": 57.21, + "learning_rate": 2.1405933042330493e-05, + "loss": 1.9493, + "step": 19764000 + }, + { + "epoch": 57.21, + "learning_rate": 2.140520939468322e-05, + "loss": 1.974, + "step": 19764500 + }, + { + "epoch": 57.21, + "learning_rate": 2.140448574703594e-05, + "loss": 1.9745, + "step": 19765000 + }, + { + "epoch": 57.21, + "learning_rate": 2.1403763546683957e-05, + "loss": 1.9416, + "step": 19765500 + }, + { + "epoch": 57.21, + "learning_rate": 2.1403041346331976e-05, + "loss": 1.9349, + "step": 19766000 + }, + { + "epoch": 57.22, + "learning_rate": 2.1402317698684698e-05, + "loss": 1.9434, + "step": 19766500 + }, + { + "epoch": 57.22, + "learning_rate": 2.140159405103742e-05, + "loss": 1.9495, + "step": 19767000 + }, + { + "epoch": 57.22, + "learning_rate": 2.1400870403390146e-05, + "loss": 1.9646, + "step": 19767500 + }, + { + "epoch": 57.22, + "learning_rate": 2.140014675574287e-05, + "loss": 1.9622, + "step": 19768000 + }, + { + "epoch": 57.22, + "learning_rate": 2.1399423108095594e-05, + "loss": 1.9786, + "step": 19768500 + }, + { + "epoch": 57.22, + "learning_rate": 2.1398699460448316e-05, + "loss": 1.963, + "step": 19769000 + }, + { + "epoch": 57.22, + "learning_rate": 2.1397975812801038e-05, + "loss": 1.9716, + "step": 19769500 + }, + { + "epoch": 57.23, + "learning_rate": 2.139725216515376e-05, + "loss": 1.9503, + "step": 19770000 + }, + { + "epoch": 57.23, + "learning_rate": 2.1396528517506483e-05, + "loss": 1.9769, + "step": 19770500 + }, + { + "epoch": 57.23, + "learning_rate": 2.13958063171545e-05, + "loss": 1.9487, + "step": 19771000 + }, + { + "epoch": 57.23, + "learning_rate": 2.1395082669507227e-05, + "loss": 1.9827, + "step": 19771500 + }, + { + "epoch": 57.23, + "learning_rate": 2.139435902185995e-05, + "loss": 1.9532, + "step": 19772000 + }, + { + "epoch": 57.23, + "learning_rate": 2.139363537421267e-05, + "loss": 1.9581, + "step": 19772500 + }, + { + "epoch": 57.23, + "learning_rate": 2.1392911726565397e-05, + "loss": 1.9602, + "step": 19773000 + }, + { + "epoch": 57.24, + "learning_rate": 2.139218807891812e-05, + "loss": 1.9741, + "step": 19773500 + }, + { + "epoch": 57.24, + "learning_rate": 2.139146443127084e-05, + "loss": 1.9686, + "step": 19774000 + }, + { + "epoch": 57.24, + "learning_rate": 2.1390740783623564e-05, + "loss": 1.9353, + "step": 19774500 + }, + { + "epoch": 57.24, + "learning_rate": 2.139001713597629e-05, + "loss": 1.959, + "step": 19775000 + }, + { + "epoch": 57.24, + "learning_rate": 2.1389293488329012e-05, + "loss": 1.964, + "step": 19775500 + }, + { + "epoch": 57.24, + "learning_rate": 2.138857128797703e-05, + "loss": 1.9587, + "step": 19776000 + }, + { + "epoch": 57.24, + "learning_rate": 2.1387847640329753e-05, + "loss": 1.9707, + "step": 19776500 + }, + { + "epoch": 57.25, + "learning_rate": 2.1387123992682475e-05, + "loss": 1.9609, + "step": 19777000 + }, + { + "epoch": 57.25, + "learning_rate": 2.1386400345035197e-05, + "loss": 1.9425, + "step": 19777500 + }, + { + "epoch": 57.25, + "learning_rate": 2.1385676697387923e-05, + "loss": 1.9757, + "step": 19778000 + }, + { + "epoch": 57.25, + "learning_rate": 2.138495304974065e-05, + "loss": 1.9741, + "step": 19778500 + }, + { + "epoch": 57.25, + "learning_rate": 2.1384230849388664e-05, + "loss": 1.971, + "step": 19779000 + }, + { + "epoch": 57.25, + "learning_rate": 2.1383507201741386e-05, + "loss": 1.9623, + "step": 19779500 + }, + { + "epoch": 57.26, + "learning_rate": 2.138278355409411e-05, + "loss": 1.9754, + "step": 19780000 + }, + { + "epoch": 57.26, + "learning_rate": 2.1382059906446834e-05, + "loss": 1.9516, + "step": 19780500 + }, + { + "epoch": 57.26, + "learning_rate": 2.1381336258799557e-05, + "loss": 1.9529, + "step": 19781000 + }, + { + "epoch": 57.26, + "learning_rate": 2.138061261115228e-05, + "loss": 1.9603, + "step": 19781500 + }, + { + "epoch": 57.26, + "learning_rate": 2.1379888963505004e-05, + "loss": 1.9657, + "step": 19782000 + }, + { + "epoch": 57.26, + "learning_rate": 2.1379165315857727e-05, + "loss": 1.9526, + "step": 19782500 + }, + { + "epoch": 57.26, + "learning_rate": 2.137844166821045e-05, + "loss": 1.9525, + "step": 19783000 + }, + { + "epoch": 57.27, + "learning_rate": 2.137772091515376e-05, + "loss": 1.952, + "step": 19783500 + }, + { + "epoch": 57.27, + "learning_rate": 2.1376997267506483e-05, + "loss": 1.9671, + "step": 19784000 + }, + { + "epoch": 57.27, + "learning_rate": 2.137627361985921e-05, + "loss": 1.9515, + "step": 19784500 + }, + { + "epoch": 57.27, + "learning_rate": 2.137554997221193e-05, + "loss": 1.9538, + "step": 19785000 + }, + { + "epoch": 57.27, + "learning_rate": 2.1374826324564653e-05, + "loss": 1.976, + "step": 19785500 + }, + { + "epoch": 57.27, + "learning_rate": 2.137410267691738e-05, + "loss": 1.9589, + "step": 19786000 + }, + { + "epoch": 57.27, + "learning_rate": 2.13733790292701e-05, + "loss": 1.9411, + "step": 19786500 + }, + { + "epoch": 57.28, + "learning_rate": 2.1372655381622824e-05, + "loss": 1.9813, + "step": 19787000 + }, + { + "epoch": 57.28, + "learning_rate": 2.137193173397555e-05, + "loss": 1.9522, + "step": 19787500 + }, + { + "epoch": 57.28, + "learning_rate": 2.1371209533623565e-05, + "loss": 1.9503, + "step": 19788000 + }, + { + "epoch": 57.28, + "learning_rate": 2.1370485885976287e-05, + "loss": 1.9275, + "step": 19788500 + }, + { + "epoch": 57.28, + "learning_rate": 2.136976223832901e-05, + "loss": 1.9423, + "step": 19789000 + }, + { + "epoch": 57.28, + "learning_rate": 2.1369038590681735e-05, + "loss": 1.9591, + "step": 19789500 + }, + { + "epoch": 57.28, + "learning_rate": 2.1368317837625047e-05, + "loss": 1.9517, + "step": 19790000 + }, + { + "epoch": 57.29, + "learning_rate": 2.1367594189977773e-05, + "loss": 1.9491, + "step": 19790500 + }, + { + "epoch": 57.29, + "learning_rate": 2.1366870542330495e-05, + "loss": 1.948, + "step": 19791000 + }, + { + "epoch": 57.29, + "learning_rate": 2.1366146894683217e-05, + "loss": 1.9519, + "step": 19791500 + }, + { + "epoch": 57.29, + "learning_rate": 2.136542324703594e-05, + "loss": 1.9701, + "step": 19792000 + }, + { + "epoch": 57.29, + "learning_rate": 2.136469959938866e-05, + "loss": 1.9565, + "step": 19792500 + }, + { + "epoch": 57.29, + "learning_rate": 2.136397739903668e-05, + "loss": 1.9463, + "step": 19793000 + }, + { + "epoch": 57.29, + "learning_rate": 2.1363253751389403e-05, + "loss": 1.9298, + "step": 19793500 + }, + { + "epoch": 57.3, + "learning_rate": 2.136253010374213e-05, + "loss": 1.9587, + "step": 19794000 + }, + { + "epoch": 57.3, + "learning_rate": 2.136180645609485e-05, + "loss": 1.9681, + "step": 19794500 + }, + { + "epoch": 57.3, + "learning_rate": 2.1361082808447573e-05, + "loss": 1.9853, + "step": 19795000 + }, + { + "epoch": 57.3, + "learning_rate": 2.13603591608003e-05, + "loss": 1.9567, + "step": 19795500 + }, + { + "epoch": 57.3, + "learning_rate": 2.1359636960448314e-05, + "loss": 1.9499, + "step": 19796000 + }, + { + "epoch": 57.3, + "learning_rate": 2.1358913312801036e-05, + "loss": 1.9789, + "step": 19796500 + }, + { + "epoch": 57.3, + "learning_rate": 2.1358189665153762e-05, + "loss": 1.9675, + "step": 19797000 + }, + { + "epoch": 57.31, + "learning_rate": 2.1357466017506488e-05, + "loss": 1.9409, + "step": 19797500 + }, + { + "epoch": 57.31, + "learning_rate": 2.1356743817154503e-05, + "loss": 1.9398, + "step": 19798000 + }, + { + "epoch": 57.31, + "learning_rate": 2.1356020169507225e-05, + "loss": 1.9579, + "step": 19798500 + }, + { + "epoch": 57.31, + "learning_rate": 2.135529652185995e-05, + "loss": 1.9517, + "step": 19799000 + }, + { + "epoch": 57.31, + "learning_rate": 2.1354574321507966e-05, + "loss": 1.9684, + "step": 19799500 + }, + { + "epoch": 57.31, + "learning_rate": 2.135385067386069e-05, + "loss": 1.9568, + "step": 19800000 + }, + { + "epoch": 57.31, + "learning_rate": 2.135312702621341e-05, + "loss": 1.9409, + "step": 19800500 + }, + { + "epoch": 57.32, + "learning_rate": 2.1352403378566137e-05, + "loss": 1.9484, + "step": 19801000 + }, + { + "epoch": 57.32, + "learning_rate": 2.1351679730918862e-05, + "loss": 1.9627, + "step": 19801500 + }, + { + "epoch": 57.32, + "learning_rate": 2.1350956083271584e-05, + "loss": 1.959, + "step": 19802000 + }, + { + "epoch": 57.32, + "learning_rate": 2.1350232435624307e-05, + "loss": 1.959, + "step": 19802500 + }, + { + "epoch": 57.32, + "learning_rate": 2.134950878797703e-05, + "loss": 1.9662, + "step": 19803000 + }, + { + "epoch": 57.32, + "learning_rate": 2.134878514032975e-05, + "loss": 1.9552, + "step": 19803500 + }, + { + "epoch": 57.32, + "learning_rate": 2.134806293997777e-05, + "loss": 1.9576, + "step": 19804000 + }, + { + "epoch": 57.33, + "learning_rate": 2.134734073962579e-05, + "loss": 1.9727, + "step": 19804500 + }, + { + "epoch": 57.33, + "learning_rate": 2.134661709197851e-05, + "loss": 1.9428, + "step": 19805000 + }, + { + "epoch": 57.33, + "learning_rate": 2.1345893444331237e-05, + "loss": 1.9771, + "step": 19805500 + }, + { + "epoch": 57.33, + "learning_rate": 2.134516979668396e-05, + "loss": 1.9404, + "step": 19806000 + }, + { + "epoch": 57.33, + "learning_rate": 2.134444614903668e-05, + "loss": 1.9657, + "step": 19806500 + }, + { + "epoch": 57.33, + "learning_rate": 2.13437239486847e-05, + "loss": 1.9265, + "step": 19807000 + }, + { + "epoch": 57.33, + "learning_rate": 2.1343000301037423e-05, + "loss": 1.9487, + "step": 19807500 + }, + { + "epoch": 57.34, + "learning_rate": 2.1342276653390145e-05, + "loss": 1.9411, + "step": 19808000 + }, + { + "epoch": 57.34, + "learning_rate": 2.1341553005742867e-05, + "loss": 1.9782, + "step": 19808500 + }, + { + "epoch": 57.34, + "learning_rate": 2.1340829358095593e-05, + "loss": 1.9532, + "step": 19809000 + }, + { + "epoch": 57.34, + "learning_rate": 2.1340105710448315e-05, + "loss": 1.9512, + "step": 19809500 + }, + { + "epoch": 57.34, + "learning_rate": 2.1339383510096334e-05, + "loss": 1.9803, + "step": 19810000 + }, + { + "epoch": 57.34, + "learning_rate": 2.1338659862449056e-05, + "loss": 1.9378, + "step": 19810500 + }, + { + "epoch": 57.34, + "learning_rate": 2.1337936214801778e-05, + "loss": 1.9344, + "step": 19811000 + }, + { + "epoch": 57.35, + "learning_rate": 2.13372125671545e-05, + "loss": 1.9352, + "step": 19811500 + }, + { + "epoch": 57.35, + "learning_rate": 2.1336488919507226e-05, + "loss": 1.9559, + "step": 19812000 + }, + { + "epoch": 57.35, + "learning_rate": 2.1335765271859952e-05, + "loss": 1.9754, + "step": 19812500 + }, + { + "epoch": 57.35, + "learning_rate": 2.1335041624212674e-05, + "loss": 1.9375, + "step": 19813000 + }, + { + "epoch": 57.35, + "learning_rate": 2.133431942386069e-05, + "loss": 1.9474, + "step": 19813500 + }, + { + "epoch": 57.35, + "learning_rate": 2.1333595776213415e-05, + "loss": 1.9267, + "step": 19814000 + }, + { + "epoch": 57.35, + "learning_rate": 2.133287357586143e-05, + "loss": 1.9443, + "step": 19814500 + }, + { + "epoch": 57.36, + "learning_rate": 2.1332149928214153e-05, + "loss": 1.9431, + "step": 19815000 + }, + { + "epoch": 57.36, + "learning_rate": 2.133142628056688e-05, + "loss": 1.9689, + "step": 19815500 + }, + { + "epoch": 57.36, + "learning_rate": 2.13307026329196e-05, + "loss": 1.9719, + "step": 19816000 + }, + { + "epoch": 57.36, + "learning_rate": 2.1329978985272326e-05, + "loss": 1.955, + "step": 19816500 + }, + { + "epoch": 57.36, + "learning_rate": 2.132925533762505e-05, + "loss": 1.9399, + "step": 19817000 + }, + { + "epoch": 57.36, + "learning_rate": 2.1328533137273064e-05, + "loss": 1.964, + "step": 19817500 + }, + { + "epoch": 57.36, + "learning_rate": 2.132780948962579e-05, + "loss": 1.9794, + "step": 19818000 + }, + { + "epoch": 57.37, + "learning_rate": 2.1327085841978512e-05, + "loss": 1.9703, + "step": 19818500 + }, + { + "epoch": 57.37, + "learning_rate": 2.1326362194331234e-05, + "loss": 1.9401, + "step": 19819000 + }, + { + "epoch": 57.37, + "learning_rate": 2.1325638546683957e-05, + "loss": 1.967, + "step": 19819500 + }, + { + "epoch": 57.37, + "learning_rate": 2.1324914899036682e-05, + "loss": 1.9667, + "step": 19820000 + }, + { + "epoch": 57.37, + "learning_rate": 2.1324191251389404e-05, + "loss": 1.9595, + "step": 19820500 + }, + { + "epoch": 57.37, + "learning_rate": 2.1323469051037423e-05, + "loss": 1.9764, + "step": 19821000 + }, + { + "epoch": 57.38, + "learning_rate": 2.1322745403390146e-05, + "loss": 1.9579, + "step": 19821500 + }, + { + "epoch": 57.38, + "learning_rate": 2.1322021755742868e-05, + "loss": 1.9469, + "step": 19822000 + }, + { + "epoch": 57.38, + "learning_rate": 2.132129810809559e-05, + "loss": 1.9619, + "step": 19822500 + }, + { + "epoch": 57.38, + "learning_rate": 2.132057590774361e-05, + "loss": 1.9498, + "step": 19823000 + }, + { + "epoch": 57.38, + "learning_rate": 2.131985226009633e-05, + "loss": 1.9767, + "step": 19823500 + }, + { + "epoch": 57.38, + "learning_rate": 2.1319128612449057e-05, + "loss": 1.9533, + "step": 19824000 + }, + { + "epoch": 57.38, + "learning_rate": 2.131840496480178e-05, + "loss": 1.9631, + "step": 19824500 + }, + { + "epoch": 57.39, + "learning_rate": 2.1317681317154505e-05, + "loss": 1.9616, + "step": 19825000 + }, + { + "epoch": 57.39, + "learning_rate": 2.131695911680252e-05, + "loss": 1.962, + "step": 19825500 + }, + { + "epoch": 57.39, + "learning_rate": 2.1316235469155242e-05, + "loss": 1.9401, + "step": 19826000 + }, + { + "epoch": 57.39, + "learning_rate": 2.1315511821507965e-05, + "loss": 1.9676, + "step": 19826500 + }, + { + "epoch": 57.39, + "learning_rate": 2.131478817386069e-05, + "loss": 1.9684, + "step": 19827000 + }, + { + "epoch": 57.39, + "learning_rate": 2.1314064526213416e-05, + "loss": 1.9607, + "step": 19827500 + }, + { + "epoch": 57.39, + "learning_rate": 2.1313340878566138e-05, + "loss": 1.9839, + "step": 19828000 + }, + { + "epoch": 57.4, + "learning_rate": 2.131261723091886e-05, + "loss": 1.9703, + "step": 19828500 + }, + { + "epoch": 57.4, + "learning_rate": 2.1311893583271583e-05, + "loss": 1.9395, + "step": 19829000 + }, + { + "epoch": 57.4, + "learning_rate": 2.1311169935624305e-05, + "loss": 1.9428, + "step": 19829500 + }, + { + "epoch": 57.4, + "learning_rate": 2.131044628797703e-05, + "loss": 1.9351, + "step": 19830000 + }, + { + "epoch": 57.4, + "learning_rate": 2.1309722640329753e-05, + "loss": 1.984, + "step": 19830500 + }, + { + "epoch": 57.4, + "learning_rate": 2.130899899268248e-05, + "loss": 1.9647, + "step": 19831000 + }, + { + "epoch": 57.4, + "learning_rate": 2.13082753450352e-05, + "loss": 1.9535, + "step": 19831500 + }, + { + "epoch": 57.41, + "learning_rate": 2.1307551697387923e-05, + "loss": 1.9651, + "step": 19832000 + }, + { + "epoch": 57.41, + "learning_rate": 2.1306828049740645e-05, + "loss": 1.9326, + "step": 19832500 + }, + { + "epoch": 57.41, + "learning_rate": 2.1306104402093367e-05, + "loss": 1.963, + "step": 19833000 + }, + { + "epoch": 57.41, + "learning_rate": 2.1305380754446093e-05, + "loss": 1.9621, + "step": 19833500 + }, + { + "epoch": 57.41, + "learning_rate": 2.1304657106798815e-05, + "loss": 1.9707, + "step": 19834000 + }, + { + "epoch": 57.41, + "learning_rate": 2.130393490644683e-05, + "loss": 1.9607, + "step": 19834500 + }, + { + "epoch": 57.41, + "learning_rate": 2.1303211258799556e-05, + "loss": 1.9453, + "step": 19835000 + }, + { + "epoch": 57.42, + "learning_rate": 2.1302487611152282e-05, + "loss": 1.9538, + "step": 19835500 + }, + { + "epoch": 57.42, + "learning_rate": 2.1301765410800297e-05, + "loss": 1.9387, + "step": 19836000 + }, + { + "epoch": 57.42, + "learning_rate": 2.130104176315302e-05, + "loss": 1.9556, + "step": 19836500 + }, + { + "epoch": 57.42, + "learning_rate": 2.1300318115505742e-05, + "loss": 1.9629, + "step": 19837000 + }, + { + "epoch": 57.42, + "learning_rate": 2.129959591515376e-05, + "loss": 1.995, + "step": 19837500 + }, + { + "epoch": 57.42, + "learning_rate": 2.1298872267506483e-05, + "loss": 1.9498, + "step": 19838000 + }, + { + "epoch": 57.42, + "learning_rate": 2.129814861985921e-05, + "loss": 1.9662, + "step": 19838500 + }, + { + "epoch": 57.43, + "learning_rate": 2.129742497221193e-05, + "loss": 1.9455, + "step": 19839000 + }, + { + "epoch": 57.43, + "learning_rate": 2.1296701324564657e-05, + "loss": 1.9674, + "step": 19839500 + }, + { + "epoch": 57.43, + "learning_rate": 2.129597767691738e-05, + "loss": 1.9617, + "step": 19840000 + }, + { + "epoch": 57.43, + "learning_rate": 2.12952540292701e-05, + "loss": 1.9521, + "step": 19840500 + }, + { + "epoch": 57.43, + "learning_rate": 2.1294530381622823e-05, + "loss": 1.9322, + "step": 19841000 + }, + { + "epoch": 57.43, + "learning_rate": 2.1293808181270842e-05, + "loss": 1.966, + "step": 19841500 + }, + { + "epoch": 57.43, + "learning_rate": 2.1293084533623564e-05, + "loss": 1.9536, + "step": 19842000 + }, + { + "epoch": 57.44, + "learning_rate": 2.1292362333271583e-05, + "loss": 1.9731, + "step": 19842500 + }, + { + "epoch": 57.44, + "learning_rate": 2.1291638685624306e-05, + "loss": 1.9523, + "step": 19843000 + }, + { + "epoch": 57.44, + "learning_rate": 2.129091503797703e-05, + "loss": 1.9508, + "step": 19843500 + }, + { + "epoch": 57.44, + "learning_rate": 2.1290191390329754e-05, + "loss": 1.9524, + "step": 19844000 + }, + { + "epoch": 57.44, + "learning_rate": 2.1289467742682476e-05, + "loss": 1.9561, + "step": 19844500 + }, + { + "epoch": 57.44, + "learning_rate": 2.1288744095035198e-05, + "loss": 1.978, + "step": 19845000 + }, + { + "epoch": 57.44, + "learning_rate": 2.128802044738792e-05, + "loss": 1.9472, + "step": 19845500 + }, + { + "epoch": 57.45, + "learning_rate": 2.1287296799740646e-05, + "loss": 1.9513, + "step": 19846000 + }, + { + "epoch": 57.45, + "learning_rate": 2.128657315209337e-05, + "loss": 1.9577, + "step": 19846500 + }, + { + "epoch": 57.45, + "learning_rate": 2.1285850951741387e-05, + "loss": 1.9712, + "step": 19847000 + }, + { + "epoch": 57.45, + "learning_rate": 2.128512730409411e-05, + "loss": 1.9863, + "step": 19847500 + }, + { + "epoch": 57.45, + "learning_rate": 2.128440365644683e-05, + "loss": 1.9522, + "step": 19848000 + }, + { + "epoch": 57.45, + "learning_rate": 2.1283680008799557e-05, + "loss": 1.9557, + "step": 19848500 + }, + { + "epoch": 57.45, + "learning_rate": 2.128295636115228e-05, + "loss": 1.9677, + "step": 19849000 + }, + { + "epoch": 57.46, + "learning_rate": 2.1282232713505005e-05, + "loss": 1.9736, + "step": 19849500 + }, + { + "epoch": 57.46, + "learning_rate": 2.1281509065857727e-05, + "loss": 1.9749, + "step": 19850000 + }, + { + "epoch": 57.46, + "learning_rate": 2.128078541821045e-05, + "loss": 1.9483, + "step": 19850500 + }, + { + "epoch": 57.46, + "learning_rate": 2.128006321785847e-05, + "loss": 1.9539, + "step": 19851000 + }, + { + "epoch": 57.46, + "learning_rate": 2.1279341017506484e-05, + "loss": 1.9658, + "step": 19851500 + }, + { + "epoch": 57.46, + "learning_rate": 2.1278617369859206e-05, + "loss": 1.9287, + "step": 19852000 + }, + { + "epoch": 57.46, + "learning_rate": 2.1277893722211932e-05, + "loss": 1.9462, + "step": 19852500 + }, + { + "epoch": 57.47, + "learning_rate": 2.1277170074564654e-05, + "loss": 1.9679, + "step": 19853000 + }, + { + "epoch": 57.47, + "learning_rate": 2.127644642691738e-05, + "loss": 1.9637, + "step": 19853500 + }, + { + "epoch": 57.47, + "learning_rate": 2.1275722779270102e-05, + "loss": 1.9512, + "step": 19854000 + }, + { + "epoch": 57.47, + "learning_rate": 2.127500057891812e-05, + "loss": 1.9583, + "step": 19854500 + }, + { + "epoch": 57.47, + "learning_rate": 2.1274276931270843e-05, + "loss": 1.9738, + "step": 19855000 + }, + { + "epoch": 57.47, + "learning_rate": 2.1273553283623565e-05, + "loss": 1.9565, + "step": 19855500 + }, + { + "epoch": 57.47, + "learning_rate": 2.1272829635976288e-05, + "loss": 1.966, + "step": 19856000 + }, + { + "epoch": 57.48, + "learning_rate": 2.127210598832901e-05, + "loss": 1.9642, + "step": 19856500 + }, + { + "epoch": 57.48, + "learning_rate": 2.1271382340681732e-05, + "loss": 1.9657, + "step": 19857000 + }, + { + "epoch": 57.48, + "learning_rate": 2.1270658693034458e-05, + "loss": 1.9619, + "step": 19857500 + }, + { + "epoch": 57.48, + "learning_rate": 2.1269935045387183e-05, + "loss": 1.9582, + "step": 19858000 + }, + { + "epoch": 57.48, + "learning_rate": 2.1269211397739905e-05, + "loss": 1.964, + "step": 19858500 + }, + { + "epoch": 57.48, + "learning_rate": 2.126848919738792e-05, + "loss": 1.9455, + "step": 19859000 + }, + { + "epoch": 57.49, + "learning_rate": 2.1267765549740647e-05, + "loss": 1.9461, + "step": 19859500 + }, + { + "epoch": 57.49, + "learning_rate": 2.1267043349388662e-05, + "loss": 1.9756, + "step": 19860000 + }, + { + "epoch": 57.49, + "learning_rate": 2.1266319701741384e-05, + "loss": 1.9466, + "step": 19860500 + }, + { + "epoch": 57.49, + "learning_rate": 2.126559605409411e-05, + "loss": 1.9761, + "step": 19861000 + }, + { + "epoch": 57.49, + "learning_rate": 2.1264872406446836e-05, + "loss": 1.9465, + "step": 19861500 + }, + { + "epoch": 57.49, + "learning_rate": 2.1264148758799558e-05, + "loss": 1.9317, + "step": 19862000 + }, + { + "epoch": 57.49, + "learning_rate": 2.126342511115228e-05, + "loss": 1.9709, + "step": 19862500 + }, + { + "epoch": 57.5, + "learning_rate": 2.1262701463505002e-05, + "loss": 1.9505, + "step": 19863000 + }, + { + "epoch": 57.5, + "learning_rate": 2.1261977815857725e-05, + "loss": 1.9404, + "step": 19863500 + }, + { + "epoch": 57.5, + "learning_rate": 2.1261254168210447e-05, + "loss": 1.9366, + "step": 19864000 + }, + { + "epoch": 57.5, + "learning_rate": 2.1260530520563172e-05, + "loss": 1.9612, + "step": 19864500 + }, + { + "epoch": 57.5, + "learning_rate": 2.1259806872915898e-05, + "loss": 1.9559, + "step": 19865000 + }, + { + "epoch": 57.5, + "learning_rate": 2.1259084672563914e-05, + "loss": 1.9551, + "step": 19865500 + }, + { + "epoch": 57.5, + "learning_rate": 2.1258361024916636e-05, + "loss": 1.9597, + "step": 19866000 + }, + { + "epoch": 57.51, + "learning_rate": 2.125763737726936e-05, + "loss": 1.9395, + "step": 19866500 + }, + { + "epoch": 57.51, + "learning_rate": 2.1256915176917377e-05, + "loss": 1.9903, + "step": 19867000 + }, + { + "epoch": 57.51, + "learning_rate": 2.12561915292701e-05, + "loss": 1.9688, + "step": 19867500 + }, + { + "epoch": 57.51, + "learning_rate": 2.125546788162282e-05, + "loss": 1.9467, + "step": 19868000 + }, + { + "epoch": 57.51, + "learning_rate": 2.1254744233975547e-05, + "loss": 1.9372, + "step": 19868500 + }, + { + "epoch": 57.51, + "learning_rate": 2.1254020586328273e-05, + "loss": 1.9373, + "step": 19869000 + }, + { + "epoch": 57.51, + "learning_rate": 2.1253296938680995e-05, + "loss": 1.9501, + "step": 19869500 + }, + { + "epoch": 57.52, + "learning_rate": 2.1252573291033717e-05, + "loss": 1.9541, + "step": 19870000 + }, + { + "epoch": 57.52, + "learning_rate": 2.1251851090681736e-05, + "loss": 1.9338, + "step": 19870500 + }, + { + "epoch": 57.52, + "learning_rate": 2.125112744303446e-05, + "loss": 1.9398, + "step": 19871000 + }, + { + "epoch": 57.52, + "learning_rate": 2.125040379538718e-05, + "loss": 1.9688, + "step": 19871500 + }, + { + "epoch": 57.52, + "learning_rate": 2.1249680147739906e-05, + "loss": 1.9699, + "step": 19872000 + }, + { + "epoch": 57.52, + "learning_rate": 2.124895650009263e-05, + "loss": 1.9604, + "step": 19872500 + }, + { + "epoch": 57.52, + "learning_rate": 2.1248234299740647e-05, + "loss": 1.9434, + "step": 19873000 + }, + { + "epoch": 57.53, + "learning_rate": 2.124751065209337e-05, + "loss": 1.9415, + "step": 19873500 + }, + { + "epoch": 57.53, + "learning_rate": 2.1246788451741385e-05, + "loss": 1.9706, + "step": 19874000 + }, + { + "epoch": 57.53, + "learning_rate": 2.124606480409411e-05, + "loss": 1.9858, + "step": 19874500 + }, + { + "epoch": 57.53, + "learning_rate": 2.1245341156446833e-05, + "loss": 1.9766, + "step": 19875000 + }, + { + "epoch": 57.53, + "learning_rate": 2.1244617508799555e-05, + "loss": 1.9625, + "step": 19875500 + }, + { + "epoch": 57.53, + "learning_rate": 2.1243895308447574e-05, + "loss": 1.962, + "step": 19876000 + }, + { + "epoch": 57.53, + "learning_rate": 2.12431716608003e-05, + "loss": 1.9693, + "step": 19876500 + }, + { + "epoch": 57.54, + "learning_rate": 2.1242448013153022e-05, + "loss": 1.9482, + "step": 19877000 + }, + { + "epoch": 57.54, + "learning_rate": 2.1241724365505744e-05, + "loss": 1.9654, + "step": 19877500 + }, + { + "epoch": 57.54, + "learning_rate": 2.1241002165153763e-05, + "loss": 1.946, + "step": 19878000 + }, + { + "epoch": 57.54, + "learning_rate": 2.1240278517506486e-05, + "loss": 1.9605, + "step": 19878500 + }, + { + "epoch": 57.54, + "learning_rate": 2.1239554869859208e-05, + "loss": 1.9599, + "step": 19879000 + }, + { + "epoch": 57.54, + "learning_rate": 2.123883122221193e-05, + "loss": 1.9658, + "step": 19879500 + }, + { + "epoch": 57.54, + "learning_rate": 2.1238107574564656e-05, + "loss": 1.9496, + "step": 19880000 + }, + { + "epoch": 57.55, + "learning_rate": 2.1237383926917378e-05, + "loss": 1.9289, + "step": 19880500 + }, + { + "epoch": 57.55, + "learning_rate": 2.12366602792701e-05, + "loss": 1.9731, + "step": 19881000 + }, + { + "epoch": 57.55, + "learning_rate": 2.123593807891812e-05, + "loss": 1.9794, + "step": 19881500 + }, + { + "epoch": 57.55, + "learning_rate": 2.123521443127084e-05, + "loss": 1.9687, + "step": 19882000 + }, + { + "epoch": 57.55, + "learning_rate": 2.123449223091886e-05, + "loss": 1.9579, + "step": 19882500 + }, + { + "epoch": 57.55, + "learning_rate": 2.1233768583271582e-05, + "loss": 1.9705, + "step": 19883000 + }, + { + "epoch": 57.55, + "learning_rate": 2.1233044935624305e-05, + "loss": 1.9593, + "step": 19883500 + }, + { + "epoch": 57.56, + "learning_rate": 2.123232128797703e-05, + "loss": 1.954, + "step": 19884000 + }, + { + "epoch": 57.56, + "learning_rate": 2.1231597640329753e-05, + "loss": 1.9392, + "step": 19884500 + }, + { + "epoch": 57.56, + "learning_rate": 2.1230873992682475e-05, + "loss": 1.9656, + "step": 19885000 + }, + { + "epoch": 57.56, + "learning_rate": 2.12301503450352e-05, + "loss": 1.9653, + "step": 19885500 + }, + { + "epoch": 57.56, + "learning_rate": 2.1229426697387923e-05, + "loss": 1.9715, + "step": 19886000 + }, + { + "epoch": 57.56, + "learning_rate": 2.1228703049740645e-05, + "loss": 1.9451, + "step": 19886500 + }, + { + "epoch": 57.56, + "learning_rate": 2.1227979402093367e-05, + "loss": 1.9661, + "step": 19887000 + }, + { + "epoch": 57.57, + "learning_rate": 2.1227255754446093e-05, + "loss": 1.9684, + "step": 19887500 + }, + { + "epoch": 57.57, + "learning_rate": 2.1226532106798815e-05, + "loss": 1.9555, + "step": 19888000 + }, + { + "epoch": 57.57, + "learning_rate": 2.1225808459151537e-05, + "loss": 1.9639, + "step": 19888500 + }, + { + "epoch": 57.57, + "learning_rate": 2.1225084811504263e-05, + "loss": 1.9546, + "step": 19889000 + }, + { + "epoch": 57.57, + "learning_rate": 2.122436261115228e-05, + "loss": 1.9457, + "step": 19889500 + }, + { + "epoch": 57.57, + "learning_rate": 2.1223638963505e-05, + "loss": 1.9495, + "step": 19890000 + }, + { + "epoch": 57.57, + "learning_rate": 2.122291676315302e-05, + "loss": 1.9593, + "step": 19890500 + }, + { + "epoch": 57.58, + "learning_rate": 2.122219456280104e-05, + "loss": 1.9497, + "step": 19891000 + }, + { + "epoch": 57.58, + "learning_rate": 2.1221470915153764e-05, + "loss": 1.9716, + "step": 19891500 + }, + { + "epoch": 57.58, + "learning_rate": 2.1220747267506486e-05, + "loss": 1.9618, + "step": 19892000 + }, + { + "epoch": 57.58, + "learning_rate": 2.122002361985921e-05, + "loss": 1.9667, + "step": 19892500 + }, + { + "epoch": 57.58, + "learning_rate": 2.121929997221193e-05, + "loss": 1.9629, + "step": 19893000 + }, + { + "epoch": 57.58, + "learning_rate": 2.1218576324564653e-05, + "loss": 1.964, + "step": 19893500 + }, + { + "epoch": 57.58, + "learning_rate": 2.1217852676917375e-05, + "loss": 1.9712, + "step": 19894000 + }, + { + "epoch": 57.59, + "learning_rate": 2.12171290292701e-05, + "loss": 1.9445, + "step": 19894500 + }, + { + "epoch": 57.59, + "learning_rate": 2.1216405381622826e-05, + "loss": 1.9284, + "step": 19895000 + }, + { + "epoch": 57.59, + "learning_rate": 2.121568173397555e-05, + "loss": 1.9629, + "step": 19895500 + }, + { + "epoch": 57.59, + "learning_rate": 2.121495808632827e-05, + "loss": 1.9904, + "step": 19896000 + }, + { + "epoch": 57.59, + "learning_rate": 2.1214234438680993e-05, + "loss": 1.9861, + "step": 19896500 + }, + { + "epoch": 57.59, + "learning_rate": 2.1213510791033715e-05, + "loss": 1.967, + "step": 19897000 + }, + { + "epoch": 57.6, + "learning_rate": 2.121278714338644e-05, + "loss": 1.9946, + "step": 19897500 + }, + { + "epoch": 57.6, + "learning_rate": 2.1212064943034457e-05, + "loss": 1.9671, + "step": 19898000 + }, + { + "epoch": 57.6, + "learning_rate": 2.1211341295387182e-05, + "loss": 1.9619, + "step": 19898500 + }, + { + "epoch": 57.6, + "learning_rate": 2.1210617647739904e-05, + "loss": 1.9653, + "step": 19899000 + }, + { + "epoch": 57.6, + "learning_rate": 2.1209896894683217e-05, + "loss": 1.9587, + "step": 19899500 + }, + { + "epoch": 57.6, + "learning_rate": 2.120917324703594e-05, + "loss": 1.9448, + "step": 19900000 + }, + { + "epoch": 57.6, + "learning_rate": 2.1208449599388665e-05, + "loss": 1.9584, + "step": 19900500 + }, + { + "epoch": 57.61, + "learning_rate": 2.1207725951741387e-05, + "loss": 1.9578, + "step": 19901000 + }, + { + "epoch": 57.61, + "learning_rate": 2.120700230409411e-05, + "loss": 1.9543, + "step": 19901500 + }, + { + "epoch": 57.61, + "learning_rate": 2.120627865644683e-05, + "loss": 1.9571, + "step": 19902000 + }, + { + "epoch": 57.61, + "learning_rate": 2.1205555008799557e-05, + "loss": 1.9666, + "step": 19902500 + }, + { + "epoch": 57.61, + "learning_rate": 2.120483136115228e-05, + "loss": 1.9622, + "step": 19903000 + }, + { + "epoch": 57.61, + "learning_rate": 2.1204107713505005e-05, + "loss": 1.9154, + "step": 19903500 + }, + { + "epoch": 57.61, + "learning_rate": 2.1203384065857727e-05, + "loss": 1.9603, + "step": 19904000 + }, + { + "epoch": 57.62, + "learning_rate": 2.120266041821045e-05, + "loss": 1.9656, + "step": 19904500 + }, + { + "epoch": 57.62, + "learning_rate": 2.1201938217858465e-05, + "loss": 1.9622, + "step": 19905000 + }, + { + "epoch": 57.62, + "learning_rate": 2.120121457021119e-05, + "loss": 1.9464, + "step": 19905500 + }, + { + "epoch": 57.62, + "learning_rate": 2.1200490922563916e-05, + "loss": 1.9568, + "step": 19906000 + }, + { + "epoch": 57.62, + "learning_rate": 2.119976872221193e-05, + "loss": 1.9552, + "step": 19906500 + }, + { + "epoch": 57.62, + "learning_rate": 2.1199045074564654e-05, + "loss": 1.9571, + "step": 19907000 + }, + { + "epoch": 57.62, + "learning_rate": 2.119832142691738e-05, + "loss": 1.9699, + "step": 19907500 + }, + { + "epoch": 57.63, + "learning_rate": 2.11975977792701e-05, + "loss": 1.9798, + "step": 19908000 + }, + { + "epoch": 57.63, + "learning_rate": 2.1196874131622824e-05, + "loss": 1.9592, + "step": 19908500 + }, + { + "epoch": 57.63, + "learning_rate": 2.1196150483975546e-05, + "loss": 1.9668, + "step": 19909000 + }, + { + "epoch": 57.63, + "learning_rate": 2.1195428283623565e-05, + "loss": 1.9733, + "step": 19909500 + }, + { + "epoch": 57.63, + "learning_rate": 2.119470463597629e-05, + "loss": 1.955, + "step": 19910000 + }, + { + "epoch": 57.63, + "learning_rate": 2.1193980988329013e-05, + "loss": 1.9724, + "step": 19910500 + }, + { + "epoch": 57.63, + "learning_rate": 2.1193257340681735e-05, + "loss": 1.9653, + "step": 19911000 + }, + { + "epoch": 57.64, + "learning_rate": 2.1192533693034457e-05, + "loss": 1.9464, + "step": 19911500 + }, + { + "epoch": 57.64, + "learning_rate": 2.119181004538718e-05, + "loss": 1.9243, + "step": 19912000 + }, + { + "epoch": 57.64, + "learning_rate": 2.1191086397739905e-05, + "loss": 1.9631, + "step": 19912500 + }, + { + "epoch": 57.64, + "learning_rate": 2.1190362750092627e-05, + "loss": 1.9624, + "step": 19913000 + }, + { + "epoch": 57.64, + "learning_rate": 2.1189639102445353e-05, + "loss": 1.971, + "step": 19913500 + }, + { + "epoch": 57.64, + "learning_rate": 2.1188915454798075e-05, + "loss": 1.9665, + "step": 19914000 + }, + { + "epoch": 57.64, + "learning_rate": 2.1188191807150798e-05, + "loss": 1.9473, + "step": 19914500 + }, + { + "epoch": 57.65, + "learning_rate": 2.118746815950352e-05, + "loss": 1.954, + "step": 19915000 + }, + { + "epoch": 57.65, + "learning_rate": 2.1186744511856242e-05, + "loss": 1.9689, + "step": 19915500 + }, + { + "epoch": 57.65, + "learning_rate": 2.1186020864208968e-05, + "loss": 1.9518, + "step": 19916000 + }, + { + "epoch": 57.65, + "learning_rate": 2.118529721656169e-05, + "loss": 1.9413, + "step": 19916500 + }, + { + "epoch": 57.65, + "learning_rate": 2.1184573568914416e-05, + "loss": 1.9559, + "step": 19917000 + }, + { + "epoch": 57.65, + "learning_rate": 2.1183849921267138e-05, + "loss": 1.982, + "step": 19917500 + }, + { + "epoch": 57.65, + "learning_rate": 2.1183127720915157e-05, + "loss": 1.9617, + "step": 19918000 + }, + { + "epoch": 57.66, + "learning_rate": 2.1182405520563172e-05, + "loss": 1.9661, + "step": 19918500 + }, + { + "epoch": 57.66, + "learning_rate": 2.118168332021119e-05, + "loss": 1.972, + "step": 19919000 + }, + { + "epoch": 57.66, + "learning_rate": 2.1180959672563913e-05, + "loss": 1.9558, + "step": 19919500 + }, + { + "epoch": 57.66, + "learning_rate": 2.1180236024916636e-05, + "loss": 1.9745, + "step": 19920000 + }, + { + "epoch": 57.66, + "learning_rate": 2.1179512377269358e-05, + "loss": 1.9621, + "step": 19920500 + }, + { + "epoch": 57.66, + "learning_rate": 2.1178788729622084e-05, + "loss": 1.9785, + "step": 19921000 + }, + { + "epoch": 57.66, + "learning_rate": 2.1178065081974806e-05, + "loss": 1.9823, + "step": 19921500 + }, + { + "epoch": 57.67, + "learning_rate": 2.117734143432753e-05, + "loss": 1.9511, + "step": 19922000 + }, + { + "epoch": 57.67, + "learning_rate": 2.1176617786680254e-05, + "loss": 1.9704, + "step": 19922500 + }, + { + "epoch": 57.67, + "learning_rate": 2.1175894139032976e-05, + "loss": 1.9337, + "step": 19923000 + }, + { + "epoch": 57.67, + "learning_rate": 2.1175171938680995e-05, + "loss": 1.9653, + "step": 19923500 + }, + { + "epoch": 57.67, + "learning_rate": 2.1174448291033717e-05, + "loss": 1.9702, + "step": 19924000 + }, + { + "epoch": 57.67, + "learning_rate": 2.1173724643386443e-05, + "loss": 1.9776, + "step": 19924500 + }, + { + "epoch": 57.67, + "learning_rate": 2.1173000995739165e-05, + "loss": 1.9904, + "step": 19925000 + }, + { + "epoch": 57.68, + "learning_rate": 2.1172277348091887e-05, + "loss": 1.9552, + "step": 19925500 + }, + { + "epoch": 57.68, + "learning_rate": 2.117155370044461e-05, + "loss": 1.9677, + "step": 19926000 + }, + { + "epoch": 57.68, + "learning_rate": 2.117083005279733e-05, + "loss": 1.9749, + "step": 19926500 + }, + { + "epoch": 57.68, + "learning_rate": 2.1170106405150057e-05, + "loss": 1.9658, + "step": 19927000 + }, + { + "epoch": 57.68, + "learning_rate": 2.116938565209337e-05, + "loss": 1.9675, + "step": 19927500 + }, + { + "epoch": 57.68, + "learning_rate": 2.1168662004446092e-05, + "loss": 1.9814, + "step": 19928000 + }, + { + "epoch": 57.68, + "learning_rate": 2.1167938356798817e-05, + "loss": 1.9813, + "step": 19928500 + }, + { + "epoch": 57.69, + "learning_rate": 2.116721470915154e-05, + "loss": 1.9537, + "step": 19929000 + }, + { + "epoch": 57.69, + "learning_rate": 2.1166491061504262e-05, + "loss": 1.9777, + "step": 19929500 + }, + { + "epoch": 57.69, + "learning_rate": 2.1165767413856984e-05, + "loss": 1.9821, + "step": 19930000 + }, + { + "epoch": 57.69, + "learning_rate": 2.1165045213505003e-05, + "loss": 1.9759, + "step": 19930500 + }, + { + "epoch": 57.69, + "learning_rate": 2.116432301315302e-05, + "loss": 1.9728, + "step": 19931000 + }, + { + "epoch": 57.69, + "learning_rate": 2.1163599365505744e-05, + "loss": 1.9607, + "step": 19931500 + }, + { + "epoch": 57.69, + "learning_rate": 2.1162875717858466e-05, + "loss": 1.9651, + "step": 19932000 + }, + { + "epoch": 57.7, + "learning_rate": 2.1162152070211192e-05, + "loss": 1.9585, + "step": 19932500 + }, + { + "epoch": 57.7, + "learning_rate": 2.1161428422563914e-05, + "loss": 1.9594, + "step": 19933000 + }, + { + "epoch": 57.7, + "learning_rate": 2.1160706222211933e-05, + "loss": 2.0025, + "step": 19933500 + }, + { + "epoch": 57.7, + "learning_rate": 2.1159982574564655e-05, + "loss": 1.9695, + "step": 19934000 + }, + { + "epoch": 57.7, + "learning_rate": 2.1159258926917378e-05, + "loss": 1.9557, + "step": 19934500 + }, + { + "epoch": 57.7, + "learning_rate": 2.11585352792701e-05, + "loss": 1.9759, + "step": 19935000 + }, + { + "epoch": 57.71, + "learning_rate": 2.1157811631622822e-05, + "loss": 1.9569, + "step": 19935500 + }, + { + "epoch": 57.71, + "learning_rate": 2.1157087983975548e-05, + "loss": 1.9579, + "step": 19936000 + }, + { + "epoch": 57.71, + "learning_rate": 2.115636433632827e-05, + "loss": 1.9714, + "step": 19936500 + }, + { + "epoch": 57.71, + "learning_rate": 2.1155640688680996e-05, + "loss": 1.9923, + "step": 19937000 + }, + { + "epoch": 57.71, + "learning_rate": 2.1154917041033718e-05, + "loss": 2.0006, + "step": 19937500 + }, + { + "epoch": 57.71, + "learning_rate": 2.115419339338644e-05, + "loss": 1.9866, + "step": 19938000 + }, + { + "epoch": 57.71, + "learning_rate": 2.1153469745739162e-05, + "loss": 1.9405, + "step": 19938500 + }, + { + "epoch": 57.72, + "learning_rate": 2.1152746098091885e-05, + "loss": 1.9563, + "step": 19939000 + }, + { + "epoch": 57.72, + "learning_rate": 2.115202245044461e-05, + "loss": 1.9628, + "step": 19939500 + }, + { + "epoch": 57.72, + "learning_rate": 2.115130025009263e-05, + "loss": 1.9866, + "step": 19940000 + }, + { + "epoch": 57.72, + "learning_rate": 2.115057660244535e-05, + "loss": 1.9511, + "step": 19940500 + }, + { + "epoch": 57.72, + "learning_rate": 2.1149852954798074e-05, + "loss": 1.9593, + "step": 19941000 + }, + { + "epoch": 57.72, + "learning_rate": 2.1149129307150796e-05, + "loss": 1.9588, + "step": 19941500 + }, + { + "epoch": 57.72, + "learning_rate": 2.114840565950352e-05, + "loss": 1.9959, + "step": 19942000 + }, + { + "epoch": 57.73, + "learning_rate": 2.1147682011856244e-05, + "loss": 1.9874, + "step": 19942500 + }, + { + "epoch": 57.73, + "learning_rate": 2.114695981150426e-05, + "loss": 1.9618, + "step": 19943000 + }, + { + "epoch": 57.73, + "learning_rate": 2.114623905844757e-05, + "loss": 1.9668, + "step": 19943500 + }, + { + "epoch": 57.73, + "learning_rate": 2.1145515410800297e-05, + "loss": 1.9525, + "step": 19944000 + }, + { + "epoch": 57.73, + "learning_rate": 2.1144791763153023e-05, + "loss": 1.967, + "step": 19944500 + }, + { + "epoch": 57.73, + "learning_rate": 2.1144069562801038e-05, + "loss": 1.9709, + "step": 19945000 + }, + { + "epoch": 57.73, + "learning_rate": 2.114334591515376e-05, + "loss": 1.9409, + "step": 19945500 + }, + { + "epoch": 57.74, + "learning_rate": 2.1142622267506486e-05, + "loss": 1.9676, + "step": 19946000 + }, + { + "epoch": 57.74, + "learning_rate": 2.114189861985921e-05, + "loss": 1.9579, + "step": 19946500 + }, + { + "epoch": 57.74, + "learning_rate": 2.114117497221193e-05, + "loss": 1.9647, + "step": 19947000 + }, + { + "epoch": 57.74, + "learning_rate": 2.1140451324564656e-05, + "loss": 1.9754, + "step": 19947500 + }, + { + "epoch": 57.74, + "learning_rate": 2.113972767691738e-05, + "loss": 1.9559, + "step": 19948000 + }, + { + "epoch": 57.74, + "learning_rate": 2.11390040292701e-05, + "loss": 1.964, + "step": 19948500 + }, + { + "epoch": 57.74, + "learning_rate": 2.1138280381622823e-05, + "loss": 1.9683, + "step": 19949000 + }, + { + "epoch": 57.75, + "learning_rate": 2.113755673397555e-05, + "loss": 1.9667, + "step": 19949500 + }, + { + "epoch": 57.75, + "learning_rate": 2.1136834533623564e-05, + "loss": 1.9533, + "step": 19950000 + }, + { + "epoch": 57.75, + "learning_rate": 2.1136110885976286e-05, + "loss": 1.9748, + "step": 19950500 + }, + { + "epoch": 57.75, + "learning_rate": 2.1135387238329012e-05, + "loss": 1.946, + "step": 19951000 + }, + { + "epoch": 57.75, + "learning_rate": 2.1134663590681734e-05, + "loss": 1.9407, + "step": 19951500 + }, + { + "epoch": 57.75, + "learning_rate": 2.113393994303446e-05, + "loss": 1.9488, + "step": 19952000 + }, + { + "epoch": 57.75, + "learning_rate": 2.1133216295387182e-05, + "loss": 1.9609, + "step": 19952500 + }, + { + "epoch": 57.76, + "learning_rate": 2.1132492647739904e-05, + "loss": 1.9669, + "step": 19953000 + }, + { + "epoch": 57.76, + "learning_rate": 2.1131769000092626e-05, + "loss": 1.9952, + "step": 19953500 + }, + { + "epoch": 57.76, + "learning_rate": 2.113104535244535e-05, + "loss": 1.9416, + "step": 19954000 + }, + { + "epoch": 57.76, + "learning_rate": 2.1130321704798074e-05, + "loss": 1.9689, + "step": 19954500 + }, + { + "epoch": 57.76, + "learning_rate": 2.11295980571508e-05, + "loss": 1.9576, + "step": 19955000 + }, + { + "epoch": 57.76, + "learning_rate": 2.1128874409503522e-05, + "loss": 1.9607, + "step": 19955500 + }, + { + "epoch": 57.76, + "learning_rate": 2.1128150761856244e-05, + "loss": 1.9532, + "step": 19956000 + }, + { + "epoch": 57.77, + "learning_rate": 2.1127427114208967e-05, + "loss": 1.9478, + "step": 19956500 + }, + { + "epoch": 57.77, + "learning_rate": 2.112670346656169e-05, + "loss": 1.987, + "step": 19957000 + }, + { + "epoch": 57.77, + "learning_rate": 2.112597981891441e-05, + "loss": 1.9671, + "step": 19957500 + }, + { + "epoch": 57.77, + "learning_rate": 2.1125256171267137e-05, + "loss": 1.9663, + "step": 19958000 + }, + { + "epoch": 57.77, + "learning_rate": 2.1124533970915156e-05, + "loss": 1.9587, + "step": 19958500 + }, + { + "epoch": 57.77, + "learning_rate": 2.1123810323267878e-05, + "loss": 1.9774, + "step": 19959000 + }, + { + "epoch": 57.77, + "learning_rate": 2.112308957021119e-05, + "loss": 1.967, + "step": 19959500 + }, + { + "epoch": 57.78, + "learning_rate": 2.1122365922563912e-05, + "loss": 1.9666, + "step": 19960000 + }, + { + "epoch": 57.78, + "learning_rate": 2.1121642274916638e-05, + "loss": 1.9921, + "step": 19960500 + }, + { + "epoch": 57.78, + "learning_rate": 2.112091862726936e-05, + "loss": 1.9687, + "step": 19961000 + }, + { + "epoch": 57.78, + "learning_rate": 2.1120194979622083e-05, + "loss": 1.9573, + "step": 19961500 + }, + { + "epoch": 57.78, + "learning_rate": 2.1119472779270098e-05, + "loss": 1.9537, + "step": 19962000 + }, + { + "epoch": 57.78, + "learning_rate": 2.1118749131622824e-05, + "loss": 1.97, + "step": 19962500 + }, + { + "epoch": 57.78, + "learning_rate": 2.1118026931270843e-05, + "loss": 1.965, + "step": 19963000 + }, + { + "epoch": 57.79, + "learning_rate": 2.1117306178214155e-05, + "loss": 1.9656, + "step": 19963500 + }, + { + "epoch": 57.79, + "learning_rate": 2.1116582530566877e-05, + "loss": 1.9641, + "step": 19964000 + }, + { + "epoch": 57.79, + "learning_rate": 2.1115860330214896e-05, + "loss": 1.9862, + "step": 19964500 + }, + { + "epoch": 57.79, + "learning_rate": 2.111513668256762e-05, + "loss": 1.954, + "step": 19965000 + }, + { + "epoch": 57.79, + "learning_rate": 2.111441303492034e-05, + "loss": 1.9682, + "step": 19965500 + }, + { + "epoch": 57.79, + "learning_rate": 2.1113689387273063e-05, + "loss": 1.9617, + "step": 19966000 + }, + { + "epoch": 57.79, + "learning_rate": 2.111296573962579e-05, + "loss": 1.9631, + "step": 19966500 + }, + { + "epoch": 57.8, + "learning_rate": 2.1112242091978514e-05, + "loss": 1.959, + "step": 19967000 + }, + { + "epoch": 57.8, + "learning_rate": 2.1111518444331236e-05, + "loss": 1.9609, + "step": 19967500 + }, + { + "epoch": 57.8, + "learning_rate": 2.111079479668396e-05, + "loss": 1.9591, + "step": 19968000 + }, + { + "epoch": 57.8, + "learning_rate": 2.111007114903668e-05, + "loss": 1.9736, + "step": 19968500 + }, + { + "epoch": 57.8, + "learning_rate": 2.1109347501389403e-05, + "loss": 1.9568, + "step": 19969000 + }, + { + "epoch": 57.8, + "learning_rate": 2.1108623853742125e-05, + "loss": 1.9657, + "step": 19969500 + }, + { + "epoch": 57.8, + "learning_rate": 2.110790020609485e-05, + "loss": 1.9744, + "step": 19970000 + }, + { + "epoch": 57.81, + "learning_rate": 2.1107176558447576e-05, + "loss": 1.9527, + "step": 19970500 + }, + { + "epoch": 57.81, + "learning_rate": 2.11064529108003e-05, + "loss": 1.9875, + "step": 19971000 + }, + { + "epoch": 57.81, + "learning_rate": 2.110572926315302e-05, + "loss": 1.9795, + "step": 19971500 + }, + { + "epoch": 57.81, + "learning_rate": 2.1105005615505743e-05, + "loss": 1.9676, + "step": 19972000 + }, + { + "epoch": 57.81, + "learning_rate": 2.1104281967858465e-05, + "loss": 1.9447, + "step": 19972500 + }, + { + "epoch": 57.81, + "learning_rate": 2.1103558320211188e-05, + "loss": 1.9585, + "step": 19973000 + }, + { + "epoch": 57.82, + "learning_rate": 2.110283611985921e-05, + "loss": 1.9598, + "step": 19973500 + }, + { + "epoch": 57.82, + "learning_rate": 2.1102112472211932e-05, + "loss": 1.9608, + "step": 19974000 + }, + { + "epoch": 57.82, + "learning_rate": 2.110139027185995e-05, + "loss": 1.9577, + "step": 19974500 + }, + { + "epoch": 57.82, + "learning_rate": 2.1100666624212673e-05, + "loss": 1.989, + "step": 19975000 + }, + { + "epoch": 57.82, + "learning_rate": 2.1099942976565396e-05, + "loss": 1.9726, + "step": 19975500 + }, + { + "epoch": 57.82, + "learning_rate": 2.1099219328918118e-05, + "loss": 1.9818, + "step": 19976000 + }, + { + "epoch": 57.82, + "learning_rate": 2.109849568127084e-05, + "loss": 1.9596, + "step": 19976500 + }, + { + "epoch": 57.83, + "learning_rate": 2.1097772033623566e-05, + "loss": 1.9781, + "step": 19977000 + }, + { + "epoch": 57.83, + "learning_rate": 2.109704838597629e-05, + "loss": 1.9587, + "step": 19977500 + }, + { + "epoch": 57.83, + "learning_rate": 2.1096324738329014e-05, + "loss": 1.9693, + "step": 19978000 + }, + { + "epoch": 57.83, + "learning_rate": 2.109560253797703e-05, + "loss": 1.969, + "step": 19978500 + }, + { + "epoch": 57.83, + "learning_rate": 2.109487889032975e-05, + "loss": 1.9657, + "step": 19979000 + }, + { + "epoch": 57.83, + "learning_rate": 2.1094155242682477e-05, + "loss": 1.9685, + "step": 19979500 + }, + { + "epoch": 57.83, + "learning_rate": 2.10934315950352e-05, + "loss": 1.9537, + "step": 19980000 + }, + { + "epoch": 57.84, + "learning_rate": 2.109270794738792e-05, + "loss": 1.9505, + "step": 19980500 + }, + { + "epoch": 57.84, + "learning_rate": 2.1091984299740647e-05, + "loss": 1.9875, + "step": 19981000 + }, + { + "epoch": 57.84, + "learning_rate": 2.109126065209337e-05, + "loss": 1.9757, + "step": 19981500 + }, + { + "epoch": 57.84, + "learning_rate": 2.109053700444609e-05, + "loss": 1.9678, + "step": 19982000 + }, + { + "epoch": 57.84, + "learning_rate": 2.1089813356798817e-05, + "loss": 1.9902, + "step": 19982500 + }, + { + "epoch": 57.84, + "learning_rate": 2.108908970915154e-05, + "loss": 1.956, + "step": 19983000 + }, + { + "epoch": 57.84, + "learning_rate": 2.108836606150426e-05, + "loss": 1.9486, + "step": 19983500 + }, + { + "epoch": 57.85, + "learning_rate": 2.1087642413856984e-05, + "loss": 1.9535, + "step": 19984000 + }, + { + "epoch": 57.85, + "learning_rate": 2.1086920213505003e-05, + "loss": 1.9682, + "step": 19984500 + }, + { + "epoch": 57.85, + "learning_rate": 2.108619656585773e-05, + "loss": 1.9611, + "step": 19985000 + }, + { + "epoch": 57.85, + "learning_rate": 2.108547291821045e-05, + "loss": 1.9468, + "step": 19985500 + }, + { + "epoch": 57.85, + "learning_rate": 2.1084749270563173e-05, + "loss": 1.9689, + "step": 19986000 + }, + { + "epoch": 57.85, + "learning_rate": 2.1084025622915895e-05, + "loss": 1.9674, + "step": 19986500 + }, + { + "epoch": 57.85, + "learning_rate": 2.1083301975268617e-05, + "loss": 1.9879, + "step": 19987000 + }, + { + "epoch": 57.86, + "learning_rate": 2.108257832762134e-05, + "loss": 1.9773, + "step": 19987500 + }, + { + "epoch": 57.86, + "learning_rate": 2.1081854679974065e-05, + "loss": 1.9425, + "step": 19988000 + }, + { + "epoch": 57.86, + "learning_rate": 2.108113103232679e-05, + "loss": 1.9602, + "step": 19988500 + }, + { + "epoch": 57.86, + "learning_rate": 2.1080408831974806e-05, + "loss": 1.9556, + "step": 19989000 + }, + { + "epoch": 57.86, + "learning_rate": 2.1079686631622825e-05, + "loss": 1.9719, + "step": 19989500 + }, + { + "epoch": 57.86, + "learning_rate": 2.1078962983975548e-05, + "loss": 1.9588, + "step": 19990000 + }, + { + "epoch": 57.86, + "learning_rate": 2.107823933632827e-05, + "loss": 1.981, + "step": 19990500 + }, + { + "epoch": 57.87, + "learning_rate": 2.1077515688680992e-05, + "loss": 1.9864, + "step": 19991000 + }, + { + "epoch": 57.87, + "learning_rate": 2.1076792041033718e-05, + "loss": 1.9525, + "step": 19991500 + }, + { + "epoch": 57.87, + "learning_rate": 2.1076068393386443e-05, + "loss": 1.9566, + "step": 19992000 + }, + { + "epoch": 57.87, + "learning_rate": 2.1075344745739165e-05, + "loss": 1.9269, + "step": 19992500 + }, + { + "epoch": 57.87, + "learning_rate": 2.1074621098091888e-05, + "loss": 1.9688, + "step": 19993000 + }, + { + "epoch": 57.87, + "learning_rate": 2.107389745044461e-05, + "loss": 1.9473, + "step": 19993500 + }, + { + "epoch": 57.87, + "learning_rate": 2.1073173802797332e-05, + "loss": 1.9643, + "step": 19994000 + }, + { + "epoch": 57.88, + "learning_rate": 2.1072450155150054e-05, + "loss": 1.9613, + "step": 19994500 + }, + { + "epoch": 57.88, + "learning_rate": 2.107172650750278e-05, + "loss": 1.9751, + "step": 19995000 + }, + { + "epoch": 57.88, + "learning_rate": 2.1071002859855506e-05, + "loss": 1.9751, + "step": 19995500 + }, + { + "epoch": 57.88, + "learning_rate": 2.1070282106798818e-05, + "loss": 1.9593, + "step": 19996000 + }, + { + "epoch": 57.88, + "learning_rate": 2.106955845915154e-05, + "loss": 1.9593, + "step": 19996500 + }, + { + "epoch": 57.88, + "learning_rate": 2.1068834811504262e-05, + "loss": 1.9877, + "step": 19997000 + }, + { + "epoch": 57.88, + "learning_rate": 2.1068111163856985e-05, + "loss": 1.966, + "step": 19997500 + }, + { + "epoch": 57.89, + "learning_rate": 2.1067388963505004e-05, + "loss": 1.9641, + "step": 19998000 + }, + { + "epoch": 57.89, + "learning_rate": 2.1066665315857726e-05, + "loss": 1.9598, + "step": 19998500 + }, + { + "epoch": 57.89, + "learning_rate": 2.1065941668210448e-05, + "loss": 1.9751, + "step": 19999000 + }, + { + "epoch": 57.89, + "learning_rate": 2.1065218020563174e-05, + "loss": 1.9557, + "step": 19999500 + }, + { + "epoch": 57.89, + "learning_rate": 2.1064494372915896e-05, + "loss": 1.9762, + "step": 20000000 + }, + { + "epoch": 57.89, + "learning_rate": 2.1063770725268618e-05, + "loss": 1.9719, + "step": 20000500 + }, + { + "epoch": 57.89, + "learning_rate": 2.1063047077621344e-05, + "loss": 1.9548, + "step": 20001000 + }, + { + "epoch": 57.9, + "learning_rate": 2.1062323429974066e-05, + "loss": 1.941, + "step": 20001500 + }, + { + "epoch": 57.9, + "learning_rate": 2.1061599782326788e-05, + "loss": 1.9545, + "step": 20002000 + }, + { + "epoch": 57.9, + "learning_rate": 2.1060877581974807e-05, + "loss": 1.9847, + "step": 20002500 + }, + { + "epoch": 57.9, + "learning_rate": 2.1060155381622823e-05, + "loss": 1.9563, + "step": 20003000 + }, + { + "epoch": 57.9, + "learning_rate": 2.105943173397555e-05, + "loss": 1.9785, + "step": 20003500 + }, + { + "epoch": 57.9, + "learning_rate": 2.105870808632827e-05, + "loss": 1.965, + "step": 20004000 + }, + { + "epoch": 57.9, + "learning_rate": 2.1057984438680993e-05, + "loss": 1.9668, + "step": 20004500 + }, + { + "epoch": 57.91, + "learning_rate": 2.105726079103372e-05, + "loss": 1.954, + "step": 20005000 + }, + { + "epoch": 57.91, + "learning_rate": 2.105653714338644e-05, + "loss": 1.9704, + "step": 20005500 + }, + { + "epoch": 57.91, + "learning_rate": 2.1055813495739163e-05, + "loss": 1.9697, + "step": 20006000 + }, + { + "epoch": 57.91, + "learning_rate": 2.1055089848091885e-05, + "loss": 1.9478, + "step": 20006500 + }, + { + "epoch": 57.91, + "learning_rate": 2.105436620044461e-05, + "loss": 1.9644, + "step": 20007000 + }, + { + "epoch": 57.91, + "learning_rate": 2.1053642552797333e-05, + "loss": 1.9774, + "step": 20007500 + }, + { + "epoch": 57.91, + "learning_rate": 2.105291890515006e-05, + "loss": 1.9703, + "step": 20008000 + }, + { + "epoch": 57.92, + "learning_rate": 2.105219525750278e-05, + "loss": 1.9901, + "step": 20008500 + }, + { + "epoch": 57.92, + "learning_rate": 2.1051473057150796e-05, + "loss": 1.9731, + "step": 20009000 + }, + { + "epoch": 57.92, + "learning_rate": 2.1050750856798815e-05, + "loss": 1.9601, + "step": 20009500 + }, + { + "epoch": 57.92, + "learning_rate": 2.1050027209151538e-05, + "loss": 1.9563, + "step": 20010000 + }, + { + "epoch": 57.92, + "learning_rate": 2.104930356150426e-05, + "loss": 1.9911, + "step": 20010500 + }, + { + "epoch": 57.92, + "learning_rate": 2.1048579913856985e-05, + "loss": 1.9503, + "step": 20011000 + }, + { + "epoch": 57.93, + "learning_rate": 2.1047856266209708e-05, + "loss": 1.9525, + "step": 20011500 + }, + { + "epoch": 57.93, + "learning_rate": 2.1047132618562433e-05, + "loss": 1.9734, + "step": 20012000 + }, + { + "epoch": 57.93, + "learning_rate": 2.1046408970915155e-05, + "loss": 1.971, + "step": 20012500 + }, + { + "epoch": 57.93, + "learning_rate": 2.104568677056317e-05, + "loss": 1.9778, + "step": 20013000 + }, + { + "epoch": 57.93, + "learning_rate": 2.1044963122915897e-05, + "loss": 1.9742, + "step": 20013500 + }, + { + "epoch": 57.93, + "learning_rate": 2.104423947526862e-05, + "loss": 1.9502, + "step": 20014000 + }, + { + "epoch": 57.93, + "learning_rate": 2.1043515827621345e-05, + "loss": 1.951, + "step": 20014500 + }, + { + "epoch": 57.94, + "learning_rate": 2.104279362726936e-05, + "loss": 1.9754, + "step": 20015000 + }, + { + "epoch": 57.94, + "learning_rate": 2.1042069979622082e-05, + "loss": 1.9484, + "step": 20015500 + }, + { + "epoch": 57.94, + "learning_rate": 2.1041346331974808e-05, + "loss": 1.9554, + "step": 20016000 + }, + { + "epoch": 57.94, + "learning_rate": 2.104062268432753e-05, + "loss": 1.9466, + "step": 20016500 + }, + { + "epoch": 57.94, + "learning_rate": 2.1039899036680252e-05, + "loss": 1.9806, + "step": 20017000 + }, + { + "epoch": 57.94, + "learning_rate": 2.1039175389032975e-05, + "loss": 1.9551, + "step": 20017500 + }, + { + "epoch": 57.94, + "learning_rate": 2.10384517413857e-05, + "loss": 1.9886, + "step": 20018000 + }, + { + "epoch": 57.95, + "learning_rate": 2.1037728093738422e-05, + "loss": 1.9568, + "step": 20018500 + }, + { + "epoch": 57.95, + "learning_rate": 2.1037004446091145e-05, + "loss": 1.9626, + "step": 20019000 + }, + { + "epoch": 57.95, + "learning_rate": 2.103628079844387e-05, + "loss": 1.9658, + "step": 20019500 + }, + { + "epoch": 57.95, + "learning_rate": 2.1035557150796593e-05, + "loss": 1.951, + "step": 20020000 + }, + { + "epoch": 57.95, + "learning_rate": 2.1034833503149315e-05, + "loss": 1.9926, + "step": 20020500 + }, + { + "epoch": 57.95, + "learning_rate": 2.1034109855502037e-05, + "loss": 1.974, + "step": 20021000 + }, + { + "epoch": 57.95, + "learning_rate": 2.1033387655150056e-05, + "loss": 1.9644, + "step": 20021500 + }, + { + "epoch": 57.96, + "learning_rate": 2.1032665454798075e-05, + "loss": 1.9588, + "step": 20022000 + }, + { + "epoch": 57.96, + "learning_rate": 2.1031941807150797e-05, + "loss": 1.987, + "step": 20022500 + }, + { + "epoch": 57.96, + "learning_rate": 2.1031218159503523e-05, + "loss": 1.9317, + "step": 20023000 + }, + { + "epoch": 57.96, + "learning_rate": 2.1030494511856245e-05, + "loss": 1.9692, + "step": 20023500 + }, + { + "epoch": 57.96, + "learning_rate": 2.1029770864208967e-05, + "loss": 1.9744, + "step": 20024000 + }, + { + "epoch": 57.96, + "learning_rate": 2.102904721656169e-05, + "loss": 1.9559, + "step": 20024500 + }, + { + "epoch": 57.96, + "learning_rate": 2.1028323568914412e-05, + "loss": 1.9539, + "step": 20025000 + }, + { + "epoch": 57.97, + "learning_rate": 2.1027601368562434e-05, + "loss": 1.9591, + "step": 20025500 + }, + { + "epoch": 57.97, + "learning_rate": 2.102687916821045e-05, + "loss": 1.9876, + "step": 20026000 + }, + { + "epoch": 57.97, + "learning_rate": 2.1026155520563172e-05, + "loss": 1.9682, + "step": 20026500 + }, + { + "epoch": 57.97, + "learning_rate": 2.1025431872915897e-05, + "loss": 1.962, + "step": 20027000 + }, + { + "epoch": 57.97, + "learning_rate": 2.102470822526862e-05, + "loss": 1.9751, + "step": 20027500 + }, + { + "epoch": 57.97, + "learning_rate": 2.1023984577621342e-05, + "loss": 1.9698, + "step": 20028000 + }, + { + "epoch": 57.97, + "learning_rate": 2.1023260929974064e-05, + "loss": 1.9724, + "step": 20028500 + }, + { + "epoch": 57.98, + "learning_rate": 2.1022537282326786e-05, + "loss": 1.9752, + "step": 20029000 + }, + { + "epoch": 57.98, + "learning_rate": 2.1021813634679512e-05, + "loss": 1.945, + "step": 20029500 + }, + { + "epoch": 57.98, + "learning_rate": 2.1021089987032234e-05, + "loss": 1.9908, + "step": 20030000 + }, + { + "epoch": 57.98, + "learning_rate": 2.102036633938496e-05, + "loss": 1.9757, + "step": 20030500 + }, + { + "epoch": 57.98, + "learning_rate": 2.1019644139032975e-05, + "loss": 1.9814, + "step": 20031000 + }, + { + "epoch": 57.98, + "learning_rate": 2.1018920491385698e-05, + "loss": 1.9823, + "step": 20031500 + }, + { + "epoch": 57.98, + "learning_rate": 2.1018198291033717e-05, + "loss": 1.9496, + "step": 20032000 + }, + { + "epoch": 57.99, + "learning_rate": 2.101747464338644e-05, + "loss": 1.9681, + "step": 20032500 + }, + { + "epoch": 57.99, + "learning_rate": 2.101675099573916e-05, + "loss": 1.9887, + "step": 20033000 + }, + { + "epoch": 57.99, + "learning_rate": 2.1016027348091887e-05, + "loss": 1.9705, + "step": 20033500 + }, + { + "epoch": 57.99, + "learning_rate": 2.1015303700444612e-05, + "loss": 1.9888, + "step": 20034000 + }, + { + "epoch": 57.99, + "learning_rate": 2.1014580052797335e-05, + "loss": 1.966, + "step": 20034500 + }, + { + "epoch": 57.99, + "learning_rate": 2.1013856405150057e-05, + "loss": 1.9558, + "step": 20035000 + }, + { + "epoch": 57.99, + "learning_rate": 2.101313275750278e-05, + "loss": 1.9812, + "step": 20035500 + }, + { + "epoch": 58.0, + "learning_rate": 2.10124091098555e-05, + "loss": 1.968, + "step": 20036000 + }, + { + "epoch": 58.0, + "learning_rate": 2.1011685462208227e-05, + "loss": 1.9581, + "step": 20036500 + }, + { + "epoch": 58.0, + "learning_rate": 2.1010963261856246e-05, + "loss": 1.9541, + "step": 20037000 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.6768148279968789, + "eval_accuracy_mlm": 0.6435821900945138, + "eval_accuracy_nsp": 0.8549815988597251, + "eval_loss": 2.161170721054077, + "eval_runtime": 332.2544, + "eval_samples_per_second": 1313.409, + "eval_steps_per_second": 54.726, + "step": 20037376 + }, + { + "epoch": 58.0, + "learning_rate": 2.1010239614208968e-05, + "loss": 1.9466, + "step": 20037500 + }, + { + "epoch": 58.0, + "learning_rate": 2.1009520308447574e-05, + "loss": 1.923, + "step": 20038000 + }, + { + "epoch": 58.0, + "learning_rate": 2.10087966608003e-05, + "loss": 1.9453, + "step": 20038500 + }, + { + "epoch": 58.0, + "learning_rate": 2.100807301315302e-05, + "loss": 1.9405, + "step": 20039000 + }, + { + "epoch": 58.01, + "learning_rate": 2.1007349365505744e-05, + "loss": 1.9429, + "step": 20039500 + }, + { + "epoch": 58.01, + "learning_rate": 2.1006625717858466e-05, + "loss": 1.9558, + "step": 20040000 + }, + { + "epoch": 58.01, + "learning_rate": 2.1005902070211188e-05, + "loss": 1.9251, + "step": 20040500 + }, + { + "epoch": 58.01, + "learning_rate": 2.1005178422563914e-05, + "loss": 1.9413, + "step": 20041000 + }, + { + "epoch": 58.01, + "learning_rate": 2.1004454774916636e-05, + "loss": 1.9536, + "step": 20041500 + }, + { + "epoch": 58.01, + "learning_rate": 2.100373112726936e-05, + "loss": 1.9315, + "step": 20042000 + }, + { + "epoch": 58.01, + "learning_rate": 2.1003007479622084e-05, + "loss": 1.9507, + "step": 20042500 + }, + { + "epoch": 58.02, + "learning_rate": 2.1002283831974806e-05, + "loss": 1.9219, + "step": 20043000 + }, + { + "epoch": 58.02, + "learning_rate": 2.100156018432753e-05, + "loss": 1.9073, + "step": 20043500 + }, + { + "epoch": 58.02, + "learning_rate": 2.100083653668025e-05, + "loss": 1.9858, + "step": 20044000 + }, + { + "epoch": 58.02, + "learning_rate": 2.1000112889032976e-05, + "loss": 1.9526, + "step": 20044500 + }, + { + "epoch": 58.02, + "learning_rate": 2.0999389241385702e-05, + "loss": 1.9469, + "step": 20045000 + }, + { + "epoch": 58.02, + "learning_rate": 2.0998667041033717e-05, + "loss": 1.9319, + "step": 20045500 + }, + { + "epoch": 58.02, + "learning_rate": 2.099794339338644e-05, + "loss": 1.9208, + "step": 20046000 + }, + { + "epoch": 58.03, + "learning_rate": 2.0997219745739162e-05, + "loss": 1.9513, + "step": 20046500 + }, + { + "epoch": 58.03, + "learning_rate": 2.099649754538718e-05, + "loss": 1.9387, + "step": 20047000 + }, + { + "epoch": 58.03, + "learning_rate": 2.0995773897739903e-05, + "loss": 1.9501, + "step": 20047500 + }, + { + "epoch": 58.03, + "learning_rate": 2.0995050250092625e-05, + "loss": 1.9281, + "step": 20048000 + }, + { + "epoch": 58.03, + "learning_rate": 2.099432660244535e-05, + "loss": 1.9667, + "step": 20048500 + }, + { + "epoch": 58.03, + "learning_rate": 2.0993602954798077e-05, + "loss": 1.9531, + "step": 20049000 + }, + { + "epoch": 58.04, + "learning_rate": 2.09928793071508e-05, + "loss": 1.9427, + "step": 20049500 + }, + { + "epoch": 58.04, + "learning_rate": 2.099215565950352e-05, + "loss": 1.9487, + "step": 20050000 + }, + { + "epoch": 58.04, + "learning_rate": 2.0991432011856243e-05, + "loss": 1.9504, + "step": 20050500 + }, + { + "epoch": 58.04, + "learning_rate": 2.0990708364208965e-05, + "loss": 1.9451, + "step": 20051000 + }, + { + "epoch": 58.04, + "learning_rate": 2.0989984716561688e-05, + "loss": 1.9576, + "step": 20051500 + }, + { + "epoch": 58.04, + "learning_rate": 2.0989261068914413e-05, + "loss": 1.9416, + "step": 20052000 + }, + { + "epoch": 58.04, + "learning_rate": 2.098853742126714e-05, + "loss": 1.933, + "step": 20052500 + }, + { + "epoch": 58.05, + "learning_rate": 2.098781377361986e-05, + "loss": 1.9273, + "step": 20053000 + }, + { + "epoch": 58.05, + "learning_rate": 2.0987091573267877e-05, + "loss": 1.938, + "step": 20053500 + }, + { + "epoch": 58.05, + "learning_rate": 2.0986367925620602e-05, + "loss": 1.9291, + "step": 20054000 + }, + { + "epoch": 58.05, + "learning_rate": 2.0985644277973325e-05, + "loss": 1.9634, + "step": 20054500 + }, + { + "epoch": 58.05, + "learning_rate": 2.0984920630326047e-05, + "loss": 1.945, + "step": 20055000 + }, + { + "epoch": 58.05, + "learning_rate": 2.0984198429974066e-05, + "loss": 1.9418, + "step": 20055500 + }, + { + "epoch": 58.05, + "learning_rate": 2.0983474782326788e-05, + "loss": 1.9454, + "step": 20056000 + }, + { + "epoch": 58.06, + "learning_rate": 2.0982751134679514e-05, + "loss": 1.9819, + "step": 20056500 + }, + { + "epoch": 58.06, + "learning_rate": 2.0982027487032236e-05, + "loss": 1.9666, + "step": 20057000 + }, + { + "epoch": 58.06, + "learning_rate": 2.0981303839384958e-05, + "loss": 1.9393, + "step": 20057500 + }, + { + "epoch": 58.06, + "learning_rate": 2.098058019173768e-05, + "loss": 1.9587, + "step": 20058000 + }, + { + "epoch": 58.06, + "learning_rate": 2.0979856544090403e-05, + "loss": 1.9365, + "step": 20058500 + }, + { + "epoch": 58.06, + "learning_rate": 2.0979132896443128e-05, + "loss": 1.9501, + "step": 20059000 + }, + { + "epoch": 58.06, + "learning_rate": 2.0978410696091147e-05, + "loss": 1.9515, + "step": 20059500 + }, + { + "epoch": 58.07, + "learning_rate": 2.097768704844387e-05, + "loss": 1.9175, + "step": 20060000 + }, + { + "epoch": 58.07, + "learning_rate": 2.097696340079659e-05, + "loss": 1.9313, + "step": 20060500 + }, + { + "epoch": 58.07, + "learning_rate": 2.097624120044461e-05, + "loss": 1.9715, + "step": 20061000 + }, + { + "epoch": 58.07, + "learning_rate": 2.0975517552797333e-05, + "loss": 1.9559, + "step": 20061500 + }, + { + "epoch": 58.07, + "learning_rate": 2.0974793905150055e-05, + "loss": 1.9455, + "step": 20062000 + }, + { + "epoch": 58.07, + "learning_rate": 2.0974070257502777e-05, + "loss": 1.9482, + "step": 20062500 + }, + { + "epoch": 58.07, + "learning_rate": 2.0973346609855503e-05, + "loss": 1.9805, + "step": 20063000 + }, + { + "epoch": 58.08, + "learning_rate": 2.097262296220823e-05, + "loss": 1.9625, + "step": 20063500 + }, + { + "epoch": 58.08, + "learning_rate": 2.097189931456095e-05, + "loss": 1.9367, + "step": 20064000 + }, + { + "epoch": 58.08, + "learning_rate": 2.0971175666913673e-05, + "loss": 1.9569, + "step": 20064500 + }, + { + "epoch": 58.08, + "learning_rate": 2.0970452019266395e-05, + "loss": 1.9367, + "step": 20065000 + }, + { + "epoch": 58.08, + "learning_rate": 2.0969728371619117e-05, + "loss": 1.9408, + "step": 20065500 + }, + { + "epoch": 58.08, + "learning_rate": 2.0969006171267136e-05, + "loss": 1.9539, + "step": 20066000 + }, + { + "epoch": 58.08, + "learning_rate": 2.0968282523619862e-05, + "loss": 1.929, + "step": 20066500 + }, + { + "epoch": 58.09, + "learning_rate": 2.0967558875972584e-05, + "loss": 1.9431, + "step": 20067000 + }, + { + "epoch": 58.09, + "learning_rate": 2.0966835228325306e-05, + "loss": 1.9357, + "step": 20067500 + }, + { + "epoch": 58.09, + "learning_rate": 2.0966113027973325e-05, + "loss": 1.9687, + "step": 20068000 + }, + { + "epoch": 58.09, + "learning_rate": 2.0965389380326048e-05, + "loss": 1.9497, + "step": 20068500 + }, + { + "epoch": 58.09, + "learning_rate": 2.096466573267877e-05, + "loss": 1.9726, + "step": 20069000 + }, + { + "epoch": 58.09, + "learning_rate": 2.0963942085031492e-05, + "loss": 1.9785, + "step": 20069500 + }, + { + "epoch": 58.09, + "learning_rate": 2.0963218437384218e-05, + "loss": 1.9488, + "step": 20070000 + }, + { + "epoch": 58.1, + "learning_rate": 2.0962494789736943e-05, + "loss": 1.9687, + "step": 20070500 + }, + { + "epoch": 58.1, + "learning_rate": 2.0961771142089666e-05, + "loss": 1.9458, + "step": 20071000 + }, + { + "epoch": 58.1, + "learning_rate": 2.0961047494442388e-05, + "loss": 1.961, + "step": 20071500 + }, + { + "epoch": 58.1, + "learning_rate": 2.096032384679511e-05, + "loss": 1.9437, + "step": 20072000 + }, + { + "epoch": 58.1, + "learning_rate": 2.095960164644313e-05, + "loss": 1.9292, + "step": 20072500 + }, + { + "epoch": 58.1, + "learning_rate": 2.0958879446091145e-05, + "loss": 1.9406, + "step": 20073000 + }, + { + "epoch": 58.1, + "learning_rate": 2.0958157245739163e-05, + "loss": 1.9592, + "step": 20073500 + }, + { + "epoch": 58.11, + "learning_rate": 2.0957433598091886e-05, + "loss": 1.945, + "step": 20074000 + }, + { + "epoch": 58.11, + "learning_rate": 2.095670995044461e-05, + "loss": 1.9634, + "step": 20074500 + }, + { + "epoch": 58.11, + "learning_rate": 2.0955986302797334e-05, + "loss": 1.9575, + "step": 20075000 + }, + { + "epoch": 58.11, + "learning_rate": 2.0955262655150056e-05, + "loss": 1.9302, + "step": 20075500 + }, + { + "epoch": 58.11, + "learning_rate": 2.095453900750278e-05, + "loss": 1.9618, + "step": 20076000 + }, + { + "epoch": 58.11, + "learning_rate": 2.0953815359855504e-05, + "loss": 1.9495, + "step": 20076500 + }, + { + "epoch": 58.11, + "learning_rate": 2.0953091712208226e-05, + "loss": 1.9402, + "step": 20077000 + }, + { + "epoch": 58.12, + "learning_rate": 2.0952368064560948e-05, + "loss": 1.9402, + "step": 20077500 + }, + { + "epoch": 58.12, + "learning_rate": 2.0951644416913674e-05, + "loss": 1.9412, + "step": 20078000 + }, + { + "epoch": 58.12, + "learning_rate": 2.0950920769266396e-05, + "loss": 1.9584, + "step": 20078500 + }, + { + "epoch": 58.12, + "learning_rate": 2.0950197121619118e-05, + "loss": 1.9509, + "step": 20079000 + }, + { + "epoch": 58.12, + "learning_rate": 2.0949474921267137e-05, + "loss": 1.9666, + "step": 20079500 + }, + { + "epoch": 58.12, + "learning_rate": 2.094875127361986e-05, + "loss": 1.9573, + "step": 20080000 + }, + { + "epoch": 58.12, + "learning_rate": 2.094802762597258e-05, + "loss": 1.9536, + "step": 20080500 + }, + { + "epoch": 58.13, + "learning_rate": 2.09473054256206e-05, + "loss": 1.9362, + "step": 20081000 + }, + { + "epoch": 58.13, + "learning_rate": 2.0946581777973323e-05, + "loss": 1.9528, + "step": 20081500 + }, + { + "epoch": 58.13, + "learning_rate": 2.094585813032605e-05, + "loss": 1.9624, + "step": 20082000 + }, + { + "epoch": 58.13, + "learning_rate": 2.094513448267877e-05, + "loss": 1.972, + "step": 20082500 + }, + { + "epoch": 58.13, + "learning_rate": 2.0944410835031493e-05, + "loss": 1.9463, + "step": 20083000 + }, + { + "epoch": 58.13, + "learning_rate": 2.0943688634679512e-05, + "loss": 1.9431, + "step": 20083500 + }, + { + "epoch": 58.13, + "learning_rate": 2.0942964987032234e-05, + "loss": 1.9464, + "step": 20084000 + }, + { + "epoch": 58.14, + "learning_rate": 2.0942242786680253e-05, + "loss": 1.9631, + "step": 20084500 + }, + { + "epoch": 58.14, + "learning_rate": 2.0941519139032975e-05, + "loss": 1.9498, + "step": 20085000 + }, + { + "epoch": 58.14, + "learning_rate": 2.09407954913857e-05, + "loss": 1.9484, + "step": 20085500 + }, + { + "epoch": 58.14, + "learning_rate": 2.0940071843738423e-05, + "loss": 1.9203, + "step": 20086000 + }, + { + "epoch": 58.14, + "learning_rate": 2.0939348196091145e-05, + "loss": 1.933, + "step": 20086500 + }, + { + "epoch": 58.14, + "learning_rate": 2.0938625995739164e-05, + "loss": 1.9238, + "step": 20087000 + }, + { + "epoch": 58.15, + "learning_rate": 2.0937902348091886e-05, + "loss": 1.9487, + "step": 20087500 + }, + { + "epoch": 58.15, + "learning_rate": 2.093717870044461e-05, + "loss": 1.9413, + "step": 20088000 + }, + { + "epoch": 58.15, + "learning_rate": 2.093645505279733e-05, + "loss": 1.9495, + "step": 20088500 + }, + { + "epoch": 58.15, + "learning_rate": 2.0935731405150057e-05, + "loss": 1.9487, + "step": 20089000 + }, + { + "epoch": 58.15, + "learning_rate": 2.0935007757502782e-05, + "loss": 1.957, + "step": 20089500 + }, + { + "epoch": 58.15, + "learning_rate": 2.0934284109855504e-05, + "loss": 1.9503, + "step": 20090000 + }, + { + "epoch": 58.15, + "learning_rate": 2.0933560462208227e-05, + "loss": 1.9597, + "step": 20090500 + }, + { + "epoch": 58.16, + "learning_rate": 2.0932838261856246e-05, + "loss": 1.9676, + "step": 20091000 + }, + { + "epoch": 58.16, + "learning_rate": 2.0932114614208968e-05, + "loss": 1.9425, + "step": 20091500 + }, + { + "epoch": 58.16, + "learning_rate": 2.0931392413856983e-05, + "loss": 1.9225, + "step": 20092000 + }, + { + "epoch": 58.16, + "learning_rate": 2.0930668766209706e-05, + "loss": 1.9783, + "step": 20092500 + }, + { + "epoch": 58.16, + "learning_rate": 2.092994511856243e-05, + "loss": 1.943, + "step": 20093000 + }, + { + "epoch": 58.16, + "learning_rate": 2.0929221470915157e-05, + "loss": 1.9642, + "step": 20093500 + }, + { + "epoch": 58.16, + "learning_rate": 2.092849782326788e-05, + "loss": 1.9329, + "step": 20094000 + }, + { + "epoch": 58.17, + "learning_rate": 2.09277741756206e-05, + "loss": 1.9493, + "step": 20094500 + }, + { + "epoch": 58.17, + "learning_rate": 2.0927050527973324e-05, + "loss": 1.9423, + "step": 20095000 + }, + { + "epoch": 58.17, + "learning_rate": 2.0926326880326046e-05, + "loss": 1.9514, + "step": 20095500 + }, + { + "epoch": 58.17, + "learning_rate": 2.092560323267877e-05, + "loss": 1.9319, + "step": 20096000 + }, + { + "epoch": 58.17, + "learning_rate": 2.0924879585031497e-05, + "loss": 1.9518, + "step": 20096500 + }, + { + "epoch": 58.17, + "learning_rate": 2.092415593738422e-05, + "loss": 1.9422, + "step": 20097000 + }, + { + "epoch": 58.17, + "learning_rate": 2.092343228973694e-05, + "loss": 1.9457, + "step": 20097500 + }, + { + "epoch": 58.18, + "learning_rate": 2.0922708642089664e-05, + "loss": 1.9639, + "step": 20098000 + }, + { + "epoch": 58.18, + "learning_rate": 2.0921986441737683e-05, + "loss": 1.9403, + "step": 20098500 + }, + { + "epoch": 58.18, + "learning_rate": 2.0921262794090405e-05, + "loss": 1.9481, + "step": 20099000 + }, + { + "epoch": 58.18, + "learning_rate": 2.0920539146443127e-05, + "loss": 1.9679, + "step": 20099500 + }, + { + "epoch": 58.18, + "learning_rate": 2.091981549879585e-05, + "loss": 1.9556, + "step": 20100000 + }, + { + "epoch": 58.18, + "learning_rate": 2.0919093298443872e-05, + "loss": 1.955, + "step": 20100500 + }, + { + "epoch": 58.18, + "learning_rate": 2.0918369650796594e-05, + "loss": 1.9502, + "step": 20101000 + }, + { + "epoch": 58.19, + "learning_rate": 2.0917646003149316e-05, + "loss": 1.9502, + "step": 20101500 + }, + { + "epoch": 58.19, + "learning_rate": 2.0916923802797335e-05, + "loss": 1.9516, + "step": 20102000 + }, + { + "epoch": 58.19, + "learning_rate": 2.0916200155150057e-05, + "loss": 1.9671, + "step": 20102500 + }, + { + "epoch": 58.19, + "learning_rate": 2.091547650750278e-05, + "loss": 1.9331, + "step": 20103000 + }, + { + "epoch": 58.19, + "learning_rate": 2.0914754307150795e-05, + "loss": 1.9707, + "step": 20103500 + }, + { + "epoch": 58.19, + "learning_rate": 2.091403065950352e-05, + "loss": 1.9536, + "step": 20104000 + }, + { + "epoch": 58.19, + "learning_rate": 2.0913307011856246e-05, + "loss": 1.939, + "step": 20104500 + }, + { + "epoch": 58.2, + "learning_rate": 2.091258336420897e-05, + "loss": 1.9889, + "step": 20105000 + }, + { + "epoch": 58.2, + "learning_rate": 2.091185971656169e-05, + "loss": 1.9611, + "step": 20105500 + }, + { + "epoch": 58.2, + "learning_rate": 2.0911136068914413e-05, + "loss": 1.9507, + "step": 20106000 + }, + { + "epoch": 58.2, + "learning_rate": 2.0910412421267135e-05, + "loss": 1.9548, + "step": 20106500 + }, + { + "epoch": 58.2, + "learning_rate": 2.0909690220915154e-05, + "loss": 1.9535, + "step": 20107000 + }, + { + "epoch": 58.2, + "learning_rate": 2.0908966573267877e-05, + "loss": 1.9493, + "step": 20107500 + }, + { + "epoch": 58.2, + "learning_rate": 2.0908242925620602e-05, + "loss": 1.9411, + "step": 20108000 + }, + { + "epoch": 58.21, + "learning_rate": 2.0907519277973324e-05, + "loss": 1.9552, + "step": 20108500 + }, + { + "epoch": 58.21, + "learning_rate": 2.0906795630326047e-05, + "loss": 1.9482, + "step": 20109000 + }, + { + "epoch": 58.21, + "learning_rate": 2.0906073429974066e-05, + "loss": 1.9463, + "step": 20109500 + }, + { + "epoch": 58.21, + "learning_rate": 2.0905349782326788e-05, + "loss": 1.9617, + "step": 20110000 + }, + { + "epoch": 58.21, + "learning_rate": 2.090462613467951e-05, + "loss": 1.9305, + "step": 20110500 + }, + { + "epoch": 58.21, + "learning_rate": 2.0903902487032236e-05, + "loss": 1.9801, + "step": 20111000 + }, + { + "epoch": 58.21, + "learning_rate": 2.0903178839384958e-05, + "loss": 1.9664, + "step": 20111500 + }, + { + "epoch": 58.22, + "learning_rate": 2.0902455191737683e-05, + "loss": 1.9487, + "step": 20112000 + }, + { + "epoch": 58.22, + "learning_rate": 2.0901731544090406e-05, + "loss": 1.9637, + "step": 20112500 + }, + { + "epoch": 58.22, + "learning_rate": 2.0901007896443128e-05, + "loss": 1.945, + "step": 20113000 + }, + { + "epoch": 58.22, + "learning_rate": 2.090028424879585e-05, + "loss": 1.9478, + "step": 20113500 + }, + { + "epoch": 58.22, + "learning_rate": 2.0899560601148572e-05, + "loss": 1.9591, + "step": 20114000 + }, + { + "epoch": 58.22, + "learning_rate": 2.089883840079659e-05, + "loss": 1.9653, + "step": 20114500 + }, + { + "epoch": 58.22, + "learning_rate": 2.0898114753149314e-05, + "loss": 1.9626, + "step": 20115000 + }, + { + "epoch": 58.23, + "learning_rate": 2.089739110550204e-05, + "loss": 1.9296, + "step": 20115500 + }, + { + "epoch": 58.23, + "learning_rate": 2.089666745785476e-05, + "loss": 1.9642, + "step": 20116000 + }, + { + "epoch": 58.23, + "learning_rate": 2.0895943810207487e-05, + "loss": 1.9734, + "step": 20116500 + }, + { + "epoch": 58.23, + "learning_rate": 2.089522016256021e-05, + "loss": 1.9383, + "step": 20117000 + }, + { + "epoch": 58.23, + "learning_rate": 2.089449651491293e-05, + "loss": 1.9666, + "step": 20117500 + }, + { + "epoch": 58.23, + "learning_rate": 2.0893772867265654e-05, + "loss": 1.9575, + "step": 20118000 + }, + { + "epoch": 58.23, + "learning_rate": 2.0893050666913673e-05, + "loss": 1.943, + "step": 20118500 + }, + { + "epoch": 58.24, + "learning_rate": 2.08923270192664e-05, + "loss": 1.9397, + "step": 20119000 + }, + { + "epoch": 58.24, + "learning_rate": 2.089160337161912e-05, + "loss": 1.9535, + "step": 20119500 + }, + { + "epoch": 58.24, + "learning_rate": 2.0890879723971843e-05, + "loss": 1.9455, + "step": 20120000 + }, + { + "epoch": 58.24, + "learning_rate": 2.0890157523619862e-05, + "loss": 1.9538, + "step": 20120500 + }, + { + "epoch": 58.24, + "learning_rate": 2.0889433875972584e-05, + "loss": 1.9429, + "step": 20121000 + }, + { + "epoch": 58.24, + "learning_rate": 2.0888710228325306e-05, + "loss": 1.9751, + "step": 20121500 + }, + { + "epoch": 58.24, + "learning_rate": 2.088798658067803e-05, + "loss": 1.9561, + "step": 20122000 + }, + { + "epoch": 58.25, + "learning_rate": 2.088726293303075e-05, + "loss": 1.9714, + "step": 20122500 + }, + { + "epoch": 58.25, + "learning_rate": 2.0886539285383476e-05, + "loss": 1.9916, + "step": 20123000 + }, + { + "epoch": 58.25, + "learning_rate": 2.0885817085031495e-05, + "loss": 1.9506, + "step": 20123500 + }, + { + "epoch": 58.25, + "learning_rate": 2.0885093437384217e-05, + "loss": 1.9493, + "step": 20124000 + }, + { + "epoch": 58.25, + "learning_rate": 2.0884371237032236e-05, + "loss": 1.9616, + "step": 20124500 + }, + { + "epoch": 58.25, + "learning_rate": 2.0883649036680252e-05, + "loss": 1.9537, + "step": 20125000 + }, + { + "epoch": 58.26, + "learning_rate": 2.0882925389032974e-05, + "loss": 1.9377, + "step": 20125500 + }, + { + "epoch": 58.26, + "learning_rate": 2.08822017413857e-05, + "loss": 1.9953, + "step": 20126000 + }, + { + "epoch": 58.26, + "learning_rate": 2.0881478093738422e-05, + "loss": 1.9819, + "step": 20126500 + }, + { + "epoch": 58.26, + "learning_rate": 2.0880754446091148e-05, + "loss": 1.9556, + "step": 20127000 + }, + { + "epoch": 58.26, + "learning_rate": 2.088003079844387e-05, + "loss": 1.9485, + "step": 20127500 + }, + { + "epoch": 58.26, + "learning_rate": 2.0879307150796592e-05, + "loss": 1.9512, + "step": 20128000 + }, + { + "epoch": 58.26, + "learning_rate": 2.0878583503149314e-05, + "loss": 1.9305, + "step": 20128500 + }, + { + "epoch": 58.27, + "learning_rate": 2.0877859855502037e-05, + "loss": 1.9554, + "step": 20129000 + }, + { + "epoch": 58.27, + "learning_rate": 2.0877136207854762e-05, + "loss": 1.9511, + "step": 20129500 + }, + { + "epoch": 58.27, + "learning_rate": 2.0876412560207484e-05, + "loss": 1.9422, + "step": 20130000 + }, + { + "epoch": 58.27, + "learning_rate": 2.0875690359855503e-05, + "loss": 1.9493, + "step": 20130500 + }, + { + "epoch": 58.27, + "learning_rate": 2.0874966712208226e-05, + "loss": 1.9492, + "step": 20131000 + }, + { + "epoch": 58.27, + "learning_rate": 2.087424306456095e-05, + "loss": 1.9517, + "step": 20131500 + }, + { + "epoch": 58.27, + "learning_rate": 2.0873519416913674e-05, + "loss": 1.9604, + "step": 20132000 + }, + { + "epoch": 58.28, + "learning_rate": 2.0872795769266396e-05, + "loss": 1.9577, + "step": 20132500 + }, + { + "epoch": 58.28, + "learning_rate": 2.0872072121619118e-05, + "loss": 1.9585, + "step": 20133000 + }, + { + "epoch": 58.28, + "learning_rate": 2.087134847397184e-05, + "loss": 1.9407, + "step": 20133500 + }, + { + "epoch": 58.28, + "learning_rate": 2.0870624826324566e-05, + "loss": 1.9521, + "step": 20134000 + }, + { + "epoch": 58.28, + "learning_rate": 2.0869902625972585e-05, + "loss": 1.972, + "step": 20134500 + }, + { + "epoch": 58.28, + "learning_rate": 2.0869178978325307e-05, + "loss": 1.9464, + "step": 20135000 + }, + { + "epoch": 58.28, + "learning_rate": 2.086845533067803e-05, + "loss": 1.9442, + "step": 20135500 + }, + { + "epoch": 58.29, + "learning_rate": 2.086773168303075e-05, + "loss": 1.9533, + "step": 20136000 + }, + { + "epoch": 58.29, + "learning_rate": 2.086700948267877e-05, + "loss": 1.9582, + "step": 20136500 + }, + { + "epoch": 58.29, + "learning_rate": 2.0866285835031493e-05, + "loss": 1.9603, + "step": 20137000 + }, + { + "epoch": 58.29, + "learning_rate": 2.0865562187384215e-05, + "loss": 1.9491, + "step": 20137500 + }, + { + "epoch": 58.29, + "learning_rate": 2.086483853973694e-05, + "loss": 1.9428, + "step": 20138000 + }, + { + "epoch": 58.29, + "learning_rate": 2.0864114892089666e-05, + "loss": 1.9488, + "step": 20138500 + }, + { + "epoch": 58.29, + "learning_rate": 2.086339124444239e-05, + "loss": 1.9725, + "step": 20139000 + }, + { + "epoch": 58.3, + "learning_rate": 2.086266759679511e-05, + "loss": 1.9408, + "step": 20139500 + }, + { + "epoch": 58.3, + "learning_rate": 2.0861943949147833e-05, + "loss": 1.948, + "step": 20140000 + }, + { + "epoch": 58.3, + "learning_rate": 2.0861221748795852e-05, + "loss": 1.9447, + "step": 20140500 + }, + { + "epoch": 58.3, + "learning_rate": 2.0860498101148574e-05, + "loss": 1.9211, + "step": 20141000 + }, + { + "epoch": 58.3, + "learning_rate": 2.08597744535013e-05, + "loss": 1.956, + "step": 20141500 + }, + { + "epoch": 58.3, + "learning_rate": 2.0859050805854022e-05, + "loss": 1.9478, + "step": 20142000 + }, + { + "epoch": 58.3, + "learning_rate": 2.0858327158206744e-05, + "loss": 1.92, + "step": 20142500 + }, + { + "epoch": 58.31, + "learning_rate": 2.0857603510559466e-05, + "loss": 1.9693, + "step": 20143000 + }, + { + "epoch": 58.31, + "learning_rate": 2.0856881310207485e-05, + "loss": 1.9998, + "step": 20143500 + }, + { + "epoch": 58.31, + "learning_rate": 2.0856157662560208e-05, + "loss": 1.9555, + "step": 20144000 + }, + { + "epoch": 58.31, + "learning_rate": 2.085543401491293e-05, + "loss": 1.9447, + "step": 20144500 + }, + { + "epoch": 58.31, + "learning_rate": 2.0854710367265655e-05, + "loss": 1.9755, + "step": 20145000 + }, + { + "epoch": 58.31, + "learning_rate": 2.0853986719618378e-05, + "loss": 1.9789, + "step": 20145500 + }, + { + "epoch": 58.31, + "learning_rate": 2.0853263071971103e-05, + "loss": 1.9683, + "step": 20146000 + }, + { + "epoch": 58.32, + "learning_rate": 2.0852539424323825e-05, + "loss": 1.9993, + "step": 20146500 + }, + { + "epoch": 58.32, + "learning_rate": 2.085181722397184e-05, + "loss": 1.9432, + "step": 20147000 + }, + { + "epoch": 58.32, + "learning_rate": 2.0851093576324567e-05, + "loss": 1.9549, + "step": 20147500 + }, + { + "epoch": 58.32, + "learning_rate": 2.085036992867729e-05, + "loss": 1.9573, + "step": 20148000 + }, + { + "epoch": 58.32, + "learning_rate": 2.084964628103001e-05, + "loss": 1.9547, + "step": 20148500 + }, + { + "epoch": 58.32, + "learning_rate": 2.0848922633382737e-05, + "loss": 1.9802, + "step": 20149000 + }, + { + "epoch": 58.32, + "learning_rate": 2.0848200433030756e-05, + "loss": 1.9423, + "step": 20149500 + }, + { + "epoch": 58.33, + "learning_rate": 2.0847476785383478e-05, + "loss": 1.9566, + "step": 20150000 + }, + { + "epoch": 58.33, + "learning_rate": 2.08467531377362e-05, + "loss": 1.9421, + "step": 20150500 + }, + { + "epoch": 58.33, + "learning_rate": 2.0846029490088922e-05, + "loss": 1.9574, + "step": 20151000 + }, + { + "epoch": 58.33, + "learning_rate": 2.0845305842441645e-05, + "loss": 1.9485, + "step": 20151500 + }, + { + "epoch": 58.33, + "learning_rate": 2.0844582194794367e-05, + "loss": 1.9567, + "step": 20152000 + }, + { + "epoch": 58.33, + "learning_rate": 2.0843858547147092e-05, + "loss": 1.9743, + "step": 20152500 + }, + { + "epoch": 58.33, + "learning_rate": 2.0843134899499818e-05, + "loss": 1.9677, + "step": 20153000 + }, + { + "epoch": 58.34, + "learning_rate": 2.084241125185254e-05, + "loss": 1.9665, + "step": 20153500 + }, + { + "epoch": 58.34, + "learning_rate": 2.0841687604205263e-05, + "loss": 1.9451, + "step": 20154000 + }, + { + "epoch": 58.34, + "learning_rate": 2.0840963956557985e-05, + "loss": 1.9459, + "step": 20154500 + }, + { + "epoch": 58.34, + "learning_rate": 2.0840240308910707e-05, + "loss": 1.9464, + "step": 20155000 + }, + { + "epoch": 58.34, + "learning_rate": 2.0839518108558726e-05, + "loss": 1.9538, + "step": 20155500 + }, + { + "epoch": 58.34, + "learning_rate": 2.083879446091145e-05, + "loss": 1.9662, + "step": 20156000 + }, + { + "epoch": 58.34, + "learning_rate": 2.0838070813264174e-05, + "loss": 1.9547, + "step": 20156500 + }, + { + "epoch": 58.35, + "learning_rate": 2.0837347165616896e-05, + "loss": 1.9581, + "step": 20157000 + }, + { + "epoch": 58.35, + "learning_rate": 2.0836623517969618e-05, + "loss": 1.9401, + "step": 20157500 + }, + { + "epoch": 58.35, + "learning_rate": 2.0835899870322344e-05, + "loss": 1.9359, + "step": 20158000 + }, + { + "epoch": 58.35, + "learning_rate": 2.083517766997036e-05, + "loss": 1.9505, + "step": 20158500 + }, + { + "epoch": 58.35, + "learning_rate": 2.0834454022323082e-05, + "loss": 1.9607, + "step": 20159000 + }, + { + "epoch": 58.35, + "learning_rate": 2.0833730374675804e-05, + "loss": 1.9797, + "step": 20159500 + }, + { + "epoch": 58.35, + "learning_rate": 2.083300672702853e-05, + "loss": 1.9693, + "step": 20160000 + }, + { + "epoch": 58.36, + "learning_rate": 2.083228452667655e-05, + "loss": 1.9591, + "step": 20160500 + }, + { + "epoch": 58.36, + "learning_rate": 2.083156087902927e-05, + "loss": 1.9565, + "step": 20161000 + }, + { + "epoch": 58.36, + "learning_rate": 2.0830837231381993e-05, + "loss": 1.9478, + "step": 20161500 + }, + { + "epoch": 58.36, + "learning_rate": 2.083011358373472e-05, + "loss": 1.9256, + "step": 20162000 + }, + { + "epoch": 58.36, + "learning_rate": 2.082938993608744e-05, + "loss": 1.9396, + "step": 20162500 + }, + { + "epoch": 58.36, + "learning_rate": 2.0828666288440163e-05, + "loss": 1.98, + "step": 20163000 + }, + { + "epoch": 58.37, + "learning_rate": 2.082794264079289e-05, + "loss": 1.9623, + "step": 20163500 + }, + { + "epoch": 58.37, + "learning_rate": 2.0827220440440908e-05, + "loss": 1.9628, + "step": 20164000 + }, + { + "epoch": 58.37, + "learning_rate": 2.082649679279363e-05, + "loss": 1.931, + "step": 20164500 + }, + { + "epoch": 58.37, + "learning_rate": 2.0825773145146352e-05, + "loss": 1.9573, + "step": 20165000 + }, + { + "epoch": 58.37, + "learning_rate": 2.0825050944794368e-05, + "loss": 1.9494, + "step": 20165500 + }, + { + "epoch": 58.37, + "learning_rate": 2.0824327297147093e-05, + "loss": 1.934, + "step": 20166000 + }, + { + "epoch": 58.37, + "learning_rate": 2.0823603649499815e-05, + "loss": 1.9515, + "step": 20166500 + }, + { + "epoch": 58.38, + "learning_rate": 2.0822880001852538e-05, + "loss": 1.9609, + "step": 20167000 + }, + { + "epoch": 58.38, + "learning_rate": 2.0822156354205263e-05, + "loss": 1.9297, + "step": 20167500 + }, + { + "epoch": 58.38, + "learning_rate": 2.0821434153853282e-05, + "loss": 1.942, + "step": 20168000 + }, + { + "epoch": 58.38, + "learning_rate": 2.0820710506206005e-05, + "loss": 1.9562, + "step": 20168500 + }, + { + "epoch": 58.38, + "learning_rate": 2.0819986858558727e-05, + "loss": 1.9667, + "step": 20169000 + }, + { + "epoch": 58.38, + "learning_rate": 2.081926321091145e-05, + "loss": 1.9511, + "step": 20169500 + }, + { + "epoch": 58.38, + "learning_rate": 2.081853956326417e-05, + "loss": 1.9687, + "step": 20170000 + }, + { + "epoch": 58.39, + "learning_rate": 2.0817815915616893e-05, + "loss": 1.9632, + "step": 20170500 + }, + { + "epoch": 58.39, + "learning_rate": 2.081709226796962e-05, + "loss": 1.9483, + "step": 20171000 + }, + { + "epoch": 58.39, + "learning_rate": 2.0816368620322345e-05, + "loss": 1.9271, + "step": 20171500 + }, + { + "epoch": 58.39, + "learning_rate": 2.0815644972675067e-05, + "loss": 1.9864, + "step": 20172000 + }, + { + "epoch": 58.39, + "learning_rate": 2.081492421961838e-05, + "loss": 1.9382, + "step": 20172500 + }, + { + "epoch": 58.39, + "learning_rate": 2.08142005719711e-05, + "loss": 1.9285, + "step": 20173000 + }, + { + "epoch": 58.39, + "learning_rate": 2.0813476924323824e-05, + "loss": 1.9505, + "step": 20173500 + }, + { + "epoch": 58.4, + "learning_rate": 2.0812753276676546e-05, + "loss": 1.9495, + "step": 20174000 + }, + { + "epoch": 58.4, + "learning_rate": 2.081202962902927e-05, + "loss": 1.959, + "step": 20174500 + }, + { + "epoch": 58.4, + "learning_rate": 2.0811305981381997e-05, + "loss": 1.9737, + "step": 20175000 + }, + { + "epoch": 58.4, + "learning_rate": 2.081058233373472e-05, + "loss": 1.9541, + "step": 20175500 + }, + { + "epoch": 58.4, + "learning_rate": 2.080985868608744e-05, + "loss": 1.9759, + "step": 20176000 + }, + { + "epoch": 58.4, + "learning_rate": 2.0809135038440164e-05, + "loss": 1.9648, + "step": 20176500 + }, + { + "epoch": 58.4, + "learning_rate": 2.0808411390792886e-05, + "loss": 1.9587, + "step": 20177000 + }, + { + "epoch": 58.41, + "learning_rate": 2.080768774314561e-05, + "loss": 1.969, + "step": 20177500 + }, + { + "epoch": 58.41, + "learning_rate": 2.0806964095498334e-05, + "loss": 1.9648, + "step": 20178000 + }, + { + "epoch": 58.41, + "learning_rate": 2.080624044785106e-05, + "loss": 1.9694, + "step": 20178500 + }, + { + "epoch": 58.41, + "learning_rate": 2.0805518247499075e-05, + "loss": 1.9447, + "step": 20179000 + }, + { + "epoch": 58.41, + "learning_rate": 2.0804796047147094e-05, + "loss": 1.9518, + "step": 20179500 + }, + { + "epoch": 58.41, + "learning_rate": 2.0804072399499816e-05, + "loss": 1.9536, + "step": 20180000 + }, + { + "epoch": 58.41, + "learning_rate": 2.080334875185254e-05, + "loss": 1.9334, + "step": 20180500 + }, + { + "epoch": 58.42, + "learning_rate": 2.0802626551500557e-05, + "loss": 1.9574, + "step": 20181000 + }, + { + "epoch": 58.42, + "learning_rate": 2.080190290385328e-05, + "loss": 1.9584, + "step": 20181500 + }, + { + "epoch": 58.42, + "learning_rate": 2.0801179256206002e-05, + "loss": 1.9515, + "step": 20182000 + }, + { + "epoch": 58.42, + "learning_rate": 2.0800455608558728e-05, + "loss": 1.9677, + "step": 20182500 + }, + { + "epoch": 58.42, + "learning_rate": 2.079973196091145e-05, + "loss": 1.9505, + "step": 20183000 + }, + { + "epoch": 58.42, + "learning_rate": 2.079900976055947e-05, + "loss": 1.9584, + "step": 20183500 + }, + { + "epoch": 58.42, + "learning_rate": 2.079828611291219e-05, + "loss": 1.9457, + "step": 20184000 + }, + { + "epoch": 58.43, + "learning_rate": 2.0797562465264913e-05, + "loss": 1.9404, + "step": 20184500 + }, + { + "epoch": 58.43, + "learning_rate": 2.0796838817617635e-05, + "loss": 1.9727, + "step": 20185000 + }, + { + "epoch": 58.43, + "learning_rate": 2.0796115169970358e-05, + "loss": 1.9615, + "step": 20185500 + }, + { + "epoch": 58.43, + "learning_rate": 2.0795391522323083e-05, + "loss": 1.9403, + "step": 20186000 + }, + { + "epoch": 58.43, + "learning_rate": 2.079466787467581e-05, + "loss": 1.9741, + "step": 20186500 + }, + { + "epoch": 58.43, + "learning_rate": 2.079394422702853e-05, + "loss": 1.9636, + "step": 20187000 + }, + { + "epoch": 58.43, + "learning_rate": 2.0793222026676547e-05, + "loss": 1.9442, + "step": 20187500 + }, + { + "epoch": 58.44, + "learning_rate": 2.0792498379029272e-05, + "loss": 1.9396, + "step": 20188000 + }, + { + "epoch": 58.44, + "learning_rate": 2.0791774731381995e-05, + "loss": 1.9693, + "step": 20188500 + }, + { + "epoch": 58.44, + "learning_rate": 2.0791051083734717e-05, + "loss": 1.9685, + "step": 20189000 + }, + { + "epoch": 58.44, + "learning_rate": 2.079032743608744e-05, + "loss": 1.9689, + "step": 20189500 + }, + { + "epoch": 58.44, + "learning_rate": 2.078960523573546e-05, + "loss": 1.9764, + "step": 20190000 + }, + { + "epoch": 58.44, + "learning_rate": 2.0788881588088184e-05, + "loss": 1.9411, + "step": 20190500 + }, + { + "epoch": 58.44, + "learning_rate": 2.0788157940440906e-05, + "loss": 1.9715, + "step": 20191000 + }, + { + "epoch": 58.45, + "learning_rate": 2.0787434292793628e-05, + "loss": 1.972, + "step": 20191500 + }, + { + "epoch": 58.45, + "learning_rate": 2.078671064514635e-05, + "loss": 1.9898, + "step": 20192000 + }, + { + "epoch": 58.45, + "learning_rate": 2.0785986997499073e-05, + "loss": 1.9422, + "step": 20192500 + }, + { + "epoch": 58.45, + "learning_rate": 2.0785263349851798e-05, + "loss": 1.956, + "step": 20193000 + }, + { + "epoch": 58.45, + "learning_rate": 2.0784539702204524e-05, + "loss": 1.9542, + "step": 20193500 + }, + { + "epoch": 58.45, + "learning_rate": 2.0783816054557246e-05, + "loss": 1.9565, + "step": 20194000 + }, + { + "epoch": 58.45, + "learning_rate": 2.0783092406909968e-05, + "loss": 1.9592, + "step": 20194500 + }, + { + "epoch": 58.46, + "learning_rate": 2.0782370206557987e-05, + "loss": 1.9749, + "step": 20195000 + }, + { + "epoch": 58.46, + "learning_rate": 2.078164655891071e-05, + "loss": 1.9478, + "step": 20195500 + }, + { + "epoch": 58.46, + "learning_rate": 2.078092291126343e-05, + "loss": 1.9252, + "step": 20196000 + }, + { + "epoch": 58.46, + "learning_rate": 2.0780199263616154e-05, + "loss": 1.9227, + "step": 20196500 + }, + { + "epoch": 58.46, + "learning_rate": 2.0779477063264173e-05, + "loss": 1.9556, + "step": 20197000 + }, + { + "epoch": 58.46, + "learning_rate": 2.07787534156169e-05, + "loss": 1.9589, + "step": 20197500 + }, + { + "epoch": 58.46, + "learning_rate": 2.077802976796962e-05, + "loss": 1.9515, + "step": 20198000 + }, + { + "epoch": 58.47, + "learning_rate": 2.0777306120322343e-05, + "loss": 1.9522, + "step": 20198500 + }, + { + "epoch": 58.47, + "learning_rate": 2.0776582472675065e-05, + "loss": 1.9496, + "step": 20199000 + }, + { + "epoch": 58.47, + "learning_rate": 2.0775858825027787e-05, + "loss": 1.9362, + "step": 20199500 + }, + { + "epoch": 58.47, + "learning_rate": 2.0775135177380513e-05, + "loss": 1.9341, + "step": 20200000 + }, + { + "epoch": 58.47, + "learning_rate": 2.0774411529733235e-05, + "loss": 1.9461, + "step": 20200500 + }, + { + "epoch": 58.47, + "learning_rate": 2.077368788208596e-05, + "loss": 1.9678, + "step": 20201000 + }, + { + "epoch": 58.48, + "learning_rate": 2.0772964234438683e-05, + "loss": 1.932, + "step": 20201500 + }, + { + "epoch": 58.48, + "learning_rate": 2.07722420340867e-05, + "loss": 1.9575, + "step": 20202000 + }, + { + "epoch": 58.48, + "learning_rate": 2.0771518386439424e-05, + "loss": 1.9688, + "step": 20202500 + }, + { + "epoch": 58.48, + "learning_rate": 2.0770794738792146e-05, + "loss": 1.9436, + "step": 20203000 + }, + { + "epoch": 58.48, + "learning_rate": 2.077007109114487e-05, + "loss": 1.9683, + "step": 20203500 + }, + { + "epoch": 58.48, + "learning_rate": 2.076934744349759e-05, + "loss": 1.9444, + "step": 20204000 + }, + { + "epoch": 58.48, + "learning_rate": 2.0768623795850317e-05, + "loss": 1.9396, + "step": 20204500 + }, + { + "epoch": 58.49, + "learning_rate": 2.076790014820304e-05, + "loss": 1.9478, + "step": 20205000 + }, + { + "epoch": 58.49, + "learning_rate": 2.0767176500555764e-05, + "loss": 1.9535, + "step": 20205500 + }, + { + "epoch": 58.49, + "learning_rate": 2.0766455747499077e-05, + "loss": 1.9622, + "step": 20206000 + }, + { + "epoch": 58.49, + "learning_rate": 2.07657320998518e-05, + "loss": 1.9601, + "step": 20206500 + }, + { + "epoch": 58.49, + "learning_rate": 2.076500845220452e-05, + "loss": 1.9923, + "step": 20207000 + }, + { + "epoch": 58.49, + "learning_rate": 2.0764284804557243e-05, + "loss": 1.9399, + "step": 20207500 + }, + { + "epoch": 58.49, + "learning_rate": 2.0763561156909966e-05, + "loss": 1.9767, + "step": 20208000 + }, + { + "epoch": 58.5, + "learning_rate": 2.076283750926269e-05, + "loss": 1.9457, + "step": 20208500 + }, + { + "epoch": 58.5, + "learning_rate": 2.0762113861615413e-05, + "loss": 1.9285, + "step": 20209000 + }, + { + "epoch": 58.5, + "learning_rate": 2.076139021396814e-05, + "loss": 1.9557, + "step": 20209500 + }, + { + "epoch": 58.5, + "learning_rate": 2.076066656632086e-05, + "loss": 1.986, + "step": 20210000 + }, + { + "epoch": 58.5, + "learning_rate": 2.0759944365968877e-05, + "loss": 1.9399, + "step": 20210500 + }, + { + "epoch": 58.5, + "learning_rate": 2.0759220718321603e-05, + "loss": 1.9591, + "step": 20211000 + }, + { + "epoch": 58.5, + "learning_rate": 2.0758499965264915e-05, + "loss": 1.9679, + "step": 20211500 + }, + { + "epoch": 58.51, + "learning_rate": 2.0757776317617637e-05, + "loss": 1.969, + "step": 20212000 + }, + { + "epoch": 58.51, + "learning_rate": 2.0757052669970363e-05, + "loss": 1.9617, + "step": 20212500 + }, + { + "epoch": 58.51, + "learning_rate": 2.0756329022323085e-05, + "loss": 1.9461, + "step": 20213000 + }, + { + "epoch": 58.51, + "learning_rate": 2.0755605374675807e-05, + "loss": 1.9518, + "step": 20213500 + }, + { + "epoch": 58.51, + "learning_rate": 2.075488172702853e-05, + "loss": 1.9872, + "step": 20214000 + }, + { + "epoch": 58.51, + "learning_rate": 2.075415807938125e-05, + "loss": 1.9346, + "step": 20214500 + }, + { + "epoch": 58.51, + "learning_rate": 2.0753434431733977e-05, + "loss": 1.9446, + "step": 20215000 + }, + { + "epoch": 58.52, + "learning_rate": 2.0752712231381993e-05, + "loss": 1.9577, + "step": 20215500 + }, + { + "epoch": 58.52, + "learning_rate": 2.075198858373472e-05, + "loss": 1.9706, + "step": 20216000 + }, + { + "epoch": 58.52, + "learning_rate": 2.075126493608744e-05, + "loss": 1.9714, + "step": 20216500 + }, + { + "epoch": 58.52, + "learning_rate": 2.075054273573546e-05, + "loss": 1.9501, + "step": 20217000 + }, + { + "epoch": 58.52, + "learning_rate": 2.074982053538348e-05, + "loss": 1.9665, + "step": 20217500 + }, + { + "epoch": 58.52, + "learning_rate": 2.07490968877362e-05, + "loss": 1.9478, + "step": 20218000 + }, + { + "epoch": 58.52, + "learning_rate": 2.0748373240088923e-05, + "loss": 1.9275, + "step": 20218500 + }, + { + "epoch": 58.53, + "learning_rate": 2.0747649592441645e-05, + "loss": 1.9729, + "step": 20219000 + }, + { + "epoch": 58.53, + "learning_rate": 2.0746925944794367e-05, + "loss": 1.9515, + "step": 20219500 + }, + { + "epoch": 58.53, + "learning_rate": 2.0746202297147093e-05, + "loss": 1.9265, + "step": 20220000 + }, + { + "epoch": 58.53, + "learning_rate": 2.0745478649499815e-05, + "loss": 1.9707, + "step": 20220500 + }, + { + "epoch": 58.53, + "learning_rate": 2.074475500185254e-05, + "loss": 1.9705, + "step": 20221000 + }, + { + "epoch": 58.53, + "learning_rate": 2.0744031354205263e-05, + "loss": 1.9365, + "step": 20221500 + }, + { + "epoch": 58.53, + "learning_rate": 2.0743307706557985e-05, + "loss": 1.9621, + "step": 20222000 + }, + { + "epoch": 58.54, + "learning_rate": 2.0742584058910708e-05, + "loss": 1.9622, + "step": 20222500 + }, + { + "epoch": 58.54, + "learning_rate": 2.074186041126343e-05, + "loss": 1.9596, + "step": 20223000 + }, + { + "epoch": 58.54, + "learning_rate": 2.0741136763616155e-05, + "loss": 1.9569, + "step": 20223500 + }, + { + "epoch": 58.54, + "learning_rate": 2.0740413115968878e-05, + "loss": 1.9286, + "step": 20224000 + }, + { + "epoch": 58.54, + "learning_rate": 2.0739689468321603e-05, + "loss": 1.9216, + "step": 20224500 + }, + { + "epoch": 58.54, + "learning_rate": 2.0738965820674326e-05, + "loss": 1.9636, + "step": 20225000 + }, + { + "epoch": 58.54, + "learning_rate": 2.0738242173027048e-05, + "loss": 1.9481, + "step": 20225500 + }, + { + "epoch": 58.55, + "learning_rate": 2.073751852537977e-05, + "loss": 1.9491, + "step": 20226000 + }, + { + "epoch": 58.55, + "learning_rate": 2.073679632502779e-05, + "loss": 1.962, + "step": 20226500 + }, + { + "epoch": 58.55, + "learning_rate": 2.0736072677380515e-05, + "loss": 1.9241, + "step": 20227000 + }, + { + "epoch": 58.55, + "learning_rate": 2.0735349029733237e-05, + "loss": 1.9502, + "step": 20227500 + }, + { + "epoch": 58.55, + "learning_rate": 2.073462538208596e-05, + "loss": 1.9661, + "step": 20228000 + }, + { + "epoch": 58.55, + "learning_rate": 2.0733903181733978e-05, + "loss": 1.9553, + "step": 20228500 + }, + { + "epoch": 58.55, + "learning_rate": 2.07331795340867e-05, + "loss": 1.9625, + "step": 20229000 + }, + { + "epoch": 58.56, + "learning_rate": 2.0732455886439422e-05, + "loss": 1.9471, + "step": 20229500 + }, + { + "epoch": 58.56, + "learning_rate": 2.0731732238792145e-05, + "loss": 1.9708, + "step": 20230000 + }, + { + "epoch": 58.56, + "learning_rate": 2.0731008591144867e-05, + "loss": 1.9424, + "step": 20230500 + }, + { + "epoch": 58.56, + "learning_rate": 2.0730284943497593e-05, + "loss": 1.9732, + "step": 20231000 + }, + { + "epoch": 58.56, + "learning_rate": 2.072956274314561e-05, + "loss": 1.962, + "step": 20231500 + }, + { + "epoch": 58.56, + "learning_rate": 2.0728839095498334e-05, + "loss": 1.9379, + "step": 20232000 + }, + { + "epoch": 58.56, + "learning_rate": 2.0728115447851056e-05, + "loss": 1.9663, + "step": 20232500 + }, + { + "epoch": 58.57, + "learning_rate": 2.0727391800203778e-05, + "loss": 1.9528, + "step": 20233000 + }, + { + "epoch": 58.57, + "learning_rate": 2.0726668152556504e-05, + "loss": 1.9629, + "step": 20233500 + }, + { + "epoch": 58.57, + "learning_rate": 2.0725944504909226e-05, + "loss": 1.9465, + "step": 20234000 + }, + { + "epoch": 58.57, + "learning_rate": 2.072522085726195e-05, + "loss": 1.9669, + "step": 20234500 + }, + { + "epoch": 58.57, + "learning_rate": 2.0724497209614674e-05, + "loss": 1.953, + "step": 20235000 + }, + { + "epoch": 58.57, + "learning_rate": 2.0723776456557986e-05, + "loss": 1.9531, + "step": 20235500 + }, + { + "epoch": 58.57, + "learning_rate": 2.072305280891071e-05, + "loss": 1.9795, + "step": 20236000 + }, + { + "epoch": 58.58, + "learning_rate": 2.072232916126343e-05, + "loss": 1.9461, + "step": 20236500 + }, + { + "epoch": 58.58, + "learning_rate": 2.0721605513616156e-05, + "loss": 1.9677, + "step": 20237000 + }, + { + "epoch": 58.58, + "learning_rate": 2.072088186596888e-05, + "loss": 1.9702, + "step": 20237500 + }, + { + "epoch": 58.58, + "learning_rate": 2.0720159665616894e-05, + "loss": 1.9802, + "step": 20238000 + }, + { + "epoch": 58.58, + "learning_rate": 2.071943601796962e-05, + "loss": 1.9635, + "step": 20238500 + }, + { + "epoch": 58.58, + "learning_rate": 2.0718712370322342e-05, + "loss": 1.9694, + "step": 20239000 + }, + { + "epoch": 58.59, + "learning_rate": 2.0717988722675068e-05, + "loss": 1.9674, + "step": 20239500 + }, + { + "epoch": 58.59, + "learning_rate": 2.071726507502779e-05, + "loss": 1.9321, + "step": 20240000 + }, + { + "epoch": 58.59, + "learning_rate": 2.0716541427380512e-05, + "loss": 1.9527, + "step": 20240500 + }, + { + "epoch": 58.59, + "learning_rate": 2.0715817779733234e-05, + "loss": 1.9583, + "step": 20241000 + }, + { + "epoch": 58.59, + "learning_rate": 2.0715095579381253e-05, + "loss": 1.9695, + "step": 20241500 + }, + { + "epoch": 58.59, + "learning_rate": 2.0714371931733975e-05, + "loss": 1.9531, + "step": 20242000 + }, + { + "epoch": 58.59, + "learning_rate": 2.07136482840867e-05, + "loss": 1.9641, + "step": 20242500 + }, + { + "epoch": 58.6, + "learning_rate": 2.0712924636439423e-05, + "loss": 1.9649, + "step": 20243000 + }, + { + "epoch": 58.6, + "learning_rate": 2.0712200988792145e-05, + "loss": 1.9695, + "step": 20243500 + }, + { + "epoch": 58.6, + "learning_rate": 2.0711477341144868e-05, + "loss": 1.9512, + "step": 20244000 + }, + { + "epoch": 58.6, + "learning_rate": 2.0710755140792887e-05, + "loss": 1.9818, + "step": 20244500 + }, + { + "epoch": 58.6, + "learning_rate": 2.0710032940440906e-05, + "loss": 1.9621, + "step": 20245000 + }, + { + "epoch": 58.6, + "learning_rate": 2.0709309292793628e-05, + "loss": 1.9413, + "step": 20245500 + }, + { + "epoch": 58.6, + "learning_rate": 2.0708585645146353e-05, + "loss": 1.9686, + "step": 20246000 + }, + { + "epoch": 58.61, + "learning_rate": 2.0707861997499076e-05, + "loss": 1.9582, + "step": 20246500 + }, + { + "epoch": 58.61, + "learning_rate": 2.0707139797147095e-05, + "loss": 1.9545, + "step": 20247000 + }, + { + "epoch": 58.61, + "learning_rate": 2.0706416149499817e-05, + "loss": 1.9621, + "step": 20247500 + }, + { + "epoch": 58.61, + "learning_rate": 2.070569250185254e-05, + "loss": 1.9679, + "step": 20248000 + }, + { + "epoch": 58.61, + "learning_rate": 2.070496885420526e-05, + "loss": 1.9754, + "step": 20248500 + }, + { + "epoch": 58.61, + "learning_rate": 2.0704245206557984e-05, + "loss": 1.9661, + "step": 20249000 + }, + { + "epoch": 58.61, + "learning_rate": 2.0703521558910706e-05, + "loss": 1.9584, + "step": 20249500 + }, + { + "epoch": 58.62, + "learning_rate": 2.070279791126343e-05, + "loss": 1.9483, + "step": 20250000 + }, + { + "epoch": 58.62, + "learning_rate": 2.070207571091145e-05, + "loss": 1.9697, + "step": 20250500 + }, + { + "epoch": 58.62, + "learning_rate": 2.0701352063264173e-05, + "loss": 1.9657, + "step": 20251000 + }, + { + "epoch": 58.62, + "learning_rate": 2.070062986291219e-05, + "loss": 1.9276, + "step": 20251500 + }, + { + "epoch": 58.62, + "learning_rate": 2.0699906215264914e-05, + "loss": 1.9603, + "step": 20252000 + }, + { + "epoch": 58.62, + "learning_rate": 2.0699182567617636e-05, + "loss": 1.9361, + "step": 20252500 + }, + { + "epoch": 58.62, + "learning_rate": 2.0698458919970358e-05, + "loss": 1.9757, + "step": 20253000 + }, + { + "epoch": 58.63, + "learning_rate": 2.0697735272323084e-05, + "loss": 1.9547, + "step": 20253500 + }, + { + "epoch": 58.63, + "learning_rate": 2.0697011624675806e-05, + "loss": 1.9737, + "step": 20254000 + }, + { + "epoch": 58.63, + "learning_rate": 2.0696287977028532e-05, + "loss": 1.9571, + "step": 20254500 + }, + { + "epoch": 58.63, + "learning_rate": 2.0695564329381254e-05, + "loss": 1.9499, + "step": 20255000 + }, + { + "epoch": 58.63, + "learning_rate": 2.0694840681733976e-05, + "loss": 1.9648, + "step": 20255500 + }, + { + "epoch": 58.63, + "learning_rate": 2.06941170340867e-05, + "loss": 1.9602, + "step": 20256000 + }, + { + "epoch": 58.63, + "learning_rate": 2.069339338643942e-05, + "loss": 1.9781, + "step": 20256500 + }, + { + "epoch": 58.64, + "learning_rate": 2.0692669738792146e-05, + "loss": 1.9497, + "step": 20257000 + }, + { + "epoch": 58.64, + "learning_rate": 2.0691946091144872e-05, + "loss": 1.9296, + "step": 20257500 + }, + { + "epoch": 58.64, + "learning_rate": 2.0691222443497594e-05, + "loss": 1.9537, + "step": 20258000 + }, + { + "epoch": 58.64, + "learning_rate": 2.0690498795850316e-05, + "loss": 1.9722, + "step": 20258500 + }, + { + "epoch": 58.64, + "learning_rate": 2.068977514820304e-05, + "loss": 1.9544, + "step": 20259000 + }, + { + "epoch": 58.64, + "learning_rate": 2.068905150055576e-05, + "loss": 1.9638, + "step": 20259500 + }, + { + "epoch": 58.64, + "learning_rate": 2.0688327852908483e-05, + "loss": 1.957, + "step": 20260000 + }, + { + "epoch": 58.65, + "learning_rate": 2.0687605652556502e-05, + "loss": 1.9563, + "step": 20260500 + }, + { + "epoch": 58.65, + "learning_rate": 2.0686882004909228e-05, + "loss": 1.9536, + "step": 20261000 + }, + { + "epoch": 58.65, + "learning_rate": 2.068615835726195e-05, + "loss": 1.9561, + "step": 20261500 + }, + { + "epoch": 58.65, + "learning_rate": 2.0685434709614672e-05, + "loss": 1.9626, + "step": 20262000 + }, + { + "epoch": 58.65, + "learning_rate": 2.0684711061967398e-05, + "loss": 1.9595, + "step": 20262500 + }, + { + "epoch": 58.65, + "learning_rate": 2.068398741432012e-05, + "loss": 1.975, + "step": 20263000 + }, + { + "epoch": 58.65, + "learning_rate": 2.0683263766672842e-05, + "loss": 1.9614, + "step": 20263500 + }, + { + "epoch": 58.66, + "learning_rate": 2.0682541566320858e-05, + "loss": 1.9418, + "step": 20264000 + }, + { + "epoch": 58.66, + "learning_rate": 2.0681817918673583e-05, + "loss": 1.9546, + "step": 20264500 + }, + { + "epoch": 58.66, + "learning_rate": 2.068109427102631e-05, + "loss": 1.982, + "step": 20265000 + }, + { + "epoch": 58.66, + "learning_rate": 2.0680372070674325e-05, + "loss": 1.9773, + "step": 20265500 + }, + { + "epoch": 58.66, + "learning_rate": 2.0679648423027047e-05, + "loss": 1.9391, + "step": 20266000 + }, + { + "epoch": 58.66, + "learning_rate": 2.0678924775379772e-05, + "loss": 1.955, + "step": 20266500 + }, + { + "epoch": 58.66, + "learning_rate": 2.0678201127732495e-05, + "loss": 1.9487, + "step": 20267000 + }, + { + "epoch": 58.67, + "learning_rate": 2.0677477480085217e-05, + "loss": 1.9309, + "step": 20267500 + }, + { + "epoch": 58.67, + "learning_rate": 2.0676753832437942e-05, + "loss": 1.9632, + "step": 20268000 + }, + { + "epoch": 58.67, + "learning_rate": 2.0676030184790665e-05, + "loss": 1.9609, + "step": 20268500 + }, + { + "epoch": 58.67, + "learning_rate": 2.0675306537143387e-05, + "loss": 1.95, + "step": 20269000 + }, + { + "epoch": 58.67, + "learning_rate": 2.067458288949611e-05, + "loss": 1.9598, + "step": 20269500 + }, + { + "epoch": 58.67, + "learning_rate": 2.0673859241848835e-05, + "loss": 1.9562, + "step": 20270000 + }, + { + "epoch": 58.67, + "learning_rate": 2.067313704149685e-05, + "loss": 1.9787, + "step": 20270500 + }, + { + "epoch": 58.68, + "learning_rate": 2.0672413393849573e-05, + "loss": 1.9356, + "step": 20271000 + }, + { + "epoch": 58.68, + "learning_rate": 2.0671689746202298e-05, + "loss": 1.9542, + "step": 20271500 + }, + { + "epoch": 58.68, + "learning_rate": 2.0670966098555024e-05, + "loss": 1.9598, + "step": 20272000 + }, + { + "epoch": 58.68, + "learning_rate": 2.0670242450907746e-05, + "loss": 1.9611, + "step": 20272500 + }, + { + "epoch": 58.68, + "learning_rate": 2.066951880326047e-05, + "loss": 1.9836, + "step": 20273000 + }, + { + "epoch": 58.68, + "learning_rate": 2.066879515561319e-05, + "loss": 1.9674, + "step": 20273500 + }, + { + "epoch": 58.68, + "learning_rate": 2.066807295526121e-05, + "loss": 1.9463, + "step": 20274000 + }, + { + "epoch": 58.69, + "learning_rate": 2.0667350754909225e-05, + "loss": 1.9602, + "step": 20274500 + }, + { + "epoch": 58.69, + "learning_rate": 2.0666627107261947e-05, + "loss": 1.9343, + "step": 20275000 + }, + { + "epoch": 58.69, + "learning_rate": 2.0665903459614673e-05, + "loss": 1.9513, + "step": 20275500 + }, + { + "epoch": 58.69, + "learning_rate": 2.06651798119674e-05, + "loss": 1.951, + "step": 20276000 + }, + { + "epoch": 58.69, + "learning_rate": 2.066445616432012e-05, + "loss": 1.9665, + "step": 20276500 + }, + { + "epoch": 58.69, + "learning_rate": 2.0663732516672843e-05, + "loss": 1.95, + "step": 20277000 + }, + { + "epoch": 58.7, + "learning_rate": 2.0663008869025565e-05, + "loss": 1.9545, + "step": 20277500 + }, + { + "epoch": 58.7, + "learning_rate": 2.0662285221378287e-05, + "loss": 1.9468, + "step": 20278000 + }, + { + "epoch": 58.7, + "learning_rate": 2.0661561573731013e-05, + "loss": 1.9628, + "step": 20278500 + }, + { + "epoch": 58.7, + "learning_rate": 2.066083937337903e-05, + "loss": 1.9652, + "step": 20279000 + }, + { + "epoch": 58.7, + "learning_rate": 2.0660115725731754e-05, + "loss": 1.9666, + "step": 20279500 + }, + { + "epoch": 58.7, + "learning_rate": 2.0659392078084476e-05, + "loss": 1.9473, + "step": 20280000 + }, + { + "epoch": 58.7, + "learning_rate": 2.06586684304372e-05, + "loss": 1.9485, + "step": 20280500 + }, + { + "epoch": 58.71, + "learning_rate": 2.0657944782789924e-05, + "loss": 1.9297, + "step": 20281000 + }, + { + "epoch": 58.71, + "learning_rate": 2.065722258243794e-05, + "loss": 1.959, + "step": 20281500 + }, + { + "epoch": 58.71, + "learning_rate": 2.0656498934790662e-05, + "loss": 1.9686, + "step": 20282000 + }, + { + "epoch": 58.71, + "learning_rate": 2.0655775287143388e-05, + "loss": 1.9639, + "step": 20282500 + }, + { + "epoch": 58.71, + "learning_rate": 2.0655051639496113e-05, + "loss": 1.9463, + "step": 20283000 + }, + { + "epoch": 58.71, + "learning_rate": 2.0654327991848836e-05, + "loss": 1.9527, + "step": 20283500 + }, + { + "epoch": 58.71, + "learning_rate": 2.0653604344201558e-05, + "loss": 1.9605, + "step": 20284000 + }, + { + "epoch": 58.72, + "learning_rate": 2.065288069655428e-05, + "loss": 1.9502, + "step": 20284500 + }, + { + "epoch": 58.72, + "learning_rate": 2.0652157048907002e-05, + "loss": 1.9299, + "step": 20285000 + }, + { + "epoch": 58.72, + "learning_rate": 2.0651433401259725e-05, + "loss": 1.9507, + "step": 20285500 + }, + { + "epoch": 58.72, + "learning_rate": 2.065070975361245e-05, + "loss": 1.9602, + "step": 20286000 + }, + { + "epoch": 58.72, + "learning_rate": 2.0649986105965176e-05, + "loss": 1.9447, + "step": 20286500 + }, + { + "epoch": 58.72, + "learning_rate": 2.0649262458317898e-05, + "loss": 1.9501, + "step": 20287000 + }, + { + "epoch": 58.72, + "learning_rate": 2.064853881067062e-05, + "loss": 1.9573, + "step": 20287500 + }, + { + "epoch": 58.73, + "learning_rate": 2.064781661031864e-05, + "loss": 1.9614, + "step": 20288000 + }, + { + "epoch": 58.73, + "learning_rate": 2.064709296267136e-05, + "loss": 1.9666, + "step": 20288500 + }, + { + "epoch": 58.73, + "learning_rate": 2.0646369315024084e-05, + "loss": 1.9401, + "step": 20289000 + }, + { + "epoch": 58.73, + "learning_rate": 2.0645645667376806e-05, + "loss": 1.9718, + "step": 20289500 + }, + { + "epoch": 58.73, + "learning_rate": 2.0644924914320118e-05, + "loss": 1.9769, + "step": 20290000 + }, + { + "epoch": 58.73, + "learning_rate": 2.0644201266672844e-05, + "loss": 1.9587, + "step": 20290500 + }, + { + "epoch": 58.73, + "learning_rate": 2.0643477619025566e-05, + "loss": 1.9676, + "step": 20291000 + }, + { + "epoch": 58.74, + "learning_rate": 2.0642753971378288e-05, + "loss": 1.9488, + "step": 20291500 + }, + { + "epoch": 58.74, + "learning_rate": 2.0642030323731014e-05, + "loss": 1.9656, + "step": 20292000 + }, + { + "epoch": 58.74, + "learning_rate": 2.0641306676083736e-05, + "loss": 1.9621, + "step": 20292500 + }, + { + "epoch": 58.74, + "learning_rate": 2.064058302843646e-05, + "loss": 1.9494, + "step": 20293000 + }, + { + "epoch": 58.74, + "learning_rate": 2.063985938078918e-05, + "loss": 1.959, + "step": 20293500 + }, + { + "epoch": 58.74, + "learning_rate": 2.0639135733141906e-05, + "loss": 1.9411, + "step": 20294000 + }, + { + "epoch": 58.74, + "learning_rate": 2.0638413532789925e-05, + "loss": 1.9729, + "step": 20294500 + }, + { + "epoch": 58.75, + "learning_rate": 2.0637689885142647e-05, + "loss": 1.9627, + "step": 20295000 + }, + { + "epoch": 58.75, + "learning_rate": 2.0636967684790663e-05, + "loss": 1.9403, + "step": 20295500 + }, + { + "epoch": 58.75, + "learning_rate": 2.063624403714339e-05, + "loss": 1.933, + "step": 20296000 + }, + { + "epoch": 58.75, + "learning_rate": 2.063552038949611e-05, + "loss": 1.9507, + "step": 20296500 + }, + { + "epoch": 58.75, + "learning_rate": 2.0634796741848833e-05, + "loss": 1.9682, + "step": 20297000 + }, + { + "epoch": 58.75, + "learning_rate": 2.0634074541496852e-05, + "loss": 1.9749, + "step": 20297500 + }, + { + "epoch": 58.75, + "learning_rate": 2.0633350893849578e-05, + "loss": 1.9664, + "step": 20298000 + }, + { + "epoch": 58.76, + "learning_rate": 2.0632628693497593e-05, + "loss": 1.9928, + "step": 20298500 + }, + { + "epoch": 58.76, + "learning_rate": 2.0631905045850315e-05, + "loss": 1.9544, + "step": 20299000 + }, + { + "epoch": 58.76, + "learning_rate": 2.063118139820304e-05, + "loss": 1.9494, + "step": 20299500 + }, + { + "epoch": 58.76, + "learning_rate": 2.0630457750555763e-05, + "loss": 1.9848, + "step": 20300000 + }, + { + "epoch": 58.76, + "learning_rate": 2.062973555020378e-05, + "loss": 1.9637, + "step": 20300500 + }, + { + "epoch": 58.76, + "learning_rate": 2.06290119025565e-05, + "loss": 1.9499, + "step": 20301000 + }, + { + "epoch": 58.76, + "learning_rate": 2.0628288254909227e-05, + "loss": 1.967, + "step": 20301500 + }, + { + "epoch": 58.77, + "learning_rate": 2.0627564607261952e-05, + "loss": 1.9479, + "step": 20302000 + }, + { + "epoch": 58.77, + "learning_rate": 2.0626840959614674e-05, + "loss": 1.9665, + "step": 20302500 + }, + { + "epoch": 58.77, + "learning_rate": 2.062611875926269e-05, + "loss": 1.9642, + "step": 20303000 + }, + { + "epoch": 58.77, + "learning_rate": 2.0625395111615416e-05, + "loss": 1.9336, + "step": 20303500 + }, + { + "epoch": 58.77, + "learning_rate": 2.0624671463968138e-05, + "loss": 1.9625, + "step": 20304000 + }, + { + "epoch": 58.77, + "learning_rate": 2.062394781632086e-05, + "loss": 1.9875, + "step": 20304500 + }, + { + "epoch": 58.77, + "learning_rate": 2.0623224168673582e-05, + "loss": 1.9581, + "step": 20305000 + }, + { + "epoch": 58.78, + "learning_rate": 2.06225019683216e-05, + "loss": 1.9785, + "step": 20305500 + }, + { + "epoch": 58.78, + "learning_rate": 2.0621778320674327e-05, + "loss": 1.9256, + "step": 20306000 + }, + { + "epoch": 58.78, + "learning_rate": 2.062105467302705e-05, + "loss": 1.957, + "step": 20306500 + }, + { + "epoch": 58.78, + "learning_rate": 2.062033102537977e-05, + "loss": 1.9579, + "step": 20307000 + }, + { + "epoch": 58.78, + "learning_rate": 2.0619607377732494e-05, + "loss": 1.9504, + "step": 20307500 + }, + { + "epoch": 58.78, + "learning_rate": 2.0618883730085216e-05, + "loss": 1.9569, + "step": 20308000 + }, + { + "epoch": 58.78, + "learning_rate": 2.061816008243794e-05, + "loss": 1.9466, + "step": 20308500 + }, + { + "epoch": 58.79, + "learning_rate": 2.0617436434790664e-05, + "loss": 1.9495, + "step": 20309000 + }, + { + "epoch": 58.79, + "learning_rate": 2.061671278714339e-05, + "loss": 1.9319, + "step": 20309500 + }, + { + "epoch": 58.79, + "learning_rate": 2.061598913949611e-05, + "loss": 1.9586, + "step": 20310000 + }, + { + "epoch": 58.79, + "learning_rate": 2.0615265491848834e-05, + "loss": 1.9602, + "step": 20310500 + }, + { + "epoch": 58.79, + "learning_rate": 2.0614543291496853e-05, + "loss": 1.9682, + "step": 20311000 + }, + { + "epoch": 58.79, + "learning_rate": 2.0613819643849575e-05, + "loss": 1.957, + "step": 20311500 + }, + { + "epoch": 58.79, + "learning_rate": 2.0613095996202297e-05, + "loss": 1.9739, + "step": 20312000 + }, + { + "epoch": 58.8, + "learning_rate": 2.061237234855502e-05, + "loss": 1.9931, + "step": 20312500 + }, + { + "epoch": 58.8, + "learning_rate": 2.0611648700907745e-05, + "loss": 1.9516, + "step": 20313000 + }, + { + "epoch": 58.8, + "learning_rate": 2.0610926500555764e-05, + "loss": 1.9573, + "step": 20313500 + }, + { + "epoch": 58.8, + "learning_rate": 2.0610202852908486e-05, + "loss": 1.9913, + "step": 20314000 + }, + { + "epoch": 58.8, + "learning_rate": 2.060947920526121e-05, + "loss": 1.9618, + "step": 20314500 + }, + { + "epoch": 58.8, + "learning_rate": 2.060875555761393e-05, + "loss": 1.9588, + "step": 20315000 + }, + { + "epoch": 58.81, + "learning_rate": 2.0608031909966656e-05, + "loss": 1.9803, + "step": 20315500 + }, + { + "epoch": 58.81, + "learning_rate": 2.060730826231938e-05, + "loss": 1.9528, + "step": 20316000 + }, + { + "epoch": 58.81, + "learning_rate": 2.0606584614672104e-05, + "loss": 1.9407, + "step": 20316500 + }, + { + "epoch": 58.81, + "learning_rate": 2.0605860967024826e-05, + "loss": 1.9593, + "step": 20317000 + }, + { + "epoch": 58.81, + "learning_rate": 2.060513731937755e-05, + "loss": 1.9481, + "step": 20317500 + }, + { + "epoch": 58.81, + "learning_rate": 2.0604415119025568e-05, + "loss": 1.9643, + "step": 20318000 + }, + { + "epoch": 58.81, + "learning_rate": 2.0603692918673583e-05, + "loss": 1.9461, + "step": 20318500 + }, + { + "epoch": 58.82, + "learning_rate": 2.0602969271026305e-05, + "loss": 1.9778, + "step": 20319000 + }, + { + "epoch": 58.82, + "learning_rate": 2.060224562337903e-05, + "loss": 1.9482, + "step": 20319500 + }, + { + "epoch": 58.82, + "learning_rate": 2.0601521975731753e-05, + "loss": 1.9398, + "step": 20320000 + }, + { + "epoch": 58.82, + "learning_rate": 2.060079832808448e-05, + "loss": 1.9587, + "step": 20320500 + }, + { + "epoch": 58.82, + "learning_rate": 2.06000746804372e-05, + "loss": 1.9717, + "step": 20321000 + }, + { + "epoch": 58.82, + "learning_rate": 2.0599351032789923e-05, + "loss": 1.9706, + "step": 20321500 + }, + { + "epoch": 58.82, + "learning_rate": 2.0598627385142646e-05, + "loss": 1.979, + "step": 20322000 + }, + { + "epoch": 58.83, + "learning_rate": 2.0597905184790665e-05, + "loss": 1.9781, + "step": 20322500 + }, + { + "epoch": 58.83, + "learning_rate": 2.0597181537143387e-05, + "loss": 1.9524, + "step": 20323000 + }, + { + "epoch": 58.83, + "learning_rate": 2.059645788949611e-05, + "loss": 1.9632, + "step": 20323500 + }, + { + "epoch": 58.83, + "learning_rate": 2.0595735689144128e-05, + "loss": 1.9922, + "step": 20324000 + }, + { + "epoch": 58.83, + "learning_rate": 2.0595012041496854e-05, + "loss": 1.9493, + "step": 20324500 + }, + { + "epoch": 58.83, + "learning_rate": 2.0594288393849576e-05, + "loss": 1.9764, + "step": 20325000 + }, + { + "epoch": 58.83, + "learning_rate": 2.0593564746202298e-05, + "loss": 1.9598, + "step": 20325500 + }, + { + "epoch": 58.84, + "learning_rate": 2.059284109855502e-05, + "loss": 1.9669, + "step": 20326000 + }, + { + "epoch": 58.84, + "learning_rate": 2.0592117450907742e-05, + "loss": 1.9716, + "step": 20326500 + }, + { + "epoch": 58.84, + "learning_rate": 2.0591393803260468e-05, + "loss": 1.9839, + "step": 20327000 + }, + { + "epoch": 58.84, + "learning_rate": 2.059067015561319e-05, + "loss": 1.9399, + "step": 20327500 + }, + { + "epoch": 58.84, + "learning_rate": 2.058994795526121e-05, + "loss": 1.9581, + "step": 20328000 + }, + { + "epoch": 58.84, + "learning_rate": 2.0589225754909228e-05, + "loss": 1.955, + "step": 20328500 + }, + { + "epoch": 58.84, + "learning_rate": 2.058850210726195e-05, + "loss": 1.9469, + "step": 20329000 + }, + { + "epoch": 58.85, + "learning_rate": 2.0587778459614673e-05, + "loss": 1.9583, + "step": 20329500 + }, + { + "epoch": 58.85, + "learning_rate": 2.0587054811967395e-05, + "loss": 1.9547, + "step": 20330000 + }, + { + "epoch": 58.85, + "learning_rate": 2.0586334058910707e-05, + "loss": 1.9404, + "step": 20330500 + }, + { + "epoch": 58.85, + "learning_rate": 2.0585610411263433e-05, + "loss": 1.9648, + "step": 20331000 + }, + { + "epoch": 58.85, + "learning_rate": 2.0584886763616155e-05, + "loss": 1.9721, + "step": 20331500 + }, + { + "epoch": 58.85, + "learning_rate": 2.058416311596888e-05, + "loss": 1.9401, + "step": 20332000 + }, + { + "epoch": 58.85, + "learning_rate": 2.0583439468321603e-05, + "loss": 1.9602, + "step": 20332500 + }, + { + "epoch": 58.86, + "learning_rate": 2.0582715820674325e-05, + "loss": 1.9817, + "step": 20333000 + }, + { + "epoch": 58.86, + "learning_rate": 2.0581992173027047e-05, + "loss": 1.9475, + "step": 20333500 + }, + { + "epoch": 58.86, + "learning_rate": 2.0581269972675066e-05, + "loss": 1.9787, + "step": 20334000 + }, + { + "epoch": 58.86, + "learning_rate": 2.058054632502779e-05, + "loss": 1.9469, + "step": 20334500 + }, + { + "epoch": 58.86, + "learning_rate": 2.057982267738051e-05, + "loss": 1.9748, + "step": 20335000 + }, + { + "epoch": 58.86, + "learning_rate": 2.0579099029733233e-05, + "loss": 1.9572, + "step": 20335500 + }, + { + "epoch": 58.86, + "learning_rate": 2.057837538208596e-05, + "loss": 1.9479, + "step": 20336000 + }, + { + "epoch": 58.87, + "learning_rate": 2.0577651734438684e-05, + "loss": 1.9661, + "step": 20336500 + }, + { + "epoch": 58.87, + "learning_rate": 2.0576928086791406e-05, + "loss": 1.95, + "step": 20337000 + }, + { + "epoch": 58.87, + "learning_rate": 2.057620443914413e-05, + "loss": 1.9622, + "step": 20337500 + }, + { + "epoch": 58.87, + "learning_rate": 2.057548079149685e-05, + "loss": 1.9649, + "step": 20338000 + }, + { + "epoch": 58.87, + "learning_rate": 2.0574757143849573e-05, + "loss": 1.9558, + "step": 20338500 + }, + { + "epoch": 58.87, + "learning_rate": 2.0574033496202295e-05, + "loss": 1.9363, + "step": 20339000 + }, + { + "epoch": 58.87, + "learning_rate": 2.057330984855502e-05, + "loss": 1.9329, + "step": 20339500 + }, + { + "epoch": 58.88, + "learning_rate": 2.057258764820304e-05, + "loss": 1.9449, + "step": 20340000 + }, + { + "epoch": 58.88, + "learning_rate": 2.0571864000555762e-05, + "loss": 1.975, + "step": 20340500 + }, + { + "epoch": 58.88, + "learning_rate": 2.0571140352908484e-05, + "loss": 1.933, + "step": 20341000 + }, + { + "epoch": 58.88, + "learning_rate": 2.057041670526121e-05, + "loss": 1.9475, + "step": 20341500 + }, + { + "epoch": 58.88, + "learning_rate": 2.0569694504909226e-05, + "loss": 1.9633, + "step": 20342000 + }, + { + "epoch": 58.88, + "learning_rate": 2.0568970857261948e-05, + "loss": 1.9448, + "step": 20342500 + }, + { + "epoch": 58.88, + "learning_rate": 2.0568247209614673e-05, + "loss": 1.9562, + "step": 20343000 + }, + { + "epoch": 58.89, + "learning_rate": 2.0567523561967396e-05, + "loss": 1.9359, + "step": 20343500 + }, + { + "epoch": 58.89, + "learning_rate": 2.056679991432012e-05, + "loss": 1.9454, + "step": 20344000 + }, + { + "epoch": 58.89, + "learning_rate": 2.0566076266672844e-05, + "loss": 1.9517, + "step": 20344500 + }, + { + "epoch": 58.89, + "learning_rate": 2.0565352619025566e-05, + "loss": 1.9343, + "step": 20345000 + }, + { + "epoch": 58.89, + "learning_rate": 2.0564628971378288e-05, + "loss": 1.9431, + "step": 20345500 + }, + { + "epoch": 58.89, + "learning_rate": 2.0563906771026307e-05, + "loss": 1.9612, + "step": 20346000 + }, + { + "epoch": 58.89, + "learning_rate": 2.056318312337903e-05, + "loss": 1.9561, + "step": 20346500 + }, + { + "epoch": 58.9, + "learning_rate": 2.0562459475731755e-05, + "loss": 1.965, + "step": 20347000 + }, + { + "epoch": 58.9, + "learning_rate": 2.0561735828084477e-05, + "loss": 1.9628, + "step": 20347500 + }, + { + "epoch": 58.9, + "learning_rate": 2.05610121804372e-05, + "loss": 1.9578, + "step": 20348000 + }, + { + "epoch": 58.9, + "learning_rate": 2.0560289980085218e-05, + "loss": 1.9666, + "step": 20348500 + }, + { + "epoch": 58.9, + "learning_rate": 2.055956633243794e-05, + "loss": 1.937, + "step": 20349000 + }, + { + "epoch": 58.9, + "learning_rate": 2.0558842684790663e-05, + "loss": 1.9754, + "step": 20349500 + }, + { + "epoch": 58.9, + "learning_rate": 2.0558119037143385e-05, + "loss": 1.9389, + "step": 20350000 + }, + { + "epoch": 58.91, + "learning_rate": 2.055739538949611e-05, + "loss": 1.9511, + "step": 20350500 + }, + { + "epoch": 58.91, + "learning_rate": 2.0556671741848836e-05, + "loss": 1.9652, + "step": 20351000 + }, + { + "epoch": 58.91, + "learning_rate": 2.0555949541496852e-05, + "loss": 1.9605, + "step": 20351500 + }, + { + "epoch": 58.91, + "learning_rate": 2.0555225893849574e-05, + "loss": 1.964, + "step": 20352000 + }, + { + "epoch": 58.91, + "learning_rate": 2.0554502246202296e-05, + "loss": 1.941, + "step": 20352500 + }, + { + "epoch": 58.91, + "learning_rate": 2.0553778598555022e-05, + "loss": 1.9493, + "step": 20353000 + }, + { + "epoch": 58.92, + "learning_rate": 2.0553054950907744e-05, + "loss": 1.9605, + "step": 20353500 + }, + { + "epoch": 58.92, + "learning_rate": 2.055233130326047e-05, + "loss": 1.9504, + "step": 20354000 + }, + { + "epoch": 58.92, + "learning_rate": 2.0551607655613192e-05, + "loss": 1.9508, + "step": 20354500 + }, + { + "epoch": 58.92, + "learning_rate": 2.0550884007965914e-05, + "loss": 1.9476, + "step": 20355000 + }, + { + "epoch": 58.92, + "learning_rate": 2.0550161807613933e-05, + "loss": 1.9557, + "step": 20355500 + }, + { + "epoch": 58.92, + "learning_rate": 2.0549438159966655e-05, + "loss": 1.9543, + "step": 20356000 + }, + { + "epoch": 58.92, + "learning_rate": 2.0548715959614674e-05, + "loss": 1.966, + "step": 20356500 + }, + { + "epoch": 58.93, + "learning_rate": 2.0547992311967397e-05, + "loss": 1.9588, + "step": 20357000 + }, + { + "epoch": 58.93, + "learning_rate": 2.054726866432012e-05, + "loss": 1.9583, + "step": 20357500 + }, + { + "epoch": 58.93, + "learning_rate": 2.0546545016672844e-05, + "loss": 1.9796, + "step": 20358000 + }, + { + "epoch": 58.93, + "learning_rate": 2.0545821369025567e-05, + "loss": 1.9543, + "step": 20358500 + }, + { + "epoch": 58.93, + "learning_rate": 2.054509772137829e-05, + "loss": 1.9392, + "step": 20359000 + }, + { + "epoch": 58.93, + "learning_rate": 2.054437407373101e-05, + "loss": 1.9423, + "step": 20359500 + }, + { + "epoch": 58.93, + "learning_rate": 2.0543650426083737e-05, + "loss": 2.0011, + "step": 20360000 + }, + { + "epoch": 58.94, + "learning_rate": 2.0542928225731752e-05, + "loss": 1.9607, + "step": 20360500 + }, + { + "epoch": 58.94, + "learning_rate": 2.0542204578084474e-05, + "loss": 1.9706, + "step": 20361000 + }, + { + "epoch": 58.94, + "learning_rate": 2.0541482377732493e-05, + "loss": 1.9519, + "step": 20361500 + }, + { + "epoch": 58.94, + "learning_rate": 2.054075873008522e-05, + "loss": 1.9593, + "step": 20362000 + }, + { + "epoch": 58.94, + "learning_rate": 2.054003508243794e-05, + "loss": 1.9501, + "step": 20362500 + }, + { + "epoch": 58.94, + "learning_rate": 2.053931288208596e-05, + "loss": 1.9836, + "step": 20363000 + }, + { + "epoch": 58.94, + "learning_rate": 2.0538589234438682e-05, + "loss": 1.9673, + "step": 20363500 + }, + { + "epoch": 58.95, + "learning_rate": 2.0537865586791405e-05, + "loss": 1.9734, + "step": 20364000 + }, + { + "epoch": 58.95, + "learning_rate": 2.0537141939144127e-05, + "loss": 1.9381, + "step": 20364500 + }, + { + "epoch": 58.95, + "learning_rate": 2.0536419738792146e-05, + "loss": 1.9612, + "step": 20365000 + }, + { + "epoch": 58.95, + "learning_rate": 2.0535696091144868e-05, + "loss": 1.9585, + "step": 20365500 + }, + { + "epoch": 58.95, + "learning_rate": 2.0534972443497594e-05, + "loss": 1.9699, + "step": 20366000 + }, + { + "epoch": 58.95, + "learning_rate": 2.0534248795850316e-05, + "loss": 1.9627, + "step": 20366500 + }, + { + "epoch": 58.95, + "learning_rate": 2.0533526595498335e-05, + "loss": 1.9586, + "step": 20367000 + }, + { + "epoch": 58.96, + "learning_rate": 2.0532802947851057e-05, + "loss": 1.9635, + "step": 20367500 + }, + { + "epoch": 58.96, + "learning_rate": 2.0532080747499076e-05, + "loss": 1.9493, + "step": 20368000 + }, + { + "epoch": 58.96, + "learning_rate": 2.05313570998518e-05, + "loss": 1.9367, + "step": 20368500 + }, + { + "epoch": 58.96, + "learning_rate": 2.053063345220452e-05, + "loss": 1.9641, + "step": 20369000 + }, + { + "epoch": 58.96, + "learning_rate": 2.0529909804557246e-05, + "loss": 1.9711, + "step": 20369500 + }, + { + "epoch": 58.96, + "learning_rate": 2.052918615690997e-05, + "loss": 1.977, + "step": 20370000 + }, + { + "epoch": 58.96, + "learning_rate": 2.052846250926269e-05, + "loss": 1.9572, + "step": 20370500 + }, + { + "epoch": 58.97, + "learning_rate": 2.052774030891071e-05, + "loss": 1.9676, + "step": 20371000 + }, + { + "epoch": 58.97, + "learning_rate": 2.0527016661263432e-05, + "loss": 1.9606, + "step": 20371500 + }, + { + "epoch": 58.97, + "learning_rate": 2.0526293013616154e-05, + "loss": 1.9497, + "step": 20372000 + }, + { + "epoch": 58.97, + "learning_rate": 2.0525569365968876e-05, + "loss": 1.9623, + "step": 20372500 + }, + { + "epoch": 58.97, + "learning_rate": 2.0524845718321602e-05, + "loss": 1.978, + "step": 20373000 + }, + { + "epoch": 58.97, + "learning_rate": 2.052412351796962e-05, + "loss": 1.9524, + "step": 20373500 + }, + { + "epoch": 58.97, + "learning_rate": 2.0523399870322343e-05, + "loss": 1.9383, + "step": 20374000 + }, + { + "epoch": 58.98, + "learning_rate": 2.0522676222675065e-05, + "loss": 1.9599, + "step": 20374500 + }, + { + "epoch": 58.98, + "learning_rate": 2.0521952575027788e-05, + "loss": 1.9951, + "step": 20375000 + }, + { + "epoch": 58.98, + "learning_rate": 2.0521228927380513e-05, + "loss": 1.9568, + "step": 20375500 + }, + { + "epoch": 58.98, + "learning_rate": 2.0520505279733235e-05, + "loss": 1.9543, + "step": 20376000 + }, + { + "epoch": 58.98, + "learning_rate": 2.0519781632085958e-05, + "loss": 1.945, + "step": 20376500 + }, + { + "epoch": 58.98, + "learning_rate": 2.0519057984438683e-05, + "loss": 1.9668, + "step": 20377000 + }, + { + "epoch": 58.98, + "learning_rate": 2.0518334336791405e-05, + "loss": 1.9473, + "step": 20377500 + }, + { + "epoch": 58.99, + "learning_rate": 2.0517610689144128e-05, + "loss": 1.9654, + "step": 20378000 + }, + { + "epoch": 58.99, + "learning_rate": 2.0516887041496853e-05, + "loss": 1.9528, + "step": 20378500 + }, + { + "epoch": 58.99, + "learning_rate": 2.0516163393849576e-05, + "loss": 1.9764, + "step": 20379000 + }, + { + "epoch": 58.99, + "learning_rate": 2.0515439746202298e-05, + "loss": 1.9597, + "step": 20379500 + }, + { + "epoch": 58.99, + "learning_rate": 2.051471609855502e-05, + "loss": 1.9549, + "step": 20380000 + }, + { + "epoch": 58.99, + "learning_rate": 2.0513992450907746e-05, + "loss": 1.913, + "step": 20380500 + }, + { + "epoch": 58.99, + "learning_rate": 2.0513270250555765e-05, + "loss": 1.9601, + "step": 20381000 + }, + { + "epoch": 59.0, + "learning_rate": 2.0512546602908487e-05, + "loss": 1.9474, + "step": 20381500 + }, + { + "epoch": 59.0, + "learning_rate": 2.051182295526121e-05, + "loss": 1.9347, + "step": 20382000 + }, + { + "epoch": 59.0, + "learning_rate": 2.051109930761393e-05, + "loss": 1.9616, + "step": 20382500 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.6762973060311479, + "eval_accuracy_mlm": 0.6429612446253726, + "eval_accuracy_nsp": 0.8552428354713487, + "eval_loss": 2.1759543418884277, + "eval_runtime": 331.5891, + "eval_samples_per_second": 1316.045, + "eval_steps_per_second": 54.836, + "step": 20382848 + }, + { + "epoch": 59.0, + "learning_rate": 2.0510375659966654e-05, + "loss": 1.9577, + "step": 20383000 + }, + { + "epoch": 59.0, + "learning_rate": 2.050965201231938e-05, + "loss": 1.9604, + "step": 20383500 + }, + { + "epoch": 59.0, + "learning_rate": 2.05089283646721e-05, + "loss": 1.9313, + "step": 20384000 + }, + { + "epoch": 59.0, + "learning_rate": 2.050820616432012e-05, + "loss": 1.9366, + "step": 20384500 + }, + { + "epoch": 59.01, + "learning_rate": 2.0507482516672843e-05, + "loss": 1.9608, + "step": 20385000 + }, + { + "epoch": 59.01, + "learning_rate": 2.0506758869025565e-05, + "loss": 1.9263, + "step": 20385500 + }, + { + "epoch": 59.01, + "learning_rate": 2.050603522137829e-05, + "loss": 1.9303, + "step": 20386000 + }, + { + "epoch": 59.01, + "learning_rate": 2.0505311573731013e-05, + "loss": 1.9689, + "step": 20386500 + }, + { + "epoch": 59.01, + "learning_rate": 2.0504587926083735e-05, + "loss": 1.9523, + "step": 20387000 + }, + { + "epoch": 59.01, + "learning_rate": 2.0503865725731754e-05, + "loss": 1.9189, + "step": 20387500 + }, + { + "epoch": 59.01, + "learning_rate": 2.050314207808448e-05, + "loss": 1.9391, + "step": 20388000 + }, + { + "epoch": 59.02, + "learning_rate": 2.0502418430437202e-05, + "loss": 1.9374, + "step": 20388500 + }, + { + "epoch": 59.02, + "learning_rate": 2.0501696230085217e-05, + "loss": 1.9482, + "step": 20389000 + }, + { + "epoch": 59.02, + "learning_rate": 2.050097258243794e-05, + "loss": 1.9405, + "step": 20389500 + }, + { + "epoch": 59.02, + "learning_rate": 2.0500248934790665e-05, + "loss": 1.9344, + "step": 20390000 + }, + { + "epoch": 59.02, + "learning_rate": 2.0499525287143387e-05, + "loss": 1.9323, + "step": 20390500 + }, + { + "epoch": 59.02, + "learning_rate": 2.049880163949611e-05, + "loss": 1.9639, + "step": 20391000 + }, + { + "epoch": 59.03, + "learning_rate": 2.0498077991848835e-05, + "loss": 1.9495, + "step": 20391500 + }, + { + "epoch": 59.03, + "learning_rate": 2.0497354344201557e-05, + "loss": 1.948, + "step": 20392000 + }, + { + "epoch": 59.03, + "learning_rate": 2.049663069655428e-05, + "loss": 1.9315, + "step": 20392500 + }, + { + "epoch": 59.03, + "learning_rate": 2.0495907048907005e-05, + "loss": 1.9473, + "step": 20393000 + }, + { + "epoch": 59.03, + "learning_rate": 2.0495183401259728e-05, + "loss": 1.9319, + "step": 20393500 + }, + { + "epoch": 59.03, + "learning_rate": 2.049445975361245e-05, + "loss": 1.9524, + "step": 20394000 + }, + { + "epoch": 59.03, + "learning_rate": 2.0493736105965172e-05, + "loss": 1.9315, + "step": 20394500 + }, + { + "epoch": 59.04, + "learning_rate": 2.0493012458317898e-05, + "loss": 1.9416, + "step": 20395000 + }, + { + "epoch": 59.04, + "learning_rate": 2.049228881067062e-05, + "loss": 1.9262, + "step": 20395500 + }, + { + "epoch": 59.04, + "learning_rate": 2.0491565163023342e-05, + "loss": 1.9373, + "step": 20396000 + }, + { + "epoch": 59.04, + "learning_rate": 2.049084296267136e-05, + "loss": 1.9445, + "step": 20396500 + }, + { + "epoch": 59.04, + "learning_rate": 2.0490119315024083e-05, + "loss": 1.9467, + "step": 20397000 + }, + { + "epoch": 59.04, + "learning_rate": 2.0489395667376805e-05, + "loss": 1.9296, + "step": 20397500 + }, + { + "epoch": 59.04, + "learning_rate": 2.0488673467024824e-05, + "loss": 1.9437, + "step": 20398000 + }, + { + "epoch": 59.05, + "learning_rate": 2.0487949819377547e-05, + "loss": 1.9305, + "step": 20398500 + }, + { + "epoch": 59.05, + "learning_rate": 2.0487226171730272e-05, + "loss": 1.9498, + "step": 20399000 + }, + { + "epoch": 59.05, + "learning_rate": 2.0486502524082995e-05, + "loss": 1.9505, + "step": 20399500 + }, + { + "epoch": 59.05, + "learning_rate": 2.0485778876435717e-05, + "loss": 1.9368, + "step": 20400000 + }, + { + "epoch": 59.05, + "learning_rate": 2.0485056676083736e-05, + "loss": 1.9414, + "step": 20400500 + }, + { + "epoch": 59.05, + "learning_rate": 2.0484333028436458e-05, + "loss": 1.946, + "step": 20401000 + }, + { + "epoch": 59.05, + "learning_rate": 2.048360938078918e-05, + "loss": 1.9513, + "step": 20401500 + }, + { + "epoch": 59.06, + "learning_rate": 2.0482885733141906e-05, + "loss": 1.9274, + "step": 20402000 + }, + { + "epoch": 59.06, + "learning_rate": 2.048216208549463e-05, + "loss": 1.9329, + "step": 20402500 + }, + { + "epoch": 59.06, + "learning_rate": 2.0481438437847354e-05, + "loss": 1.9886, + "step": 20403000 + }, + { + "epoch": 59.06, + "learning_rate": 2.0480714790200076e-05, + "loss": 1.9792, + "step": 20403500 + }, + { + "epoch": 59.06, + "learning_rate": 2.0479991142552798e-05, + "loss": 1.9375, + "step": 20404000 + }, + { + "epoch": 59.06, + "learning_rate": 2.0479268942200817e-05, + "loss": 1.9591, + "step": 20404500 + }, + { + "epoch": 59.06, + "learning_rate": 2.0478546741848833e-05, + "loss": 1.9119, + "step": 20405000 + }, + { + "epoch": 59.07, + "learning_rate": 2.0477823094201555e-05, + "loss": 1.9413, + "step": 20405500 + }, + { + "epoch": 59.07, + "learning_rate": 2.047709944655428e-05, + "loss": 1.9445, + "step": 20406000 + }, + { + "epoch": 59.07, + "learning_rate": 2.0476375798907006e-05, + "loss": 1.9583, + "step": 20406500 + }, + { + "epoch": 59.07, + "learning_rate": 2.047565215125973e-05, + "loss": 1.9356, + "step": 20407000 + }, + { + "epoch": 59.07, + "learning_rate": 2.047492850361245e-05, + "loss": 1.9111, + "step": 20407500 + }, + { + "epoch": 59.07, + "learning_rate": 2.0474204855965173e-05, + "loss": 1.9347, + "step": 20408000 + }, + { + "epoch": 59.07, + "learning_rate": 2.0473481208317895e-05, + "loss": 1.9255, + "step": 20408500 + }, + { + "epoch": 59.08, + "learning_rate": 2.047275756067062e-05, + "loss": 1.9525, + "step": 20409000 + }, + { + "epoch": 59.08, + "learning_rate": 2.0472036807613933e-05, + "loss": 1.9425, + "step": 20409500 + }, + { + "epoch": 59.08, + "learning_rate": 2.047131460726195e-05, + "loss": 1.9396, + "step": 20410000 + }, + { + "epoch": 59.08, + "learning_rate": 2.0470590959614674e-05, + "loss": 1.9461, + "step": 20410500 + }, + { + "epoch": 59.08, + "learning_rate": 2.0469867311967396e-05, + "loss": 1.9432, + "step": 20411000 + }, + { + "epoch": 59.08, + "learning_rate": 2.046914366432012e-05, + "loss": 1.9371, + "step": 20411500 + }, + { + "epoch": 59.08, + "learning_rate": 2.0468420016672844e-05, + "loss": 1.9559, + "step": 20412000 + }, + { + "epoch": 59.09, + "learning_rate": 2.0467696369025566e-05, + "loss": 1.933, + "step": 20412500 + }, + { + "epoch": 59.09, + "learning_rate": 2.046697272137829e-05, + "loss": 1.947, + "step": 20413000 + }, + { + "epoch": 59.09, + "learning_rate": 2.046624907373101e-05, + "loss": 1.9564, + "step": 20413500 + }, + { + "epoch": 59.09, + "learning_rate": 2.0465525426083736e-05, + "loss": 1.9404, + "step": 20414000 + }, + { + "epoch": 59.09, + "learning_rate": 2.046480177843646e-05, + "loss": 1.9361, + "step": 20414500 + }, + { + "epoch": 59.09, + "learning_rate": 2.0464078130789184e-05, + "loss": 1.9474, + "step": 20415000 + }, + { + "epoch": 59.09, + "learning_rate": 2.0463354483141907e-05, + "loss": 1.9334, + "step": 20415500 + }, + { + "epoch": 59.1, + "learning_rate": 2.0462632282789922e-05, + "loss": 1.9334, + "step": 20416000 + }, + { + "epoch": 59.1, + "learning_rate": 2.0461908635142644e-05, + "loss": 1.9485, + "step": 20416500 + }, + { + "epoch": 59.1, + "learning_rate": 2.046118498749537e-05, + "loss": 1.9242, + "step": 20417000 + }, + { + "epoch": 59.1, + "learning_rate": 2.0460461339848092e-05, + "loss": 1.9248, + "step": 20417500 + }, + { + "epoch": 59.1, + "learning_rate": 2.0459737692200818e-05, + "loss": 1.9382, + "step": 20418000 + }, + { + "epoch": 59.1, + "learning_rate": 2.045901404455354e-05, + "loss": 1.9198, + "step": 20418500 + }, + { + "epoch": 59.1, + "learning_rate": 2.045829184420156e-05, + "loss": 1.9529, + "step": 20419000 + }, + { + "epoch": 59.11, + "learning_rate": 2.0457569643849575e-05, + "loss": 1.937, + "step": 20419500 + }, + { + "epoch": 59.11, + "learning_rate": 2.0456845996202297e-05, + "loss": 1.9305, + "step": 20420000 + }, + { + "epoch": 59.11, + "learning_rate": 2.0456123795850316e-05, + "loss": 1.9356, + "step": 20420500 + }, + { + "epoch": 59.11, + "learning_rate": 2.0455400148203038e-05, + "loss": 1.9118, + "step": 20421000 + }, + { + "epoch": 59.11, + "learning_rate": 2.045467650055576e-05, + "loss": 1.9437, + "step": 20421500 + }, + { + "epoch": 59.11, + "learning_rate": 2.0453952852908486e-05, + "loss": 1.9433, + "step": 20422000 + }, + { + "epoch": 59.11, + "learning_rate": 2.0453230652556505e-05, + "loss": 1.944, + "step": 20422500 + }, + { + "epoch": 59.12, + "learning_rate": 2.0452507004909227e-05, + "loss": 1.9242, + "step": 20423000 + }, + { + "epoch": 59.12, + "learning_rate": 2.0451784804557246e-05, + "loss": 1.9702, + "step": 20423500 + }, + { + "epoch": 59.12, + "learning_rate": 2.0451061156909968e-05, + "loss": 1.9244, + "step": 20424000 + }, + { + "epoch": 59.12, + "learning_rate": 2.045033750926269e-05, + "loss": 1.9526, + "step": 20424500 + }, + { + "epoch": 59.12, + "learning_rate": 2.0449613861615413e-05, + "loss": 1.924, + "step": 20425000 + }, + { + "epoch": 59.12, + "learning_rate": 2.0448890213968135e-05, + "loss": 1.95, + "step": 20425500 + }, + { + "epoch": 59.12, + "learning_rate": 2.044816656632086e-05, + "loss": 1.9505, + "step": 20426000 + }, + { + "epoch": 59.13, + "learning_rate": 2.0447442918673583e-05, + "loss": 1.931, + "step": 20426500 + }, + { + "epoch": 59.13, + "learning_rate": 2.044671927102631e-05, + "loss": 1.9585, + "step": 20427000 + }, + { + "epoch": 59.13, + "learning_rate": 2.044599562337903e-05, + "loss": 1.9326, + "step": 20427500 + }, + { + "epoch": 59.13, + "learning_rate": 2.0445273423027046e-05, + "loss": 1.9445, + "step": 20428000 + }, + { + "epoch": 59.13, + "learning_rate": 2.0444549775379772e-05, + "loss": 1.9555, + "step": 20428500 + }, + { + "epoch": 59.13, + "learning_rate": 2.0443826127732494e-05, + "loss": 1.9233, + "step": 20429000 + }, + { + "epoch": 59.14, + "learning_rate": 2.044310248008522e-05, + "loss": 1.9617, + "step": 20429500 + }, + { + "epoch": 59.14, + "learning_rate": 2.0442378832437942e-05, + "loss": 1.9487, + "step": 20430000 + }, + { + "epoch": 59.14, + "learning_rate": 2.0441655184790664e-05, + "loss": 1.9397, + "step": 20430500 + }, + { + "epoch": 59.14, + "learning_rate": 2.0440931537143386e-05, + "loss": 1.9457, + "step": 20431000 + }, + { + "epoch": 59.14, + "learning_rate": 2.044020788949611e-05, + "loss": 1.9323, + "step": 20431500 + }, + { + "epoch": 59.14, + "learning_rate": 2.0439484241848834e-05, + "loss": 1.9447, + "step": 20432000 + }, + { + "epoch": 59.14, + "learning_rate": 2.0438760594201556e-05, + "loss": 1.9445, + "step": 20432500 + }, + { + "epoch": 59.15, + "learning_rate": 2.0438036946554282e-05, + "loss": 1.9288, + "step": 20433000 + }, + { + "epoch": 59.15, + "learning_rate": 2.0437313298907004e-05, + "loss": 1.9596, + "step": 20433500 + }, + { + "epoch": 59.15, + "learning_rate": 2.0436589651259727e-05, + "loss": 1.9437, + "step": 20434000 + }, + { + "epoch": 59.15, + "learning_rate": 2.043586600361245e-05, + "loss": 1.9683, + "step": 20434500 + }, + { + "epoch": 59.15, + "learning_rate": 2.0435142355965174e-05, + "loss": 1.9551, + "step": 20435000 + }, + { + "epoch": 59.15, + "learning_rate": 2.0434418708317897e-05, + "loss": 1.9545, + "step": 20435500 + }, + { + "epoch": 59.15, + "learning_rate": 2.043369506067062e-05, + "loss": 1.944, + "step": 20436000 + }, + { + "epoch": 59.16, + "learning_rate": 2.0432972860318638e-05, + "loss": 1.9439, + "step": 20436500 + }, + { + "epoch": 59.16, + "learning_rate": 2.043224921267136e-05, + "loss": 1.9537, + "step": 20437000 + }, + { + "epoch": 59.16, + "learning_rate": 2.0431525565024086e-05, + "loss": 1.9416, + "step": 20437500 + }, + { + "epoch": 59.16, + "learning_rate": 2.0430801917376808e-05, + "loss": 1.9543, + "step": 20438000 + }, + { + "epoch": 59.16, + "learning_rate": 2.043007826972953e-05, + "loss": 1.9525, + "step": 20438500 + }, + { + "epoch": 59.16, + "learning_rate": 2.042935606937755e-05, + "loss": 1.949, + "step": 20439000 + }, + { + "epoch": 59.16, + "learning_rate": 2.042863242173027e-05, + "loss": 1.9393, + "step": 20439500 + }, + { + "epoch": 59.17, + "learning_rate": 2.0427908774082994e-05, + "loss": 1.934, + "step": 20440000 + }, + { + "epoch": 59.17, + "learning_rate": 2.0427186573731012e-05, + "loss": 1.9411, + "step": 20440500 + }, + { + "epoch": 59.17, + "learning_rate": 2.0426462926083738e-05, + "loss": 1.9383, + "step": 20441000 + }, + { + "epoch": 59.17, + "learning_rate": 2.042573927843646e-05, + "loss": 1.9485, + "step": 20441500 + }, + { + "epoch": 59.17, + "learning_rate": 2.0425015630789183e-05, + "loss": 1.9448, + "step": 20442000 + }, + { + "epoch": 59.17, + "learning_rate": 2.0424293430437198e-05, + "loss": 1.9524, + "step": 20442500 + }, + { + "epoch": 59.17, + "learning_rate": 2.0423569782789924e-05, + "loss": 1.941, + "step": 20443000 + }, + { + "epoch": 59.18, + "learning_rate": 2.0422846135142646e-05, + "loss": 1.948, + "step": 20443500 + }, + { + "epoch": 59.18, + "learning_rate": 2.042212248749537e-05, + "loss": 1.9416, + "step": 20444000 + }, + { + "epoch": 59.18, + "learning_rate": 2.0421398839848094e-05, + "loss": 1.9579, + "step": 20444500 + }, + { + "epoch": 59.18, + "learning_rate": 2.0420675192200816e-05, + "loss": 1.9436, + "step": 20445000 + }, + { + "epoch": 59.18, + "learning_rate": 2.0419951544553538e-05, + "loss": 1.9426, + "step": 20445500 + }, + { + "epoch": 59.18, + "learning_rate": 2.0419227896906264e-05, + "loss": 1.9377, + "step": 20446000 + }, + { + "epoch": 59.18, + "learning_rate": 2.0418504249258986e-05, + "loss": 1.921, + "step": 20446500 + }, + { + "epoch": 59.19, + "learning_rate": 2.0417782048907002e-05, + "loss": 1.9812, + "step": 20447000 + }, + { + "epoch": 59.19, + "learning_rate": 2.0417058401259724e-05, + "loss": 1.937, + "step": 20447500 + }, + { + "epoch": 59.19, + "learning_rate": 2.041633475361245e-05, + "loss": 1.9674, + "step": 20448000 + }, + { + "epoch": 59.19, + "learning_rate": 2.041561255326047e-05, + "loss": 1.9281, + "step": 20448500 + }, + { + "epoch": 59.19, + "learning_rate": 2.041488890561319e-05, + "loss": 1.9181, + "step": 20449000 + }, + { + "epoch": 59.19, + "learning_rate": 2.0414165257965913e-05, + "loss": 1.9242, + "step": 20449500 + }, + { + "epoch": 59.19, + "learning_rate": 2.041344161031864e-05, + "loss": 1.942, + "step": 20450000 + }, + { + "epoch": 59.2, + "learning_rate": 2.041271796267136e-05, + "loss": 1.9376, + "step": 20450500 + }, + { + "epoch": 59.2, + "learning_rate": 2.0411995762319376e-05, + "loss": 1.9469, + "step": 20451000 + }, + { + "epoch": 59.2, + "learning_rate": 2.0411272114672102e-05, + "loss": 1.9366, + "step": 20451500 + }, + { + "epoch": 59.2, + "learning_rate": 2.0410548467024824e-05, + "loss": 1.93, + "step": 20452000 + }, + { + "epoch": 59.2, + "learning_rate": 2.040982481937755e-05, + "loss": 1.9672, + "step": 20452500 + }, + { + "epoch": 59.2, + "learning_rate": 2.0409101171730272e-05, + "loss": 1.9611, + "step": 20453000 + }, + { + "epoch": 59.2, + "learning_rate": 2.0408377524082994e-05, + "loss": 1.9665, + "step": 20453500 + }, + { + "epoch": 59.21, + "learning_rate": 2.0407653876435717e-05, + "loss": 1.9447, + "step": 20454000 + }, + { + "epoch": 59.21, + "learning_rate": 2.0406931676083735e-05, + "loss": 1.9573, + "step": 20454500 + }, + { + "epoch": 59.21, + "learning_rate": 2.0406208028436458e-05, + "loss": 1.9599, + "step": 20455000 + }, + { + "epoch": 59.21, + "learning_rate": 2.0405484380789183e-05, + "loss": 1.9383, + "step": 20455500 + }, + { + "epoch": 59.21, + "learning_rate": 2.0404762180437202e-05, + "loss": 1.9701, + "step": 20456000 + }, + { + "epoch": 59.21, + "learning_rate": 2.0404038532789925e-05, + "loss": 1.9467, + "step": 20456500 + }, + { + "epoch": 59.21, + "learning_rate": 2.0403314885142647e-05, + "loss": 1.9386, + "step": 20457000 + }, + { + "epoch": 59.22, + "learning_rate": 2.040259123749537e-05, + "loss": 1.9211, + "step": 20457500 + }, + { + "epoch": 59.22, + "learning_rate": 2.040186758984809e-05, + "loss": 1.9412, + "step": 20458000 + }, + { + "epoch": 59.22, + "learning_rate": 2.0401143942200813e-05, + "loss": 1.9791, + "step": 20458500 + }, + { + "epoch": 59.22, + "learning_rate": 2.040042029455354e-05, + "loss": 1.933, + "step": 20459000 + }, + { + "epoch": 59.22, + "learning_rate": 2.0399696646906265e-05, + "loss": 1.9135, + "step": 20459500 + }, + { + "epoch": 59.22, + "learning_rate": 2.0398972999258987e-05, + "loss": 1.9473, + "step": 20460000 + }, + { + "epoch": 59.22, + "learning_rate": 2.0398250798907002e-05, + "loss": 1.9434, + "step": 20460500 + }, + { + "epoch": 59.23, + "learning_rate": 2.0397527151259728e-05, + "loss": 1.9512, + "step": 20461000 + }, + { + "epoch": 59.23, + "learning_rate": 2.039680350361245e-05, + "loss": 1.9558, + "step": 20461500 + }, + { + "epoch": 59.23, + "learning_rate": 2.0396079855965173e-05, + "loss": 1.9429, + "step": 20462000 + }, + { + "epoch": 59.23, + "learning_rate": 2.0395357655613188e-05, + "loss": 1.9577, + "step": 20462500 + }, + { + "epoch": 59.23, + "learning_rate": 2.039463545526121e-05, + "loss": 1.9545, + "step": 20463000 + }, + { + "epoch": 59.23, + "learning_rate": 2.0393911807613933e-05, + "loss": 1.9705, + "step": 20463500 + }, + { + "epoch": 59.23, + "learning_rate": 2.0393188159966655e-05, + "loss": 1.9492, + "step": 20464000 + }, + { + "epoch": 59.24, + "learning_rate": 2.0392464512319377e-05, + "loss": 1.9353, + "step": 20464500 + }, + { + "epoch": 59.24, + "learning_rate": 2.0391740864672103e-05, + "loss": 1.9307, + "step": 20465000 + }, + { + "epoch": 59.24, + "learning_rate": 2.0391017217024825e-05, + "loss": 1.9201, + "step": 20465500 + }, + { + "epoch": 59.24, + "learning_rate": 2.0390293569377547e-05, + "loss": 1.933, + "step": 20466000 + }, + { + "epoch": 59.24, + "learning_rate": 2.0389569921730273e-05, + "loss": 1.9583, + "step": 20466500 + }, + { + "epoch": 59.24, + "learning_rate": 2.0388846274082995e-05, + "loss": 1.9454, + "step": 20467000 + }, + { + "epoch": 59.25, + "learning_rate": 2.0388122626435717e-05, + "loss": 1.9306, + "step": 20467500 + }, + { + "epoch": 59.25, + "learning_rate": 2.038739897878844e-05, + "loss": 1.9425, + "step": 20468000 + }, + { + "epoch": 59.25, + "learning_rate": 2.0386675331141165e-05, + "loss": 1.9672, + "step": 20468500 + }, + { + "epoch": 59.25, + "learning_rate": 2.0385954578084477e-05, + "loss": 1.9618, + "step": 20469000 + }, + { + "epoch": 59.25, + "learning_rate": 2.03852309304372e-05, + "loss": 1.9238, + "step": 20469500 + }, + { + "epoch": 59.25, + "learning_rate": 2.0384507282789922e-05, + "loss": 1.9581, + "step": 20470000 + }, + { + "epoch": 59.25, + "learning_rate": 2.038378508243794e-05, + "loss": 1.964, + "step": 20470500 + }, + { + "epoch": 59.26, + "learning_rate": 2.0383061434790667e-05, + "loss": 1.9465, + "step": 20471000 + }, + { + "epoch": 59.26, + "learning_rate": 2.038233778714339e-05, + "loss": 1.9204, + "step": 20471500 + }, + { + "epoch": 59.26, + "learning_rate": 2.038161413949611e-05, + "loss": 1.9324, + "step": 20472000 + }, + { + "epoch": 59.26, + "learning_rate": 2.0380890491848833e-05, + "loss": 1.9447, + "step": 20472500 + }, + { + "epoch": 59.26, + "learning_rate": 2.0380166844201555e-05, + "loss": 1.98, + "step": 20473000 + }, + { + "epoch": 59.26, + "learning_rate": 2.0379443196554278e-05, + "loss": 1.933, + "step": 20473500 + }, + { + "epoch": 59.26, + "learning_rate": 2.0378720996202297e-05, + "loss": 1.9362, + "step": 20474000 + }, + { + "epoch": 59.27, + "learning_rate": 2.0377997348555022e-05, + "loss": 1.9437, + "step": 20474500 + }, + { + "epoch": 59.27, + "learning_rate": 2.0377273700907744e-05, + "loss": 1.9417, + "step": 20475000 + }, + { + "epoch": 59.27, + "learning_rate": 2.0376550053260467e-05, + "loss": 1.9497, + "step": 20475500 + }, + { + "epoch": 59.27, + "learning_rate": 2.0375826405613192e-05, + "loss": 1.9452, + "step": 20476000 + }, + { + "epoch": 59.27, + "learning_rate": 2.0375102757965915e-05, + "loss": 1.9509, + "step": 20476500 + }, + { + "epoch": 59.27, + "learning_rate": 2.0374379110318637e-05, + "loss": 1.9432, + "step": 20477000 + }, + { + "epoch": 59.27, + "learning_rate": 2.0373656909966656e-05, + "loss": 1.9689, + "step": 20477500 + }, + { + "epoch": 59.28, + "learning_rate": 2.0372934709614675e-05, + "loss": 1.9451, + "step": 20478000 + }, + { + "epoch": 59.28, + "learning_rate": 2.0372211061967397e-05, + "loss": 1.9514, + "step": 20478500 + }, + { + "epoch": 59.28, + "learning_rate": 2.037148741432012e-05, + "loss": 1.9524, + "step": 20479000 + }, + { + "epoch": 59.28, + "learning_rate": 2.037076376667284e-05, + "loss": 1.9481, + "step": 20479500 + }, + { + "epoch": 59.28, + "learning_rate": 2.0370040119025567e-05, + "loss": 1.9593, + "step": 20480000 + }, + { + "epoch": 59.28, + "learning_rate": 2.036931647137829e-05, + "loss": 1.9291, + "step": 20480500 + }, + { + "epoch": 59.28, + "learning_rate": 2.036859282373101e-05, + "loss": 1.9405, + "step": 20481000 + }, + { + "epoch": 59.29, + "learning_rate": 2.036787062337903e-05, + "loss": 1.9469, + "step": 20481500 + }, + { + "epoch": 59.29, + "learning_rate": 2.0367146975731756e-05, + "loss": 1.9578, + "step": 20482000 + }, + { + "epoch": 59.29, + "learning_rate": 2.0366423328084478e-05, + "loss": 1.941, + "step": 20482500 + }, + { + "epoch": 59.29, + "learning_rate": 2.03656996804372e-05, + "loss": 1.9395, + "step": 20483000 + }, + { + "epoch": 59.29, + "learning_rate": 2.0364976032789923e-05, + "loss": 1.9275, + "step": 20483500 + }, + { + "epoch": 59.29, + "learning_rate": 2.0364252385142645e-05, + "loss": 1.9682, + "step": 20484000 + }, + { + "epoch": 59.29, + "learning_rate": 2.0363528737495367e-05, + "loss": 1.9437, + "step": 20484500 + }, + { + "epoch": 59.3, + "learning_rate": 2.0362805089848093e-05, + "loss": 1.9202, + "step": 20485000 + }, + { + "epoch": 59.3, + "learning_rate": 2.036208144220082e-05, + "loss": 1.9527, + "step": 20485500 + }, + { + "epoch": 59.3, + "learning_rate": 2.0361359241848834e-05, + "loss": 1.9678, + "step": 20486000 + }, + { + "epoch": 59.3, + "learning_rate": 2.0360635594201556e-05, + "loss": 1.9614, + "step": 20486500 + }, + { + "epoch": 59.3, + "learning_rate": 2.0359911946554282e-05, + "loss": 1.9431, + "step": 20487000 + }, + { + "epoch": 59.3, + "learning_rate": 2.0359188298907004e-05, + "loss": 1.9387, + "step": 20487500 + }, + { + "epoch": 59.3, + "learning_rate": 2.0358464651259726e-05, + "loss": 1.9369, + "step": 20488000 + }, + { + "epoch": 59.31, + "learning_rate": 2.035774100361245e-05, + "loss": 1.9777, + "step": 20488500 + }, + { + "epoch": 59.31, + "learning_rate": 2.0357017355965174e-05, + "loss": 1.9364, + "step": 20489000 + }, + { + "epoch": 59.31, + "learning_rate": 2.0356293708317896e-05, + "loss": 1.9056, + "step": 20489500 + }, + { + "epoch": 59.31, + "learning_rate": 2.035557006067062e-05, + "loss": 1.9532, + "step": 20490000 + }, + { + "epoch": 59.31, + "learning_rate": 2.0354846413023344e-05, + "loss": 1.9651, + "step": 20490500 + }, + { + "epoch": 59.31, + "learning_rate": 2.0354122765376066e-05, + "loss": 1.9147, + "step": 20491000 + }, + { + "epoch": 59.31, + "learning_rate": 2.0353400565024082e-05, + "loss": 1.9729, + "step": 20491500 + }, + { + "epoch": 59.32, + "learning_rate": 2.0352676917376808e-05, + "loss": 1.9477, + "step": 20492000 + }, + { + "epoch": 59.32, + "learning_rate": 2.0351953269729533e-05, + "loss": 1.9687, + "step": 20492500 + }, + { + "epoch": 59.32, + "learning_rate": 2.0351229622082256e-05, + "loss": 1.9777, + "step": 20493000 + }, + { + "epoch": 59.32, + "learning_rate": 2.0350505974434978e-05, + "loss": 1.9366, + "step": 20493500 + }, + { + "epoch": 59.32, + "learning_rate": 2.03497823267877e-05, + "loss": 1.9433, + "step": 20494000 + }, + { + "epoch": 59.32, + "learning_rate": 2.0349058679140422e-05, + "loss": 1.9351, + "step": 20494500 + }, + { + "epoch": 59.32, + "learning_rate": 2.0348335031493144e-05, + "loss": 1.947, + "step": 20495000 + }, + { + "epoch": 59.33, + "learning_rate": 2.034761138384587e-05, + "loss": 1.9532, + "step": 20495500 + }, + { + "epoch": 59.33, + "learning_rate": 2.0346887736198596e-05, + "loss": 1.9546, + "step": 20496000 + }, + { + "epoch": 59.33, + "learning_rate": 2.034616553584661e-05, + "loss": 1.9563, + "step": 20496500 + }, + { + "epoch": 59.33, + "learning_rate": 2.0345441888199333e-05, + "loss": 1.9512, + "step": 20497000 + }, + { + "epoch": 59.33, + "learning_rate": 2.034471824055206e-05, + "loss": 1.9614, + "step": 20497500 + }, + { + "epoch": 59.33, + "learning_rate": 2.034399459290478e-05, + "loss": 1.9432, + "step": 20498000 + }, + { + "epoch": 59.33, + "learning_rate": 2.0343272392552797e-05, + "loss": 1.9458, + "step": 20498500 + }, + { + "epoch": 59.34, + "learning_rate": 2.034254874490552e-05, + "loss": 1.9296, + "step": 20499000 + }, + { + "epoch": 59.34, + "learning_rate": 2.0341825097258245e-05, + "loss": 1.9303, + "step": 20499500 + }, + { + "epoch": 59.34, + "learning_rate": 2.034110144961097e-05, + "loss": 1.9532, + "step": 20500000 + }, + { + "epoch": 59.34, + "learning_rate": 2.0340377801963693e-05, + "loss": 1.9562, + "step": 20500500 + }, + { + "epoch": 59.34, + "learning_rate": 2.0339654154316415e-05, + "loss": 1.9557, + "step": 20501000 + }, + { + "epoch": 59.34, + "learning_rate": 2.0338933401259727e-05, + "loss": 1.9248, + "step": 20501500 + }, + { + "epoch": 59.34, + "learning_rate": 2.033820975361245e-05, + "loss": 1.9494, + "step": 20502000 + }, + { + "epoch": 59.35, + "learning_rate": 2.033748610596517e-05, + "loss": 1.9483, + "step": 20502500 + }, + { + "epoch": 59.35, + "learning_rate": 2.0336762458317897e-05, + "loss": 1.9541, + "step": 20503000 + }, + { + "epoch": 59.35, + "learning_rate": 2.0336040257965913e-05, + "loss": 1.9324, + "step": 20503500 + }, + { + "epoch": 59.35, + "learning_rate": 2.033531661031864e-05, + "loss": 1.9499, + "step": 20504000 + }, + { + "epoch": 59.35, + "learning_rate": 2.033459296267136e-05, + "loss": 1.9553, + "step": 20504500 + }, + { + "epoch": 59.35, + "learning_rate": 2.0333869315024083e-05, + "loss": 1.9577, + "step": 20505000 + }, + { + "epoch": 59.36, + "learning_rate": 2.033314566737681e-05, + "loss": 1.9534, + "step": 20505500 + }, + { + "epoch": 59.36, + "learning_rate": 2.0332423467024824e-05, + "loss": 1.9615, + "step": 20506000 + }, + { + "epoch": 59.36, + "learning_rate": 2.0331699819377546e-05, + "loss": 1.9406, + "step": 20506500 + }, + { + "epoch": 59.36, + "learning_rate": 2.0330976171730272e-05, + "loss": 1.9378, + "step": 20507000 + }, + { + "epoch": 59.36, + "learning_rate": 2.0330253971378287e-05, + "loss": 1.9337, + "step": 20507500 + }, + { + "epoch": 59.36, + "learning_rate": 2.0329530323731013e-05, + "loss": 1.9549, + "step": 20508000 + }, + { + "epoch": 59.36, + "learning_rate": 2.0328806676083735e-05, + "loss": 1.9737, + "step": 20508500 + }, + { + "epoch": 59.37, + "learning_rate": 2.032808302843646e-05, + "loss": 1.9561, + "step": 20509000 + }, + { + "epoch": 59.37, + "learning_rate": 2.0327359380789183e-05, + "loss": 1.9456, + "step": 20509500 + }, + { + "epoch": 59.37, + "learning_rate": 2.0326635733141905e-05, + "loss": 1.971, + "step": 20510000 + }, + { + "epoch": 59.37, + "learning_rate": 2.0325912085494628e-05, + "loss": 1.9451, + "step": 20510500 + }, + { + "epoch": 59.37, + "learning_rate": 2.0325189885142647e-05, + "loss": 1.9742, + "step": 20511000 + }, + { + "epoch": 59.37, + "learning_rate": 2.0324466237495372e-05, + "loss": 1.9315, + "step": 20511500 + }, + { + "epoch": 59.37, + "learning_rate": 2.0323742589848094e-05, + "loss": 1.9636, + "step": 20512000 + }, + { + "epoch": 59.38, + "learning_rate": 2.0323018942200817e-05, + "loss": 1.9387, + "step": 20512500 + }, + { + "epoch": 59.38, + "learning_rate": 2.032229529455354e-05, + "loss": 1.958, + "step": 20513000 + }, + { + "epoch": 59.38, + "learning_rate": 2.032157164690626e-05, + "loss": 1.9696, + "step": 20513500 + }, + { + "epoch": 59.38, + "learning_rate": 2.0320847999258987e-05, + "loss": 1.9375, + "step": 20514000 + }, + { + "epoch": 59.38, + "learning_rate": 2.032012435161171e-05, + "loss": 1.9312, + "step": 20514500 + }, + { + "epoch": 59.38, + "learning_rate": 2.0319400703964435e-05, + "loss": 1.9653, + "step": 20515000 + }, + { + "epoch": 59.38, + "learning_rate": 2.0318677056317157e-05, + "loss": 1.9503, + "step": 20515500 + }, + { + "epoch": 59.39, + "learning_rate": 2.0317954855965172e-05, + "loss": 1.9782, + "step": 20516000 + }, + { + "epoch": 59.39, + "learning_rate": 2.0317231208317898e-05, + "loss": 1.9573, + "step": 20516500 + }, + { + "epoch": 59.39, + "learning_rate": 2.031650756067062e-05, + "loss": 1.9232, + "step": 20517000 + }, + { + "epoch": 59.39, + "learning_rate": 2.0315783913023342e-05, + "loss": 1.9356, + "step": 20517500 + }, + { + "epoch": 59.39, + "learning_rate": 2.0315060265376065e-05, + "loss": 1.9466, + "step": 20518000 + }, + { + "epoch": 59.39, + "learning_rate": 2.0314336617728787e-05, + "loss": 1.9541, + "step": 20518500 + }, + { + "epoch": 59.39, + "learning_rate": 2.0313612970081513e-05, + "loss": 1.9341, + "step": 20519000 + }, + { + "epoch": 59.4, + "learning_rate": 2.0312889322434235e-05, + "loss": 1.9301, + "step": 20519500 + }, + { + "epoch": 59.4, + "learning_rate": 2.031216567478696e-05, + "loss": 1.9586, + "step": 20520000 + }, + { + "epoch": 59.4, + "learning_rate": 2.0311443474434976e-05, + "loss": 1.9546, + "step": 20520500 + }, + { + "epoch": 59.4, + "learning_rate": 2.0310719826787698e-05, + "loss": 1.9527, + "step": 20521000 + }, + { + "epoch": 59.4, + "learning_rate": 2.0309997626435717e-05, + "loss": 1.9447, + "step": 20521500 + }, + { + "epoch": 59.4, + "learning_rate": 2.030927397878844e-05, + "loss": 1.9156, + "step": 20522000 + }, + { + "epoch": 59.4, + "learning_rate": 2.030855177843646e-05, + "loss": 1.9533, + "step": 20522500 + }, + { + "epoch": 59.41, + "learning_rate": 2.0307828130789184e-05, + "loss": 1.9427, + "step": 20523000 + }, + { + "epoch": 59.41, + "learning_rate": 2.03071059304372e-05, + "loss": 1.958, + "step": 20523500 + }, + { + "epoch": 59.41, + "learning_rate": 2.0306382282789925e-05, + "loss": 1.9728, + "step": 20524000 + }, + { + "epoch": 59.41, + "learning_rate": 2.0305658635142647e-05, + "loss": 1.9553, + "step": 20524500 + }, + { + "epoch": 59.41, + "learning_rate": 2.0304936434790663e-05, + "loss": 1.9558, + "step": 20525000 + }, + { + "epoch": 59.41, + "learning_rate": 2.0304212787143385e-05, + "loss": 1.9477, + "step": 20525500 + }, + { + "epoch": 59.41, + "learning_rate": 2.0303490586791404e-05, + "loss": 1.9424, + "step": 20526000 + }, + { + "epoch": 59.42, + "learning_rate": 2.0302766939144126e-05, + "loss": 1.9548, + "step": 20526500 + }, + { + "epoch": 59.42, + "learning_rate": 2.0302043291496852e-05, + "loss": 1.9645, + "step": 20527000 + }, + { + "epoch": 59.42, + "learning_rate": 2.0301319643849574e-05, + "loss": 1.957, + "step": 20527500 + }, + { + "epoch": 59.42, + "learning_rate": 2.03005959962023e-05, + "loss": 1.9518, + "step": 20528000 + }, + { + "epoch": 59.42, + "learning_rate": 2.0299872348555022e-05, + "loss": 1.941, + "step": 20528500 + }, + { + "epoch": 59.42, + "learning_rate": 2.0299148700907744e-05, + "loss": 1.9133, + "step": 20529000 + }, + { + "epoch": 59.42, + "learning_rate": 2.0298425053260466e-05, + "loss": 1.9206, + "step": 20529500 + }, + { + "epoch": 59.43, + "learning_rate": 2.029770140561319e-05, + "loss": 1.9586, + "step": 20530000 + }, + { + "epoch": 59.43, + "learning_rate": 2.0296977757965914e-05, + "loss": 1.9389, + "step": 20530500 + }, + { + "epoch": 59.43, + "learning_rate": 2.0296254110318637e-05, + "loss": 1.9578, + "step": 20531000 + }, + { + "epoch": 59.43, + "learning_rate": 2.0295530462671362e-05, + "loss": 1.9651, + "step": 20531500 + }, + { + "epoch": 59.43, + "learning_rate": 2.0294806815024084e-05, + "loss": 1.9768, + "step": 20532000 + }, + { + "epoch": 59.43, + "learning_rate": 2.0294083167376807e-05, + "loss": 1.9836, + "step": 20532500 + }, + { + "epoch": 59.43, + "learning_rate": 2.029335951972953e-05, + "loss": 1.9611, + "step": 20533000 + }, + { + "epoch": 59.44, + "learning_rate": 2.029263587208225e-05, + "loss": 1.9409, + "step": 20533500 + }, + { + "epoch": 59.44, + "learning_rate": 2.0291912224434977e-05, + "loss": 1.9391, + "step": 20534000 + }, + { + "epoch": 59.44, + "learning_rate": 2.0291188576787702e-05, + "loss": 1.9385, + "step": 20534500 + }, + { + "epoch": 59.44, + "learning_rate": 2.0290464929140425e-05, + "loss": 1.9554, + "step": 20535000 + }, + { + "epoch": 59.44, + "learning_rate": 2.028974272878844e-05, + "loss": 1.9316, + "step": 20535500 + }, + { + "epoch": 59.44, + "learning_rate": 2.0289019081141162e-05, + "loss": 1.9516, + "step": 20536000 + }, + { + "epoch": 59.44, + "learning_rate": 2.0288295433493888e-05, + "loss": 1.9271, + "step": 20536500 + }, + { + "epoch": 59.45, + "learning_rate": 2.028757178584661e-05, + "loss": 1.9506, + "step": 20537000 + }, + { + "epoch": 59.45, + "learning_rate": 2.0286848138199336e-05, + "loss": 1.949, + "step": 20537500 + }, + { + "epoch": 59.45, + "learning_rate": 2.0286124490552058e-05, + "loss": 1.9364, + "step": 20538000 + }, + { + "epoch": 59.45, + "learning_rate": 2.028540084290478e-05, + "loss": 1.9353, + "step": 20538500 + }, + { + "epoch": 59.45, + "learning_rate": 2.02846786425528e-05, + "loss": 1.9636, + "step": 20539000 + }, + { + "epoch": 59.45, + "learning_rate": 2.028395499490552e-05, + "loss": 1.9422, + "step": 20539500 + }, + { + "epoch": 59.45, + "learning_rate": 2.0283231347258244e-05, + "loss": 1.9517, + "step": 20540000 + }, + { + "epoch": 59.46, + "learning_rate": 2.0282507699610966e-05, + "loss": 1.9646, + "step": 20540500 + }, + { + "epoch": 59.46, + "learning_rate": 2.028178405196369e-05, + "loss": 1.9292, + "step": 20541000 + }, + { + "epoch": 59.46, + "learning_rate": 2.028106185161171e-05, + "loss": 1.9621, + "step": 20541500 + }, + { + "epoch": 59.46, + "learning_rate": 2.0280338203964433e-05, + "loss": 1.9508, + "step": 20542000 + }, + { + "epoch": 59.46, + "learning_rate": 2.0279614556317155e-05, + "loss": 1.9544, + "step": 20542500 + }, + { + "epoch": 59.46, + "learning_rate": 2.0278890908669877e-05, + "loss": 1.9425, + "step": 20543000 + }, + { + "epoch": 59.47, + "learning_rate": 2.0278167261022603e-05, + "loss": 1.9665, + "step": 20543500 + }, + { + "epoch": 59.47, + "learning_rate": 2.0277443613375325e-05, + "loss": 1.9559, + "step": 20544000 + }, + { + "epoch": 59.47, + "learning_rate": 2.0276719965728047e-05, + "loss": 1.9508, + "step": 20544500 + }, + { + "epoch": 59.47, + "learning_rate": 2.0275996318080773e-05, + "loss": 1.9318, + "step": 20545000 + }, + { + "epoch": 59.47, + "learning_rate": 2.0275272670433495e-05, + "loss": 1.9516, + "step": 20545500 + }, + { + "epoch": 59.47, + "learning_rate": 2.0274549022786217e-05, + "loss": 1.9628, + "step": 20546000 + }, + { + "epoch": 59.47, + "learning_rate": 2.027382537513894e-05, + "loss": 1.9444, + "step": 20546500 + }, + { + "epoch": 59.48, + "learning_rate": 2.0273101727491665e-05, + "loss": 1.9552, + "step": 20547000 + }, + { + "epoch": 59.48, + "learning_rate": 2.0272378079844388e-05, + "loss": 1.9265, + "step": 20547500 + }, + { + "epoch": 59.48, + "learning_rate": 2.0271655879492403e-05, + "loss": 1.9705, + "step": 20548000 + }, + { + "epoch": 59.48, + "learning_rate": 2.0270933679140422e-05, + "loss": 1.9423, + "step": 20548500 + }, + { + "epoch": 59.48, + "learning_rate": 2.0270210031493148e-05, + "loss": 1.9668, + "step": 20549000 + }, + { + "epoch": 59.48, + "learning_rate": 2.026948638384587e-05, + "loss": 1.9466, + "step": 20549500 + }, + { + "epoch": 59.48, + "learning_rate": 2.0268762736198592e-05, + "loss": 1.9365, + "step": 20550000 + }, + { + "epoch": 59.49, + "learning_rate": 2.0268039088551318e-05, + "loss": 1.9578, + "step": 20550500 + }, + { + "epoch": 59.49, + "learning_rate": 2.026731544090404e-05, + "loss": 1.9354, + "step": 20551000 + }, + { + "epoch": 59.49, + "learning_rate": 2.0266591793256762e-05, + "loss": 1.9372, + "step": 20551500 + }, + { + "epoch": 59.49, + "learning_rate": 2.0265868145609488e-05, + "loss": 1.934, + "step": 20552000 + }, + { + "epoch": 59.49, + "learning_rate": 2.0265145945257503e-05, + "loss": 1.946, + "step": 20552500 + }, + { + "epoch": 59.49, + "learning_rate": 2.026442229761023e-05, + "loss": 1.984, + "step": 20553000 + }, + { + "epoch": 59.49, + "learning_rate": 2.026369864996295e-05, + "loss": 1.9425, + "step": 20553500 + }, + { + "epoch": 59.5, + "learning_rate": 2.0262976449610967e-05, + "loss": 1.9714, + "step": 20554000 + }, + { + "epoch": 59.5, + "learning_rate": 2.0262252801963692e-05, + "loss": 1.9412, + "step": 20554500 + }, + { + "epoch": 59.5, + "learning_rate": 2.0261529154316415e-05, + "loss": 1.9533, + "step": 20555000 + }, + { + "epoch": 59.5, + "learning_rate": 2.026080695396443e-05, + "loss": 1.9223, + "step": 20555500 + }, + { + "epoch": 59.5, + "learning_rate": 2.0260083306317152e-05, + "loss": 1.9411, + "step": 20556000 + }, + { + "epoch": 59.5, + "learning_rate": 2.0259359658669878e-05, + "loss": 1.945, + "step": 20556500 + }, + { + "epoch": 59.5, + "learning_rate": 2.0258636011022604e-05, + "loss": 1.9559, + "step": 20557000 + }, + { + "epoch": 59.51, + "learning_rate": 2.0257912363375326e-05, + "loss": 1.9162, + "step": 20557500 + }, + { + "epoch": 59.51, + "learning_rate": 2.0257188715728048e-05, + "loss": 1.9515, + "step": 20558000 + }, + { + "epoch": 59.51, + "learning_rate": 2.0256466515376067e-05, + "loss": 1.9467, + "step": 20558500 + }, + { + "epoch": 59.51, + "learning_rate": 2.025574286772879e-05, + "loss": 1.9441, + "step": 20559000 + }, + { + "epoch": 59.51, + "learning_rate": 2.025501922008151e-05, + "loss": 1.9548, + "step": 20559500 + }, + { + "epoch": 59.51, + "learning_rate": 2.0254295572434237e-05, + "loss": 1.9503, + "step": 20560000 + }, + { + "epoch": 59.51, + "learning_rate": 2.025357192478696e-05, + "loss": 1.9464, + "step": 20560500 + }, + { + "epoch": 59.52, + "learning_rate": 2.025284827713968e-05, + "loss": 1.9472, + "step": 20561000 + }, + { + "epoch": 59.52, + "learning_rate": 2.02521260767877e-05, + "loss": 1.9851, + "step": 20561500 + }, + { + "epoch": 59.52, + "learning_rate": 2.0251402429140423e-05, + "loss": 1.9359, + "step": 20562000 + }, + { + "epoch": 59.52, + "learning_rate": 2.0250678781493145e-05, + "loss": 1.938, + "step": 20562500 + }, + { + "epoch": 59.52, + "learning_rate": 2.0249955133845867e-05, + "loss": 1.9395, + "step": 20563000 + }, + { + "epoch": 59.52, + "learning_rate": 2.0249231486198593e-05, + "loss": 1.9541, + "step": 20563500 + }, + { + "epoch": 59.52, + "learning_rate": 2.024850783855132e-05, + "loss": 1.9584, + "step": 20564000 + }, + { + "epoch": 59.53, + "learning_rate": 2.024778419090404e-05, + "loss": 1.9466, + "step": 20564500 + }, + { + "epoch": 59.53, + "learning_rate": 2.0247060543256763e-05, + "loss": 1.9667, + "step": 20565000 + }, + { + "epoch": 59.53, + "learning_rate": 2.0246336895609485e-05, + "loss": 1.9445, + "step": 20565500 + }, + { + "epoch": 59.53, + "learning_rate": 2.0245613247962207e-05, + "loss": 1.9666, + "step": 20566000 + }, + { + "epoch": 59.53, + "learning_rate": 2.024488960031493e-05, + "loss": 1.9563, + "step": 20566500 + }, + { + "epoch": 59.53, + "learning_rate": 2.0244165952667655e-05, + "loss": 1.9497, + "step": 20567000 + }, + { + "epoch": 59.53, + "learning_rate": 2.024344230502038e-05, + "loss": 1.9447, + "step": 20567500 + }, + { + "epoch": 59.54, + "learning_rate": 2.0242721551963693e-05, + "loss": 1.9324, + "step": 20568000 + }, + { + "epoch": 59.54, + "learning_rate": 2.0241997904316415e-05, + "loss": 1.9541, + "step": 20568500 + }, + { + "epoch": 59.54, + "learning_rate": 2.0241274256669138e-05, + "loss": 1.9654, + "step": 20569000 + }, + { + "epoch": 59.54, + "learning_rate": 2.024055060902186e-05, + "loss": 1.9329, + "step": 20569500 + }, + { + "epoch": 59.54, + "learning_rate": 2.0239826961374582e-05, + "loss": 1.9426, + "step": 20570000 + }, + { + "epoch": 59.54, + "learning_rate": 2.0239103313727308e-05, + "loss": 1.9552, + "step": 20570500 + }, + { + "epoch": 59.54, + "learning_rate": 2.0238379666080033e-05, + "loss": 1.9342, + "step": 20571000 + }, + { + "epoch": 59.55, + "learning_rate": 2.023765746572805e-05, + "loss": 1.9628, + "step": 20571500 + }, + { + "epoch": 59.55, + "learning_rate": 2.023693381808077e-05, + "loss": 1.9506, + "step": 20572000 + }, + { + "epoch": 59.55, + "learning_rate": 2.023621161772879e-05, + "loss": 1.9486, + "step": 20572500 + }, + { + "epoch": 59.55, + "learning_rate": 2.0235487970081512e-05, + "loss": 1.9623, + "step": 20573000 + }, + { + "epoch": 59.55, + "learning_rate": 2.0234764322434235e-05, + "loss": 1.9083, + "step": 20573500 + }, + { + "epoch": 59.55, + "learning_rate": 2.0234040674786957e-05, + "loss": 1.9569, + "step": 20574000 + }, + { + "epoch": 59.55, + "learning_rate": 2.0233317027139682e-05, + "loss": 1.9516, + "step": 20574500 + }, + { + "epoch": 59.56, + "learning_rate": 2.0232593379492408e-05, + "loss": 1.9538, + "step": 20575000 + }, + { + "epoch": 59.56, + "learning_rate": 2.023186973184513e-05, + "loss": 1.9467, + "step": 20575500 + }, + { + "epoch": 59.56, + "learning_rate": 2.0231146084197853e-05, + "loss": 1.9619, + "step": 20576000 + }, + { + "epoch": 59.56, + "learning_rate": 2.023042388384587e-05, + "loss": 1.9571, + "step": 20576500 + }, + { + "epoch": 59.56, + "learning_rate": 2.0229700236198594e-05, + "loss": 1.9489, + "step": 20577000 + }, + { + "epoch": 59.56, + "learning_rate": 2.0228976588551316e-05, + "loss": 1.9581, + "step": 20577500 + }, + { + "epoch": 59.56, + "learning_rate": 2.0228252940904038e-05, + "loss": 1.9274, + "step": 20578000 + }, + { + "epoch": 59.57, + "learning_rate": 2.0227529293256764e-05, + "loss": 1.9256, + "step": 20578500 + }, + { + "epoch": 59.57, + "learning_rate": 2.0226807092904783e-05, + "loss": 1.9495, + "step": 20579000 + }, + { + "epoch": 59.57, + "learning_rate": 2.0226083445257505e-05, + "loss": 1.9727, + "step": 20579500 + }, + { + "epoch": 59.57, + "learning_rate": 2.0225359797610227e-05, + "loss": 1.9563, + "step": 20580000 + }, + { + "epoch": 59.57, + "learning_rate": 2.022463614996295e-05, + "loss": 1.9613, + "step": 20580500 + }, + { + "epoch": 59.57, + "learning_rate": 2.022391250231567e-05, + "loss": 1.9656, + "step": 20581000 + }, + { + "epoch": 59.58, + "learning_rate": 2.0223188854668397e-05, + "loss": 1.9658, + "step": 20581500 + }, + { + "epoch": 59.58, + "learning_rate": 2.0222465207021123e-05, + "loss": 1.9689, + "step": 20582000 + }, + { + "epoch": 59.58, + "learning_rate": 2.0221741559373845e-05, + "loss": 1.94, + "step": 20582500 + }, + { + "epoch": 59.58, + "learning_rate": 2.0221017911726567e-05, + "loss": 1.9239, + "step": 20583000 + }, + { + "epoch": 59.58, + "learning_rate": 2.0220295711374583e-05, + "loss": 1.9377, + "step": 20583500 + }, + { + "epoch": 59.58, + "learning_rate": 2.021957206372731e-05, + "loss": 1.9321, + "step": 20584000 + }, + { + "epoch": 59.58, + "learning_rate": 2.021884841608003e-05, + "loss": 1.9648, + "step": 20584500 + }, + { + "epoch": 59.59, + "learning_rate": 2.0218124768432753e-05, + "loss": 1.9519, + "step": 20585000 + }, + { + "epoch": 59.59, + "learning_rate": 2.0217401120785475e-05, + "loss": 1.9405, + "step": 20585500 + }, + { + "epoch": 59.59, + "learning_rate": 2.02166774731382e-05, + "loss": 1.9228, + "step": 20586000 + }, + { + "epoch": 59.59, + "learning_rate": 2.0215953825490923e-05, + "loss": 1.9665, + "step": 20586500 + }, + { + "epoch": 59.59, + "learning_rate": 2.0215231625138942e-05, + "loss": 1.9481, + "step": 20587000 + }, + { + "epoch": 59.59, + "learning_rate": 2.0214507977491664e-05, + "loss": 1.9434, + "step": 20587500 + }, + { + "epoch": 59.59, + "learning_rate": 2.0213784329844387e-05, + "loss": 1.9494, + "step": 20588000 + }, + { + "epoch": 59.6, + "learning_rate": 2.021306068219711e-05, + "loss": 1.947, + "step": 20588500 + }, + { + "epoch": 59.6, + "learning_rate": 2.0212337034549834e-05, + "loss": 1.9655, + "step": 20589000 + }, + { + "epoch": 59.6, + "learning_rate": 2.0211616281493147e-05, + "loss": 1.966, + "step": 20589500 + }, + { + "epoch": 59.6, + "learning_rate": 2.0210892633845872e-05, + "loss": 1.9741, + "step": 20590000 + }, + { + "epoch": 59.6, + "learning_rate": 2.0210168986198595e-05, + "loss": 1.9504, + "step": 20590500 + }, + { + "epoch": 59.6, + "learning_rate": 2.0209445338551317e-05, + "loss": 1.9574, + "step": 20591000 + }, + { + "epoch": 59.6, + "learning_rate": 2.020872169090404e-05, + "loss": 1.96, + "step": 20591500 + }, + { + "epoch": 59.61, + "learning_rate": 2.020799804325676e-05, + "loss": 1.9413, + "step": 20592000 + }, + { + "epoch": 59.61, + "learning_rate": 2.0207274395609483e-05, + "loss": 1.9664, + "step": 20592500 + }, + { + "epoch": 59.61, + "learning_rate": 2.020655074796221e-05, + "loss": 1.9282, + "step": 20593000 + }, + { + "epoch": 59.61, + "learning_rate": 2.0205827100314935e-05, + "loss": 1.9522, + "step": 20593500 + }, + { + "epoch": 59.61, + "learning_rate": 2.0205103452667657e-05, + "loss": 1.9455, + "step": 20594000 + }, + { + "epoch": 59.61, + "learning_rate": 2.0204381252315672e-05, + "loss": 1.9316, + "step": 20594500 + }, + { + "epoch": 59.61, + "learning_rate": 2.0203657604668398e-05, + "loss": 1.9308, + "step": 20595000 + }, + { + "epoch": 59.62, + "learning_rate": 2.0202935404316414e-05, + "loss": 1.956, + "step": 20595500 + }, + { + "epoch": 59.62, + "learning_rate": 2.0202211756669136e-05, + "loss": 1.9358, + "step": 20596000 + }, + { + "epoch": 59.62, + "learning_rate": 2.020148810902186e-05, + "loss": 1.9492, + "step": 20596500 + }, + { + "epoch": 59.62, + "learning_rate": 2.0200764461374584e-05, + "loss": 1.9447, + "step": 20597000 + }, + { + "epoch": 59.62, + "learning_rate": 2.020004081372731e-05, + "loss": 1.9515, + "step": 20597500 + }, + { + "epoch": 59.62, + "learning_rate": 2.019931716608003e-05, + "loss": 1.9871, + "step": 20598000 + }, + { + "epoch": 59.62, + "learning_rate": 2.0198593518432754e-05, + "loss": 1.9468, + "step": 20598500 + }, + { + "epoch": 59.63, + "learning_rate": 2.0197869870785476e-05, + "loss": 1.9477, + "step": 20599000 + }, + { + "epoch": 59.63, + "learning_rate": 2.0197146223138198e-05, + "loss": 1.9516, + "step": 20599500 + }, + { + "epoch": 59.63, + "learning_rate": 2.0196422575490924e-05, + "loss": 1.9661, + "step": 20600000 + }, + { + "epoch": 59.63, + "learning_rate": 2.019569892784365e-05, + "loss": 1.9385, + "step": 20600500 + }, + { + "epoch": 59.63, + "learning_rate": 2.0194975280196372e-05, + "loss": 1.9522, + "step": 20601000 + }, + { + "epoch": 59.63, + "learning_rate": 2.0194253079844387e-05, + "loss": 1.9369, + "step": 20601500 + }, + { + "epoch": 59.63, + "learning_rate": 2.0193529432197113e-05, + "loss": 1.9437, + "step": 20602000 + }, + { + "epoch": 59.64, + "learning_rate": 2.0192805784549835e-05, + "loss": 1.945, + "step": 20602500 + }, + { + "epoch": 59.64, + "learning_rate": 2.0192082136902557e-05, + "loss": 1.954, + "step": 20603000 + }, + { + "epoch": 59.64, + "learning_rate": 2.0191359936550573e-05, + "loss": 1.9449, + "step": 20603500 + }, + { + "epoch": 59.64, + "learning_rate": 2.0190637736198592e-05, + "loss": 1.9439, + "step": 20604000 + }, + { + "epoch": 59.64, + "learning_rate": 2.0189914088551314e-05, + "loss": 1.96, + "step": 20604500 + }, + { + "epoch": 59.64, + "learning_rate": 2.018919044090404e-05, + "loss": 1.9476, + "step": 20605000 + }, + { + "epoch": 59.64, + "learning_rate": 2.0188466793256762e-05, + "loss": 1.9387, + "step": 20605500 + }, + { + "epoch": 59.65, + "learning_rate": 2.018774459290478e-05, + "loss": 1.9846, + "step": 20606000 + }, + { + "epoch": 59.65, + "learning_rate": 2.0187020945257503e-05, + "loss": 1.9464, + "step": 20606500 + }, + { + "epoch": 59.65, + "learning_rate": 2.0186298744905522e-05, + "loss": 1.9401, + "step": 20607000 + }, + { + "epoch": 59.65, + "learning_rate": 2.0185575097258244e-05, + "loss": 1.945, + "step": 20607500 + }, + { + "epoch": 59.65, + "learning_rate": 2.0184851449610967e-05, + "loss": 1.9407, + "step": 20608000 + }, + { + "epoch": 59.65, + "learning_rate": 2.0184127801963692e-05, + "loss": 1.9394, + "step": 20608500 + }, + { + "epoch": 59.65, + "learning_rate": 2.0183404154316414e-05, + "loss": 1.9665, + "step": 20609000 + }, + { + "epoch": 59.66, + "learning_rate": 2.0182680506669137e-05, + "loss": 1.9667, + "step": 20609500 + }, + { + "epoch": 59.66, + "learning_rate": 2.0181958306317156e-05, + "loss": 1.9514, + "step": 20610000 + }, + { + "epoch": 59.66, + "learning_rate": 2.0181234658669878e-05, + "loss": 1.9599, + "step": 20610500 + }, + { + "epoch": 59.66, + "learning_rate": 2.01805110110226e-05, + "loss": 1.9336, + "step": 20611000 + }, + { + "epoch": 59.66, + "learning_rate": 2.0179787363375326e-05, + "loss": 1.9738, + "step": 20611500 + }, + { + "epoch": 59.66, + "learning_rate": 2.017906516302334e-05, + "loss": 1.9252, + "step": 20612000 + }, + { + "epoch": 59.66, + "learning_rate": 2.0178341515376067e-05, + "loss": 1.9707, + "step": 20612500 + }, + { + "epoch": 59.67, + "learning_rate": 2.017761786772879e-05, + "loss": 1.9543, + "step": 20613000 + }, + { + "epoch": 59.67, + "learning_rate": 2.0176894220081515e-05, + "loss": 1.9663, + "step": 20613500 + }, + { + "epoch": 59.67, + "learning_rate": 2.0176170572434237e-05, + "loss": 1.9505, + "step": 20614000 + }, + { + "epoch": 59.67, + "learning_rate": 2.017544692478696e-05, + "loss": 1.9496, + "step": 20614500 + }, + { + "epoch": 59.67, + "learning_rate": 2.017472327713968e-05, + "loss": 1.9274, + "step": 20615000 + }, + { + "epoch": 59.67, + "learning_rate": 2.0173999629492404e-05, + "loss": 1.9641, + "step": 20615500 + }, + { + "epoch": 59.67, + "learning_rate": 2.017327598184513e-05, + "loss": 1.9625, + "step": 20616000 + }, + { + "epoch": 59.68, + "learning_rate": 2.0172553781493148e-05, + "loss": 1.9283, + "step": 20616500 + }, + { + "epoch": 59.68, + "learning_rate": 2.017183013384587e-05, + "loss": 1.943, + "step": 20617000 + }, + { + "epoch": 59.68, + "learning_rate": 2.0171106486198593e-05, + "loss": 1.9582, + "step": 20617500 + }, + { + "epoch": 59.68, + "learning_rate": 2.0170382838551315e-05, + "loss": 1.9588, + "step": 20618000 + }, + { + "epoch": 59.68, + "learning_rate": 2.016965919090404e-05, + "loss": 1.9454, + "step": 20618500 + }, + { + "epoch": 59.68, + "learning_rate": 2.0168936990552056e-05, + "loss": 1.9606, + "step": 20619000 + }, + { + "epoch": 59.69, + "learning_rate": 2.016821334290478e-05, + "loss": 1.9326, + "step": 20619500 + }, + { + "epoch": 59.69, + "learning_rate": 2.0167489695257504e-05, + "loss": 1.9697, + "step": 20620000 + }, + { + "epoch": 59.69, + "learning_rate": 2.0166767494905523e-05, + "loss": 1.9448, + "step": 20620500 + }, + { + "epoch": 59.69, + "learning_rate": 2.0166043847258245e-05, + "loss": 1.9455, + "step": 20621000 + }, + { + "epoch": 59.69, + "learning_rate": 2.0165320199610967e-05, + "loss": 1.9369, + "step": 20621500 + }, + { + "epoch": 59.69, + "learning_rate": 2.016459655196369e-05, + "loss": 1.9742, + "step": 20622000 + }, + { + "epoch": 59.69, + "learning_rate": 2.0163872904316415e-05, + "loss": 1.9497, + "step": 20622500 + }, + { + "epoch": 59.7, + "learning_rate": 2.016315070396443e-05, + "loss": 1.9515, + "step": 20623000 + }, + { + "epoch": 59.7, + "learning_rate": 2.0162427056317153e-05, + "loss": 1.9499, + "step": 20623500 + }, + { + "epoch": 59.7, + "learning_rate": 2.016170340866988e-05, + "loss": 1.9559, + "step": 20624000 + }, + { + "epoch": 59.7, + "learning_rate": 2.01609797610226e-05, + "loss": 1.946, + "step": 20624500 + }, + { + "epoch": 59.7, + "learning_rate": 2.0160256113375327e-05, + "loss": 1.9554, + "step": 20625000 + }, + { + "epoch": 59.7, + "learning_rate": 2.015953246572805e-05, + "loss": 1.9548, + "step": 20625500 + }, + { + "epoch": 59.7, + "learning_rate": 2.015880881808077e-05, + "loss": 1.9496, + "step": 20626000 + }, + { + "epoch": 59.71, + "learning_rate": 2.0158085170433493e-05, + "loss": 1.9417, + "step": 20626500 + }, + { + "epoch": 59.71, + "learning_rate": 2.0157361522786215e-05, + "loss": 1.947, + "step": 20627000 + }, + { + "epoch": 59.71, + "learning_rate": 2.015663787513894e-05, + "loss": 1.9593, + "step": 20627500 + }, + { + "epoch": 59.71, + "learning_rate": 2.015591567478696e-05, + "loss": 1.9458, + "step": 20628000 + }, + { + "epoch": 59.71, + "learning_rate": 2.0155192027139682e-05, + "loss": 1.9486, + "step": 20628500 + }, + { + "epoch": 59.71, + "learning_rate": 2.0154468379492404e-05, + "loss": 1.9267, + "step": 20629000 + }, + { + "epoch": 59.71, + "learning_rate": 2.0153744731845127e-05, + "loss": 1.9343, + "step": 20629500 + }, + { + "epoch": 59.72, + "learning_rate": 2.0153021084197852e-05, + "loss": 1.9698, + "step": 20630000 + }, + { + "epoch": 59.72, + "learning_rate": 2.0152297436550575e-05, + "loss": 1.9537, + "step": 20630500 + }, + { + "epoch": 59.72, + "learning_rate": 2.0151575236198594e-05, + "loss": 1.9466, + "step": 20631000 + }, + { + "epoch": 59.72, + "learning_rate": 2.0150853035846612e-05, + "loss": 1.952, + "step": 20631500 + }, + { + "epoch": 59.72, + "learning_rate": 2.0150129388199335e-05, + "loss": 1.9538, + "step": 20632000 + }, + { + "epoch": 59.72, + "learning_rate": 2.0149405740552057e-05, + "loss": 1.9351, + "step": 20632500 + }, + { + "epoch": 59.72, + "learning_rate": 2.014868209290478e-05, + "loss": 1.937, + "step": 20633000 + }, + { + "epoch": 59.73, + "learning_rate": 2.0147959892552798e-05, + "loss": 1.9595, + "step": 20633500 + }, + { + "epoch": 59.73, + "learning_rate": 2.014723624490552e-05, + "loss": 1.9668, + "step": 20634000 + }, + { + "epoch": 59.73, + "learning_rate": 2.0146512597258243e-05, + "loss": 1.9445, + "step": 20634500 + }, + { + "epoch": 59.73, + "learning_rate": 2.0145788949610968e-05, + "loss": 1.9489, + "step": 20635000 + }, + { + "epoch": 59.73, + "learning_rate": 2.014506530196369e-05, + "loss": 1.9397, + "step": 20635500 + }, + { + "epoch": 59.73, + "learning_rate": 2.0144341654316416e-05, + "loss": 1.9729, + "step": 20636000 + }, + { + "epoch": 59.73, + "learning_rate": 2.0143618006669138e-05, + "loss": 1.9648, + "step": 20636500 + }, + { + "epoch": 59.74, + "learning_rate": 2.014289435902186e-05, + "loss": 1.9198, + "step": 20637000 + }, + { + "epoch": 59.74, + "learning_rate": 2.0142170711374583e-05, + "loss": 1.9571, + "step": 20637500 + }, + { + "epoch": 59.74, + "learning_rate": 2.0141447063727305e-05, + "loss": 1.9518, + "step": 20638000 + }, + { + "epoch": 59.74, + "learning_rate": 2.0140726310670617e-05, + "loss": 1.9611, + "step": 20638500 + }, + { + "epoch": 59.74, + "learning_rate": 2.0140002663023343e-05, + "loss": 1.9593, + "step": 20639000 + }, + { + "epoch": 59.74, + "learning_rate": 2.013927901537607e-05, + "loss": 1.9559, + "step": 20639500 + }, + { + "epoch": 59.74, + "learning_rate": 2.013855536772879e-05, + "loss": 1.9301, + "step": 20640000 + }, + { + "epoch": 59.75, + "learning_rate": 2.0137831720081513e-05, + "loss": 1.9509, + "step": 20640500 + }, + { + "epoch": 59.75, + "learning_rate": 2.0137108072434235e-05, + "loss": 1.9661, + "step": 20641000 + }, + { + "epoch": 59.75, + "learning_rate": 2.0136384424786957e-05, + "loss": 1.9335, + "step": 20641500 + }, + { + "epoch": 59.75, + "learning_rate": 2.013566077713968e-05, + "loss": 1.9461, + "step": 20642000 + }, + { + "epoch": 59.75, + "learning_rate": 2.0134938576787702e-05, + "loss": 1.9225, + "step": 20642500 + }, + { + "epoch": 59.75, + "learning_rate": 2.0134214929140424e-05, + "loss": 1.975, + "step": 20643000 + }, + { + "epoch": 59.75, + "learning_rate": 2.0133491281493146e-05, + "loss": 1.9437, + "step": 20643500 + }, + { + "epoch": 59.76, + "learning_rate": 2.013276763384587e-05, + "loss": 1.9781, + "step": 20644000 + }, + { + "epoch": 59.76, + "learning_rate": 2.0132045433493888e-05, + "loss": 1.9537, + "step": 20644500 + }, + { + "epoch": 59.76, + "learning_rate": 2.013132178584661e-05, + "loss": 1.9583, + "step": 20645000 + }, + { + "epoch": 59.76, + "learning_rate": 2.0130598138199332e-05, + "loss": 1.9323, + "step": 20645500 + }, + { + "epoch": 59.76, + "learning_rate": 2.0129874490552058e-05, + "loss": 1.943, + "step": 20646000 + }, + { + "epoch": 59.76, + "learning_rate": 2.012915084290478e-05, + "loss": 1.9458, + "step": 20646500 + }, + { + "epoch": 59.76, + "learning_rate": 2.0128427195257506e-05, + "loss": 1.9454, + "step": 20647000 + }, + { + "epoch": 59.77, + "learning_rate": 2.0127703547610228e-05, + "loss": 1.9624, + "step": 20647500 + }, + { + "epoch": 59.77, + "learning_rate": 2.0126981347258243e-05, + "loss": 1.9599, + "step": 20648000 + }, + { + "epoch": 59.77, + "learning_rate": 2.012625769961097e-05, + "loss": 1.963, + "step": 20648500 + }, + { + "epoch": 59.77, + "learning_rate": 2.012553405196369e-05, + "loss": 1.9369, + "step": 20649000 + }, + { + "epoch": 59.77, + "learning_rate": 2.0124810404316413e-05, + "loss": 1.9693, + "step": 20649500 + }, + { + "epoch": 59.77, + "learning_rate": 2.0124088203964432e-05, + "loss": 1.9504, + "step": 20650000 + }, + { + "epoch": 59.77, + "learning_rate": 2.0123364556317158e-05, + "loss": 1.9289, + "step": 20650500 + }, + { + "epoch": 59.78, + "learning_rate": 2.012264090866988e-05, + "loss": 1.9845, + "step": 20651000 + }, + { + "epoch": 59.78, + "learning_rate": 2.0121917261022602e-05, + "loss": 1.9596, + "step": 20651500 + }, + { + "epoch": 59.78, + "learning_rate": 2.0121195060670618e-05, + "loss": 1.9693, + "step": 20652000 + }, + { + "epoch": 59.78, + "learning_rate": 2.0120471413023344e-05, + "loss": 1.9552, + "step": 20652500 + }, + { + "epoch": 59.78, + "learning_rate": 2.0119747765376066e-05, + "loss": 1.9358, + "step": 20653000 + }, + { + "epoch": 59.78, + "learning_rate": 2.0119024117728788e-05, + "loss": 1.969, + "step": 20653500 + }, + { + "epoch": 59.78, + "learning_rate": 2.0118300470081514e-05, + "loss": 1.9546, + "step": 20654000 + }, + { + "epoch": 59.79, + "learning_rate": 2.0117576822434236e-05, + "loss": 1.9655, + "step": 20654500 + }, + { + "epoch": 59.79, + "learning_rate": 2.0116853174786958e-05, + "loss": 1.9562, + "step": 20655000 + }, + { + "epoch": 59.79, + "learning_rate": 2.011613242173027e-05, + "loss": 1.9439, + "step": 20655500 + }, + { + "epoch": 59.79, + "learning_rate": 2.0115408774082996e-05, + "loss": 1.9527, + "step": 20656000 + }, + { + "epoch": 59.79, + "learning_rate": 2.011468512643572e-05, + "loss": 1.9407, + "step": 20656500 + }, + { + "epoch": 59.79, + "learning_rate": 2.011396147878844e-05, + "loss": 1.9507, + "step": 20657000 + }, + { + "epoch": 59.8, + "learning_rate": 2.0113237831141166e-05, + "loss": 1.9919, + "step": 20657500 + }, + { + "epoch": 59.8, + "learning_rate": 2.011251418349389e-05, + "loss": 1.9433, + "step": 20658000 + }, + { + "epoch": 59.8, + "learning_rate": 2.011179053584661e-05, + "loss": 1.9494, + "step": 20658500 + }, + { + "epoch": 59.8, + "learning_rate": 2.0111066888199333e-05, + "loss": 1.9409, + "step": 20659000 + }, + { + "epoch": 59.8, + "learning_rate": 2.0110344687847352e-05, + "loss": 1.9681, + "step": 20659500 + }, + { + "epoch": 59.8, + "learning_rate": 2.0109621040200074e-05, + "loss": 1.961, + "step": 20660000 + }, + { + "epoch": 59.8, + "learning_rate": 2.0108897392552796e-05, + "loss": 1.9714, + "step": 20660500 + }, + { + "epoch": 59.81, + "learning_rate": 2.010817374490552e-05, + "loss": 1.9501, + "step": 20661000 + }, + { + "epoch": 59.81, + "learning_rate": 2.0107450097258244e-05, + "loss": 1.9594, + "step": 20661500 + }, + { + "epoch": 59.81, + "learning_rate": 2.010672644961097e-05, + "loss": 1.9507, + "step": 20662000 + }, + { + "epoch": 59.81, + "learning_rate": 2.0106002801963692e-05, + "loss": 1.9688, + "step": 20662500 + }, + { + "epoch": 59.81, + "learning_rate": 2.0105279154316414e-05, + "loss": 1.9384, + "step": 20663000 + }, + { + "epoch": 59.81, + "learning_rate": 2.0104555506669136e-05, + "loss": 1.9603, + "step": 20663500 + }, + { + "epoch": 59.81, + "learning_rate": 2.010383185902186e-05, + "loss": 1.9597, + "step": 20664000 + }, + { + "epoch": 59.82, + "learning_rate": 2.0103108211374584e-05, + "loss": 1.9579, + "step": 20664500 + }, + { + "epoch": 59.82, + "learning_rate": 2.010238456372731e-05, + "loss": 1.9348, + "step": 20665000 + }, + { + "epoch": 59.82, + "learning_rate": 2.0101660916080032e-05, + "loss": 1.9548, + "step": 20665500 + }, + { + "epoch": 59.82, + "learning_rate": 2.0100940163023344e-05, + "loss": 1.9396, + "step": 20666000 + }, + { + "epoch": 59.82, + "learning_rate": 2.0100216515376067e-05, + "loss": 1.9473, + "step": 20666500 + }, + { + "epoch": 59.82, + "learning_rate": 2.009949286772879e-05, + "loss": 1.9235, + "step": 20667000 + }, + { + "epoch": 59.82, + "learning_rate": 2.009876922008151e-05, + "loss": 1.9351, + "step": 20667500 + }, + { + "epoch": 59.83, + "learning_rate": 2.0098045572434233e-05, + "loss": 1.9532, + "step": 20668000 + }, + { + "epoch": 59.83, + "learning_rate": 2.009732192478696e-05, + "loss": 1.9355, + "step": 20668500 + }, + { + "epoch": 59.83, + "learning_rate": 2.0096598277139685e-05, + "loss": 1.9463, + "step": 20669000 + }, + { + "epoch": 59.83, + "learning_rate": 2.0095874629492407e-05, + "loss": 1.9415, + "step": 20669500 + }, + { + "epoch": 59.83, + "learning_rate": 2.009515098184513e-05, + "loss": 1.9559, + "step": 20670000 + }, + { + "epoch": 59.83, + "learning_rate": 2.0094428781493148e-05, + "loss": 1.9456, + "step": 20670500 + }, + { + "epoch": 59.83, + "learning_rate": 2.009370513384587e-05, + "loss": 1.9329, + "step": 20671000 + }, + { + "epoch": 59.84, + "learning_rate": 2.0092981486198593e-05, + "loss": 1.9582, + "step": 20671500 + }, + { + "epoch": 59.84, + "learning_rate": 2.0092257838551315e-05, + "loss": 1.9603, + "step": 20672000 + }, + { + "epoch": 59.84, + "learning_rate": 2.009153419090404e-05, + "loss": 1.961, + "step": 20672500 + }, + { + "epoch": 59.84, + "learning_rate": 2.0090810543256763e-05, + "loss": 1.9667, + "step": 20673000 + }, + { + "epoch": 59.84, + "learning_rate": 2.0090086895609485e-05, + "loss": 1.9401, + "step": 20673500 + }, + { + "epoch": 59.84, + "learning_rate": 2.008936324796221e-05, + "loss": 1.9495, + "step": 20674000 + }, + { + "epoch": 59.84, + "learning_rate": 2.0088642494905523e-05, + "loss": 1.9491, + "step": 20674500 + }, + { + "epoch": 59.85, + "learning_rate": 2.0087918847258245e-05, + "loss": 1.9539, + "step": 20675000 + }, + { + "epoch": 59.85, + "learning_rate": 2.0087195199610967e-05, + "loss": 1.9453, + "step": 20675500 + }, + { + "epoch": 59.85, + "learning_rate": 2.0086471551963693e-05, + "loss": 1.9535, + "step": 20676000 + }, + { + "epoch": 59.85, + "learning_rate": 2.0085747904316415e-05, + "loss": 1.9518, + "step": 20676500 + }, + { + "epoch": 59.85, + "learning_rate": 2.0085024256669137e-05, + "loss": 1.9448, + "step": 20677000 + }, + { + "epoch": 59.85, + "learning_rate": 2.008430060902186e-05, + "loss": 1.9464, + "step": 20677500 + }, + { + "epoch": 59.85, + "learning_rate": 2.0083576961374585e-05, + "loss": 1.928, + "step": 20678000 + }, + { + "epoch": 59.86, + "learning_rate": 2.0082853313727307e-05, + "loss": 1.9618, + "step": 20678500 + }, + { + "epoch": 59.86, + "learning_rate": 2.008212966608003e-05, + "loss": 1.9514, + "step": 20679000 + }, + { + "epoch": 59.86, + "learning_rate": 2.0081406018432755e-05, + "loss": 1.937, + "step": 20679500 + }, + { + "epoch": 59.86, + "learning_rate": 2.0080682370785477e-05, + "loss": 1.9655, + "step": 20680000 + }, + { + "epoch": 59.86, + "learning_rate": 2.00799587231382e-05, + "loss": 1.971, + "step": 20680500 + }, + { + "epoch": 59.86, + "learning_rate": 2.0079235075490925e-05, + "loss": 1.967, + "step": 20681000 + }, + { + "epoch": 59.86, + "learning_rate": 2.0078511427843648e-05, + "loss": 1.9432, + "step": 20681500 + }, + { + "epoch": 59.87, + "learning_rate": 2.0077789227491663e-05, + "loss": 1.9525, + "step": 20682000 + }, + { + "epoch": 59.87, + "learning_rate": 2.0077065579844385e-05, + "loss": 1.9527, + "step": 20682500 + }, + { + "epoch": 59.87, + "learning_rate": 2.007634193219711e-05, + "loss": 1.9439, + "step": 20683000 + }, + { + "epoch": 59.87, + "learning_rate": 2.0075618284549837e-05, + "loss": 1.9528, + "step": 20683500 + }, + { + "epoch": 59.87, + "learning_rate": 2.007489463690256e-05, + "loss": 1.954, + "step": 20684000 + }, + { + "epoch": 59.87, + "learning_rate": 2.007417388384587e-05, + "loss": 1.9453, + "step": 20684500 + }, + { + "epoch": 59.87, + "learning_rate": 2.0073450236198593e-05, + "loss": 1.9615, + "step": 20685000 + }, + { + "epoch": 59.88, + "learning_rate": 2.0072726588551316e-05, + "loss": 1.9513, + "step": 20685500 + }, + { + "epoch": 59.88, + "learning_rate": 2.0072002940904038e-05, + "loss": 1.9454, + "step": 20686000 + }, + { + "epoch": 59.88, + "learning_rate": 2.0071279293256763e-05, + "loss": 1.9666, + "step": 20686500 + }, + { + "epoch": 59.88, + "learning_rate": 2.0070555645609486e-05, + "loss": 1.9807, + "step": 20687000 + }, + { + "epoch": 59.88, + "learning_rate": 2.006983199796221e-05, + "loss": 1.9623, + "step": 20687500 + }, + { + "epoch": 59.88, + "learning_rate": 2.0069108350314933e-05, + "loss": 1.9615, + "step": 20688000 + }, + { + "epoch": 59.88, + "learning_rate": 2.0068384702667656e-05, + "loss": 1.943, + "step": 20688500 + }, + { + "epoch": 59.89, + "learning_rate": 2.0067662502315675e-05, + "loss": 1.9625, + "step": 20689000 + }, + { + "epoch": 59.89, + "learning_rate": 2.0066938854668397e-05, + "loss": 1.9714, + "step": 20689500 + }, + { + "epoch": 59.89, + "learning_rate": 2.0066216654316412e-05, + "loss": 1.9529, + "step": 20690000 + }, + { + "epoch": 59.89, + "learning_rate": 2.0065493006669138e-05, + "loss": 1.97, + "step": 20690500 + }, + { + "epoch": 59.89, + "learning_rate": 2.0064769359021864e-05, + "loss": 1.9474, + "step": 20691000 + }, + { + "epoch": 59.89, + "learning_rate": 2.0064045711374586e-05, + "loss": 1.9559, + "step": 20691500 + }, + { + "epoch": 59.89, + "learning_rate": 2.00633235110226e-05, + "loss": 1.9475, + "step": 20692000 + }, + { + "epoch": 59.9, + "learning_rate": 2.0062599863375324e-05, + "loss": 1.945, + "step": 20692500 + }, + { + "epoch": 59.9, + "learning_rate": 2.006187621572805e-05, + "loss": 1.9201, + "step": 20693000 + }, + { + "epoch": 59.9, + "learning_rate": 2.006115256808077e-05, + "loss": 1.9639, + "step": 20693500 + }, + { + "epoch": 59.9, + "learning_rate": 2.0060428920433494e-05, + "loss": 1.9436, + "step": 20694000 + }, + { + "epoch": 59.9, + "learning_rate": 2.0059705272786216e-05, + "loss": 1.9433, + "step": 20694500 + }, + { + "epoch": 59.9, + "learning_rate": 2.005898162513894e-05, + "loss": 1.9626, + "step": 20695000 + }, + { + "epoch": 59.9, + "learning_rate": 2.0058257977491664e-05, + "loss": 1.9389, + "step": 20695500 + }, + { + "epoch": 59.91, + "learning_rate": 2.0057537224434976e-05, + "loss": 1.9289, + "step": 20696000 + }, + { + "epoch": 59.91, + "learning_rate": 2.0056813576787702e-05, + "loss": 1.9339, + "step": 20696500 + }, + { + "epoch": 59.91, + "learning_rate": 2.0056089929140424e-05, + "loss": 1.9442, + "step": 20697000 + }, + { + "epoch": 59.91, + "learning_rate": 2.005536772878844e-05, + "loss": 1.9571, + "step": 20697500 + }, + { + "epoch": 59.91, + "learning_rate": 2.0054644081141162e-05, + "loss": 1.9665, + "step": 20698000 + }, + { + "epoch": 59.91, + "learning_rate": 2.005392188078918e-05, + "loss": 1.9718, + "step": 20698500 + }, + { + "epoch": 59.92, + "learning_rate": 2.0053198233141906e-05, + "loss": 1.9536, + "step": 20699000 + }, + { + "epoch": 59.92, + "learning_rate": 2.005247458549463e-05, + "loss": 1.9543, + "step": 20699500 + }, + { + "epoch": 59.92, + "learning_rate": 2.005175093784735e-05, + "loss": 1.9296, + "step": 20700000 + }, + { + "epoch": 59.92, + "learning_rate": 2.0051027290200076e-05, + "loss": 1.9748, + "step": 20700500 + }, + { + "epoch": 59.92, + "learning_rate": 2.00503036425528e-05, + "loss": 1.9721, + "step": 20701000 + }, + { + "epoch": 59.92, + "learning_rate": 2.004957999490552e-05, + "loss": 1.9636, + "step": 20701500 + }, + { + "epoch": 59.92, + "learning_rate": 2.0048856347258243e-05, + "loss": 1.96, + "step": 20702000 + }, + { + "epoch": 59.93, + "learning_rate": 2.004813269961097e-05, + "loss": 1.9533, + "step": 20702500 + }, + { + "epoch": 59.93, + "learning_rate": 2.004740905196369e-05, + "loss": 1.9652, + "step": 20703000 + }, + { + "epoch": 59.93, + "learning_rate": 2.0046685404316413e-05, + "loss": 1.9618, + "step": 20703500 + }, + { + "epoch": 59.93, + "learning_rate": 2.004596175666914e-05, + "loss": 1.9753, + "step": 20704000 + }, + { + "epoch": 59.93, + "learning_rate": 2.0045239556317154e-05, + "loss": 1.9354, + "step": 20704500 + }, + { + "epoch": 59.93, + "learning_rate": 2.0044515908669877e-05, + "loss": 1.9505, + "step": 20705000 + }, + { + "epoch": 59.93, + "learning_rate": 2.0043792261022602e-05, + "loss": 1.941, + "step": 20705500 + }, + { + "epoch": 59.94, + "learning_rate": 2.0043068613375328e-05, + "loss": 1.9578, + "step": 20706000 + }, + { + "epoch": 59.94, + "learning_rate": 2.0042346413023343e-05, + "loss": 1.9441, + "step": 20706500 + }, + { + "epoch": 59.94, + "learning_rate": 2.0041622765376066e-05, + "loss": 1.943, + "step": 20707000 + }, + { + "epoch": 59.94, + "learning_rate": 2.004089911772879e-05, + "loss": 1.9537, + "step": 20707500 + }, + { + "epoch": 59.94, + "learning_rate": 2.0040175470081514e-05, + "loss": 1.9686, + "step": 20708000 + }, + { + "epoch": 59.94, + "learning_rate": 2.0039451822434236e-05, + "loss": 1.9394, + "step": 20708500 + }, + { + "epoch": 59.94, + "learning_rate": 2.0038728174786958e-05, + "loss": 1.9649, + "step": 20709000 + }, + { + "epoch": 59.95, + "learning_rate": 2.003800452713968e-05, + "loss": 1.9465, + "step": 20709500 + }, + { + "epoch": 59.95, + "learning_rate": 2.0037280879492406e-05, + "loss": 1.9609, + "step": 20710000 + }, + { + "epoch": 59.95, + "learning_rate": 2.0036557231845128e-05, + "loss": 1.9933, + "step": 20710500 + }, + { + "epoch": 59.95, + "learning_rate": 2.0035835031493147e-05, + "loss": 1.9574, + "step": 20711000 + }, + { + "epoch": 59.95, + "learning_rate": 2.003511138384587e-05, + "loss": 1.9435, + "step": 20711500 + }, + { + "epoch": 59.95, + "learning_rate": 2.003438773619859e-05, + "loss": 1.9296, + "step": 20712000 + }, + { + "epoch": 59.95, + "learning_rate": 2.0033664088551317e-05, + "loss": 1.9225, + "step": 20712500 + }, + { + "epoch": 59.96, + "learning_rate": 2.003294044090404e-05, + "loss": 1.9645, + "step": 20713000 + }, + { + "epoch": 59.96, + "learning_rate": 2.0032216793256765e-05, + "loss": 1.9424, + "step": 20713500 + }, + { + "epoch": 59.96, + "learning_rate": 2.0031493145609487e-05, + "loss": 1.9376, + "step": 20714000 + }, + { + "epoch": 59.96, + "learning_rate": 2.0030770945257503e-05, + "loss": 1.9374, + "step": 20714500 + }, + { + "epoch": 59.96, + "learning_rate": 2.003004729761023e-05, + "loss": 1.946, + "step": 20715000 + }, + { + "epoch": 59.96, + "learning_rate": 2.002932364996295e-05, + "loss": 1.9781, + "step": 20715500 + }, + { + "epoch": 59.96, + "learning_rate": 2.0028600002315673e-05, + "loss": 1.9449, + "step": 20716000 + }, + { + "epoch": 59.97, + "learning_rate": 2.0027876354668395e-05, + "loss": 1.9667, + "step": 20716500 + }, + { + "epoch": 59.97, + "learning_rate": 2.002715270702112e-05, + "loss": 1.9635, + "step": 20717000 + }, + { + "epoch": 59.97, + "learning_rate": 2.0026429059373843e-05, + "loss": 1.9665, + "step": 20717500 + }, + { + "epoch": 59.97, + "learning_rate": 2.002570541172657e-05, + "loss": 1.9566, + "step": 20718000 + }, + { + "epoch": 59.97, + "learning_rate": 2.002498176407929e-05, + "loss": 1.972, + "step": 20718500 + }, + { + "epoch": 59.97, + "learning_rate": 2.0024258116432013e-05, + "loss": 1.9879, + "step": 20719000 + }, + { + "epoch": 59.97, + "learning_rate": 2.0023534468784735e-05, + "loss": 1.9781, + "step": 20719500 + }, + { + "epoch": 59.98, + "learning_rate": 2.0022812268432754e-05, + "loss": 1.9594, + "step": 20720000 + }, + { + "epoch": 59.98, + "learning_rate": 2.0022088620785476e-05, + "loss": 1.9592, + "step": 20720500 + }, + { + "epoch": 59.98, + "learning_rate": 2.0021366420433495e-05, + "loss": 1.9537, + "step": 20721000 + }, + { + "epoch": 59.98, + "learning_rate": 2.0020642772786218e-05, + "loss": 1.9344, + "step": 20721500 + }, + { + "epoch": 59.98, + "learning_rate": 2.0019919125138943e-05, + "loss": 1.9856, + "step": 20722000 + }, + { + "epoch": 59.98, + "learning_rate": 2.0019195477491665e-05, + "loss": 1.9469, + "step": 20722500 + }, + { + "epoch": 59.98, + "learning_rate": 2.0018471829844388e-05, + "loss": 1.9389, + "step": 20723000 + }, + { + "epoch": 59.99, + "learning_rate": 2.0017749629492403e-05, + "loss": 1.9476, + "step": 20723500 + }, + { + "epoch": 59.99, + "learning_rate": 2.001702598184513e-05, + "loss": 1.971, + "step": 20724000 + }, + { + "epoch": 59.99, + "learning_rate": 2.0016302334197855e-05, + "loss": 1.9543, + "step": 20724500 + }, + { + "epoch": 59.99, + "learning_rate": 2.0015578686550577e-05, + "loss": 1.9651, + "step": 20725000 + }, + { + "epoch": 59.99, + "learning_rate": 2.00148550389033e-05, + "loss": 1.9549, + "step": 20725500 + }, + { + "epoch": 59.99, + "learning_rate": 2.001413139125602e-05, + "loss": 1.9458, + "step": 20726000 + }, + { + "epoch": 59.99, + "learning_rate": 2.0013407743608743e-05, + "loss": 1.9514, + "step": 20726500 + }, + { + "epoch": 60.0, + "learning_rate": 2.001268409596147e-05, + "loss": 1.9333, + "step": 20727000 + }, + { + "epoch": 60.0, + "learning_rate": 2.001196044831419e-05, + "loss": 1.9379, + "step": 20727500 + }, + { + "epoch": 60.0, + "learning_rate": 2.0011236800666917e-05, + "loss": 1.9627, + "step": 20728000 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.6774629093801515, + "eval_accuracy_mlm": 0.6439839317329467, + "eval_accuracy_nsp": 0.8569477480945769, + "eval_loss": 2.1707332134246826, + "eval_runtime": 331.3276, + "eval_samples_per_second": 1317.083, + "eval_steps_per_second": 54.879, + "step": 20728320 } ], "max_steps": 34547200, "num_train_epochs": 100, - "total_flos": 2.366578797409368e+19, + "total_flos": 2.8398376322525532e+19, "trial_name": null, "trial_params": null }