diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 30.0, - "global_step": 10364160, + "epoch": 40.0, + "global_step": 13818880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -124704,11 +124704,41575 @@ "eval_samples_per_second": 1316.115, "eval_steps_per_second": 54.839, "step": 10364160 + }, + { + "epoch": 30.0, + "learning_rate": 3.500492804047796e-05, + "loss": 2.1213, + "step": 10364500 + }, + { + "epoch": 30.0, + "learning_rate": 3.500420439283068e-05, + "loss": 2.1103, + "step": 10365000 + }, + { + "epoch": 30.0, + "learning_rate": 3.50034821924787e-05, + "loss": 2.1067, + "step": 10365500 + }, + { + "epoch": 30.01, + "learning_rate": 3.500276143942201e-05, + "loss": 2.0826, + "step": 10366000 + }, + { + "epoch": 30.01, + "learning_rate": 3.5002037791774736e-05, + "loss": 2.0969, + "step": 10366500 + }, + { + "epoch": 30.01, + "learning_rate": 3.500131414412746e-05, + "loss": 2.0776, + "step": 10367000 + }, + { + "epoch": 30.01, + "learning_rate": 3.500059049648018e-05, + "loss": 2.0948, + "step": 10367500 + }, + { + "epoch": 30.01, + "learning_rate": 3.49998668488329e-05, + "loss": 2.0853, + "step": 10368000 + }, + { + "epoch": 30.01, + "learning_rate": 3.4999143201185625e-05, + "loss": 2.0972, + "step": 10368500 + }, + { + "epoch": 30.01, + "learning_rate": 3.499841955353835e-05, + "loss": 2.0717, + "step": 10369000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499769590589107e-05, + "loss": 2.0894, + "step": 10369500 + }, + { + "epoch": 30.02, + "learning_rate": 3.499697225824379e-05, + "loss": 2.0816, + "step": 10370000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499624861059652e-05, + "loss": 2.0836, + "step": 10370500 + }, + { + "epoch": 30.02, + "learning_rate": 3.499552496294924e-05, + "loss": 2.0927, + "step": 10371000 + }, + { + "epoch": 30.02, + "learning_rate": 3.4994801315301965e-05, + "loss": 2.1031, + "step": 10371500 + }, + { + "epoch": 30.02, + "learning_rate": 3.499407766765469e-05, + "loss": 2.1029, + "step": 10372000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499335546730271e-05, + "loss": 2.1119, + "step": 10372500 + }, + { + "epoch": 30.03, + "learning_rate": 3.499263181965543e-05, + "loss": 2.0929, + "step": 10373000 + }, + { + "epoch": 30.03, + "learning_rate": 3.4991908172008154e-05, + "loss": 2.0962, + "step": 10373500 + }, + { + "epoch": 30.03, + "learning_rate": 3.4991184524360877e-05, + "loss": 2.0846, + "step": 10374000 + }, + { + "epoch": 30.03, + "learning_rate": 3.499046232400889e-05, + "loss": 2.1082, + "step": 10374500 + }, + { + "epoch": 30.03, + "learning_rate": 3.4989738676361614e-05, + "loss": 2.1004, + "step": 10375000 + }, + { + "epoch": 30.03, + "learning_rate": 3.4989015028714337e-05, + "loss": 2.0722, + "step": 10375500 + }, + { + "epoch": 30.03, + "learning_rate": 3.498829138106706e-05, + "loss": 2.0787, + "step": 10376000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498756773341979e-05, + "loss": 2.1036, + "step": 10376500 + }, + { + "epoch": 30.04, + "learning_rate": 3.49868455330678e-05, + "loss": 2.1034, + "step": 10377000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498612333271582e-05, + "loss": 2.0852, + "step": 10377500 + }, + { + "epoch": 30.04, + "learning_rate": 3.498539968506854e-05, + "loss": 2.1011, + "step": 10378000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498467603742127e-05, + "loss": 2.0806, + "step": 10378500 + }, + { + "epoch": 30.04, + "learning_rate": 3.498395238977399e-05, + "loss": 2.0907, + "step": 10379000 + }, + { + "epoch": 30.04, + "learning_rate": 3.4983228742126715e-05, + "loss": 2.0767, + "step": 10379500 + }, + { + "epoch": 30.05, + "learning_rate": 3.498250509447944e-05, + "loss": 2.0895, + "step": 10380000 + }, + { + "epoch": 30.05, + "learning_rate": 3.4981781446832166e-05, + "loss": 2.1007, + "step": 10380500 + }, + { + "epoch": 30.05, + "learning_rate": 3.498105779918489e-05, + "loss": 2.089, + "step": 10381000 + }, + { + "epoch": 30.05, + "learning_rate": 3.498033415153761e-05, + "loss": 2.0967, + "step": 10381500 + }, + { + "epoch": 30.05, + "learning_rate": 3.4979611951185626e-05, + "loss": 2.065, + "step": 10382000 + }, + { + "epoch": 30.05, + "learning_rate": 3.497888830353835e-05, + "loss": 2.0805, + "step": 10382500 + }, + { + "epoch": 30.05, + "learning_rate": 3.497816465589107e-05, + "loss": 2.0907, + "step": 10383000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497744100824379e-05, + "loss": 2.0715, + "step": 10383500 + }, + { + "epoch": 30.06, + "learning_rate": 3.4976717360596515e-05, + "loss": 2.0941, + "step": 10384000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497599516024454e-05, + "loss": 2.0916, + "step": 10384500 + }, + { + "epoch": 30.06, + "learning_rate": 3.497527151259726e-05, + "loss": 2.1088, + "step": 10385000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497454786494999e-05, + "loss": 2.097, + "step": 10385500 + }, + { + "epoch": 30.06, + "learning_rate": 3.497382421730271e-05, + "loss": 2.1012, + "step": 10386000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497310056965543e-05, + "loss": 2.0997, + "step": 10386500 + }, + { + "epoch": 30.07, + "learning_rate": 3.497237836930345e-05, + "loss": 2.1141, + "step": 10387000 + }, + { + "epoch": 30.07, + "learning_rate": 3.497165472165617e-05, + "loss": 2.0816, + "step": 10387500 + }, + { + "epoch": 30.07, + "learning_rate": 3.497093107400889e-05, + "loss": 2.1172, + "step": 10388000 + }, + { + "epoch": 30.07, + "learning_rate": 3.4970207426361615e-05, + "loss": 2.1078, + "step": 10388500 + }, + { + "epoch": 30.07, + "learning_rate": 3.496948377871434e-05, + "loss": 2.0878, + "step": 10389000 + }, + { + "epoch": 30.07, + "learning_rate": 3.4968760131067066e-05, + "loss": 2.0848, + "step": 10389500 + }, + { + "epoch": 30.07, + "learning_rate": 3.496803793071508e-05, + "loss": 2.1086, + "step": 10390000 + }, + { + "epoch": 30.08, + "learning_rate": 3.4967314283067804e-05, + "loss": 2.1157, + "step": 10390500 + }, + { + "epoch": 30.08, + "learning_rate": 3.4966590635420526e-05, + "loss": 2.0851, + "step": 10391000 + }, + { + "epoch": 30.08, + "learning_rate": 3.496586698777325e-05, + "loss": 2.0881, + "step": 10391500 + }, + { + "epoch": 30.08, + "learning_rate": 3.496514334012597e-05, + "loss": 2.0887, + "step": 10392000 + }, + { + "epoch": 30.08, + "learning_rate": 3.4964421139773986e-05, + "loss": 2.0919, + "step": 10392500 + }, + { + "epoch": 30.08, + "learning_rate": 3.4963697492126715e-05, + "loss": 2.097, + "step": 10393000 + }, + { + "epoch": 30.08, + "learning_rate": 3.496297384447944e-05, + "loss": 2.0882, + "step": 10393500 + }, + { + "epoch": 30.09, + "learning_rate": 3.496225019683217e-05, + "loss": 2.1106, + "step": 10394000 + }, + { + "epoch": 30.09, + "learning_rate": 3.4961529443775476e-05, + "loss": 2.0996, + "step": 10394500 + }, + { + "epoch": 30.09, + "learning_rate": 3.49608057961282e-05, + "loss": 2.0901, + "step": 10395000 + }, + { + "epoch": 30.09, + "learning_rate": 3.496008214848092e-05, + "loss": 2.0748, + "step": 10395500 + }, + { + "epoch": 30.09, + "learning_rate": 3.495935850083364e-05, + "loss": 2.1166, + "step": 10396000 + }, + { + "epoch": 30.09, + "learning_rate": 3.4958634853186364e-05, + "loss": 2.0924, + "step": 10396500 + }, + { + "epoch": 30.1, + "learning_rate": 3.495791120553909e-05, + "loss": 2.0888, + "step": 10397000 + }, + { + "epoch": 30.1, + "learning_rate": 3.4957187557891816e-05, + "loss": 2.1078, + "step": 10397500 + }, + { + "epoch": 30.1, + "learning_rate": 3.495646391024454e-05, + "loss": 2.0979, + "step": 10398000 + }, + { + "epoch": 30.1, + "learning_rate": 3.495574026259726e-05, + "loss": 2.1075, + "step": 10398500 + }, + { + "epoch": 30.1, + "learning_rate": 3.495501661494998e-05, + "loss": 2.0849, + "step": 10399000 + }, + { + "epoch": 30.1, + "learning_rate": 3.4954292967302705e-05, + "loss": 2.0983, + "step": 10399500 + }, + { + "epoch": 30.1, + "learning_rate": 3.495356931965543e-05, + "loss": 2.1227, + "step": 10400000 + }, + { + "epoch": 30.11, + "learning_rate": 3.4952845672008156e-05, + "loss": 2.102, + "step": 10400500 + }, + { + "epoch": 30.11, + "learning_rate": 3.495212202436088e-05, + "loss": 2.1033, + "step": 10401000 + }, + { + "epoch": 30.11, + "learning_rate": 3.49513983767136e-05, + "loss": 2.0828, + "step": 10401500 + }, + { + "epoch": 30.11, + "learning_rate": 3.495067472906632e-05, + "loss": 2.0795, + "step": 10402000 + }, + { + "epoch": 30.11, + "learning_rate": 3.494995252871434e-05, + "loss": 2.0932, + "step": 10402500 + }, + { + "epoch": 30.11, + "learning_rate": 3.494922888106707e-05, + "loss": 2.0807, + "step": 10403000 + }, + { + "epoch": 30.11, + "learning_rate": 3.494850523341979e-05, + "loss": 2.1094, + "step": 10403500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494778158577251e-05, + "loss": 2.0948, + "step": 10404000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494705938542053e-05, + "loss": 2.0983, + "step": 10404500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494633573777325e-05, + "loss": 2.1104, + "step": 10405000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494561209012597e-05, + "loss": 2.077, + "step": 10405500 + }, + { + "epoch": 30.12, + "learning_rate": 3.4944888442478694e-05, + "loss": 2.0896, + "step": 10406000 + }, + { + "epoch": 30.12, + "learning_rate": 3.4944164794831416e-05, + "loss": 2.1105, + "step": 10406500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494344114718414e-05, + "loss": 2.0885, + "step": 10407000 + }, + { + "epoch": 30.13, + "learning_rate": 3.494271749953687e-05, + "loss": 2.101, + "step": 10407500 + }, + { + "epoch": 30.13, + "learning_rate": 3.494199385188959e-05, + "loss": 2.0973, + "step": 10408000 + }, + { + "epoch": 30.13, + "learning_rate": 3.494127165153761e-05, + "loss": 2.0951, + "step": 10408500 + }, + { + "epoch": 30.13, + "learning_rate": 3.4940548003890334e-05, + "loss": 2.0993, + "step": 10409000 + }, + { + "epoch": 30.13, + "learning_rate": 3.4939824356243056e-05, + "loss": 2.1071, + "step": 10409500 + }, + { + "epoch": 30.13, + "learning_rate": 3.493910070859578e-05, + "loss": 2.1001, + "step": 10410000 + }, + { + "epoch": 30.13, + "learning_rate": 3.49383770609485e-05, + "loss": 2.1157, + "step": 10410500 + }, + { + "epoch": 30.14, + "learning_rate": 3.493765341330122e-05, + "loss": 2.0842, + "step": 10411000 + }, + { + "epoch": 30.14, + "learning_rate": 3.4936929765653945e-05, + "loss": 2.1065, + "step": 10411500 + }, + { + "epoch": 30.14, + "learning_rate": 3.493620611800667e-05, + "loss": 2.0963, + "step": 10412000 + }, + { + "epoch": 30.14, + "learning_rate": 3.493548247035939e-05, + "loss": 2.1174, + "step": 10412500 + }, + { + "epoch": 30.14, + "learning_rate": 3.493476027000741e-05, + "loss": 2.0945, + "step": 10413000 + }, + { + "epoch": 30.14, + "learning_rate": 3.4934036622360134e-05, + "loss": 2.0761, + "step": 10413500 + }, + { + "epoch": 30.14, + "learning_rate": 3.4933312974712857e-05, + "loss": 2.0952, + "step": 10414000 + }, + { + "epoch": 30.15, + "learning_rate": 3.493258932706558e-05, + "loss": 2.096, + "step": 10414500 + }, + { + "epoch": 30.15, + "learning_rate": 3.493186567941831e-05, + "loss": 2.0889, + "step": 10415000 + }, + { + "epoch": 30.15, + "learning_rate": 3.4931143479066323e-05, + "loss": 2.0994, + "step": 10415500 + }, + { + "epoch": 30.15, + "learning_rate": 3.4930421278714346e-05, + "loss": 2.0818, + "step": 10416000 + }, + { + "epoch": 30.15, + "learning_rate": 3.492969763106707e-05, + "loss": 2.0852, + "step": 10416500 + }, + { + "epoch": 30.15, + "learning_rate": 3.492897398341979e-05, + "loss": 2.1043, + "step": 10417000 + }, + { + "epoch": 30.15, + "learning_rate": 3.4928251783067806e-05, + "loss": 2.1028, + "step": 10417500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492752813542053e-05, + "loss": 2.0783, + "step": 10418000 + }, + { + "epoch": 30.16, + "learning_rate": 3.492680448777325e-05, + "loss": 2.0817, + "step": 10418500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492608084012597e-05, + "loss": 2.1086, + "step": 10419000 + }, + { + "epoch": 30.16, + "learning_rate": 3.492536008706929e-05, + "loss": 2.0912, + "step": 10419500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492463643942201e-05, + "loss": 2.0897, + "step": 10420000 + }, + { + "epoch": 30.16, + "learning_rate": 3.492391279177473e-05, + "loss": 2.1039, + "step": 10420500 + }, + { + "epoch": 30.16, + "learning_rate": 3.4923189144127455e-05, + "loss": 2.1048, + "step": 10421000 + }, + { + "epoch": 30.17, + "learning_rate": 3.492246549648018e-05, + "loss": 2.0901, + "step": 10421500 + }, + { + "epoch": 30.17, + "learning_rate": 3.49217418488329e-05, + "loss": 2.0923, + "step": 10422000 + }, + { + "epoch": 30.17, + "learning_rate": 3.492101820118562e-05, + "loss": 2.0534, + "step": 10422500 + }, + { + "epoch": 30.17, + "learning_rate": 3.492029455353835e-05, + "loss": 2.098, + "step": 10423000 + }, + { + "epoch": 30.17, + "learning_rate": 3.4919572353186366e-05, + "loss": 2.0857, + "step": 10423500 + }, + { + "epoch": 30.17, + "learning_rate": 3.4918848705539095e-05, + "loss": 2.0862, + "step": 10424000 + }, + { + "epoch": 30.17, + "learning_rate": 3.491812505789182e-05, + "loss": 2.0934, + "step": 10424500 + }, + { + "epoch": 30.18, + "learning_rate": 3.491740141024454e-05, + "loss": 2.112, + "step": 10425000 + }, + { + "epoch": 30.18, + "learning_rate": 3.491667776259726e-05, + "loss": 2.0991, + "step": 10425500 + }, + { + "epoch": 30.18, + "learning_rate": 3.4915954114949984e-05, + "loss": 2.099, + "step": 10426000 + }, + { + "epoch": 30.18, + "learning_rate": 3.4915230467302706e-05, + "loss": 2.0994, + "step": 10426500 + }, + { + "epoch": 30.18, + "learning_rate": 3.491450826695072e-05, + "loss": 2.1091, + "step": 10427000 + }, + { + "epoch": 30.18, + "learning_rate": 3.4913784619303444e-05, + "loss": 2.1217, + "step": 10427500 + }, + { + "epoch": 30.18, + "learning_rate": 3.4913060971656166e-05, + "loss": 2.102, + "step": 10428000 + }, + { + "epoch": 30.19, + "learning_rate": 3.4912337324008895e-05, + "loss": 2.1201, + "step": 10428500 + }, + { + "epoch": 30.19, + "learning_rate": 3.491161367636162e-05, + "loss": 2.097, + "step": 10429000 + }, + { + "epoch": 30.19, + "learning_rate": 3.491089147600963e-05, + "loss": 2.1024, + "step": 10429500 + }, + { + "epoch": 30.19, + "learning_rate": 3.4910167828362355e-05, + "loss": 2.0938, + "step": 10430000 + }, + { + "epoch": 30.19, + "learning_rate": 3.4909444180715084e-05, + "loss": 2.094, + "step": 10430500 + }, + { + "epoch": 30.19, + "learning_rate": 3.4908720533067807e-05, + "loss": 2.1125, + "step": 10431000 + }, + { + "epoch": 30.19, + "learning_rate": 3.490799688542053e-05, + "loss": 2.1181, + "step": 10431500 + }, + { + "epoch": 30.2, + "learning_rate": 3.490727323777325e-05, + "loss": 2.0849, + "step": 10432000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490655103742127e-05, + "loss": 2.0887, + "step": 10432500 + }, + { + "epoch": 30.2, + "learning_rate": 3.4905827389773996e-05, + "loss": 2.084, + "step": 10433000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490510374212672e-05, + "loss": 2.0454, + "step": 10433500 + }, + { + "epoch": 30.2, + "learning_rate": 3.490438154177473e-05, + "loss": 2.1311, + "step": 10434000 + }, + { + "epoch": 30.2, + "learning_rate": 3.4903657894127456e-05, + "loss": 2.118, + "step": 10434500 + }, + { + "epoch": 30.21, + "learning_rate": 3.490293424648018e-05, + "loss": 2.0895, + "step": 10435000 + }, + { + "epoch": 30.21, + "learning_rate": 3.49022105988329e-05, + "loss": 2.0945, + "step": 10435500 + }, + { + "epoch": 30.21, + "learning_rate": 3.490148695118562e-05, + "loss": 2.0768, + "step": 10436000 + }, + { + "epoch": 30.21, + "learning_rate": 3.490076619812894e-05, + "loss": 2.1068, + "step": 10436500 + }, + { + "epoch": 30.21, + "learning_rate": 3.490004255048166e-05, + "loss": 2.1096, + "step": 10437000 + }, + { + "epoch": 30.21, + "learning_rate": 3.489931890283438e-05, + "loss": 2.1142, + "step": 10437500 + }, + { + "epoch": 30.21, + "learning_rate": 3.4898595255187105e-05, + "loss": 2.1015, + "step": 10438000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489787305483513e-05, + "loss": 2.0993, + "step": 10438500 + }, + { + "epoch": 30.22, + "learning_rate": 3.489714940718785e-05, + "loss": 2.0991, + "step": 10439000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489642575954057e-05, + "loss": 2.0732, + "step": 10439500 + }, + { + "epoch": 30.22, + "learning_rate": 3.4895702111893294e-05, + "loss": 2.091, + "step": 10440000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489497846424602e-05, + "loss": 2.0875, + "step": 10440500 + }, + { + "epoch": 30.22, + "learning_rate": 3.4894254816598745e-05, + "loss": 2.0889, + "step": 10441000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489353116895147e-05, + "loss": 2.1032, + "step": 10441500 + }, + { + "epoch": 30.23, + "learning_rate": 3.489280752130419e-05, + "loss": 2.0874, + "step": 10442000 + }, + { + "epoch": 30.23, + "learning_rate": 3.489208387365691e-05, + "loss": 2.0921, + "step": 10442500 + }, + { + "epoch": 30.23, + "learning_rate": 3.4891360226009634e-05, + "loss": 2.0944, + "step": 10443000 + }, + { + "epoch": 30.23, + "learning_rate": 3.4890636578362356e-05, + "loss": 2.0889, + "step": 10443500 + }, + { + "epoch": 30.23, + "learning_rate": 3.488991293071508e-05, + "loss": 2.1125, + "step": 10444000 + }, + { + "epoch": 30.23, + "learning_rate": 3.48891892830678e-05, + "loss": 2.1107, + "step": 10444500 + }, + { + "epoch": 30.23, + "learning_rate": 3.4888468530011116e-05, + "loss": 2.135, + "step": 10445000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488774488236384e-05, + "loss": 2.1006, + "step": 10445500 + }, + { + "epoch": 30.24, + "learning_rate": 3.488702268201186e-05, + "loss": 2.0979, + "step": 10446000 + }, + { + "epoch": 30.24, + "learning_rate": 3.4886300481659876e-05, + "loss": 2.1041, + "step": 10446500 + }, + { + "epoch": 30.24, + "learning_rate": 3.48855768340126e-05, + "loss": 2.1155, + "step": 10447000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488485318636532e-05, + "loss": 2.1172, + "step": 10447500 + }, + { + "epoch": 30.24, + "learning_rate": 3.488412953871805e-05, + "loss": 2.0934, + "step": 10448000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488340589107077e-05, + "loss": 2.1162, + "step": 10448500 + }, + { + "epoch": 30.25, + "learning_rate": 3.4882682243423494e-05, + "loss": 2.1004, + "step": 10449000 + }, + { + "epoch": 30.25, + "learning_rate": 3.4881958595776217e-05, + "loss": 2.1148, + "step": 10449500 + }, + { + "epoch": 30.25, + "learning_rate": 3.488123494812894e-05, + "loss": 2.101, + "step": 10450000 + }, + { + "epoch": 30.25, + "learning_rate": 3.488051130048166e-05, + "loss": 2.0839, + "step": 10450500 + }, + { + "epoch": 30.25, + "learning_rate": 3.487978765283438e-05, + "loss": 2.1271, + "step": 10451000 + }, + { + "epoch": 30.25, + "learning_rate": 3.4879064005187105e-05, + "loss": 2.1042, + "step": 10451500 + }, + { + "epoch": 30.25, + "learning_rate": 3.487834035753983e-05, + "loss": 2.0907, + "step": 10452000 + }, + { + "epoch": 30.26, + "learning_rate": 3.487761815718785e-05, + "loss": 2.0969, + "step": 10452500 + }, + { + "epoch": 30.26, + "learning_rate": 3.487689450954057e-05, + "loss": 2.0737, + "step": 10453000 + }, + { + "epoch": 30.26, + "learning_rate": 3.48761708618933e-05, + "loss": 2.0969, + "step": 10453500 + }, + { + "epoch": 30.26, + "learning_rate": 3.4875447214246023e-05, + "loss": 2.0856, + "step": 10454000 + }, + { + "epoch": 30.26, + "learning_rate": 3.4874723566598746e-05, + "loss": 2.1022, + "step": 10454500 + }, + { + "epoch": 30.26, + "learning_rate": 3.487399991895147e-05, + "loss": 2.0988, + "step": 10455000 + }, + { + "epoch": 30.26, + "learning_rate": 3.487327627130419e-05, + "loss": 2.1063, + "step": 10455500 + }, + { + "epoch": 30.27, + "learning_rate": 3.487255262365691e-05, + "loss": 2.0833, + "step": 10456000 + }, + { + "epoch": 30.27, + "learning_rate": 3.4871828976009635e-05, + "loss": 2.1004, + "step": 10456500 + }, + { + "epoch": 30.27, + "learning_rate": 3.487110532836236e-05, + "loss": 2.1153, + "step": 10457000 + }, + { + "epoch": 30.27, + "learning_rate": 3.487038168071508e-05, + "loss": 2.0634, + "step": 10457500 + }, + { + "epoch": 30.27, + "learning_rate": 3.48696580330678e-05, + "loss": 2.0819, + "step": 10458000 + }, + { + "epoch": 30.27, + "learning_rate": 3.4868934385420524e-05, + "loss": 2.0988, + "step": 10458500 + }, + { + "epoch": 30.27, + "learning_rate": 3.4868210737773246e-05, + "loss": 2.0891, + "step": 10459000 + }, + { + "epoch": 30.28, + "learning_rate": 3.4867487090125975e-05, + "loss": 2.1027, + "step": 10459500 + }, + { + "epoch": 30.28, + "learning_rate": 3.48667634424787e-05, + "loss": 2.089, + "step": 10460000 + }, + { + "epoch": 30.28, + "learning_rate": 3.4866039794831426e-05, + "loss": 2.0912, + "step": 10460500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486531759447944e-05, + "loss": 2.0854, + "step": 10461000 + }, + { + "epoch": 30.28, + "learning_rate": 3.4864593946832164e-05, + "loss": 2.0956, + "step": 10461500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486387174648018e-05, + "loss": 2.106, + "step": 10462000 + }, + { + "epoch": 30.28, + "learning_rate": 3.48631480988329e-05, + "loss": 2.0862, + "step": 10462500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4862424451185624e-05, + "loss": 2.1113, + "step": 10463000 + }, + { + "epoch": 30.29, + "learning_rate": 3.486170080353835e-05, + "loss": 2.1022, + "step": 10463500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4860977155891075e-05, + "loss": 2.0718, + "step": 10464000 + }, + { + "epoch": 30.29, + "learning_rate": 3.48602535082438e-05, + "loss": 2.1125, + "step": 10464500 + }, + { + "epoch": 30.29, + "learning_rate": 3.485952986059652e-05, + "loss": 2.117, + "step": 10465000 + }, + { + "epoch": 30.29, + "learning_rate": 3.485880621294924e-05, + "loss": 2.0922, + "step": 10465500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4858082565301964e-05, + "loss": 2.1053, + "step": 10466000 + }, + { + "epoch": 30.3, + "learning_rate": 3.485736036494998e-05, + "loss": 2.0858, + "step": 10466500 + }, + { + "epoch": 30.3, + "learning_rate": 3.48566367173027e-05, + "loss": 2.098, + "step": 10467000 + }, + { + "epoch": 30.3, + "learning_rate": 3.4855913069655424e-05, + "loss": 2.0941, + "step": 10467500 + }, + { + "epoch": 30.3, + "learning_rate": 3.485518942200815e-05, + "loss": 2.1012, + "step": 10468000 + }, + { + "epoch": 30.3, + "learning_rate": 3.4854465774360875e-05, + "loss": 2.1077, + "step": 10468500 + }, + { + "epoch": 30.3, + "learning_rate": 3.4853742126713604e-05, + "loss": 2.0932, + "step": 10469000 + }, + { + "epoch": 30.3, + "learning_rate": 3.4853018479066327e-05, + "loss": 2.0795, + "step": 10469500 + }, + { + "epoch": 30.31, + "learning_rate": 3.485229483141905e-05, + "loss": 2.099, + "step": 10470000 + }, + { + "epoch": 30.31, + "learning_rate": 3.485157118377177e-05, + "loss": 2.1142, + "step": 10470500 + }, + { + "epoch": 30.31, + "learning_rate": 3.485084753612449e-05, + "loss": 2.089, + "step": 10471000 + }, + { + "epoch": 30.31, + "learning_rate": 3.4850123888477216e-05, + "loss": 2.1337, + "step": 10471500 + }, + { + "epoch": 30.31, + "learning_rate": 3.484940168812523e-05, + "loss": 2.1182, + "step": 10472000 + }, + { + "epoch": 30.31, + "learning_rate": 3.484867804047795e-05, + "loss": 2.0872, + "step": 10472500 + }, + { + "epoch": 30.32, + "learning_rate": 3.4847955840125976e-05, + "loss": 2.0921, + "step": 10473000 + }, + { + "epoch": 30.32, + "learning_rate": 3.484723363977399e-05, + "loss": 2.103, + "step": 10473500 + }, + { + "epoch": 30.32, + "learning_rate": 3.4846509992126713e-05, + "loss": 2.1102, + "step": 10474000 + }, + { + "epoch": 30.32, + "learning_rate": 3.4845786344479436e-05, + "loss": 2.1045, + "step": 10474500 + }, + { + "epoch": 30.32, + "learning_rate": 3.484506269683216e-05, + "loss": 2.1252, + "step": 10475000 + }, + { + "epoch": 30.32, + "learning_rate": 3.484433904918489e-05, + "loss": 2.1201, + "step": 10475500 + }, + { + "epoch": 30.32, + "learning_rate": 3.484361540153761e-05, + "loss": 2.0907, + "step": 10476000 + }, + { + "epoch": 30.33, + "learning_rate": 3.484289175389033e-05, + "loss": 2.0748, + "step": 10476500 + }, + { + "epoch": 30.33, + "learning_rate": 3.4842169553538354e-05, + "loss": 2.126, + "step": 10477000 + }, + { + "epoch": 30.33, + "learning_rate": 3.4841445905891076e-05, + "loss": 2.0855, + "step": 10477500 + }, + { + "epoch": 30.33, + "learning_rate": 3.48407222582438e-05, + "loss": 2.0942, + "step": 10478000 + }, + { + "epoch": 30.33, + "learning_rate": 3.483999861059652e-05, + "loss": 2.1122, + "step": 10478500 + }, + { + "epoch": 30.33, + "learning_rate": 3.483927496294924e-05, + "loss": 2.0802, + "step": 10479000 + }, + { + "epoch": 30.33, + "learning_rate": 3.4838551315301965e-05, + "loss": 2.1032, + "step": 10479500 + }, + { + "epoch": 30.34, + "learning_rate": 3.483782766765469e-05, + "loss": 2.0793, + "step": 10480000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483710402000741e-05, + "loss": 2.0851, + "step": 10480500 + }, + { + "epoch": 30.34, + "learning_rate": 3.483638037236013e-05, + "loss": 2.108, + "step": 10481000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483565961930345e-05, + "loss": 2.1147, + "step": 10481500 + }, + { + "epoch": 30.34, + "learning_rate": 3.483493741895146e-05, + "loss": 2.1067, + "step": 10482000 + }, + { + "epoch": 30.34, + "learning_rate": 3.4834213771304185e-05, + "loss": 2.1116, + "step": 10482500 + }, + { + "epoch": 30.34, + "learning_rate": 3.48334915709522e-05, + "loss": 2.0866, + "step": 10483000 + }, + { + "epoch": 30.35, + "learning_rate": 3.483276792330493e-05, + "loss": 2.1022, + "step": 10483500 + }, + { + "epoch": 30.35, + "learning_rate": 3.483204427565765e-05, + "loss": 2.1074, + "step": 10484000 + }, + { + "epoch": 30.35, + "learning_rate": 3.483132062801038e-05, + "loss": 2.0932, + "step": 10484500 + }, + { + "epoch": 30.35, + "learning_rate": 3.48305969803631e-05, + "loss": 2.1031, + "step": 10485000 + }, + { + "epoch": 30.35, + "learning_rate": 3.4829873332715825e-05, + "loss": 2.0863, + "step": 10485500 + }, + { + "epoch": 30.35, + "learning_rate": 3.482914968506855e-05, + "loss": 2.0826, + "step": 10486000 + }, + { + "epoch": 30.35, + "learning_rate": 3.482842603742127e-05, + "loss": 2.1108, + "step": 10486500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482770238977399e-05, + "loss": 2.1278, + "step": 10487000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482698018942201e-05, + "loss": 2.1054, + "step": 10487500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482625654177473e-05, + "loss": 2.1043, + "step": 10488000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482553289412745e-05, + "loss": 2.0743, + "step": 10488500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482480924648018e-05, + "loss": 2.1235, + "step": 10489000 + }, + { + "epoch": 30.36, + "learning_rate": 3.4824087046128197e-05, + "loss": 2.0896, + "step": 10489500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482336339848092e-05, + "loss": 2.0856, + "step": 10490000 + }, + { + "epoch": 30.37, + "learning_rate": 3.482263975083364e-05, + "loss": 2.1063, + "step": 10490500 + }, + { + "epoch": 30.37, + "learning_rate": 3.482191610318637e-05, + "loss": 2.1214, + "step": 10491000 + }, + { + "epoch": 30.37, + "learning_rate": 3.482119245553909e-05, + "loss": 2.1202, + "step": 10491500 + }, + { + "epoch": 30.37, + "learning_rate": 3.4820468807891815e-05, + "loss": 2.098, + "step": 10492000 + }, + { + "epoch": 30.37, + "learning_rate": 3.481974516024454e-05, + "loss": 2.0809, + "step": 10492500 + }, + { + "epoch": 30.37, + "learning_rate": 3.481902295989255e-05, + "loss": 2.0923, + "step": 10493000 + }, + { + "epoch": 30.37, + "learning_rate": 3.4818300759540575e-05, + "loss": 2.082, + "step": 10493500 + }, + { + "epoch": 30.38, + "learning_rate": 3.48175771118933e-05, + "loss": 2.096, + "step": 10494000 + }, + { + "epoch": 30.38, + "learning_rate": 3.481685346424602e-05, + "loss": 2.1176, + "step": 10494500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481612981659874e-05, + "loss": 2.1086, + "step": 10495000 + }, + { + "epoch": 30.38, + "learning_rate": 3.4815406168951464e-05, + "loss": 2.1244, + "step": 10495500 + }, + { + "epoch": 30.38, + "learning_rate": 3.4814682521304186e-05, + "loss": 2.0735, + "step": 10496000 + }, + { + "epoch": 30.38, + "learning_rate": 3.481395887365691e-05, + "loss": 2.1281, + "step": 10496500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481323522600963e-05, + "loss": 2.0921, + "step": 10497000 + }, + { + "epoch": 30.39, + "learning_rate": 3.481251302565765e-05, + "loss": 2.1079, + "step": 10497500 + }, + { + "epoch": 30.39, + "learning_rate": 3.4811789378010375e-05, + "loss": 2.1079, + "step": 10498000 + }, + { + "epoch": 30.39, + "learning_rate": 3.4811065730363104e-05, + "loss": 2.1152, + "step": 10498500 + }, + { + "epoch": 30.39, + "learning_rate": 3.4810342082715826e-05, + "loss": 2.1016, + "step": 10499000 + }, + { + "epoch": 30.39, + "learning_rate": 3.480961843506855e-05, + "loss": 2.1077, + "step": 10499500 + }, + { + "epoch": 30.39, + "learning_rate": 3.480889478742127e-05, + "loss": 2.1101, + "step": 10500000 + }, + { + "epoch": 30.39, + "learning_rate": 3.480817113977399e-05, + "loss": 2.0933, + "step": 10500500 + }, + { + "epoch": 30.4, + "learning_rate": 3.4807447492126715e-05, + "loss": 2.1145, + "step": 10501000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480672384447944e-05, + "loss": 2.0921, + "step": 10501500 + }, + { + "epoch": 30.4, + "learning_rate": 3.480600019683216e-05, + "loss": 2.1182, + "step": 10502000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480527654918488e-05, + "loss": 2.1088, + "step": 10502500 + }, + { + "epoch": 30.4, + "learning_rate": 3.4804552901537604e-05, + "loss": 2.1015, + "step": 10503000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480382925389033e-05, + "loss": 2.1247, + "step": 10503500 + }, + { + "epoch": 30.4, + "learning_rate": 3.480310705353835e-05, + "loss": 2.0996, + "step": 10504000 + }, + { + "epoch": 30.41, + "learning_rate": 3.480238340589107e-05, + "loss": 2.1378, + "step": 10504500 + }, + { + "epoch": 30.41, + "learning_rate": 3.480165975824379e-05, + "loss": 2.1126, + "step": 10505000 + }, + { + "epoch": 30.41, + "learning_rate": 3.4800937557891815e-05, + "loss": 2.0946, + "step": 10505500 + }, + { + "epoch": 30.41, + "learning_rate": 3.480021391024454e-05, + "loss": 2.1013, + "step": 10506000 + }, + { + "epoch": 30.41, + "learning_rate": 3.479949026259726e-05, + "loss": 2.1005, + "step": 10506500 + }, + { + "epoch": 30.41, + "learning_rate": 3.479876661494998e-05, + "loss": 2.0826, + "step": 10507000 + }, + { + "epoch": 30.41, + "learning_rate": 3.4798042967302704e-05, + "loss": 2.1098, + "step": 10507500 + }, + { + "epoch": 30.42, + "learning_rate": 3.479731931965543e-05, + "loss": 2.121, + "step": 10508000 + }, + { + "epoch": 30.42, + "learning_rate": 3.4796595672008155e-05, + "loss": 2.1286, + "step": 10508500 + }, + { + "epoch": 30.42, + "learning_rate": 3.479587202436088e-05, + "loss": 2.0955, + "step": 10509000 + }, + { + "epoch": 30.42, + "learning_rate": 3.47951483767136e-05, + "loss": 2.1016, + "step": 10509500 + }, + { + "epoch": 30.42, + "learning_rate": 3.4794426176361616e-05, + "loss": 2.1107, + "step": 10510000 + }, + { + "epoch": 30.42, + "learning_rate": 3.479370252871434e-05, + "loss": 2.0876, + "step": 10510500 + }, + { + "epoch": 30.43, + "learning_rate": 3.479297888106706e-05, + "loss": 2.0942, + "step": 10511000 + }, + { + "epoch": 30.43, + "learning_rate": 3.479225523341978e-05, + "loss": 2.1061, + "step": 10511500 + }, + { + "epoch": 30.43, + "learning_rate": 3.4791531585772504e-05, + "loss": 2.1005, + "step": 10512000 + }, + { + "epoch": 30.43, + "learning_rate": 3.4790807938125233e-05, + "loss": 2.1172, + "step": 10512500 + }, + { + "epoch": 30.43, + "learning_rate": 3.4790084290477956e-05, + "loss": 2.0962, + "step": 10513000 + }, + { + "epoch": 30.43, + "learning_rate": 3.4789360642830685e-05, + "loss": 2.0949, + "step": 10513500 + }, + { + "epoch": 30.43, + "learning_rate": 3.478863699518341e-05, + "loss": 2.0859, + "step": 10514000 + }, + { + "epoch": 30.44, + "learning_rate": 3.478791334753613e-05, + "loss": 2.0992, + "step": 10514500 + }, + { + "epoch": 30.44, + "learning_rate": 3.478718969988885e-05, + "loss": 2.0895, + "step": 10515000 + }, + { + "epoch": 30.44, + "learning_rate": 3.478646749953687e-05, + "loss": 2.1089, + "step": 10515500 + }, + { + "epoch": 30.44, + "learning_rate": 3.478574385188959e-05, + "loss": 2.1144, + "step": 10516000 + }, + { + "epoch": 30.44, + "learning_rate": 3.478502020424231e-05, + "loss": 2.101, + "step": 10516500 + }, + { + "epoch": 30.44, + "learning_rate": 3.4784296556595034e-05, + "loss": 2.1223, + "step": 10517000 + }, + { + "epoch": 30.44, + "learning_rate": 3.4783572908947756e-05, + "loss": 2.1121, + "step": 10517500 + }, + { + "epoch": 30.45, + "learning_rate": 3.4782849261300485e-05, + "loss": 2.1003, + "step": 10518000 + }, + { + "epoch": 30.45, + "learning_rate": 3.478212561365321e-05, + "loss": 2.0867, + "step": 10518500 + }, + { + "epoch": 30.45, + "learning_rate": 3.478140196600593e-05, + "loss": 2.1225, + "step": 10519000 + }, + { + "epoch": 30.45, + "learning_rate": 3.4780679765653945e-05, + "loss": 2.1012, + "step": 10519500 + }, + { + "epoch": 30.45, + "learning_rate": 3.4779956118006674e-05, + "loss": 2.085, + "step": 10520000 + }, + { + "epoch": 30.45, + "learning_rate": 3.4779232470359396e-05, + "loss": 2.1006, + "step": 10520500 + }, + { + "epoch": 30.45, + "learning_rate": 3.477850882271212e-05, + "loss": 2.1184, + "step": 10521000 + }, + { + "epoch": 30.46, + "learning_rate": 3.4777786622360134e-05, + "loss": 2.0995, + "step": 10521500 + }, + { + "epoch": 30.46, + "learning_rate": 3.4777062974712856e-05, + "loss": 2.0993, + "step": 10522000 + }, + { + "epoch": 30.46, + "learning_rate": 3.4776339327065585e-05, + "loss": 2.1256, + "step": 10522500 + }, + { + "epoch": 30.46, + "learning_rate": 3.477561567941831e-05, + "loss": 2.1224, + "step": 10523000 + }, + { + "epoch": 30.46, + "learning_rate": 3.477489347906632e-05, + "loss": 2.1225, + "step": 10523500 + }, + { + "epoch": 30.46, + "learning_rate": 3.4774169831419045e-05, + "loss": 2.1086, + "step": 10524000 + }, + { + "epoch": 30.46, + "learning_rate": 3.477344618377177e-05, + "loss": 2.0947, + "step": 10524500 + }, + { + "epoch": 30.47, + "learning_rate": 3.477272253612449e-05, + "loss": 2.0866, + "step": 10525000 + }, + { + "epoch": 30.47, + "learning_rate": 3.477200033577251e-05, + "loss": 2.1321, + "step": 10525500 + }, + { + "epoch": 30.47, + "learning_rate": 3.4771276688125234e-05, + "loss": 2.1213, + "step": 10526000 + }, + { + "epoch": 30.47, + "learning_rate": 3.4770553040477956e-05, + "loss": 2.0891, + "step": 10526500 + }, + { + "epoch": 30.47, + "learning_rate": 3.476982939283068e-05, + "loss": 2.1095, + "step": 10527000 + }, + { + "epoch": 30.47, + "learning_rate": 3.476910574518341e-05, + "loss": 2.0887, + "step": 10527500 + }, + { + "epoch": 30.47, + "learning_rate": 3.476838209753613e-05, + "loss": 2.1086, + "step": 10528000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476765844988885e-05, + "loss": 2.1144, + "step": 10528500 + }, + { + "epoch": 30.48, + "learning_rate": 3.4766934802241574e-05, + "loss": 2.0794, + "step": 10529000 + }, + { + "epoch": 30.48, + "learning_rate": 3.47662111545943e-05, + "loss": 2.1258, + "step": 10529500 + }, + { + "epoch": 30.48, + "learning_rate": 3.476548895424231e-05, + "loss": 2.1027, + "step": 10530000 + }, + { + "epoch": 30.48, + "learning_rate": 3.4764765306595034e-05, + "loss": 2.0945, + "step": 10530500 + }, + { + "epoch": 30.48, + "learning_rate": 3.476404310624306e-05, + "loss": 2.1142, + "step": 10531000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476331945859578e-05, + "loss": 2.0891, + "step": 10531500 + }, + { + "epoch": 30.49, + "learning_rate": 3.47625958109485e-05, + "loss": 2.0964, + "step": 10532000 + }, + { + "epoch": 30.49, + "learning_rate": 3.4761872163301223e-05, + "loss": 2.1022, + "step": 10532500 + }, + { + "epoch": 30.49, + "learning_rate": 3.476114996294924e-05, + "loss": 2.0993, + "step": 10533000 + }, + { + "epoch": 30.49, + "learning_rate": 3.476042631530196e-05, + "loss": 2.0903, + "step": 10533500 + }, + { + "epoch": 30.49, + "learning_rate": 3.4759702667654684e-05, + "loss": 2.1145, + "step": 10534000 + }, + { + "epoch": 30.49, + "learning_rate": 3.475897902000741e-05, + "loss": 2.1025, + "step": 10534500 + }, + { + "epoch": 30.49, + "learning_rate": 3.4758255372360135e-05, + "loss": 2.0829, + "step": 10535000 + }, + { + "epoch": 30.5, + "learning_rate": 3.4757531724712864e-05, + "loss": 2.0857, + "step": 10535500 + }, + { + "epoch": 30.5, + "learning_rate": 3.4756808077065586e-05, + "loss": 2.0852, + "step": 10536000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475608442941831e-05, + "loss": 2.1172, + "step": 10536500 + }, + { + "epoch": 30.5, + "learning_rate": 3.475536078177103e-05, + "loss": 2.088, + "step": 10537000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475463713412375e-05, + "loss": 2.1164, + "step": 10537500 + }, + { + "epoch": 30.5, + "learning_rate": 3.4753913486476475e-05, + "loss": 2.108, + "step": 10538000 + }, + { + "epoch": 30.5, + "learning_rate": 3.47531898388292e-05, + "loss": 2.1157, + "step": 10538500 + }, + { + "epoch": 30.51, + "learning_rate": 3.475246619118192e-05, + "loss": 2.0807, + "step": 10539000 + }, + { + "epoch": 30.51, + "learning_rate": 3.475174254353464e-05, + "loss": 2.0747, + "step": 10539500 + }, + { + "epoch": 30.51, + "learning_rate": 3.4751018895887364e-05, + "loss": 2.1153, + "step": 10540000 + }, + { + "epoch": 30.51, + "learning_rate": 3.4750295248240086e-05, + "loss": 2.1244, + "step": 10540500 + }, + { + "epoch": 30.51, + "learning_rate": 3.474957304788811e-05, + "loss": 2.0827, + "step": 10541000 + }, + { + "epoch": 30.51, + "learning_rate": 3.474884940024083e-05, + "loss": 2.0851, + "step": 10541500 + }, + { + "epoch": 30.51, + "learning_rate": 3.474812575259355e-05, + "loss": 2.0935, + "step": 10542000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474740210494628e-05, + "loss": 2.1113, + "step": 10542500 + }, + { + "epoch": 30.52, + "learning_rate": 3.4746678457299004e-05, + "loss": 2.1156, + "step": 10543000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474595770424231e-05, + "loss": 2.1006, + "step": 10543500 + }, + { + "epoch": 30.52, + "learning_rate": 3.4745234056595035e-05, + "loss": 2.0893, + "step": 10544000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474451185624306e-05, + "loss": 2.108, + "step": 10544500 + }, + { + "epoch": 30.52, + "learning_rate": 3.474378820859578e-05, + "loss": 2.1079, + "step": 10545000 + }, + { + "epoch": 30.52, + "learning_rate": 3.47430645609485e-05, + "loss": 2.1155, + "step": 10545500 + }, + { + "epoch": 30.53, + "learning_rate": 3.4742340913301224e-05, + "loss": 2.1003, + "step": 10546000 + }, + { + "epoch": 30.53, + "learning_rate": 3.4741617265653947e-05, + "loss": 2.1159, + "step": 10546500 + }, + { + "epoch": 30.53, + "learning_rate": 3.474089506530196e-05, + "loss": 2.1369, + "step": 10547000 + }, + { + "epoch": 30.53, + "learning_rate": 3.474017141765469e-05, + "loss": 2.0783, + "step": 10547500 + }, + { + "epoch": 30.53, + "learning_rate": 3.473944777000741e-05, + "loss": 2.1329, + "step": 10548000 + }, + { + "epoch": 30.53, + "learning_rate": 3.4738724122360136e-05, + "loss": 2.1144, + "step": 10548500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473800047471286e-05, + "loss": 2.1095, + "step": 10549000 + }, + { + "epoch": 30.54, + "learning_rate": 3.473727682706558e-05, + "loss": 2.1005, + "step": 10549500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473655317941831e-05, + "loss": 2.1084, + "step": 10550000 + }, + { + "epoch": 30.54, + "learning_rate": 3.4735830979066325e-05, + "loss": 2.0824, + "step": 10550500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473511022600964e-05, + "loss": 2.1026, + "step": 10551000 + }, + { + "epoch": 30.54, + "learning_rate": 3.473438657836236e-05, + "loss": 2.0889, + "step": 10551500 + }, + { + "epoch": 30.54, + "learning_rate": 3.4733662930715085e-05, + "loss": 2.1154, + "step": 10552000 + }, + { + "epoch": 30.55, + "learning_rate": 3.473293928306781e-05, + "loss": 2.0945, + "step": 10552500 + }, + { + "epoch": 30.55, + "learning_rate": 3.473221563542053e-05, + "loss": 2.1108, + "step": 10553000 + }, + { + "epoch": 30.55, + "learning_rate": 3.473149198777325e-05, + "loss": 2.0928, + "step": 10553500 + }, + { + "epoch": 30.55, + "learning_rate": 3.4730768340125974e-05, + "loss": 2.1428, + "step": 10554000 + }, + { + "epoch": 30.55, + "learning_rate": 3.4730044692478696e-05, + "loss": 2.103, + "step": 10554500 + }, + { + "epoch": 30.55, + "learning_rate": 3.472932104483142e-05, + "loss": 2.1319, + "step": 10555000 + }, + { + "epoch": 30.55, + "learning_rate": 3.472859739718414e-05, + "loss": 2.1045, + "step": 10555500 + }, + { + "epoch": 30.56, + "learning_rate": 3.472787374953686e-05, + "loss": 2.1106, + "step": 10556000 + }, + { + "epoch": 30.56, + "learning_rate": 3.4727151549184885e-05, + "loss": 2.1103, + "step": 10556500 + }, + { + "epoch": 30.56, + "learning_rate": 3.472642790153761e-05, + "loss": 2.1271, + "step": 10557000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472570425389033e-05, + "loss": 2.1034, + "step": 10557500 + }, + { + "epoch": 30.56, + "learning_rate": 3.472498060624306e-05, + "loss": 2.088, + "step": 10558000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472425695859578e-05, + "loss": 2.1319, + "step": 10558500 + }, + { + "epoch": 30.56, + "learning_rate": 3.47235333109485e-05, + "loss": 2.1103, + "step": 10559000 + }, + { + "epoch": 30.57, + "learning_rate": 3.472281111059652e-05, + "loss": 2.1127, + "step": 10559500 + }, + { + "epoch": 30.57, + "learning_rate": 3.472208746294924e-05, + "loss": 2.1161, + "step": 10560000 + }, + { + "epoch": 30.57, + "learning_rate": 3.472136381530196e-05, + "loss": 2.1091, + "step": 10560500 + }, + { + "epoch": 30.57, + "learning_rate": 3.472064016765469e-05, + "loss": 2.0902, + "step": 10561000 + }, + { + "epoch": 30.57, + "learning_rate": 3.4719916520007414e-05, + "loss": 2.104, + "step": 10561500 + }, + { + "epoch": 30.57, + "learning_rate": 3.4719192872360136e-05, + "loss": 2.1171, + "step": 10562000 + }, + { + "epoch": 30.57, + "learning_rate": 3.471846922471286e-05, + "loss": 2.0987, + "step": 10562500 + }, + { + "epoch": 30.58, + "learning_rate": 3.4717747024360874e-05, + "loss": 2.097, + "step": 10563000 + }, + { + "epoch": 30.58, + "learning_rate": 3.4717023376713596e-05, + "loss": 2.0914, + "step": 10563500 + }, + { + "epoch": 30.58, + "learning_rate": 3.471629972906632e-05, + "loss": 2.113, + "step": 10564000 + }, + { + "epoch": 30.58, + "learning_rate": 3.471557608141904e-05, + "loss": 2.1005, + "step": 10564500 + }, + { + "epoch": 30.58, + "learning_rate": 3.471485243377176e-05, + "loss": 2.1241, + "step": 10565000 + }, + { + "epoch": 30.58, + "learning_rate": 3.471412878612449e-05, + "loss": 2.0977, + "step": 10565500 + }, + { + "epoch": 30.58, + "learning_rate": 3.4713405138477214e-05, + "loss": 2.0874, + "step": 10566000 + }, + { + "epoch": 30.59, + "learning_rate": 3.471268149082994e-05, + "loss": 2.0743, + "step": 10566500 + }, + { + "epoch": 30.59, + "learning_rate": 3.4711957843182666e-05, + "loss": 2.079, + "step": 10567000 + }, + { + "epoch": 30.59, + "learning_rate": 3.471123564283068e-05, + "loss": 2.1048, + "step": 10567500 + }, + { + "epoch": 30.59, + "learning_rate": 3.47105119951834e-05, + "loss": 2.0906, + "step": 10568000 + }, + { + "epoch": 30.59, + "learning_rate": 3.470978979483142e-05, + "loss": 2.0914, + "step": 10568500 + }, + { + "epoch": 30.59, + "learning_rate": 3.470906614718414e-05, + "loss": 2.1006, + "step": 10569000 + }, + { + "epoch": 30.59, + "learning_rate": 3.470834249953686e-05, + "loss": 2.1233, + "step": 10569500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470761885188959e-05, + "loss": 2.093, + "step": 10570000 + }, + { + "epoch": 30.6, + "learning_rate": 3.4706895204242315e-05, + "loss": 2.1286, + "step": 10570500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470617155659504e-05, + "loss": 2.1026, + "step": 10571000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470544790894776e-05, + "loss": 2.0967, + "step": 10571500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470472426130048e-05, + "loss": 2.0947, + "step": 10572000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470400061365321e-05, + "loss": 2.107, + "step": 10572500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470327696600593e-05, + "loss": 2.0974, + "step": 10573000 + }, + { + "epoch": 30.61, + "learning_rate": 3.4702553318358655e-05, + "loss": 2.0987, + "step": 10573500 + }, + { + "epoch": 30.61, + "learning_rate": 3.470183111800667e-05, + "loss": 2.1058, + "step": 10574000 + }, + { + "epoch": 30.61, + "learning_rate": 3.470110747035939e-05, + "loss": 2.0925, + "step": 10574500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4700383822712115e-05, + "loss": 2.1235, + "step": 10575000 + }, + { + "epoch": 30.61, + "learning_rate": 3.4699660175064844e-05, + "loss": 2.0879, + "step": 10575500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4698936527417566e-05, + "loss": 2.1142, + "step": 10576000 + }, + { + "epoch": 30.61, + "learning_rate": 3.469821287977029e-05, + "loss": 2.0795, + "step": 10576500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469748923212301e-05, + "loss": 2.1155, + "step": 10577000 + }, + { + "epoch": 30.62, + "learning_rate": 3.469676558447573e-05, + "loss": 2.1153, + "step": 10577500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469604338412375e-05, + "loss": 2.1202, + "step": 10578000 + }, + { + "epoch": 30.62, + "learning_rate": 3.469532118377177e-05, + "loss": 2.1204, + "step": 10578500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469459753612449e-05, + "loss": 2.1243, + "step": 10579000 + }, + { + "epoch": 30.62, + "learning_rate": 3.4693873888477215e-05, + "loss": 2.112, + "step": 10579500 + }, + { + "epoch": 30.62, + "learning_rate": 3.4693150240829944e-05, + "loss": 2.1113, + "step": 10580000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4692426593182666e-05, + "loss": 2.0985, + "step": 10580500 + }, + { + "epoch": 30.63, + "learning_rate": 3.469170294553539e-05, + "loss": 2.1281, + "step": 10581000 + }, + { + "epoch": 30.63, + "learning_rate": 3.469097929788811e-05, + "loss": 2.0971, + "step": 10581500 + }, + { + "epoch": 30.63, + "learning_rate": 3.469025565024083e-05, + "loss": 2.1048, + "step": 10582000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4689532002593555e-05, + "loss": 2.1022, + "step": 10582500 + }, + { + "epoch": 30.63, + "learning_rate": 3.468880835494628e-05, + "loss": 2.1156, + "step": 10583000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4688084707299e-05, + "loss": 2.0974, + "step": 10583500 + }, + { + "epoch": 30.64, + "learning_rate": 3.468736105965172e-05, + "loss": 2.1066, + "step": 10584000 + }, + { + "epoch": 30.64, + "learning_rate": 3.4686637412004444e-05, + "loss": 2.0973, + "step": 10584500 + }, + { + "epoch": 30.64, + "learning_rate": 3.4685915211652467e-05, + "loss": 2.1149, + "step": 10585000 + }, + { + "epoch": 30.64, + "learning_rate": 3.468519301130048e-05, + "loss": 2.0976, + "step": 10585500 + }, + { + "epoch": 30.64, + "learning_rate": 3.4684469363653204e-05, + "loss": 2.1073, + "step": 10586000 + }, + { + "epoch": 30.64, + "learning_rate": 3.4683745716005927e-05, + "loss": 2.1206, + "step": 10586500 + }, + { + "epoch": 30.65, + "learning_rate": 3.468302206835865e-05, + "loss": 2.1014, + "step": 10587000 + }, + { + "epoch": 30.65, + "learning_rate": 3.468229842071138e-05, + "loss": 2.1026, + "step": 10587500 + }, + { + "epoch": 30.65, + "learning_rate": 3.46815747730641e-05, + "loss": 2.0665, + "step": 10588000 + }, + { + "epoch": 30.65, + "learning_rate": 3.468085112541682e-05, + "loss": 2.0805, + "step": 10588500 + }, + { + "epoch": 30.65, + "learning_rate": 3.4680127477769545e-05, + "loss": 2.1147, + "step": 10589000 + }, + { + "epoch": 30.65, + "learning_rate": 3.467940383012227e-05, + "loss": 2.0928, + "step": 10589500 + }, + { + "epoch": 30.65, + "learning_rate": 3.467868162977029e-05, + "loss": 2.1106, + "step": 10590000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467795798212301e-05, + "loss": 2.099, + "step": 10590500 + }, + { + "epoch": 30.66, + "learning_rate": 3.4677234334475734e-05, + "loss": 2.1048, + "step": 10591000 + }, + { + "epoch": 30.66, + "learning_rate": 3.4676510686828456e-05, + "loss": 2.0968, + "step": 10591500 + }, + { + "epoch": 30.66, + "learning_rate": 3.467578848647647e-05, + "loss": 2.1245, + "step": 10592000 + }, + { + "epoch": 30.66, + "learning_rate": 3.4675066286124494e-05, + "loss": 2.107, + "step": 10592500 + }, + { + "epoch": 30.66, + "learning_rate": 3.4674342638477216e-05, + "loss": 2.0908, + "step": 10593000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467362043812523e-05, + "loss": 2.0927, + "step": 10593500 + }, + { + "epoch": 30.67, + "learning_rate": 3.4672896790477954e-05, + "loss": 2.1311, + "step": 10594000 + }, + { + "epoch": 30.67, + "learning_rate": 3.4672173142830676e-05, + "loss": 2.1037, + "step": 10594500 + }, + { + "epoch": 30.67, + "learning_rate": 3.4671449495183405e-05, + "loss": 2.123, + "step": 10595000 + }, + { + "epoch": 30.67, + "learning_rate": 3.467072729483142e-05, + "loss": 2.1299, + "step": 10595500 + }, + { + "epoch": 30.67, + "learning_rate": 3.467000364718414e-05, + "loss": 2.0969, + "step": 10596000 + }, + { + "epoch": 30.67, + "learning_rate": 3.466927999953687e-05, + "loss": 2.1337, + "step": 10596500 + }, + { + "epoch": 30.67, + "learning_rate": 3.4668556351889594e-05, + "loss": 2.1021, + "step": 10597000 + }, + { + "epoch": 30.68, + "learning_rate": 3.4667832704242316e-05, + "loss": 2.1204, + "step": 10597500 + }, + { + "epoch": 30.68, + "learning_rate": 3.466710905659504e-05, + "loss": 2.1056, + "step": 10598000 + }, + { + "epoch": 30.68, + "learning_rate": 3.466638540894776e-05, + "loss": 2.1403, + "step": 10598500 + }, + { + "epoch": 30.68, + "learning_rate": 3.466566176130048e-05, + "loss": 2.1092, + "step": 10599000 + }, + { + "epoch": 30.68, + "learning_rate": 3.46649395609485e-05, + "loss": 2.1259, + "step": 10599500 + }, + { + "epoch": 30.68, + "learning_rate": 3.466421591330122e-05, + "loss": 2.1083, + "step": 10600000 + }, + { + "epoch": 30.68, + "learning_rate": 3.466349226565394e-05, + "loss": 2.1092, + "step": 10600500 + }, + { + "epoch": 30.69, + "learning_rate": 3.466276861800667e-05, + "loss": 2.1048, + "step": 10601000 + }, + { + "epoch": 30.69, + "learning_rate": 3.4662044970359394e-05, + "loss": 2.1048, + "step": 10601500 + }, + { + "epoch": 30.69, + "learning_rate": 3.466132277000741e-05, + "loss": 2.1221, + "step": 10602000 + }, + { + "epoch": 30.69, + "learning_rate": 3.466059912236013e-05, + "loss": 2.081, + "step": 10602500 + }, + { + "epoch": 30.69, + "learning_rate": 3.465987547471286e-05, + "loss": 2.1052, + "step": 10603000 + }, + { + "epoch": 30.69, + "learning_rate": 3.465915182706558e-05, + "loss": 2.107, + "step": 10603500 + }, + { + "epoch": 30.69, + "learning_rate": 3.4658428179418305e-05, + "loss": 2.0856, + "step": 10604000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465770453177103e-05, + "loss": 2.115, + "step": 10604500 + }, + { + "epoch": 30.7, + "learning_rate": 3.465698088412375e-05, + "loss": 2.1242, + "step": 10605000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465625723647647e-05, + "loss": 2.1285, + "step": 10605500 + }, + { + "epoch": 30.7, + "learning_rate": 3.4655535036124494e-05, + "loss": 2.1191, + "step": 10606000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465481138847722e-05, + "loss": 2.1342, + "step": 10606500 + }, + { + "epoch": 30.7, + "learning_rate": 3.465408774082994e-05, + "loss": 2.0741, + "step": 10607000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465336409318266e-05, + "loss": 2.1249, + "step": 10607500 + }, + { + "epoch": 30.71, + "learning_rate": 3.4652640445535383e-05, + "loss": 2.1406, + "step": 10608000 + }, + { + "epoch": 30.71, + "learning_rate": 3.4651916797888106e-05, + "loss": 2.1068, + "step": 10608500 + }, + { + "epoch": 30.71, + "learning_rate": 3.465119315024083e-05, + "loss": 2.112, + "step": 10609000 + }, + { + "epoch": 30.71, + "learning_rate": 3.465047094988885e-05, + "loss": 2.0934, + "step": 10609500 + }, + { + "epoch": 30.71, + "learning_rate": 3.464974730224157e-05, + "loss": 2.1061, + "step": 10610000 + }, + { + "epoch": 30.71, + "learning_rate": 3.46490236545943e-05, + "loss": 2.0968, + "step": 10610500 + }, + { + "epoch": 30.71, + "learning_rate": 3.464830145424232e-05, + "loss": 2.1406, + "step": 10611000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464757780659504e-05, + "loss": 2.1136, + "step": 10611500 + }, + { + "epoch": 30.72, + "learning_rate": 3.464685415894776e-05, + "loss": 2.1141, + "step": 10612000 + }, + { + "epoch": 30.72, + "learning_rate": 3.4646130511300484e-05, + "loss": 2.125, + "step": 10612500 + }, + { + "epoch": 30.72, + "learning_rate": 3.46454083109485e-05, + "loss": 2.094, + "step": 10613000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464468466330122e-05, + "loss": 2.1236, + "step": 10613500 + }, + { + "epoch": 30.72, + "learning_rate": 3.464396101565395e-05, + "loss": 2.1084, + "step": 10614000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464323736800667e-05, + "loss": 2.108, + "step": 10614500 + }, + { + "epoch": 30.73, + "learning_rate": 3.4642513720359395e-05, + "loss": 2.1089, + "step": 10615000 + }, + { + "epoch": 30.73, + "learning_rate": 3.464179007271212e-05, + "loss": 2.126, + "step": 10615500 + }, + { + "epoch": 30.73, + "learning_rate": 3.464106642506484e-05, + "loss": 2.0948, + "step": 10616000 + }, + { + "epoch": 30.73, + "learning_rate": 3.464034277741756e-05, + "loss": 2.1059, + "step": 10616500 + }, + { + "epoch": 30.73, + "learning_rate": 3.4639619129770284e-05, + "loss": 2.1235, + "step": 10617000 + }, + { + "epoch": 30.73, + "learning_rate": 3.4638896929418306e-05, + "loss": 2.0988, + "step": 10617500 + }, + { + "epoch": 30.73, + "learning_rate": 3.463817472906632e-05, + "loss": 2.1331, + "step": 10618000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463745108141905e-05, + "loss": 2.1103, + "step": 10618500 + }, + { + "epoch": 30.74, + "learning_rate": 3.4636728881067066e-05, + "loss": 2.1302, + "step": 10619000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463600523341979e-05, + "loss": 2.1352, + "step": 10619500 + }, + { + "epoch": 30.74, + "learning_rate": 3.463528158577251e-05, + "loss": 2.134, + "step": 10620000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463455793812523e-05, + "loss": 2.0998, + "step": 10620500 + }, + { + "epoch": 30.74, + "learning_rate": 3.4633834290477955e-05, + "loss": 2.0873, + "step": 10621000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463311064283068e-05, + "loss": 2.1095, + "step": 10621500 + }, + { + "epoch": 30.75, + "learning_rate": 3.46323869951834e-05, + "loss": 2.0785, + "step": 10622000 + }, + { + "epoch": 30.75, + "learning_rate": 3.463166334753612e-05, + "loss": 2.1043, + "step": 10622500 + }, + { + "epoch": 30.75, + "learning_rate": 3.4630941147184144e-05, + "loss": 2.1015, + "step": 10623000 + }, + { + "epoch": 30.75, + "learning_rate": 3.4630217499536867e-05, + "loss": 2.1199, + "step": 10623500 + }, + { + "epoch": 30.75, + "learning_rate": 3.462949385188959e-05, + "loss": 2.0892, + "step": 10624000 + }, + { + "epoch": 30.75, + "learning_rate": 3.462877020424231e-05, + "loss": 2.1227, + "step": 10624500 + }, + { + "epoch": 30.76, + "learning_rate": 3.462804655659504e-05, + "loss": 2.1063, + "step": 10625000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462732290894776e-05, + "loss": 2.1008, + "step": 10625500 + }, + { + "epoch": 30.76, + "learning_rate": 3.4626599261300484e-05, + "loss": 2.1084, + "step": 10626000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462587561365321e-05, + "loss": 2.082, + "step": 10626500 + }, + { + "epoch": 30.76, + "learning_rate": 3.462515341330122e-05, + "loss": 2.1205, + "step": 10627000 + }, + { + "epoch": 30.76, + "learning_rate": 3.4624431212949245e-05, + "loss": 2.0917, + "step": 10627500 + }, + { + "epoch": 30.76, + "learning_rate": 3.462370756530197e-05, + "loss": 2.0926, + "step": 10628000 + }, + { + "epoch": 30.77, + "learning_rate": 3.462298391765469e-05, + "loss": 2.1074, + "step": 10628500 + }, + { + "epoch": 30.77, + "learning_rate": 3.462226027000741e-05, + "loss": 2.1121, + "step": 10629000 + }, + { + "epoch": 30.77, + "learning_rate": 3.4621536622360134e-05, + "loss": 2.0819, + "step": 10629500 + }, + { + "epoch": 30.77, + "learning_rate": 3.4620812974712856e-05, + "loss": 2.1125, + "step": 10630000 + }, + { + "epoch": 30.77, + "learning_rate": 3.462008932706558e-05, + "loss": 2.1056, + "step": 10630500 + }, + { + "epoch": 30.77, + "learning_rate": 3.46193656794183e-05, + "loss": 2.1028, + "step": 10631000 + }, + { + "epoch": 30.77, + "learning_rate": 3.461864203177102e-05, + "loss": 2.1043, + "step": 10631500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461791838412375e-05, + "loss": 2.0865, + "step": 10632000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461719618377177e-05, + "loss": 2.1143, + "step": 10632500 + }, + { + "epoch": 30.78, + "learning_rate": 3.4616472536124496e-05, + "loss": 2.0973, + "step": 10633000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461574888847722e-05, + "loss": 2.1167, + "step": 10633500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461502524082994e-05, + "loss": 2.0848, + "step": 10634000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461430159318266e-05, + "loss": 2.1045, + "step": 10634500 + }, + { + "epoch": 30.78, + "learning_rate": 3.4613577945535385e-05, + "loss": 2.1214, + "step": 10635000 + }, + { + "epoch": 30.79, + "learning_rate": 3.461285429788811e-05, + "loss": 2.1033, + "step": 10635500 + }, + { + "epoch": 30.79, + "learning_rate": 3.461213065024083e-05, + "loss": 2.1035, + "step": 10636000 + }, + { + "epoch": 30.79, + "learning_rate": 3.461140844988885e-05, + "loss": 2.0896, + "step": 10636500 + }, + { + "epoch": 30.79, + "learning_rate": 3.4610684802241574e-05, + "loss": 2.1127, + "step": 10637000 + }, + { + "epoch": 30.79, + "learning_rate": 3.4609961154594296e-05, + "loss": 2.1077, + "step": 10637500 + }, + { + "epoch": 30.79, + "learning_rate": 3.460923750694702e-05, + "loss": 2.1448, + "step": 10638000 + }, + { + "epoch": 30.79, + "learning_rate": 3.4608515306595034e-05, + "loss": 2.1095, + "step": 10638500 + }, + { + "epoch": 30.8, + "learning_rate": 3.4607791658947756e-05, + "loss": 2.0712, + "step": 10639000 + }, + { + "epoch": 30.8, + "learning_rate": 3.460706801130048e-05, + "loss": 2.1032, + "step": 10639500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460634436365321e-05, + "loss": 2.1296, + "step": 10640000 + }, + { + "epoch": 30.8, + "learning_rate": 3.460562071600593e-05, + "loss": 2.1002, + "step": 10640500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460489851565395e-05, + "loss": 2.1017, + "step": 10641000 + }, + { + "epoch": 30.8, + "learning_rate": 3.4604174868006674e-05, + "loss": 2.1172, + "step": 10641500 + }, + { + "epoch": 30.8, + "learning_rate": 3.4603451220359397e-05, + "loss": 2.1023, + "step": 10642000 + }, + { + "epoch": 30.81, + "learning_rate": 3.460272902000741e-05, + "loss": 2.1137, + "step": 10642500 + }, + { + "epoch": 30.81, + "learning_rate": 3.4602005372360134e-05, + "loss": 2.1161, + "step": 10643000 + }, + { + "epoch": 30.81, + "learning_rate": 3.460128317200815e-05, + "loss": 2.1348, + "step": 10643500 + }, + { + "epoch": 30.81, + "learning_rate": 3.460055952436088e-05, + "loss": 2.1076, + "step": 10644000 + }, + { + "epoch": 30.81, + "learning_rate": 3.45998358767136e-05, + "loss": 2.1262, + "step": 10644500 + }, + { + "epoch": 30.81, + "learning_rate": 3.459911222906632e-05, + "loss": 2.0831, + "step": 10645000 + }, + { + "epoch": 30.81, + "learning_rate": 3.4598388581419046e-05, + "loss": 2.1251, + "step": 10645500 + }, + { + "epoch": 30.82, + "learning_rate": 3.459766493377177e-05, + "loss": 2.1163, + "step": 10646000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459694128612449e-05, + "loss": 2.1153, + "step": 10646500 + }, + { + "epoch": 30.82, + "learning_rate": 3.459621763847721e-05, + "loss": 2.0995, + "step": 10647000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459549399082994e-05, + "loss": 2.1295, + "step": 10647500 + }, + { + "epoch": 30.82, + "learning_rate": 3.4594770343182664e-05, + "loss": 2.0819, + "step": 10648000 + }, + { + "epoch": 30.82, + "learning_rate": 3.4594046695535386e-05, + "loss": 2.1078, + "step": 10648500 + }, + { + "epoch": 30.82, + "learning_rate": 3.459332304788811e-05, + "loss": 2.1318, + "step": 10649000 + }, + { + "epoch": 30.83, + "learning_rate": 3.459259940024083e-05, + "loss": 2.1269, + "step": 10649500 + }, + { + "epoch": 30.83, + "learning_rate": 3.459187719988885e-05, + "loss": 2.1018, + "step": 10650000 + }, + { + "epoch": 30.83, + "learning_rate": 3.4591153552241575e-05, + "loss": 2.1007, + "step": 10650500 + }, + { + "epoch": 30.83, + "learning_rate": 3.45904299045943e-05, + "loss": 2.1133, + "step": 10651000 + }, + { + "epoch": 30.83, + "learning_rate": 3.458970625694702e-05, + "loss": 2.1134, + "step": 10651500 + }, + { + "epoch": 30.83, + "learning_rate": 3.4588984056595035e-05, + "loss": 2.1079, + "step": 10652000 + }, + { + "epoch": 30.83, + "learning_rate": 3.458826185624306e-05, + "loss": 2.0908, + "step": 10652500 + }, + { + "epoch": 30.84, + "learning_rate": 3.458753820859578e-05, + "loss": 2.0961, + "step": 10653000 + }, + { + "epoch": 30.84, + "learning_rate": 3.45868145609485e-05, + "loss": 2.1317, + "step": 10653500 + }, + { + "epoch": 30.84, + "learning_rate": 3.4586090913301224e-05, + "loss": 2.0974, + "step": 10654000 + }, + { + "epoch": 30.84, + "learning_rate": 3.4585367265653946e-05, + "loss": 2.1093, + "step": 10654500 + }, + { + "epoch": 30.84, + "learning_rate": 3.4584643618006675e-05, + "loss": 2.0993, + "step": 10655000 + }, + { + "epoch": 30.84, + "learning_rate": 3.45839199703594e-05, + "loss": 2.1435, + "step": 10655500 + }, + { + "epoch": 30.84, + "learning_rate": 3.458319777000741e-05, + "loss": 2.101, + "step": 10656000 + }, + { + "epoch": 30.85, + "learning_rate": 3.458247556965543e-05, + "loss": 2.0932, + "step": 10656500 + }, + { + "epoch": 30.85, + "learning_rate": 3.458175192200816e-05, + "loss": 2.1, + "step": 10657000 + }, + { + "epoch": 30.85, + "learning_rate": 3.458102827436088e-05, + "loss": 2.0907, + "step": 10657500 + }, + { + "epoch": 30.85, + "learning_rate": 3.45803046267136e-05, + "loss": 2.115, + "step": 10658000 + }, + { + "epoch": 30.85, + "learning_rate": 3.4579580979066324e-05, + "loss": 2.1094, + "step": 10658500 + }, + { + "epoch": 30.85, + "learning_rate": 3.4578857331419046e-05, + "loss": 2.1002, + "step": 10659000 + }, + { + "epoch": 30.85, + "learning_rate": 3.457813368377177e-05, + "loss": 2.0962, + "step": 10659500 + }, + { + "epoch": 30.86, + "learning_rate": 3.457741003612449e-05, + "loss": 2.095, + "step": 10660000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457668638847721e-05, + "loss": 2.1143, + "step": 10660500 + }, + { + "epoch": 30.86, + "learning_rate": 3.4575962740829935e-05, + "loss": 2.1077, + "step": 10661000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457523909318266e-05, + "loss": 2.1018, + "step": 10661500 + }, + { + "epoch": 30.86, + "learning_rate": 3.457451689283068e-05, + "loss": 2.087, + "step": 10662000 + }, + { + "epoch": 30.86, + "learning_rate": 3.4573794692478695e-05, + "loss": 2.1108, + "step": 10662500 + }, + { + "epoch": 30.87, + "learning_rate": 3.4573071044831424e-05, + "loss": 2.1193, + "step": 10663000 + }, + { + "epoch": 30.87, + "learning_rate": 3.457234739718415e-05, + "loss": 2.1091, + "step": 10663500 + }, + { + "epoch": 30.87, + "learning_rate": 3.457162374953687e-05, + "loss": 2.1125, + "step": 10664000 + }, + { + "epoch": 30.87, + "learning_rate": 3.457090010188959e-05, + "loss": 2.0936, + "step": 10664500 + }, + { + "epoch": 30.87, + "learning_rate": 3.4570176454242313e-05, + "loss": 2.1075, + "step": 10665000 + }, + { + "epoch": 30.87, + "learning_rate": 3.4569452806595036e-05, + "loss": 2.1165, + "step": 10665500 + }, + { + "epoch": 30.87, + "learning_rate": 3.456872915894776e-05, + "loss": 2.1046, + "step": 10666000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456800551130048e-05, + "loss": 2.1345, + "step": 10666500 + }, + { + "epoch": 30.88, + "learning_rate": 3.456728186365321e-05, + "loss": 2.132, + "step": 10667000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456655821600593e-05, + "loss": 2.1084, + "step": 10667500 + }, + { + "epoch": 30.88, + "learning_rate": 3.456583601565395e-05, + "loss": 2.0939, + "step": 10668000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456511236800667e-05, + "loss": 2.099, + "step": 10668500 + }, + { + "epoch": 30.88, + "learning_rate": 3.456438872035939e-05, + "loss": 2.103, + "step": 10669000 + }, + { + "epoch": 30.88, + "learning_rate": 3.4563665072712114e-05, + "loss": 2.1018, + "step": 10669500 + }, + { + "epoch": 30.89, + "learning_rate": 3.456294142506484e-05, + "loss": 2.1097, + "step": 10670000 + }, + { + "epoch": 30.89, + "learning_rate": 3.456221922471286e-05, + "loss": 2.1088, + "step": 10670500 + }, + { + "epoch": 30.89, + "learning_rate": 3.456149557706558e-05, + "loss": 2.1186, + "step": 10671000 + }, + { + "epoch": 30.89, + "learning_rate": 3.456077192941831e-05, + "loss": 2.1183, + "step": 10671500 + }, + { + "epoch": 30.89, + "learning_rate": 3.456004828177103e-05, + "loss": 2.0985, + "step": 10672000 + }, + { + "epoch": 30.89, + "learning_rate": 3.4559324634123754e-05, + "loss": 2.0993, + "step": 10672500 + }, + { + "epoch": 30.89, + "learning_rate": 3.4558600986476476e-05, + "loss": 2.1145, + "step": 10673000 + }, + { + "epoch": 30.9, + "learning_rate": 3.45578773388292e-05, + "loss": 2.1013, + "step": 10673500 + }, + { + "epoch": 30.9, + "learning_rate": 3.455715369118192e-05, + "loss": 2.1034, + "step": 10674000 + }, + { + "epoch": 30.9, + "learning_rate": 3.4556431490829936e-05, + "loss": 2.0987, + "step": 10674500 + }, + { + "epoch": 30.9, + "learning_rate": 3.455570784318266e-05, + "loss": 2.1138, + "step": 10675000 + }, + { + "epoch": 30.9, + "learning_rate": 3.455498419553538e-05, + "loss": 2.0997, + "step": 10675500 + }, + { + "epoch": 30.9, + "learning_rate": 3.455426054788811e-05, + "loss": 2.1045, + "step": 10676000 + }, + { + "epoch": 30.9, + "learning_rate": 3.455353690024083e-05, + "loss": 2.1221, + "step": 10676500 + }, + { + "epoch": 30.91, + "learning_rate": 3.4552813252593554e-05, + "loss": 2.1028, + "step": 10677000 + }, + { + "epoch": 30.91, + "learning_rate": 3.455208960494628e-05, + "loss": 2.0933, + "step": 10677500 + }, + { + "epoch": 30.91, + "learning_rate": 3.4551365957299005e-05, + "loss": 2.0702, + "step": 10678000 + }, + { + "epoch": 30.91, + "learning_rate": 3.455064230965173e-05, + "loss": 2.123, + "step": 10678500 + }, + { + "epoch": 30.91, + "learning_rate": 3.454991866200445e-05, + "loss": 2.1145, + "step": 10679000 + }, + { + "epoch": 30.91, + "learning_rate": 3.454919501435717e-05, + "loss": 2.1137, + "step": 10679500 + }, + { + "epoch": 30.91, + "learning_rate": 3.4548471366709894e-05, + "loss": 2.1112, + "step": 10680000 + }, + { + "epoch": 30.92, + "learning_rate": 3.454775061365321e-05, + "loss": 2.0965, + "step": 10680500 + }, + { + "epoch": 30.92, + "learning_rate": 3.454702696600593e-05, + "loss": 2.1289, + "step": 10681000 + }, + { + "epoch": 30.92, + "learning_rate": 3.4546303318358654e-05, + "loss": 2.0981, + "step": 10681500 + }, + { + "epoch": 30.92, + "learning_rate": 3.4545579670711377e-05, + "loss": 2.1094, + "step": 10682000 + }, + { + "epoch": 30.92, + "learning_rate": 3.45448560230641e-05, + "loss": 2.1064, + "step": 10682500 + }, + { + "epoch": 30.92, + "learning_rate": 3.4544133822712114e-05, + "loss": 2.0846, + "step": 10683000 + }, + { + "epoch": 30.92, + "learning_rate": 3.454341017506484e-05, + "loss": 2.145, + "step": 10683500 + }, + { + "epoch": 30.93, + "learning_rate": 3.454268652741756e-05, + "loss": 2.1017, + "step": 10684000 + }, + { + "epoch": 30.93, + "learning_rate": 3.454196287977028e-05, + "loss": 2.1485, + "step": 10684500 + }, + { + "epoch": 30.93, + "learning_rate": 3.454123923212301e-05, + "loss": 2.0905, + "step": 10685000 + }, + { + "epoch": 30.93, + "learning_rate": 3.454051703177103e-05, + "loss": 2.1336, + "step": 10685500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4539793384123755e-05, + "loss": 2.0857, + "step": 10686000 + }, + { + "epoch": 30.93, + "learning_rate": 3.453907118377177e-05, + "loss": 2.1163, + "step": 10686500 + }, + { + "epoch": 30.93, + "learning_rate": 3.453834753612449e-05, + "loss": 2.1486, + "step": 10687000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453762533577251e-05, + "loss": 2.108, + "step": 10687500 + }, + { + "epoch": 30.94, + "learning_rate": 3.453690168812524e-05, + "loss": 2.1128, + "step": 10688000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453617804047796e-05, + "loss": 2.0781, + "step": 10688500 + }, + { + "epoch": 30.94, + "learning_rate": 3.453545439283068e-05, + "loss": 2.0972, + "step": 10689000 + }, + { + "epoch": 30.94, + "learning_rate": 3.4534730745183404e-05, + "loss": 2.1034, + "step": 10689500 + }, + { + "epoch": 30.94, + "learning_rate": 3.4534007097536126e-05, + "loss": 2.1139, + "step": 10690000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453328344988885e-05, + "loss": 2.1235, + "step": 10690500 + }, + { + "epoch": 30.95, + "learning_rate": 3.453255980224157e-05, + "loss": 2.113, + "step": 10691000 + }, + { + "epoch": 30.95, + "learning_rate": 3.453183615459429e-05, + "loss": 2.109, + "step": 10691500 + }, + { + "epoch": 30.95, + "learning_rate": 3.4531112506947015e-05, + "loss": 2.1155, + "step": 10692000 + }, + { + "epoch": 30.95, + "learning_rate": 3.4530388859299744e-05, + "loss": 2.1178, + "step": 10692500 + }, + { + "epoch": 30.95, + "learning_rate": 3.4529665211652466e-05, + "loss": 2.0883, + "step": 10693000 + }, + { + "epoch": 30.95, + "learning_rate": 3.452894301130049e-05, + "loss": 2.0839, + "step": 10693500 + }, + { + "epoch": 30.95, + "learning_rate": 3.452821936365321e-05, + "loss": 2.109, + "step": 10694000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452749571600593e-05, + "loss": 2.0958, + "step": 10694500 + }, + { + "epoch": 30.96, + "learning_rate": 3.452677351565395e-05, + "loss": 2.0982, + "step": 10695000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452604986800667e-05, + "loss": 2.1025, + "step": 10695500 + }, + { + "epoch": 30.96, + "learning_rate": 3.4525327667654686e-05, + "loss": 2.0891, + "step": 10696000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452460402000741e-05, + "loss": 2.0991, + "step": 10696500 + }, + { + "epoch": 30.96, + "learning_rate": 3.452388181965543e-05, + "loss": 2.1349, + "step": 10697000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452315817200815e-05, + "loss": 2.1267, + "step": 10697500 + }, + { + "epoch": 30.97, + "learning_rate": 3.4522434524360875e-05, + "loss": 2.1275, + "step": 10698000 + }, + { + "epoch": 30.97, + "learning_rate": 3.45217108767136e-05, + "loss": 2.1063, + "step": 10698500 + }, + { + "epoch": 30.97, + "learning_rate": 3.452098722906632e-05, + "loss": 2.1058, + "step": 10699000 + }, + { + "epoch": 30.97, + "learning_rate": 3.452026358141904e-05, + "loss": 2.1145, + "step": 10699500 + }, + { + "epoch": 30.97, + "learning_rate": 3.4519539933771764e-05, + "loss": 2.1079, + "step": 10700000 + }, + { + "epoch": 30.97, + "learning_rate": 3.451881628612449e-05, + "loss": 2.1016, + "step": 10700500 + }, + { + "epoch": 30.98, + "learning_rate": 3.4518092638477215e-05, + "loss": 2.0703, + "step": 10701000 + }, + { + "epoch": 30.98, + "learning_rate": 3.451736899082994e-05, + "loss": 2.1236, + "step": 10701500 + }, + { + "epoch": 30.98, + "learning_rate": 3.451664534318266e-05, + "loss": 2.1111, + "step": 10702000 + }, + { + "epoch": 30.98, + "learning_rate": 3.451592314283068e-05, + "loss": 2.1126, + "step": 10702500 + }, + { + "epoch": 30.98, + "learning_rate": 3.45152009424787e-05, + "loss": 2.1096, + "step": 10703000 + }, + { + "epoch": 30.98, + "learning_rate": 3.451447729483142e-05, + "loss": 2.1062, + "step": 10703500 + }, + { + "epoch": 30.98, + "learning_rate": 3.451375364718414e-05, + "loss": 2.0982, + "step": 10704000 + }, + { + "epoch": 30.99, + "learning_rate": 3.4513029999536865e-05, + "loss": 2.0822, + "step": 10704500 + }, + { + "epoch": 30.99, + "learning_rate": 3.451230635188959e-05, + "loss": 2.1035, + "step": 10705000 + }, + { + "epoch": 30.99, + "learning_rate": 3.451158415153761e-05, + "loss": 2.1218, + "step": 10705500 + }, + { + "epoch": 30.99, + "learning_rate": 3.451086050389033e-05, + "loss": 2.1137, + "step": 10706000 + }, + { + "epoch": 30.99, + "learning_rate": 3.4510136856243054e-05, + "loss": 2.1128, + "step": 10706500 + }, + { + "epoch": 30.99, + "learning_rate": 3.4509413208595776e-05, + "loss": 2.1017, + "step": 10707000 + }, + { + "epoch": 30.99, + "learning_rate": 3.45086895609485e-05, + "loss": 2.0891, + "step": 10707500 + }, + { + "epoch": 31.0, + "learning_rate": 3.450796591330123e-05, + "loss": 2.1031, + "step": 10708000 + }, + { + "epoch": 31.0, + "learning_rate": 3.450724226565395e-05, + "loss": 2.0928, + "step": 10708500 + }, + { + "epoch": 31.0, + "learning_rate": 3.4506520065301965e-05, + "loss": 2.1086, + "step": 10709000 + }, + { + "epoch": 31.0, + "learning_rate": 3.450579641765469e-05, + "loss": 2.0887, + "step": 10709500 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.66679647998167, + "eval_accuracy_mlm": 0.6318043767726271, + "eval_accuracy_nsp": 0.8543835045120605, + "eval_loss": 2.1734020709991455, + "eval_runtime": 331.1869, + "eval_samples_per_second": 1317.643, + "eval_steps_per_second": 54.903, + "step": 10709632 + }, + { + "epoch": 31.0, + "learning_rate": 3.4505072770007416e-05, + "loss": 2.0638, + "step": 10710000 + }, + { + "epoch": 31.0, + "learning_rate": 3.450434912236014e-05, + "loss": 2.075, + "step": 10710500 + }, + { + "epoch": 31.0, + "learning_rate": 3.450362547471286e-05, + "loss": 2.0787, + "step": 10711000 + }, + { + "epoch": 31.01, + "learning_rate": 3.450290182706558e-05, + "loss": 2.0977, + "step": 10711500 + }, + { + "epoch": 31.01, + "learning_rate": 3.4502178179418305e-05, + "loss": 2.0766, + "step": 10712000 + }, + { + "epoch": 31.01, + "learning_rate": 3.450145453177103e-05, + "loss": 2.071, + "step": 10712500 + }, + { + "epoch": 31.01, + "learning_rate": 3.450073088412375e-05, + "loss": 2.0904, + "step": 10713000 + }, + { + "epoch": 31.01, + "learning_rate": 3.450000723647647e-05, + "loss": 2.0882, + "step": 10713500 + }, + { + "epoch": 31.01, + "learning_rate": 3.449928648341979e-05, + "loss": 2.0788, + "step": 10714000 + }, + { + "epoch": 31.01, + "learning_rate": 3.449856283577251e-05, + "loss": 2.1167, + "step": 10714500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449783918812523e-05, + "loss": 2.0884, + "step": 10715000 + }, + { + "epoch": 31.02, + "learning_rate": 3.449711554047796e-05, + "loss": 2.0807, + "step": 10715500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449639189283068e-05, + "loss": 2.0777, + "step": 10716000 + }, + { + "epoch": 31.02, + "learning_rate": 3.4495668245183405e-05, + "loss": 2.0795, + "step": 10716500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449494459753613e-05, + "loss": 2.0772, + "step": 10717000 + }, + { + "epoch": 31.02, + "learning_rate": 3.449422094988885e-05, + "loss": 2.0969, + "step": 10717500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449349730224157e-05, + "loss": 2.1089, + "step": 10718000 + }, + { + "epoch": 31.03, + "learning_rate": 3.4492773654594294e-05, + "loss": 2.0837, + "step": 10718500 + }, + { + "epoch": 31.03, + "learning_rate": 3.4492050006947016e-05, + "loss": 2.0876, + "step": 10719000 + }, + { + "epoch": 31.03, + "learning_rate": 3.449132635929974e-05, + "loss": 2.0689, + "step": 10719500 + }, + { + "epoch": 31.03, + "learning_rate": 3.449060415894776e-05, + "loss": 2.0878, + "step": 10720000 + }, + { + "epoch": 31.03, + "learning_rate": 3.448988051130048e-05, + "loss": 2.0833, + "step": 10720500 + }, + { + "epoch": 31.03, + "learning_rate": 3.4489156863653206e-05, + "loss": 2.12, + "step": 10721000 + }, + { + "epoch": 31.03, + "learning_rate": 3.448843321600593e-05, + "loss": 2.0591, + "step": 10721500 + }, + { + "epoch": 31.04, + "learning_rate": 3.448770956835865e-05, + "loss": 2.0979, + "step": 10722000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448698736800667e-05, + "loss": 2.0614, + "step": 10722500 + }, + { + "epoch": 31.04, + "learning_rate": 3.4486263720359395e-05, + "loss": 2.0765, + "step": 10723000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448554152000742e-05, + "loss": 2.0922, + "step": 10723500 + }, + { + "epoch": 31.04, + "learning_rate": 3.448481787236014e-05, + "loss": 2.0879, + "step": 10724000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448409422471286e-05, + "loss": 2.0897, + "step": 10724500 + }, + { + "epoch": 31.04, + "learning_rate": 3.4483370577065584e-05, + "loss": 2.1274, + "step": 10725000 + }, + { + "epoch": 31.05, + "learning_rate": 3.4482646929418306e-05, + "loss": 2.068, + "step": 10725500 + }, + { + "epoch": 31.05, + "learning_rate": 3.448192472906632e-05, + "loss": 2.0966, + "step": 10726000 + }, + { + "epoch": 31.05, + "learning_rate": 3.448120252871434e-05, + "loss": 2.1057, + "step": 10726500 + }, + { + "epoch": 31.05, + "learning_rate": 3.4480478881067066e-05, + "loss": 2.0975, + "step": 10727000 + }, + { + "epoch": 31.05, + "learning_rate": 3.447975523341979e-05, + "loss": 2.1057, + "step": 10727500 + }, + { + "epoch": 31.05, + "learning_rate": 3.447903158577251e-05, + "loss": 2.0921, + "step": 10728000 + }, + { + "epoch": 31.05, + "learning_rate": 3.447830793812523e-05, + "loss": 2.071, + "step": 10728500 + }, + { + "epoch": 31.06, + "learning_rate": 3.4477584290477955e-05, + "loss": 2.086, + "step": 10729000 + }, + { + "epoch": 31.06, + "learning_rate": 3.447686064283068e-05, + "loss": 2.0777, + "step": 10729500 + }, + { + "epoch": 31.06, + "learning_rate": 3.4476136995183406e-05, + "loss": 2.1007, + "step": 10730000 + }, + { + "epoch": 31.06, + "learning_rate": 3.447541334753613e-05, + "loss": 2.0912, + "step": 10730500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447468969988885e-05, + "loss": 2.0892, + "step": 10731000 + }, + { + "epoch": 31.06, + "learning_rate": 3.447396605224157e-05, + "loss": 2.0664, + "step": 10731500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447324385188959e-05, + "loss": 2.1078, + "step": 10732000 + }, + { + "epoch": 31.07, + "learning_rate": 3.447252020424232e-05, + "loss": 2.0718, + "step": 10732500 + }, + { + "epoch": 31.07, + "learning_rate": 3.447179655659504e-05, + "loss": 2.1038, + "step": 10733000 + }, + { + "epoch": 31.07, + "learning_rate": 3.447107290894776e-05, + "loss": 2.0838, + "step": 10733500 + }, + { + "epoch": 31.07, + "learning_rate": 3.447035070859578e-05, + "loss": 2.1088, + "step": 10734000 + }, + { + "epoch": 31.07, + "learning_rate": 3.44696270609485e-05, + "loss": 2.0947, + "step": 10734500 + }, + { + "epoch": 31.07, + "learning_rate": 3.446890341330122e-05, + "loss": 2.0899, + "step": 10735000 + }, + { + "epoch": 31.07, + "learning_rate": 3.4468179765653944e-05, + "loss": 2.0798, + "step": 10735500 + }, + { + "epoch": 31.08, + "learning_rate": 3.4467456118006666e-05, + "loss": 2.1049, + "step": 10736000 + }, + { + "epoch": 31.08, + "learning_rate": 3.446673391765469e-05, + "loss": 2.073, + "step": 10736500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446601027000741e-05, + "loss": 2.0686, + "step": 10737000 + }, + { + "epoch": 31.08, + "learning_rate": 3.446528662236013e-05, + "loss": 2.0981, + "step": 10737500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446456297471286e-05, + "loss": 2.0835, + "step": 10738000 + }, + { + "epoch": 31.08, + "learning_rate": 3.4463839327065584e-05, + "loss": 2.0774, + "step": 10738500 + }, + { + "epoch": 31.09, + "learning_rate": 3.4463115679418307e-05, + "loss": 2.0779, + "step": 10739000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446239203177103e-05, + "loss": 2.0736, + "step": 10739500 + }, + { + "epoch": 31.09, + "learning_rate": 3.446166838412375e-05, + "loss": 2.0892, + "step": 10740000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446094618377177e-05, + "loss": 2.0819, + "step": 10740500 + }, + { + "epoch": 31.09, + "learning_rate": 3.4460222536124496e-05, + "loss": 2.0993, + "step": 10741000 + }, + { + "epoch": 31.09, + "learning_rate": 3.445950033577251e-05, + "loss": 2.0968, + "step": 10741500 + }, + { + "epoch": 31.09, + "learning_rate": 3.4458776688125233e-05, + "loss": 2.0609, + "step": 10742000 + }, + { + "epoch": 31.1, + "learning_rate": 3.4458053040477956e-05, + "loss": 2.0779, + "step": 10742500 + }, + { + "epoch": 31.1, + "learning_rate": 3.445732939283068e-05, + "loss": 2.1065, + "step": 10743000 + }, + { + "epoch": 31.1, + "learning_rate": 3.44566057451834e-05, + "loss": 2.1202, + "step": 10743500 + }, + { + "epoch": 31.1, + "learning_rate": 3.445588209753612e-05, + "loss": 2.0803, + "step": 10744000 + }, + { + "epoch": 31.1, + "learning_rate": 3.4455158449888845e-05, + "loss": 2.092, + "step": 10744500 + }, + { + "epoch": 31.1, + "learning_rate": 3.4454434802241574e-05, + "loss": 2.1197, + "step": 10745000 + }, + { + "epoch": 31.1, + "learning_rate": 3.4453711154594296e-05, + "loss": 2.1096, + "step": 10745500 + }, + { + "epoch": 31.11, + "learning_rate": 3.445298750694702e-05, + "loss": 2.0993, + "step": 10746000 + }, + { + "epoch": 31.11, + "learning_rate": 3.445226385929975e-05, + "loss": 2.0928, + "step": 10746500 + }, + { + "epoch": 31.11, + "learning_rate": 3.445154021165247e-05, + "loss": 2.1068, + "step": 10747000 + }, + { + "epoch": 31.11, + "learning_rate": 3.445081656400519e-05, + "loss": 2.1097, + "step": 10747500 + }, + { + "epoch": 31.11, + "learning_rate": 3.4450092916357914e-05, + "loss": 2.098, + "step": 10748000 + }, + { + "epoch": 31.11, + "learning_rate": 3.4449369268710636e-05, + "loss": 2.0802, + "step": 10748500 + }, + { + "epoch": 31.11, + "learning_rate": 3.444864706835865e-05, + "loss": 2.0878, + "step": 10749000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444792486800667e-05, + "loss": 2.0926, + "step": 10749500 + }, + { + "epoch": 31.12, + "learning_rate": 3.4447201220359396e-05, + "loss": 2.0935, + "step": 10750000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444647757271212e-05, + "loss": 2.095, + "step": 10750500 + }, + { + "epoch": 31.12, + "learning_rate": 3.444575392506484e-05, + "loss": 2.1155, + "step": 10751000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444503027741756e-05, + "loss": 2.1019, + "step": 10751500 + }, + { + "epoch": 31.12, + "learning_rate": 3.444430807706558e-05, + "loss": 2.0898, + "step": 10752000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444358442941831e-05, + "loss": 2.0855, + "step": 10752500 + }, + { + "epoch": 31.13, + "learning_rate": 3.444286078177103e-05, + "loss": 2.1153, + "step": 10753000 + }, + { + "epoch": 31.13, + "learning_rate": 3.444213713412375e-05, + "loss": 2.1018, + "step": 10753500 + }, + { + "epoch": 31.13, + "learning_rate": 3.444141493377177e-05, + "loss": 2.1045, + "step": 10754000 + }, + { + "epoch": 31.13, + "learning_rate": 3.4440691286124496e-05, + "loss": 2.0989, + "step": 10754500 + }, + { + "epoch": 31.13, + "learning_rate": 3.443996763847722e-05, + "loss": 2.0887, + "step": 10755000 + }, + { + "epoch": 31.13, + "learning_rate": 3.443924688542053e-05, + "loss": 2.0815, + "step": 10755500 + }, + { + "epoch": 31.13, + "learning_rate": 3.443852323777325e-05, + "loss": 2.0994, + "step": 10756000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443779959012597e-05, + "loss": 2.0949, + "step": 10756500 + }, + { + "epoch": 31.14, + "learning_rate": 3.4437075942478694e-05, + "loss": 2.1008, + "step": 10757000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443635229483142e-05, + "loss": 2.0858, + "step": 10757500 + }, + { + "epoch": 31.14, + "learning_rate": 3.4435628647184145e-05, + "loss": 2.0785, + "step": 10758000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443490499953687e-05, + "loss": 2.0872, + "step": 10758500 + }, + { + "epoch": 31.14, + "learning_rate": 3.443418135188959e-05, + "loss": 2.102, + "step": 10759000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443345770424231e-05, + "loss": 2.0882, + "step": 10759500 + }, + { + "epoch": 31.15, + "learning_rate": 3.443273405659504e-05, + "loss": 2.0813, + "step": 10760000 + }, + { + "epoch": 31.15, + "learning_rate": 3.4432010408947763e-05, + "loss": 2.1039, + "step": 10760500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4431286761300486e-05, + "loss": 2.0822, + "step": 10761000 + }, + { + "epoch": 31.15, + "learning_rate": 3.443056311365321e-05, + "loss": 2.1055, + "step": 10761500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4429840913301223e-05, + "loss": 2.0929, + "step": 10762000 + }, + { + "epoch": 31.15, + "learning_rate": 3.4429117265653946e-05, + "loss": 2.0828, + "step": 10762500 + }, + { + "epoch": 31.15, + "learning_rate": 3.442839361800667e-05, + "loss": 2.0714, + "step": 10763000 + }, + { + "epoch": 31.16, + "learning_rate": 3.44276699703594e-05, + "loss": 2.1033, + "step": 10763500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442694632271212e-05, + "loss": 2.0889, + "step": 10764000 + }, + { + "epoch": 31.16, + "learning_rate": 3.442622267506484e-05, + "loss": 2.096, + "step": 10764500 + }, + { + "epoch": 31.16, + "learning_rate": 3.4425499027417564e-05, + "loss": 2.0881, + "step": 10765000 + }, + { + "epoch": 31.16, + "learning_rate": 3.4424775379770286e-05, + "loss": 2.0817, + "step": 10765500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442405173212301e-05, + "loss": 2.0926, + "step": 10766000 + }, + { + "epoch": 31.16, + "learning_rate": 3.4423329531771024e-05, + "loss": 2.0763, + "step": 10766500 + }, + { + "epoch": 31.17, + "learning_rate": 3.4422605884123746e-05, + "loss": 2.0771, + "step": 10767000 + }, + { + "epoch": 31.17, + "learning_rate": 3.4421882236476475e-05, + "loss": 2.1136, + "step": 10767500 + }, + { + "epoch": 31.17, + "learning_rate": 3.44211585888292e-05, + "loss": 2.0859, + "step": 10768000 + }, + { + "epoch": 31.17, + "learning_rate": 3.442043494118192e-05, + "loss": 2.1017, + "step": 10768500 + }, + { + "epoch": 31.17, + "learning_rate": 3.441971274082994e-05, + "loss": 2.0705, + "step": 10769000 + }, + { + "epoch": 31.17, + "learning_rate": 3.4418989093182664e-05, + "loss": 2.083, + "step": 10769500 + }, + { + "epoch": 31.17, + "learning_rate": 3.441826689283068e-05, + "loss": 2.0818, + "step": 10770000 + }, + { + "epoch": 31.18, + "learning_rate": 3.44175432451834e-05, + "loss": 2.088, + "step": 10770500 + }, + { + "epoch": 31.18, + "learning_rate": 3.4416819597536124e-05, + "loss": 2.0957, + "step": 10771000 + }, + { + "epoch": 31.18, + "learning_rate": 3.4416095949888846e-05, + "loss": 2.0825, + "step": 10771500 + }, + { + "epoch": 31.18, + "learning_rate": 3.4415372302241575e-05, + "loss": 2.095, + "step": 10772000 + }, + { + "epoch": 31.18, + "learning_rate": 3.44146486545943e-05, + "loss": 2.0682, + "step": 10772500 + }, + { + "epoch": 31.18, + "learning_rate": 3.441392645424231e-05, + "loss": 2.086, + "step": 10773000 + }, + { + "epoch": 31.18, + "learning_rate": 3.4413202806595035e-05, + "loss": 2.1061, + "step": 10773500 + }, + { + "epoch": 31.19, + "learning_rate": 3.441247915894776e-05, + "loss": 2.1071, + "step": 10774000 + }, + { + "epoch": 31.19, + "learning_rate": 3.441175695859577e-05, + "loss": 2.1062, + "step": 10774500 + }, + { + "epoch": 31.19, + "learning_rate": 3.4411033310948495e-05, + "loss": 2.1015, + "step": 10775000 + }, + { + "epoch": 31.19, + "learning_rate": 3.4410309663301224e-05, + "loss": 2.1257, + "step": 10775500 + }, + { + "epoch": 31.19, + "learning_rate": 3.4409586015653946e-05, + "loss": 2.1254, + "step": 10776000 + }, + { + "epoch": 31.19, + "learning_rate": 3.4408862368006675e-05, + "loss": 2.0726, + "step": 10776500 + }, + { + "epoch": 31.2, + "learning_rate": 3.44081387203594e-05, + "loss": 2.0901, + "step": 10777000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440741652000741e-05, + "loss": 2.1135, + "step": 10777500 + }, + { + "epoch": 31.2, + "learning_rate": 3.4406692872360136e-05, + "loss": 2.0998, + "step": 10778000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440596922471286e-05, + "loss": 2.0861, + "step": 10778500 + }, + { + "epoch": 31.2, + "learning_rate": 3.440524557706558e-05, + "loss": 2.0851, + "step": 10779000 + }, + { + "epoch": 31.2, + "learning_rate": 3.44045219294183e-05, + "loss": 2.1217, + "step": 10779500 + }, + { + "epoch": 31.2, + "learning_rate": 3.4403798281771024e-05, + "loss": 2.0857, + "step": 10780000 + }, + { + "epoch": 31.21, + "learning_rate": 3.440307463412375e-05, + "loss": 2.098, + "step": 10780500 + }, + { + "epoch": 31.21, + "learning_rate": 3.4402350986476476e-05, + "loss": 2.0865, + "step": 10781000 + }, + { + "epoch": 31.21, + "learning_rate": 3.44016273388292e-05, + "loss": 2.1236, + "step": 10781500 + }, + { + "epoch": 31.21, + "learning_rate": 3.440090369118192e-05, + "loss": 2.0979, + "step": 10782000 + }, + { + "epoch": 31.21, + "learning_rate": 3.440018004353465e-05, + "loss": 2.0919, + "step": 10782500 + }, + { + "epoch": 31.21, + "learning_rate": 3.439945639588737e-05, + "loss": 2.0959, + "step": 10783000 + }, + { + "epoch": 31.21, + "learning_rate": 3.439873419553539e-05, + "loss": 2.1093, + "step": 10783500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439801054788811e-05, + "loss": 2.1078, + "step": 10784000 + }, + { + "epoch": 31.22, + "learning_rate": 3.439728690024083e-05, + "loss": 2.0734, + "step": 10784500 + }, + { + "epoch": 31.22, + "learning_rate": 3.4396563252593554e-05, + "loss": 2.1036, + "step": 10785000 + }, + { + "epoch": 31.22, + "learning_rate": 3.4395839604946276e-05, + "loss": 2.1082, + "step": 10785500 + }, + { + "epoch": 31.22, + "learning_rate": 3.4395115957299e-05, + "loss": 2.1076, + "step": 10786000 + }, + { + "epoch": 31.22, + "learning_rate": 3.439439230965173e-05, + "loss": 2.0776, + "step": 10786500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439366866200445e-05, + "loss": 2.1019, + "step": 10787000 + }, + { + "epoch": 31.23, + "learning_rate": 3.439294501435717e-05, + "loss": 2.0676, + "step": 10787500 + }, + { + "epoch": 31.23, + "learning_rate": 3.439222281400519e-05, + "loss": 2.103, + "step": 10788000 + }, + { + "epoch": 31.23, + "learning_rate": 3.439149916635791e-05, + "loss": 2.0785, + "step": 10788500 + }, + { + "epoch": 31.23, + "learning_rate": 3.439077551871063e-05, + "loss": 2.0874, + "step": 10789000 + }, + { + "epoch": 31.23, + "learning_rate": 3.4390051871063354e-05, + "loss": 2.0858, + "step": 10789500 + }, + { + "epoch": 31.23, + "learning_rate": 3.438932822341608e-05, + "loss": 2.0758, + "step": 10790000 + }, + { + "epoch": 31.23, + "learning_rate": 3.43886060230641e-05, + "loss": 2.0788, + "step": 10790500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438788237541683e-05, + "loss": 2.0967, + "step": 10791000 + }, + { + "epoch": 31.24, + "learning_rate": 3.438715872776955e-05, + "loss": 2.1101, + "step": 10791500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438643508012227e-05, + "loss": 2.1064, + "step": 10792000 + }, + { + "epoch": 31.24, + "learning_rate": 3.438571432706558e-05, + "loss": 2.0906, + "step": 10792500 + }, + { + "epoch": 31.24, + "learning_rate": 3.43849906794183e-05, + "loss": 2.1126, + "step": 10793000 + }, + { + "epoch": 31.24, + "learning_rate": 3.4384267031771025e-05, + "loss": 2.1057, + "step": 10793500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438354338412375e-05, + "loss": 2.1015, + "step": 10794000 + }, + { + "epoch": 31.25, + "learning_rate": 3.4382819736476476e-05, + "loss": 2.0912, + "step": 10794500 + }, + { + "epoch": 31.25, + "learning_rate": 3.43820960888292e-05, + "loss": 2.0883, + "step": 10795000 + }, + { + "epoch": 31.25, + "learning_rate": 3.438137244118192e-05, + "loss": 2.0857, + "step": 10795500 + }, + { + "epoch": 31.25, + "learning_rate": 3.438064879353464e-05, + "loss": 2.0828, + "step": 10796000 + }, + { + "epoch": 31.25, + "learning_rate": 3.4379925145887365e-05, + "loss": 2.0939, + "step": 10796500 + }, + { + "epoch": 31.25, + "learning_rate": 3.437920149824009e-05, + "loss": 2.1193, + "step": 10797000 + }, + { + "epoch": 31.25, + "learning_rate": 3.437847785059282e-05, + "loss": 2.0771, + "step": 10797500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437775420294554e-05, + "loss": 2.1036, + "step": 10798000 + }, + { + "epoch": 31.26, + "learning_rate": 3.437703055529826e-05, + "loss": 2.119, + "step": 10798500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437630835494628e-05, + "loss": 2.0783, + "step": 10799000 + }, + { + "epoch": 31.26, + "learning_rate": 3.43755861545943e-05, + "loss": 2.1315, + "step": 10799500 + }, + { + "epoch": 31.26, + "learning_rate": 3.4374863954242315e-05, + "loss": 2.0895, + "step": 10800000 + }, + { + "epoch": 31.26, + "learning_rate": 3.437414030659504e-05, + "loss": 2.095, + "step": 10800500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437341665894776e-05, + "loss": 2.0913, + "step": 10801000 + }, + { + "epoch": 31.27, + "learning_rate": 3.437269301130048e-05, + "loss": 2.0927, + "step": 10801500 + }, + { + "epoch": 31.27, + "learning_rate": 3.4371969363653204e-05, + "loss": 2.0789, + "step": 10802000 + }, + { + "epoch": 31.27, + "learning_rate": 3.4371245716005926e-05, + "loss": 2.1138, + "step": 10802500 + }, + { + "epoch": 31.27, + "learning_rate": 3.4370522068358655e-05, + "loss": 2.0896, + "step": 10803000 + }, + { + "epoch": 31.27, + "learning_rate": 3.436979842071138e-05, + "loss": 2.1014, + "step": 10803500 + }, + { + "epoch": 31.27, + "learning_rate": 3.436907622035939e-05, + "loss": 2.1004, + "step": 10804000 + }, + { + "epoch": 31.27, + "learning_rate": 3.4368352572712115e-05, + "loss": 2.0893, + "step": 10804500 + }, + { + "epoch": 31.28, + "learning_rate": 3.4367628925064844e-05, + "loss": 2.1175, + "step": 10805000 + }, + { + "epoch": 31.28, + "learning_rate": 3.436690672471286e-05, + "loss": 2.1006, + "step": 10805500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436618307706558e-05, + "loss": 2.0857, + "step": 10806000 + }, + { + "epoch": 31.28, + "learning_rate": 3.4365459429418304e-05, + "loss": 2.1031, + "step": 10806500 + }, + { + "epoch": 31.28, + "learning_rate": 3.4364735781771026e-05, + "loss": 2.0897, + "step": 10807000 + }, + { + "epoch": 31.28, + "learning_rate": 3.4364012134123755e-05, + "loss": 2.0926, + "step": 10807500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436328848647648e-05, + "loss": 2.0875, + "step": 10808000 + }, + { + "epoch": 31.29, + "learning_rate": 3.43625648388292e-05, + "loss": 2.0746, + "step": 10808500 + }, + { + "epoch": 31.29, + "learning_rate": 3.4361842638477215e-05, + "loss": 2.0896, + "step": 10809000 + }, + { + "epoch": 31.29, + "learning_rate": 3.436111899082994e-05, + "loss": 2.0824, + "step": 10809500 + }, + { + "epoch": 31.29, + "learning_rate": 3.436039534318266e-05, + "loss": 2.1134, + "step": 10810000 + }, + { + "epoch": 31.29, + "learning_rate": 3.435967169553538e-05, + "loss": 2.1017, + "step": 10810500 + }, + { + "epoch": 31.29, + "learning_rate": 3.4358948047888104e-05, + "loss": 2.0751, + "step": 10811000 + }, + { + "epoch": 31.29, + "learning_rate": 3.4358224400240826e-05, + "loss": 2.0961, + "step": 10811500 + }, + { + "epoch": 31.3, + "learning_rate": 3.4357500752593555e-05, + "loss": 2.1102, + "step": 10812000 + }, + { + "epoch": 31.3, + "learning_rate": 3.435677710494628e-05, + "loss": 2.0795, + "step": 10812500 + }, + { + "epoch": 31.3, + "learning_rate": 3.4356053457299007e-05, + "loss": 2.0851, + "step": 10813000 + }, + { + "epoch": 31.3, + "learning_rate": 3.435532980965173e-05, + "loss": 2.0845, + "step": 10813500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435460616200445e-05, + "loss": 2.0845, + "step": 10814000 + }, + { + "epoch": 31.3, + "learning_rate": 3.435388251435717e-05, + "loss": 2.0991, + "step": 10814500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435316031400519e-05, + "loss": 2.1168, + "step": 10815000 + }, + { + "epoch": 31.31, + "learning_rate": 3.435243666635791e-05, + "loss": 2.098, + "step": 10815500 + }, + { + "epoch": 31.31, + "learning_rate": 3.4351714466005927e-05, + "loss": 2.1273, + "step": 10816000 + }, + { + "epoch": 31.31, + "learning_rate": 3.435099226565395e-05, + "loss": 2.1069, + "step": 10816500 + }, + { + "epoch": 31.31, + "learning_rate": 3.435026861800667e-05, + "loss": 2.09, + "step": 10817000 + }, + { + "epoch": 31.31, + "learning_rate": 3.434954497035939e-05, + "loss": 2.1015, + "step": 10817500 + }, + { + "epoch": 31.31, + "learning_rate": 3.4348821322712116e-05, + "loss": 2.0699, + "step": 10818000 + }, + { + "epoch": 31.32, + "learning_rate": 3.434809767506484e-05, + "loss": 2.0949, + "step": 10818500 + }, + { + "epoch": 31.32, + "learning_rate": 3.434737547471285e-05, + "loss": 2.1007, + "step": 10819000 + }, + { + "epoch": 31.32, + "learning_rate": 3.434665182706558e-05, + "loss": 2.0969, + "step": 10819500 + }, + { + "epoch": 31.32, + "learning_rate": 3.4345928179418305e-05, + "loss": 2.0821, + "step": 10820000 + }, + { + "epoch": 31.32, + "learning_rate": 3.4345204531771034e-05, + "loss": 2.108, + "step": 10820500 + }, + { + "epoch": 31.32, + "learning_rate": 3.4344480884123756e-05, + "loss": 2.1132, + "step": 10821000 + }, + { + "epoch": 31.32, + "learning_rate": 3.434375723647648e-05, + "loss": 2.1031, + "step": 10821500 + }, + { + "epoch": 31.33, + "learning_rate": 3.43430335888292e-05, + "loss": 2.0709, + "step": 10822000 + }, + { + "epoch": 31.33, + "learning_rate": 3.434230994118192e-05, + "loss": 2.0799, + "step": 10822500 + }, + { + "epoch": 31.33, + "learning_rate": 3.4341586293534645e-05, + "loss": 2.1011, + "step": 10823000 + }, + { + "epoch": 31.33, + "learning_rate": 3.434086409318266e-05, + "loss": 2.1128, + "step": 10823500 + }, + { + "epoch": 31.33, + "learning_rate": 3.434014044553538e-05, + "loss": 2.0982, + "step": 10824000 + }, + { + "epoch": 31.33, + "learning_rate": 3.4339416797888105e-05, + "loss": 2.1034, + "step": 10824500 + }, + { + "epoch": 31.33, + "learning_rate": 3.4338693150240834e-05, + "loss": 2.0936, + "step": 10825000 + }, + { + "epoch": 31.34, + "learning_rate": 3.4337969502593556e-05, + "loss": 2.1077, + "step": 10825500 + }, + { + "epoch": 31.34, + "learning_rate": 3.433724730224157e-05, + "loss": 2.1107, + "step": 10826000 + }, + { + "epoch": 31.34, + "learning_rate": 3.4336523654594294e-05, + "loss": 2.1106, + "step": 10826500 + }, + { + "epoch": 31.34, + "learning_rate": 3.4335800006947016e-05, + "loss": 2.1106, + "step": 10827000 + }, + { + "epoch": 31.34, + "learning_rate": 3.4335076359299745e-05, + "loss": 2.0784, + "step": 10827500 + }, + { + "epoch": 31.34, + "learning_rate": 3.433435415894776e-05, + "loss": 2.1091, + "step": 10828000 + }, + { + "epoch": 31.34, + "learning_rate": 3.433363051130048e-05, + "loss": 2.1297, + "step": 10828500 + }, + { + "epoch": 31.35, + "learning_rate": 3.4332908310948505e-05, + "loss": 2.1143, + "step": 10829000 + }, + { + "epoch": 31.35, + "learning_rate": 3.433218466330123e-05, + "loss": 2.0899, + "step": 10829500 + }, + { + "epoch": 31.35, + "learning_rate": 3.433146101565395e-05, + "loss": 2.1223, + "step": 10830000 + }, + { + "epoch": 31.35, + "learning_rate": 3.433073736800667e-05, + "loss": 2.0839, + "step": 10830500 + }, + { + "epoch": 31.35, + "learning_rate": 3.4330013720359394e-05, + "loss": 2.0801, + "step": 10831000 + }, + { + "epoch": 31.35, + "learning_rate": 3.432929152000741e-05, + "loss": 2.0864, + "step": 10831500 + }, + { + "epoch": 31.35, + "learning_rate": 3.432856787236013e-05, + "loss": 2.0825, + "step": 10832000 + }, + { + "epoch": 31.36, + "learning_rate": 3.4327845672008154e-05, + "loss": 2.1311, + "step": 10832500 + }, + { + "epoch": 31.36, + "learning_rate": 3.4327122024360876e-05, + "loss": 2.0945, + "step": 10833000 + }, + { + "epoch": 31.36, + "learning_rate": 3.432639982400889e-05, + "loss": 2.0941, + "step": 10833500 + }, + { + "epoch": 31.36, + "learning_rate": 3.4325676176361614e-05, + "loss": 2.1079, + "step": 10834000 + }, + { + "epoch": 31.36, + "learning_rate": 3.4324952528714337e-05, + "loss": 2.055, + "step": 10834500 + }, + { + "epoch": 31.36, + "learning_rate": 3.432422888106706e-05, + "loss": 2.1063, + "step": 10835000 + }, + { + "epoch": 31.36, + "learning_rate": 3.432350523341979e-05, + "loss": 2.0908, + "step": 10835500 + }, + { + "epoch": 31.37, + "learning_rate": 3.432278158577251e-05, + "loss": 2.1023, + "step": 10836000 + }, + { + "epoch": 31.37, + "learning_rate": 3.432205793812523e-05, + "loss": 2.0847, + "step": 10836500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4321334290477954e-05, + "loss": 2.0946, + "step": 10837000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4320610642830683e-05, + "loss": 2.0747, + "step": 10837500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4319886995183406e-05, + "loss": 2.1078, + "step": 10838000 + }, + { + "epoch": 31.37, + "learning_rate": 3.431916334753613e-05, + "loss": 2.0835, + "step": 10838500 + }, + { + "epoch": 31.37, + "learning_rate": 3.431843969988885e-05, + "loss": 2.1133, + "step": 10839000 + }, + { + "epoch": 31.38, + "learning_rate": 3.431771605224157e-05, + "loss": 2.1159, + "step": 10839500 + }, + { + "epoch": 31.38, + "learning_rate": 3.4316992404594295e-05, + "loss": 2.0959, + "step": 10840000 + }, + { + "epoch": 31.38, + "learning_rate": 3.431627020424231e-05, + "loss": 2.1028, + "step": 10840500 + }, + { + "epoch": 31.38, + "learning_rate": 3.431554655659503e-05, + "loss": 2.0914, + "step": 10841000 + }, + { + "epoch": 31.38, + "learning_rate": 3.4314822908947755e-05, + "loss": 2.0882, + "step": 10841500 + }, + { + "epoch": 31.38, + "learning_rate": 3.4314099261300484e-05, + "loss": 2.1068, + "step": 10842000 + }, + { + "epoch": 31.38, + "learning_rate": 3.4313375613653206e-05, + "loss": 2.1187, + "step": 10842500 + }, + { + "epoch": 31.39, + "learning_rate": 3.4312651966005935e-05, + "loss": 2.0559, + "step": 10843000 + }, + { + "epoch": 31.39, + "learning_rate": 3.431192831835866e-05, + "loss": 2.0938, + "step": 10843500 + }, + { + "epoch": 31.39, + "learning_rate": 3.431120611800667e-05, + "loss": 2.1121, + "step": 10844000 + }, + { + "epoch": 31.39, + "learning_rate": 3.4310482470359395e-05, + "loss": 2.1055, + "step": 10844500 + }, + { + "epoch": 31.39, + "learning_rate": 3.430975882271212e-05, + "loss": 2.0949, + "step": 10845000 + }, + { + "epoch": 31.39, + "learning_rate": 3.430903517506484e-05, + "loss": 2.1139, + "step": 10845500 + }, + { + "epoch": 31.39, + "learning_rate": 3.430831152741756e-05, + "loss": 2.1157, + "step": 10846000 + }, + { + "epoch": 31.4, + "learning_rate": 3.4307587879770284e-05, + "loss": 2.1047, + "step": 10846500 + }, + { + "epoch": 31.4, + "learning_rate": 3.4306865679418306e-05, + "loss": 2.0747, + "step": 10847000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430614347906632e-05, + "loss": 2.1091, + "step": 10847500 + }, + { + "epoch": 31.4, + "learning_rate": 3.4305419831419044e-05, + "loss": 2.0779, + "step": 10848000 + }, + { + "epoch": 31.4, + "learning_rate": 3.4304696183771766e-05, + "loss": 2.0986, + "step": 10848500 + }, + { + "epoch": 31.4, + "learning_rate": 3.430397253612449e-05, + "loss": 2.0735, + "step": 10849000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430325033577251e-05, + "loss": 2.1085, + "step": 10849500 + }, + { + "epoch": 31.41, + "learning_rate": 3.430252668812523e-05, + "loss": 2.0823, + "step": 10850000 + }, + { + "epoch": 31.41, + "learning_rate": 3.430180304047796e-05, + "loss": 2.0913, + "step": 10850500 + }, + { + "epoch": 31.41, + "learning_rate": 3.4301079392830684e-05, + "loss": 2.0983, + "step": 10851000 + }, + { + "epoch": 31.41, + "learning_rate": 3.4300355745183406e-05, + "loss": 2.0837, + "step": 10851500 + }, + { + "epoch": 31.41, + "learning_rate": 3.429963209753613e-05, + "loss": 2.0772, + "step": 10852000 + }, + { + "epoch": 31.41, + "learning_rate": 3.429890844988885e-05, + "loss": 2.1056, + "step": 10852500 + }, + { + "epoch": 31.41, + "learning_rate": 3.429818480224157e-05, + "loss": 2.0794, + "step": 10853000 + }, + { + "epoch": 31.42, + "learning_rate": 3.429746260188959e-05, + "loss": 2.0961, + "step": 10853500 + }, + { + "epoch": 31.42, + "learning_rate": 3.429673895424231e-05, + "loss": 2.0784, + "step": 10854000 + }, + { + "epoch": 31.42, + "learning_rate": 3.429601530659503e-05, + "loss": 2.1127, + "step": 10854500 + }, + { + "epoch": 31.42, + "learning_rate": 3.4295293106243056e-05, + "loss": 2.0786, + "step": 10855000 + }, + { + "epoch": 31.42, + "learning_rate": 3.429456945859578e-05, + "loss": 2.0987, + "step": 10855500 + }, + { + "epoch": 31.42, + "learning_rate": 3.42938458109485e-05, + "loss": 2.0833, + "step": 10856000 + }, + { + "epoch": 31.43, + "learning_rate": 3.429312216330122e-05, + "loss": 2.1303, + "step": 10856500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4292398515653944e-05, + "loss": 2.1016, + "step": 10857000 + }, + { + "epoch": 31.43, + "learning_rate": 3.4291674868006673e-05, + "loss": 2.0897, + "step": 10857500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4290951220359396e-05, + "loss": 2.0975, + "step": 10858000 + }, + { + "epoch": 31.43, + "learning_rate": 3.429022902000741e-05, + "loss": 2.0983, + "step": 10858500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4289505372360134e-05, + "loss": 2.1078, + "step": 10859000 + }, + { + "epoch": 31.43, + "learning_rate": 3.428878172471286e-05, + "loss": 2.0993, + "step": 10859500 + }, + { + "epoch": 31.44, + "learning_rate": 3.4288058077065585e-05, + "loss": 2.1001, + "step": 10860000 + }, + { + "epoch": 31.44, + "learning_rate": 3.428733442941831e-05, + "loss": 2.0968, + "step": 10860500 + }, + { + "epoch": 31.44, + "learning_rate": 3.428661078177103e-05, + "loss": 2.1032, + "step": 10861000 + }, + { + "epoch": 31.44, + "learning_rate": 3.428588713412375e-05, + "loss": 2.0934, + "step": 10861500 + }, + { + "epoch": 31.44, + "learning_rate": 3.4285163486476474e-05, + "loss": 2.096, + "step": 10862000 + }, + { + "epoch": 31.44, + "learning_rate": 3.4284439838829196e-05, + "loss": 2.0808, + "step": 10862500 + }, + { + "epoch": 31.44, + "learning_rate": 3.428371619118192e-05, + "loss": 2.0996, + "step": 10863000 + }, + { + "epoch": 31.45, + "learning_rate": 3.428299254353464e-05, + "loss": 2.0976, + "step": 10863500 + }, + { + "epoch": 31.45, + "learning_rate": 3.428226889588736e-05, + "loss": 2.0983, + "step": 10864000 + }, + { + "epoch": 31.45, + "learning_rate": 3.4281545248240085e-05, + "loss": 2.1039, + "step": 10864500 + }, + { + "epoch": 31.45, + "learning_rate": 3.4280821600592814e-05, + "loss": 2.0984, + "step": 10865000 + }, + { + "epoch": 31.45, + "learning_rate": 3.4280097952945536e-05, + "loss": 2.0732, + "step": 10865500 + }, + { + "epoch": 31.45, + "learning_rate": 3.427937575259356e-05, + "loss": 2.0955, + "step": 10866000 + }, + { + "epoch": 31.45, + "learning_rate": 3.427865210494628e-05, + "loss": 2.0859, + "step": 10866500 + }, + { + "epoch": 31.46, + "learning_rate": 3.4277928457299e-05, + "loss": 2.1031, + "step": 10867000 + }, + { + "epoch": 31.46, + "learning_rate": 3.4277204809651725e-05, + "loss": 2.1112, + "step": 10867500 + }, + { + "epoch": 31.46, + "learning_rate": 3.427648260929974e-05, + "loss": 2.1046, + "step": 10868000 + }, + { + "epoch": 31.46, + "learning_rate": 3.427575896165246e-05, + "loss": 2.1204, + "step": 10868500 + }, + { + "epoch": 31.46, + "learning_rate": 3.4275035314005185e-05, + "loss": 2.0923, + "step": 10869000 + }, + { + "epoch": 31.46, + "learning_rate": 3.427431311365321e-05, + "loss": 2.1067, + "step": 10869500 + }, + { + "epoch": 31.46, + "learning_rate": 3.427358946600593e-05, + "loss": 2.0891, + "step": 10870000 + }, + { + "epoch": 31.47, + "learning_rate": 3.427286581835865e-05, + "loss": 2.1113, + "step": 10870500 + }, + { + "epoch": 31.47, + "learning_rate": 3.4272142170711374e-05, + "loss": 2.0872, + "step": 10871000 + }, + { + "epoch": 31.47, + "learning_rate": 3.4271418523064096e-05, + "loss": 2.0944, + "step": 10871500 + }, + { + "epoch": 31.47, + "learning_rate": 3.427069632271211e-05, + "loss": 2.0745, + "step": 10872000 + }, + { + "epoch": 31.47, + "learning_rate": 3.426997267506484e-05, + "loss": 2.0872, + "step": 10872500 + }, + { + "epoch": 31.47, + "learning_rate": 3.426924902741756e-05, + "loss": 2.0983, + "step": 10873000 + }, + { + "epoch": 31.47, + "learning_rate": 3.4268525379770285e-05, + "loss": 2.1221, + "step": 10873500 + }, + { + "epoch": 31.48, + "learning_rate": 3.4267801732123014e-05, + "loss": 2.1001, + "step": 10874000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426707808447574e-05, + "loss": 2.0905, + "step": 10874500 + }, + { + "epoch": 31.48, + "learning_rate": 3.426635588412375e-05, + "loss": 2.087, + "step": 10875000 + }, + { + "epoch": 31.48, + "learning_rate": 3.4265632236476474e-05, + "loss": 2.1269, + "step": 10875500 + }, + { + "epoch": 31.48, + "learning_rate": 3.42649085888292e-05, + "loss": 2.1054, + "step": 10876000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426418494118192e-05, + "loss": 2.0945, + "step": 10876500 + }, + { + "epoch": 31.48, + "learning_rate": 3.426346274082994e-05, + "loss": 2.0911, + "step": 10877000 + }, + { + "epoch": 31.49, + "learning_rate": 3.4262739093182664e-05, + "loss": 2.0731, + "step": 10877500 + }, + { + "epoch": 31.49, + "learning_rate": 3.4262015445535386e-05, + "loss": 2.096, + "step": 10878000 + }, + { + "epoch": 31.49, + "learning_rate": 3.426129179788811e-05, + "loss": 2.094, + "step": 10878500 + }, + { + "epoch": 31.49, + "learning_rate": 3.426056815024083e-05, + "loss": 2.0833, + "step": 10879000 + }, + { + "epoch": 31.49, + "learning_rate": 3.425984450259355e-05, + "loss": 2.1098, + "step": 10879500 + }, + { + "epoch": 31.49, + "learning_rate": 3.425912085494628e-05, + "loss": 2.1133, + "step": 10880000 + }, + { + "epoch": 31.49, + "learning_rate": 3.4258397207299004e-05, + "loss": 2.1041, + "step": 10880500 + }, + { + "epoch": 31.5, + "learning_rate": 3.4257673559651726e-05, + "loss": 2.1054, + "step": 10881000 + }, + { + "epoch": 31.5, + "learning_rate": 3.425695135929974e-05, + "loss": 2.0711, + "step": 10881500 + }, + { + "epoch": 31.5, + "learning_rate": 3.4256227711652464e-05, + "loss": 2.0978, + "step": 10882000 + }, + { + "epoch": 31.5, + "learning_rate": 3.4255505511300486e-05, + "loss": 2.0898, + "step": 10882500 + }, + { + "epoch": 31.5, + "learning_rate": 3.425478186365321e-05, + "loss": 2.1039, + "step": 10883000 + }, + { + "epoch": 31.5, + "learning_rate": 3.425405821600593e-05, + "loss": 2.068, + "step": 10883500 + }, + { + "epoch": 31.5, + "learning_rate": 3.425333456835865e-05, + "loss": 2.0865, + "step": 10884000 + }, + { + "epoch": 31.51, + "learning_rate": 3.4252610920711375e-05, + "loss": 2.0796, + "step": 10884500 + }, + { + "epoch": 31.51, + "learning_rate": 3.42518872730641e-05, + "loss": 2.084, + "step": 10885000 + }, + { + "epoch": 31.51, + "learning_rate": 3.425116362541682e-05, + "loss": 2.0894, + "step": 10885500 + }, + { + "epoch": 31.51, + "learning_rate": 3.425043997776954e-05, + "loss": 2.0892, + "step": 10886000 + }, + { + "epoch": 31.51, + "learning_rate": 3.4249716330122264e-05, + "loss": 2.1096, + "step": 10886500 + }, + { + "epoch": 31.51, + "learning_rate": 3.424899268247499e-05, + "loss": 2.0915, + "step": 10887000 + }, + { + "epoch": 31.51, + "learning_rate": 3.4248270482123015e-05, + "loss": 2.0995, + "step": 10887500 + }, + { + "epoch": 31.52, + "learning_rate": 3.424754683447574e-05, + "loss": 2.11, + "step": 10888000 + }, + { + "epoch": 31.52, + "learning_rate": 3.424682318682846e-05, + "loss": 2.0688, + "step": 10888500 + }, + { + "epoch": 31.52, + "learning_rate": 3.424609953918118e-05, + "loss": 2.093, + "step": 10889000 + }, + { + "epoch": 31.52, + "learning_rate": 3.4245375891533904e-05, + "loss": 2.1091, + "step": 10889500 + }, + { + "epoch": 31.52, + "learning_rate": 3.4244652243886626e-05, + "loss": 2.1233, + "step": 10890000 + }, + { + "epoch": 31.52, + "learning_rate": 3.424392859623935e-05, + "loss": 2.0905, + "step": 10890500 + }, + { + "epoch": 31.52, + "learning_rate": 3.424320494859207e-05, + "loss": 2.0978, + "step": 10891000 + }, + { + "epoch": 31.53, + "learning_rate": 3.424248130094479e-05, + "loss": 2.0914, + "step": 10891500 + }, + { + "epoch": 31.53, + "learning_rate": 3.4241757653297515e-05, + "loss": 2.0883, + "step": 10892000 + }, + { + "epoch": 31.53, + "learning_rate": 3.4241034005650244e-05, + "loss": 2.0893, + "step": 10892500 + }, + { + "epoch": 31.53, + "learning_rate": 3.424031325259355e-05, + "loss": 2.1158, + "step": 10893000 + }, + { + "epoch": 31.53, + "learning_rate": 3.4239589604946275e-05, + "loss": 2.09, + "step": 10893500 + }, + { + "epoch": 31.53, + "learning_rate": 3.4238865957299e-05, + "loss": 2.1035, + "step": 10894000 + }, + { + "epoch": 31.54, + "learning_rate": 3.423814375694701e-05, + "loss": 2.0978, + "step": 10894500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423742010929974e-05, + "loss": 2.1041, + "step": 10895000 + }, + { + "epoch": 31.54, + "learning_rate": 3.4236696461652465e-05, + "loss": 2.1077, + "step": 10895500 + }, + { + "epoch": 31.54, + "learning_rate": 3.4235972814005194e-05, + "loss": 2.0975, + "step": 10896000 + }, + { + "epoch": 31.54, + "learning_rate": 3.4235249166357916e-05, + "loss": 2.0728, + "step": 10896500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423452551871064e-05, + "loss": 2.0961, + "step": 10897000 + }, + { + "epoch": 31.54, + "learning_rate": 3.423380187106336e-05, + "loss": 2.0974, + "step": 10897500 + }, + { + "epoch": 31.55, + "learning_rate": 3.423307822341608e-05, + "loss": 2.0854, + "step": 10898000 + }, + { + "epoch": 31.55, + "learning_rate": 3.4232354575768805e-05, + "loss": 2.0985, + "step": 10898500 + }, + { + "epoch": 31.55, + "learning_rate": 3.423163092812153e-05, + "loss": 2.0783, + "step": 10899000 + }, + { + "epoch": 31.55, + "learning_rate": 3.423090728047425e-05, + "loss": 2.1012, + "step": 10899500 + }, + { + "epoch": 31.55, + "learning_rate": 3.423018363282697e-05, + "loss": 2.0991, + "step": 10900000 + }, + { + "epoch": 31.55, + "learning_rate": 3.4229459985179694e-05, + "loss": 2.0948, + "step": 10900500 + }, + { + "epoch": 31.55, + "learning_rate": 3.4228736337532416e-05, + "loss": 2.0835, + "step": 10901000 + }, + { + "epoch": 31.56, + "learning_rate": 3.4228012689885145e-05, + "loss": 2.1164, + "step": 10901500 + }, + { + "epoch": 31.56, + "learning_rate": 3.422728904223787e-05, + "loss": 2.0969, + "step": 10902000 + }, + { + "epoch": 31.56, + "learning_rate": 3.4226565394590596e-05, + "loss": 2.0923, + "step": 10902500 + }, + { + "epoch": 31.56, + "learning_rate": 3.422584319423861e-05, + "loss": 2.0715, + "step": 10903000 + }, + { + "epoch": 31.56, + "learning_rate": 3.4225119546591334e-05, + "loss": 2.1102, + "step": 10903500 + }, + { + "epoch": 31.56, + "learning_rate": 3.4224395898944056e-05, + "loss": 2.1148, + "step": 10904000 + }, + { + "epoch": 31.56, + "learning_rate": 3.422367369859207e-05, + "loss": 2.1002, + "step": 10904500 + }, + { + "epoch": 31.57, + "learning_rate": 3.4222950050944794e-05, + "loss": 2.1048, + "step": 10905000 + }, + { + "epoch": 31.57, + "learning_rate": 3.4222226403297516e-05, + "loss": 2.1031, + "step": 10905500 + }, + { + "epoch": 31.57, + "learning_rate": 3.422150420294554e-05, + "loss": 2.0951, + "step": 10906000 + }, + { + "epoch": 31.57, + "learning_rate": 3.422078055529826e-05, + "loss": 2.0905, + "step": 10906500 + }, + { + "epoch": 31.57, + "learning_rate": 3.4220058354946276e-05, + "loss": 2.1179, + "step": 10907000 + }, + { + "epoch": 31.57, + "learning_rate": 3.4219334707299e-05, + "loss": 2.1117, + "step": 10907500 + }, + { + "epoch": 31.57, + "learning_rate": 3.421861105965172e-05, + "loss": 2.0828, + "step": 10908000 + }, + { + "epoch": 31.58, + "learning_rate": 3.421788741200444e-05, + "loss": 2.1007, + "step": 10908500 + }, + { + "epoch": 31.58, + "learning_rate": 3.4217163764357165e-05, + "loss": 2.1162, + "step": 10909000 + }, + { + "epoch": 31.58, + "learning_rate": 3.4216440116709894e-05, + "loss": 2.1099, + "step": 10909500 + }, + { + "epoch": 31.58, + "learning_rate": 3.4215716469062616e-05, + "loss": 2.1086, + "step": 10910000 + }, + { + "epoch": 31.58, + "learning_rate": 3.4214992821415345e-05, + "loss": 2.0836, + "step": 10910500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421426917376807e-05, + "loss": 2.0936, + "step": 10911000 + }, + { + "epoch": 31.58, + "learning_rate": 3.421354552612079e-05, + "loss": 2.0824, + "step": 10911500 + }, + { + "epoch": 31.59, + "learning_rate": 3.421282187847351e-05, + "loss": 2.0822, + "step": 10912000 + }, + { + "epoch": 31.59, + "learning_rate": 3.4212098230826234e-05, + "loss": 2.1023, + "step": 10912500 + }, + { + "epoch": 31.59, + "learning_rate": 3.421137458317896e-05, + "loss": 2.0839, + "step": 10913000 + }, + { + "epoch": 31.59, + "learning_rate": 3.421065093553168e-05, + "loss": 2.1395, + "step": 10913500 + }, + { + "epoch": 31.59, + "learning_rate": 3.42099272878844e-05, + "loss": 2.1137, + "step": 10914000 + }, + { + "epoch": 31.59, + "learning_rate": 3.420920364023712e-05, + "loss": 2.1097, + "step": 10914500 + }, + { + "epoch": 31.59, + "learning_rate": 3.4208479992589846e-05, + "loss": 2.1132, + "step": 10915000 + }, + { + "epoch": 31.6, + "learning_rate": 3.420775634494257e-05, + "loss": 2.1244, + "step": 10915500 + }, + { + "epoch": 31.6, + "learning_rate": 3.42070326972953e-05, + "loss": 2.0953, + "step": 10916000 + }, + { + "epoch": 31.6, + "learning_rate": 3.420630904964802e-05, + "loss": 2.0949, + "step": 10916500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420558540200075e-05, + "loss": 2.1194, + "step": 10917000 + }, + { + "epoch": 31.6, + "learning_rate": 3.4204863201648764e-05, + "loss": 2.0949, + "step": 10917500 + }, + { + "epoch": 31.6, + "learning_rate": 3.4204139554001486e-05, + "loss": 2.1254, + "step": 10918000 + }, + { + "epoch": 31.6, + "learning_rate": 3.42034173536495e-05, + "loss": 2.0932, + "step": 10918500 + }, + { + "epoch": 31.61, + "learning_rate": 3.4202693706002224e-05, + "loss": 2.0915, + "step": 10919000 + }, + { + "epoch": 31.61, + "learning_rate": 3.4201970058354946e-05, + "loss": 2.0977, + "step": 10919500 + }, + { + "epoch": 31.61, + "learning_rate": 3.420124785800297e-05, + "loss": 2.1169, + "step": 10920000 + }, + { + "epoch": 31.61, + "learning_rate": 3.4200525657650984e-05, + "loss": 2.1015, + "step": 10920500 + }, + { + "epoch": 31.61, + "learning_rate": 3.4199802010003706e-05, + "loss": 2.1039, + "step": 10921000 + }, + { + "epoch": 31.61, + "learning_rate": 3.419907836235643e-05, + "loss": 2.0963, + "step": 10921500 + }, + { + "epoch": 31.61, + "learning_rate": 3.419835471470915e-05, + "loss": 2.0729, + "step": 10922000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419763106706187e-05, + "loss": 2.1114, + "step": 10922500 + }, + { + "epoch": 31.62, + "learning_rate": 3.4196907419414595e-05, + "loss": 2.1, + "step": 10923000 + }, + { + "epoch": 31.62, + "learning_rate": 3.4196183771767324e-05, + "loss": 2.0952, + "step": 10923500 + }, + { + "epoch": 31.62, + "learning_rate": 3.4195460124120046e-05, + "loss": 2.1091, + "step": 10924000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419473792376807e-05, + "loss": 2.1133, + "step": 10924500 + }, + { + "epoch": 31.62, + "learning_rate": 3.4194015723416084e-05, + "loss": 2.0939, + "step": 10925000 + }, + { + "epoch": 31.62, + "learning_rate": 3.4193292075768806e-05, + "loss": 2.098, + "step": 10925500 + }, + { + "epoch": 31.63, + "learning_rate": 3.419256842812153e-05, + "loss": 2.0894, + "step": 10926000 + }, + { + "epoch": 31.63, + "learning_rate": 3.419184478047425e-05, + "loss": 2.0987, + "step": 10926500 + }, + { + "epoch": 31.63, + "learning_rate": 3.419112113282697e-05, + "loss": 2.0984, + "step": 10927000 + }, + { + "epoch": 31.63, + "learning_rate": 3.4190397485179695e-05, + "loss": 2.0829, + "step": 10927500 + }, + { + "epoch": 31.63, + "learning_rate": 3.4189673837532424e-05, + "loss": 2.0904, + "step": 10928000 + }, + { + "epoch": 31.63, + "learning_rate": 3.4188950189885146e-05, + "loss": 2.1089, + "step": 10928500 + }, + { + "epoch": 31.63, + "learning_rate": 3.418822654223787e-05, + "loss": 2.0907, + "step": 10929000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418750289459059e-05, + "loss": 2.0969, + "step": 10929500 + }, + { + "epoch": 31.64, + "learning_rate": 3.418677924694331e-05, + "loss": 2.0721, + "step": 10930000 + }, + { + "epoch": 31.64, + "learning_rate": 3.4186055599296035e-05, + "loss": 2.0954, + "step": 10930500 + }, + { + "epoch": 31.64, + "learning_rate": 3.418533195164876e-05, + "loss": 2.1008, + "step": 10931000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418460830400149e-05, + "loss": 2.0946, + "step": 10931500 + }, + { + "epoch": 31.64, + "learning_rate": 3.41838861036495e-05, + "loss": 2.1146, + "step": 10932000 + }, + { + "epoch": 31.65, + "learning_rate": 3.4183162456002224e-05, + "loss": 2.0996, + "step": 10932500 + }, + { + "epoch": 31.65, + "learning_rate": 3.418243880835495e-05, + "loss": 2.1003, + "step": 10933000 + }, + { + "epoch": 31.65, + "learning_rate": 3.4181715160707676e-05, + "loss": 2.0839, + "step": 10933500 + }, + { + "epoch": 31.65, + "learning_rate": 3.41809915130604e-05, + "loss": 2.0881, + "step": 10934000 + }, + { + "epoch": 31.65, + "learning_rate": 3.418026786541312e-05, + "loss": 2.1211, + "step": 10934500 + }, + { + "epoch": 31.65, + "learning_rate": 3.417954421776584e-05, + "loss": 2.082, + "step": 10935000 + }, + { + "epoch": 31.65, + "learning_rate": 3.417882201741386e-05, + "loss": 2.1041, + "step": 10935500 + }, + { + "epoch": 31.66, + "learning_rate": 3.4178099817061873e-05, + "loss": 2.0947, + "step": 10936000 + }, + { + "epoch": 31.66, + "learning_rate": 3.4177376169414596e-05, + "loss": 2.0974, + "step": 10936500 + }, + { + "epoch": 31.66, + "learning_rate": 3.4176652521767325e-05, + "loss": 2.112, + "step": 10937000 + }, + { + "epoch": 31.66, + "learning_rate": 3.417592887412005e-05, + "loss": 2.1184, + "step": 10937500 + }, + { + "epoch": 31.66, + "learning_rate": 3.417520522647277e-05, + "loss": 2.1018, + "step": 10938000 + }, + { + "epoch": 31.66, + "learning_rate": 3.4174483026120785e-05, + "loss": 2.1195, + "step": 10938500 + }, + { + "epoch": 31.66, + "learning_rate": 3.417375937847351e-05, + "loss": 2.1054, + "step": 10939000 + }, + { + "epoch": 31.67, + "learning_rate": 3.4173035730826236e-05, + "loss": 2.0946, + "step": 10939500 + }, + { + "epoch": 31.67, + "learning_rate": 3.417231353047425e-05, + "loss": 2.1097, + "step": 10940000 + }, + { + "epoch": 31.67, + "learning_rate": 3.4171589882826974e-05, + "loss": 2.096, + "step": 10940500 + }, + { + "epoch": 31.67, + "learning_rate": 3.4170866235179696e-05, + "loss": 2.1072, + "step": 10941000 + }, + { + "epoch": 31.67, + "learning_rate": 3.4170142587532425e-05, + "loss": 2.1039, + "step": 10941500 + }, + { + "epoch": 31.67, + "learning_rate": 3.416941893988515e-05, + "loss": 2.089, + "step": 10942000 + }, + { + "epoch": 31.67, + "learning_rate": 3.416869529223787e-05, + "loss": 2.0889, + "step": 10942500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416797164459059e-05, + "loss": 2.0931, + "step": 10943000 + }, + { + "epoch": 31.68, + "learning_rate": 3.4167247996943314e-05, + "loss": 2.1067, + "step": 10943500 + }, + { + "epoch": 31.68, + "learning_rate": 3.4166524349296036e-05, + "loss": 2.1005, + "step": 10944000 + }, + { + "epoch": 31.68, + "learning_rate": 3.416580070164876e-05, + "loss": 2.0979, + "step": 10944500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416507705400148e-05, + "loss": 2.1032, + "step": 10945000 + }, + { + "epoch": 31.68, + "learning_rate": 3.4164356300944796e-05, + "loss": 2.0937, + "step": 10945500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416363265329752e-05, + "loss": 2.1034, + "step": 10946000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416290900565024e-05, + "loss": 2.134, + "step": 10946500 + }, + { + "epoch": 31.69, + "learning_rate": 3.416218535800297e-05, + "loss": 2.0973, + "step": 10947000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416146171035569e-05, + "loss": 2.0949, + "step": 10947500 + }, + { + "epoch": 31.69, + "learning_rate": 3.4160738062708414e-05, + "loss": 2.0857, + "step": 10948000 + }, + { + "epoch": 31.69, + "learning_rate": 3.4160014415061136e-05, + "loss": 2.1004, + "step": 10948500 + }, + { + "epoch": 31.69, + "learning_rate": 3.415929076741386e-05, + "loss": 2.1149, + "step": 10949000 + }, + { + "epoch": 31.69, + "learning_rate": 3.415856711976658e-05, + "loss": 2.0953, + "step": 10949500 + }, + { + "epoch": 31.7, + "learning_rate": 3.41578434721193e-05, + "loss": 2.1168, + "step": 10950000 + }, + { + "epoch": 31.7, + "learning_rate": 3.4157121271767326e-05, + "loss": 2.1018, + "step": 10950500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415639762412005e-05, + "loss": 2.1135, + "step": 10951000 + }, + { + "epoch": 31.7, + "learning_rate": 3.415567397647277e-05, + "loss": 2.1014, + "step": 10951500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415495032882549e-05, + "loss": 2.0938, + "step": 10952000 + }, + { + "epoch": 31.7, + "learning_rate": 3.4154226681178214e-05, + "loss": 2.1251, + "step": 10952500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415350303353094e-05, + "loss": 2.0813, + "step": 10953000 + }, + { + "epoch": 31.71, + "learning_rate": 3.415277938588366e-05, + "loss": 2.1092, + "step": 10953500 + }, + { + "epoch": 31.71, + "learning_rate": 3.415205573823639e-05, + "loss": 2.0988, + "step": 10954000 + }, + { + "epoch": 31.71, + "learning_rate": 3.415133209058911e-05, + "loss": 2.0968, + "step": 10954500 + }, + { + "epoch": 31.71, + "learning_rate": 3.415060844294183e-05, + "loss": 2.0729, + "step": 10955000 + }, + { + "epoch": 31.71, + "learning_rate": 3.4149886242589855e-05, + "loss": 2.1287, + "step": 10955500 + }, + { + "epoch": 31.71, + "learning_rate": 3.414916404223787e-05, + "loss": 2.0745, + "step": 10956000 + }, + { + "epoch": 31.71, + "learning_rate": 3.4148441841885886e-05, + "loss": 2.1094, + "step": 10956500 + }, + { + "epoch": 31.72, + "learning_rate": 3.414771819423861e-05, + "loss": 2.124, + "step": 10957000 + }, + { + "epoch": 31.72, + "learning_rate": 3.414699454659133e-05, + "loss": 2.1024, + "step": 10957500 + }, + { + "epoch": 31.72, + "learning_rate": 3.414627089894405e-05, + "loss": 2.0991, + "step": 10958000 + }, + { + "epoch": 31.72, + "learning_rate": 3.4145547251296775e-05, + "loss": 2.1159, + "step": 10958500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4144823603649504e-05, + "loss": 2.1087, + "step": 10959000 + }, + { + "epoch": 31.72, + "learning_rate": 3.4144099956002226e-05, + "loss": 2.0965, + "step": 10959500 + }, + { + "epoch": 31.72, + "learning_rate": 3.414337775565024e-05, + "loss": 2.1053, + "step": 10960000 + }, + { + "epoch": 31.73, + "learning_rate": 3.4142654108002964e-05, + "loss": 2.0802, + "step": 10960500 + }, + { + "epoch": 31.73, + "learning_rate": 3.4141930460355686e-05, + "loss": 2.1218, + "step": 10961000 + }, + { + "epoch": 31.73, + "learning_rate": 3.414120681270841e-05, + "loss": 2.0974, + "step": 10961500 + }, + { + "epoch": 31.73, + "learning_rate": 3.414048316506114e-05, + "loss": 2.1067, + "step": 10962000 + }, + { + "epoch": 31.73, + "learning_rate": 3.413975951741386e-05, + "loss": 2.0764, + "step": 10962500 + }, + { + "epoch": 31.73, + "learning_rate": 3.4139037317061875e-05, + "loss": 2.1022, + "step": 10963000 + }, + { + "epoch": 31.73, + "learning_rate": 3.4138313669414604e-05, + "loss": 2.1155, + "step": 10963500 + }, + { + "epoch": 31.74, + "learning_rate": 3.4137590021767326e-05, + "loss": 2.104, + "step": 10964000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413686637412005e-05, + "loss": 2.1135, + "step": 10964500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413614272647277e-05, + "loss": 2.0734, + "step": 10965000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413541907882549e-05, + "loss": 2.0836, + "step": 10965500 + }, + { + "epoch": 31.74, + "learning_rate": 3.4134695431178215e-05, + "loss": 2.085, + "step": 10966000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413397178353094e-05, + "loss": 2.0995, + "step": 10966500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413324813588366e-05, + "loss": 2.0803, + "step": 10967000 + }, + { + "epoch": 31.75, + "learning_rate": 3.4132525935531675e-05, + "loss": 2.1066, + "step": 10967500 + }, + { + "epoch": 31.75, + "learning_rate": 3.4131802287884404e-05, + "loss": 2.1115, + "step": 10968000 + }, + { + "epoch": 31.75, + "learning_rate": 3.4131078640237127e-05, + "loss": 2.0802, + "step": 10968500 + }, + { + "epoch": 31.75, + "learning_rate": 3.4130354992589856e-05, + "loss": 2.0897, + "step": 10969000 + }, + { + "epoch": 31.75, + "learning_rate": 3.412963279223787e-05, + "loss": 2.0987, + "step": 10969500 + }, + { + "epoch": 31.75, + "learning_rate": 3.412890914459059e-05, + "loss": 2.0865, + "step": 10970000 + }, + { + "epoch": 31.76, + "learning_rate": 3.4128185496943316e-05, + "loss": 2.073, + "step": 10970500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412746184929604e-05, + "loss": 2.0925, + "step": 10971000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412673820164876e-05, + "loss": 2.0883, + "step": 10971500 + }, + { + "epoch": 31.76, + "learning_rate": 3.4126016001296776e-05, + "loss": 2.1024, + "step": 10972000 + }, + { + "epoch": 31.76, + "learning_rate": 3.4125292353649505e-05, + "loss": 2.1092, + "step": 10972500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412456870600223e-05, + "loss": 2.0778, + "step": 10973000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412384505835495e-05, + "loss": 2.0979, + "step": 10973500 + }, + { + "epoch": 31.77, + "learning_rate": 3.412312141070767e-05, + "loss": 2.0916, + "step": 10974000 + }, + { + "epoch": 31.77, + "learning_rate": 3.4122397763060394e-05, + "loss": 2.1182, + "step": 10974500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4121674115413116e-05, + "loss": 2.1062, + "step": 10975000 + }, + { + "epoch": 31.77, + "learning_rate": 3.412095191506113e-05, + "loss": 2.0791, + "step": 10975500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4120228267413854e-05, + "loss": 2.1213, + "step": 10976000 + }, + { + "epoch": 31.77, + "learning_rate": 3.4119504619766576e-05, + "loss": 2.1095, + "step": 10976500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4118780972119305e-05, + "loss": 2.0942, + "step": 10977000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411805732447203e-05, + "loss": 2.0898, + "step": 10977500 + }, + { + "epoch": 31.78, + "learning_rate": 3.4117333676824756e-05, + "loss": 2.1023, + "step": 10978000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411661002917748e-05, + "loss": 2.0885, + "step": 10978500 + }, + { + "epoch": 31.78, + "learning_rate": 3.41158863815302e-05, + "loss": 2.1062, + "step": 10979000 + }, + { + "epoch": 31.78, + "learning_rate": 3.4115164181178216e-05, + "loss": 2.0876, + "step": 10979500 + }, + { + "epoch": 31.78, + "learning_rate": 3.411444053353094e-05, + "loss": 2.1045, + "step": 10980000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411371688588366e-05, + "loss": 2.1202, + "step": 10980500 + }, + { + "epoch": 31.79, + "learning_rate": 3.411299323823638e-05, + "loss": 2.0842, + "step": 10981000 + }, + { + "epoch": 31.79, + "learning_rate": 3.4112269590589105e-05, + "loss": 2.1279, + "step": 10981500 + }, + { + "epoch": 31.79, + "learning_rate": 3.411154739023713e-05, + "loss": 2.1003, + "step": 10982000 + }, + { + "epoch": 31.79, + "learning_rate": 3.411082374258985e-05, + "loss": 2.112, + "step": 10982500 + }, + { + "epoch": 31.79, + "learning_rate": 3.411010009494257e-05, + "loss": 2.0932, + "step": 10983000 + }, + { + "epoch": 31.79, + "learning_rate": 3.410937789459059e-05, + "loss": 2.0937, + "step": 10983500 + }, + { + "epoch": 31.79, + "learning_rate": 3.410865424694331e-05, + "loss": 2.0832, + "step": 10984000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410793059929604e-05, + "loss": 2.0915, + "step": 10984500 + }, + { + "epoch": 31.8, + "learning_rate": 3.410720695164876e-05, + "loss": 2.1044, + "step": 10985000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410648330400148e-05, + "loss": 2.1193, + "step": 10985500 + }, + { + "epoch": 31.8, + "learning_rate": 3.4105761103649505e-05, + "loss": 2.0999, + "step": 10986000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410503745600223e-05, + "loss": 2.1316, + "step": 10986500 + }, + { + "epoch": 31.8, + "learning_rate": 3.410431380835495e-05, + "loss": 2.0909, + "step": 10987000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410359016070767e-05, + "loss": 2.103, + "step": 10987500 + }, + { + "epoch": 31.81, + "learning_rate": 3.4102866513060394e-05, + "loss": 2.0997, + "step": 10988000 + }, + { + "epoch": 31.81, + "learning_rate": 3.410214431270841e-05, + "loss": 2.123, + "step": 10988500 + }, + { + "epoch": 31.81, + "learning_rate": 3.410142066506113e-05, + "loss": 2.1264, + "step": 10989000 + }, + { + "epoch": 31.81, + "learning_rate": 3.4100697017413854e-05, + "loss": 2.0962, + "step": 10989500 + }, + { + "epoch": 31.81, + "learning_rate": 3.409997336976658e-05, + "loss": 2.1111, + "step": 10990000 + }, + { + "epoch": 31.81, + "learning_rate": 3.4099249722119306e-05, + "loss": 2.1121, + "step": 10990500 + }, + { + "epoch": 31.81, + "learning_rate": 3.409852752176732e-05, + "loss": 2.0882, + "step": 10991000 + }, + { + "epoch": 31.82, + "learning_rate": 3.409780387412004e-05, + "loss": 2.1222, + "step": 10991500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409708022647277e-05, + "loss": 2.1022, + "step": 10992000 + }, + { + "epoch": 31.82, + "learning_rate": 3.4096356578825495e-05, + "loss": 2.1035, + "step": 10992500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409563293117822e-05, + "loss": 2.1012, + "step": 10993000 + }, + { + "epoch": 31.82, + "learning_rate": 3.409490928353094e-05, + "loss": 2.0714, + "step": 10993500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409418563588366e-05, + "loss": 2.0741, + "step": 10994000 + }, + { + "epoch": 31.82, + "learning_rate": 3.4093461988236384e-05, + "loss": 2.0775, + "step": 10994500 + }, + { + "epoch": 31.83, + "learning_rate": 3.4092739787884406e-05, + "loss": 2.1194, + "step": 10995000 + }, + { + "epoch": 31.83, + "learning_rate": 3.409201758753242e-05, + "loss": 2.0808, + "step": 10995500 + }, + { + "epoch": 31.83, + "learning_rate": 3.4091293939885144e-05, + "loss": 2.1009, + "step": 10996000 + }, + { + "epoch": 31.83, + "learning_rate": 3.4090570292237866e-05, + "loss": 2.1154, + "step": 10996500 + }, + { + "epoch": 31.83, + "learning_rate": 3.408984664459059e-05, + "loss": 2.1056, + "step": 10997000 + }, + { + "epoch": 31.83, + "learning_rate": 3.408912299694331e-05, + "loss": 2.106, + "step": 10997500 + }, + { + "epoch": 31.83, + "learning_rate": 3.408840079659133e-05, + "loss": 2.0857, + "step": 10998000 + }, + { + "epoch": 31.84, + "learning_rate": 3.408767859623935e-05, + "loss": 2.1097, + "step": 10998500 + }, + { + "epoch": 31.84, + "learning_rate": 3.408695494859207e-05, + "loss": 2.0926, + "step": 10999000 + }, + { + "epoch": 31.84, + "learning_rate": 3.40862313009448e-05, + "loss": 2.0938, + "step": 10999500 + }, + { + "epoch": 31.84, + "learning_rate": 3.408550765329752e-05, + "loss": 2.0924, + "step": 11000000 + }, + { + "epoch": 31.84, + "learning_rate": 3.408478545294554e-05, + "loss": 2.0892, + "step": 11000500 + }, + { + "epoch": 31.84, + "learning_rate": 3.408406180529826e-05, + "loss": 2.1199, + "step": 11001000 + }, + { + "epoch": 31.84, + "learning_rate": 3.408333815765098e-05, + "loss": 2.0798, + "step": 11001500 + }, + { + "epoch": 31.85, + "learning_rate": 3.408261451000371e-05, + "loss": 2.1098, + "step": 11002000 + }, + { + "epoch": 31.85, + "learning_rate": 3.408189086235643e-05, + "loss": 2.0651, + "step": 11002500 + }, + { + "epoch": 31.85, + "learning_rate": 3.4081167214709155e-05, + "loss": 2.0822, + "step": 11003000 + }, + { + "epoch": 31.85, + "learning_rate": 3.408044356706188e-05, + "loss": 2.0953, + "step": 11003500 + }, + { + "epoch": 31.85, + "learning_rate": 3.40797199194146e-05, + "loss": 2.0927, + "step": 11004000 + }, + { + "epoch": 31.85, + "learning_rate": 3.407899627176732e-05, + "loss": 2.1134, + "step": 11004500 + }, + { + "epoch": 31.85, + "learning_rate": 3.4078272624120044e-05, + "loss": 2.0864, + "step": 11005000 + }, + { + "epoch": 31.86, + "learning_rate": 3.4077548976472766e-05, + "loss": 2.0868, + "step": 11005500 + }, + { + "epoch": 31.86, + "learning_rate": 3.407682532882549e-05, + "loss": 2.1155, + "step": 11006000 + }, + { + "epoch": 31.86, + "learning_rate": 3.407610168117822e-05, + "loss": 2.1002, + "step": 11006500 + }, + { + "epoch": 31.86, + "learning_rate": 3.407537803353094e-05, + "loss": 2.1142, + "step": 11007000 + }, + { + "epoch": 31.86, + "learning_rate": 3.407465583317896e-05, + "loss": 2.0898, + "step": 11007500 + }, + { + "epoch": 31.86, + "learning_rate": 3.4073932185531684e-05, + "loss": 2.1062, + "step": 11008000 + }, + { + "epoch": 31.87, + "learning_rate": 3.407320853788441e-05, + "loss": 2.0961, + "step": 11008500 + }, + { + "epoch": 31.87, + "learning_rate": 3.407248489023713e-05, + "loss": 2.1142, + "step": 11009000 + }, + { + "epoch": 31.87, + "learning_rate": 3.407176124258985e-05, + "loss": 2.0889, + "step": 11009500 + }, + { + "epoch": 31.87, + "learning_rate": 3.407103759494257e-05, + "loss": 2.0843, + "step": 11010000 + }, + { + "epoch": 31.87, + "learning_rate": 3.4070313947295296e-05, + "loss": 2.0904, + "step": 11010500 + }, + { + "epoch": 31.87, + "learning_rate": 3.406959319423861e-05, + "loss": 2.0881, + "step": 11011000 + }, + { + "epoch": 31.87, + "learning_rate": 3.4068869546591333e-05, + "loss": 2.0938, + "step": 11011500 + }, + { + "epoch": 31.88, + "learning_rate": 3.4068145898944056e-05, + "loss": 2.0809, + "step": 11012000 + }, + { + "epoch": 31.88, + "learning_rate": 3.406742225129678e-05, + "loss": 2.0756, + "step": 11012500 + }, + { + "epoch": 31.88, + "learning_rate": 3.4066700050944794e-05, + "loss": 2.1003, + "step": 11013000 + }, + { + "epoch": 31.88, + "learning_rate": 3.4065976403297516e-05, + "loss": 2.1023, + "step": 11013500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406525275565024e-05, + "loss": 2.1022, + "step": 11014000 + }, + { + "epoch": 31.88, + "learning_rate": 3.406452910800297e-05, + "loss": 2.1127, + "step": 11014500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406380546035569e-05, + "loss": 2.1, + "step": 11015000 + }, + { + "epoch": 31.89, + "learning_rate": 3.406308326000371e-05, + "loss": 2.0899, + "step": 11015500 + }, + { + "epoch": 31.89, + "learning_rate": 3.4062359612356434e-05, + "loss": 2.1049, + "step": 11016000 + }, + { + "epoch": 31.89, + "learning_rate": 3.4061635964709156e-05, + "loss": 2.123, + "step": 11016500 + }, + { + "epoch": 31.89, + "learning_rate": 3.406091231706188e-05, + "loss": 2.1166, + "step": 11017000 + }, + { + "epoch": 31.89, + "learning_rate": 3.40601886694146e-05, + "loss": 2.1091, + "step": 11017500 + }, + { + "epoch": 31.89, + "learning_rate": 3.405946502176732e-05, + "loss": 2.1262, + "step": 11018000 + }, + { + "epoch": 31.89, + "learning_rate": 3.4058741374120045e-05, + "loss": 2.1127, + "step": 11018500 + }, + { + "epoch": 31.9, + "learning_rate": 3.405801772647277e-05, + "loss": 2.1186, + "step": 11019000 + }, + { + "epoch": 31.9, + "learning_rate": 3.405729552612078e-05, + "loss": 2.1037, + "step": 11019500 + }, + { + "epoch": 31.9, + "learning_rate": 3.405657187847351e-05, + "loss": 2.0914, + "step": 11020000 + }, + { + "epoch": 31.9, + "learning_rate": 3.4055848230826234e-05, + "loss": 2.1114, + "step": 11020500 + }, + { + "epoch": 31.9, + "learning_rate": 3.4055124583178956e-05, + "loss": 2.1115, + "step": 11021000 + }, + { + "epoch": 31.9, + "learning_rate": 3.405440093553168e-05, + "loss": 2.1175, + "step": 11021500 + }, + { + "epoch": 31.9, + "learning_rate": 3.405367728788441e-05, + "loss": 2.0948, + "step": 11022000 + }, + { + "epoch": 31.91, + "learning_rate": 3.405295508753242e-05, + "loss": 2.0919, + "step": 11022500 + }, + { + "epoch": 31.91, + "learning_rate": 3.405223288718044e-05, + "loss": 2.1249, + "step": 11023000 + }, + { + "epoch": 31.91, + "learning_rate": 3.405150923953316e-05, + "loss": 2.0901, + "step": 11023500 + }, + { + "epoch": 31.91, + "learning_rate": 3.405078559188589e-05, + "loss": 2.083, + "step": 11024000 + }, + { + "epoch": 31.91, + "learning_rate": 3.405006194423861e-05, + "loss": 2.0982, + "step": 11024500 + }, + { + "epoch": 31.91, + "learning_rate": 3.404933974388663e-05, + "loss": 2.1035, + "step": 11025000 + }, + { + "epoch": 31.91, + "learning_rate": 3.404861754353464e-05, + "loss": 2.0904, + "step": 11025500 + }, + { + "epoch": 31.92, + "learning_rate": 3.4047893895887365e-05, + "loss": 2.0962, + "step": 11026000 + }, + { + "epoch": 31.92, + "learning_rate": 3.404717024824009e-05, + "loss": 2.1016, + "step": 11026500 + }, + { + "epoch": 31.92, + "learning_rate": 3.404644660059281e-05, + "loss": 2.1133, + "step": 11027000 + }, + { + "epoch": 31.92, + "learning_rate": 3.404572295294554e-05, + "loss": 2.1082, + "step": 11027500 + }, + { + "epoch": 31.92, + "learning_rate": 3.404499930529826e-05, + "loss": 2.1004, + "step": 11028000 + }, + { + "epoch": 31.92, + "learning_rate": 3.404427565765098e-05, + "loss": 2.0961, + "step": 11028500 + }, + { + "epoch": 31.92, + "learning_rate": 3.4043552010003706e-05, + "loss": 2.1086, + "step": 11029000 + }, + { + "epoch": 31.93, + "learning_rate": 3.4042828362356435e-05, + "loss": 2.1021, + "step": 11029500 + }, + { + "epoch": 31.93, + "learning_rate": 3.404210471470916e-05, + "loss": 2.1187, + "step": 11030000 + }, + { + "epoch": 31.93, + "learning_rate": 3.404138106706188e-05, + "loss": 2.0988, + "step": 11030500 + }, + { + "epoch": 31.93, + "learning_rate": 3.40406574194146e-05, + "loss": 2.1115, + "step": 11031000 + }, + { + "epoch": 31.93, + "learning_rate": 3.4039933771767324e-05, + "loss": 2.0857, + "step": 11031500 + }, + { + "epoch": 31.93, + "learning_rate": 3.4039210124120046e-05, + "loss": 2.1237, + "step": 11032000 + }, + { + "epoch": 31.93, + "learning_rate": 3.403848792376806e-05, + "loss": 2.1112, + "step": 11032500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403776427612079e-05, + "loss": 2.1057, + "step": 11033000 + }, + { + "epoch": 31.94, + "learning_rate": 3.403704062847351e-05, + "loss": 2.0866, + "step": 11033500 + }, + { + "epoch": 31.94, + "learning_rate": 3.4036316980826235e-05, + "loss": 2.1, + "step": 11034000 + }, + { + "epoch": 31.94, + "learning_rate": 3.403559333317896e-05, + "loss": 2.0885, + "step": 11034500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403486968553168e-05, + "loss": 2.0845, + "step": 11035000 + }, + { + "epoch": 31.94, + "learning_rate": 3.40341460378844e-05, + "loss": 2.0994, + "step": 11035500 + }, + { + "epoch": 31.94, + "learning_rate": 3.4033422390237124e-05, + "loss": 2.0806, + "step": 11036000 + }, + { + "epoch": 31.95, + "learning_rate": 3.403270018988514e-05, + "loss": 2.1238, + "step": 11036500 + }, + { + "epoch": 31.95, + "learning_rate": 3.403197654223787e-05, + "loss": 2.1197, + "step": 11037000 + }, + { + "epoch": 31.95, + "learning_rate": 3.403125289459059e-05, + "loss": 2.1138, + "step": 11037500 + }, + { + "epoch": 31.95, + "learning_rate": 3.403052924694331e-05, + "loss": 2.1471, + "step": 11038000 + }, + { + "epoch": 31.95, + "learning_rate": 3.4029807046591335e-05, + "loss": 2.0917, + "step": 11038500 + }, + { + "epoch": 31.95, + "learning_rate": 3.402908339894406e-05, + "loss": 2.0972, + "step": 11039000 + }, + { + "epoch": 31.95, + "learning_rate": 3.402836119859207e-05, + "loss": 2.095, + "step": 11039500 + }, + { + "epoch": 31.96, + "learning_rate": 3.4027637550944795e-05, + "loss": 2.1102, + "step": 11040000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402691390329752e-05, + "loss": 2.1143, + "step": 11040500 + }, + { + "epoch": 31.96, + "learning_rate": 3.402619025565024e-05, + "loss": 2.0817, + "step": 11041000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402546660800296e-05, + "loss": 2.0853, + "step": 11041500 + }, + { + "epoch": 31.96, + "learning_rate": 3.402474296035569e-05, + "loss": 2.1062, + "step": 11042000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402401931270841e-05, + "loss": 2.092, + "step": 11042500 + }, + { + "epoch": 31.96, + "learning_rate": 3.4023295665061135e-05, + "loss": 2.1142, + "step": 11043000 + }, + { + "epoch": 31.97, + "learning_rate": 3.402257201741386e-05, + "loss": 2.0706, + "step": 11043500 + }, + { + "epoch": 31.97, + "learning_rate": 3.402184836976658e-05, + "loss": 2.1074, + "step": 11044000 + }, + { + "epoch": 31.97, + "learning_rate": 3.402112472211931e-05, + "loss": 2.0848, + "step": 11044500 + }, + { + "epoch": 31.97, + "learning_rate": 3.402040107447203e-05, + "loss": 2.0974, + "step": 11045000 + }, + { + "epoch": 31.97, + "learning_rate": 3.401967742682475e-05, + "loss": 2.0988, + "step": 11045500 + }, + { + "epoch": 31.97, + "learning_rate": 3.4018953779177475e-05, + "loss": 2.0857, + "step": 11046000 + }, + { + "epoch": 31.98, + "learning_rate": 3.40182301315302e-05, + "loss": 2.1264, + "step": 11046500 + }, + { + "epoch": 31.98, + "learning_rate": 3.401750937847351e-05, + "loss": 2.0891, + "step": 11047000 + }, + { + "epoch": 31.98, + "learning_rate": 3.401678717812153e-05, + "loss": 2.1024, + "step": 11047500 + }, + { + "epoch": 31.98, + "learning_rate": 3.401606353047425e-05, + "loss": 2.0933, + "step": 11048000 + }, + { + "epoch": 31.98, + "learning_rate": 3.401533988282697e-05, + "loss": 2.1329, + "step": 11048500 + }, + { + "epoch": 31.98, + "learning_rate": 3.4014616235179696e-05, + "loss": 2.0897, + "step": 11049000 + }, + { + "epoch": 31.98, + "learning_rate": 3.401389258753242e-05, + "loss": 2.1008, + "step": 11049500 + }, + { + "epoch": 31.99, + "learning_rate": 3.401317038718044e-05, + "loss": 2.1168, + "step": 11050000 + }, + { + "epoch": 31.99, + "learning_rate": 3.401244673953316e-05, + "loss": 2.0799, + "step": 11050500 + }, + { + "epoch": 31.99, + "learning_rate": 3.4011723091885885e-05, + "loss": 2.1242, + "step": 11051000 + }, + { + "epoch": 31.99, + "learning_rate": 3.401099944423861e-05, + "loss": 2.1073, + "step": 11051500 + }, + { + "epoch": 31.99, + "learning_rate": 3.4010275796591336e-05, + "loss": 2.108, + "step": 11052000 + }, + { + "epoch": 31.99, + "learning_rate": 3.400955214894406e-05, + "loss": 2.0969, + "step": 11052500 + }, + { + "epoch": 31.99, + "learning_rate": 3.400882850129678e-05, + "loss": 2.1011, + "step": 11053000 + }, + { + "epoch": 32.0, + "learning_rate": 3.40081048536495e-05, + "loss": 2.1123, + "step": 11053500 + }, + { + "epoch": 32.0, + "learning_rate": 3.4007381206002225e-05, + "loss": 2.1004, + "step": 11054000 + }, + { + "epoch": 32.0, + "learning_rate": 3.400665755835495e-05, + "loss": 2.1224, + "step": 11054500 + }, + { + "epoch": 32.0, + "learning_rate": 3.400593391070767e-05, + "loss": 2.0872, + "step": 11055000 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6675262658987756, + "eval_accuracy_mlm": 0.6324042771749461, + "eval_accuracy_nsp": 0.8559600903787015, + "eval_loss": 2.1762564182281494, + "eval_runtime": 331.7013, + "eval_samples_per_second": 1315.599, + "eval_steps_per_second": 54.817, + "step": 11055104 + }, + { + "epoch": 32.0, + "learning_rate": 3.400521171035569e-05, + "loss": 2.0846, + "step": 11055500 + }, + { + "epoch": 32.0, + "learning_rate": 3.4004488062708414e-05, + "loss": 2.06, + "step": 11056000 + }, + { + "epoch": 32.0, + "learning_rate": 3.400376586235643e-05, + "loss": 2.0646, + "step": 11056500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400304221470915e-05, + "loss": 2.0927, + "step": 11057000 + }, + { + "epoch": 32.01, + "learning_rate": 3.4002318567061874e-05, + "loss": 2.0445, + "step": 11057500 + }, + { + "epoch": 32.01, + "learning_rate": 3.4001594919414596e-05, + "loss": 2.0489, + "step": 11058000 + }, + { + "epoch": 32.01, + "learning_rate": 3.400087271906262e-05, + "loss": 2.0669, + "step": 11058500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400014907141534e-05, + "loss": 2.0876, + "step": 11059000 + }, + { + "epoch": 32.01, + "learning_rate": 3.399942542376807e-05, + "loss": 2.099, + "step": 11059500 + }, + { + "epoch": 32.01, + "learning_rate": 3.399870177612079e-05, + "loss": 2.0681, + "step": 11060000 + }, + { + "epoch": 32.02, + "learning_rate": 3.3997978128473514e-05, + "loss": 2.076, + "step": 11060500 + }, + { + "epoch": 32.02, + "learning_rate": 3.3997254480826236e-05, + "loss": 2.0616, + "step": 11061000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399653083317896e-05, + "loss": 2.0765, + "step": 11061500 + }, + { + "epoch": 32.02, + "learning_rate": 3.399580718553168e-05, + "loss": 2.0781, + "step": 11062000 + }, + { + "epoch": 32.02, + "learning_rate": 3.39950835378844e-05, + "loss": 2.0885, + "step": 11062500 + }, + { + "epoch": 32.02, + "learning_rate": 3.3994359890237125e-05, + "loss": 2.0804, + "step": 11063000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399363624258985e-05, + "loss": 2.0707, + "step": 11063500 + }, + { + "epoch": 32.03, + "learning_rate": 3.399291259494257e-05, + "loss": 2.0815, + "step": 11064000 + }, + { + "epoch": 32.03, + "learning_rate": 3.399218894729529e-05, + "loss": 2.0873, + "step": 11064500 + }, + { + "epoch": 32.03, + "learning_rate": 3.3991465299648014e-05, + "loss": 2.0757, + "step": 11065000 + }, + { + "epoch": 32.03, + "learning_rate": 3.399074165200074e-05, + "loss": 2.0726, + "step": 11065500 + }, + { + "epoch": 32.03, + "learning_rate": 3.399001945164876e-05, + "loss": 2.0832, + "step": 11066000 + }, + { + "epoch": 32.03, + "learning_rate": 3.398929580400149e-05, + "loss": 2.087, + "step": 11066500 + }, + { + "epoch": 32.03, + "learning_rate": 3.398857215635421e-05, + "loss": 2.0782, + "step": 11067000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398784850870693e-05, + "loss": 2.0785, + "step": 11067500 + }, + { + "epoch": 32.04, + "learning_rate": 3.3987124861059655e-05, + "loss": 2.0597, + "step": 11068000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398640121341238e-05, + "loss": 2.0627, + "step": 11068500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398567901306039e-05, + "loss": 2.0933, + "step": 11069000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398495536541312e-05, + "loss": 2.0995, + "step": 11069500 + }, + { + "epoch": 32.04, + "learning_rate": 3.3984231717765844e-05, + "loss": 2.0712, + "step": 11070000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398350951741386e-05, + "loss": 2.0735, + "step": 11070500 + }, + { + "epoch": 32.05, + "learning_rate": 3.398278586976658e-05, + "loss": 2.0865, + "step": 11071000 + }, + { + "epoch": 32.05, + "learning_rate": 3.3982062222119304e-05, + "loss": 2.0783, + "step": 11071500 + }, + { + "epoch": 32.05, + "learning_rate": 3.3981338574472026e-05, + "loss": 2.0867, + "step": 11072000 + }, + { + "epoch": 32.05, + "learning_rate": 3.398061492682475e-05, + "loss": 2.0735, + "step": 11072500 + }, + { + "epoch": 32.05, + "learning_rate": 3.397989272647277e-05, + "loss": 2.107, + "step": 11073000 + }, + { + "epoch": 32.05, + "learning_rate": 3.397916907882549e-05, + "loss": 2.0847, + "step": 11073500 + }, + { + "epoch": 32.05, + "learning_rate": 3.397844543117822e-05, + "loss": 2.0888, + "step": 11074000 + }, + { + "epoch": 32.06, + "learning_rate": 3.3977721783530944e-05, + "loss": 2.0778, + "step": 11074500 + }, + { + "epoch": 32.06, + "learning_rate": 3.3976998135883666e-05, + "loss": 2.0654, + "step": 11075000 + }, + { + "epoch": 32.06, + "learning_rate": 3.397627448823639e-05, + "loss": 2.0775, + "step": 11075500 + }, + { + "epoch": 32.06, + "learning_rate": 3.397555084058911e-05, + "loss": 2.0729, + "step": 11076000 + }, + { + "epoch": 32.06, + "learning_rate": 3.397482719294183e-05, + "loss": 2.0838, + "step": 11076500 + }, + { + "epoch": 32.06, + "learning_rate": 3.3974103545294555e-05, + "loss": 2.1062, + "step": 11077000 + }, + { + "epoch": 32.06, + "learning_rate": 3.397337989764728e-05, + "loss": 2.0761, + "step": 11077500 + }, + { + "epoch": 32.07, + "learning_rate": 3.397265625e-05, + "loss": 2.0998, + "step": 11078000 + }, + { + "epoch": 32.07, + "learning_rate": 3.397193260235272e-05, + "loss": 2.0944, + "step": 11078500 + }, + { + "epoch": 32.07, + "learning_rate": 3.3971210402000744e-05, + "loss": 2.1006, + "step": 11079000 + }, + { + "epoch": 32.07, + "learning_rate": 3.3970486754353466e-05, + "loss": 2.1022, + "step": 11079500 + }, + { + "epoch": 32.07, + "learning_rate": 3.396976310670619e-05, + "loss": 2.088, + "step": 11080000 + }, + { + "epoch": 32.07, + "learning_rate": 3.396903945905891e-05, + "loss": 2.0724, + "step": 11080500 + }, + { + "epoch": 32.07, + "learning_rate": 3.396831581141163e-05, + "loss": 2.1133, + "step": 11081000 + }, + { + "epoch": 32.08, + "learning_rate": 3.3967593611059655e-05, + "loss": 2.0864, + "step": 11081500 + }, + { + "epoch": 32.08, + "learning_rate": 3.396686996341238e-05, + "loss": 2.0808, + "step": 11082000 + }, + { + "epoch": 32.08, + "learning_rate": 3.39661463157651e-05, + "loss": 2.0805, + "step": 11082500 + }, + { + "epoch": 32.08, + "learning_rate": 3.396542266811782e-05, + "loss": 2.0874, + "step": 11083000 + }, + { + "epoch": 32.08, + "learning_rate": 3.3964699020470544e-05, + "loss": 2.077, + "step": 11083500 + }, + { + "epoch": 32.08, + "learning_rate": 3.396397537282327e-05, + "loss": 2.0846, + "step": 11084000 + }, + { + "epoch": 32.09, + "learning_rate": 3.3963251725175995e-05, + "loss": 2.1005, + "step": 11084500 + }, + { + "epoch": 32.09, + "learning_rate": 3.396252807752872e-05, + "loss": 2.0891, + "step": 11085000 + }, + { + "epoch": 32.09, + "learning_rate": 3.396180442988144e-05, + "loss": 2.0949, + "step": 11085500 + }, + { + "epoch": 32.09, + "learning_rate": 3.396108078223416e-05, + "loss": 2.0748, + "step": 11086000 + }, + { + "epoch": 32.09, + "learning_rate": 3.396035858188218e-05, + "loss": 2.0744, + "step": 11086500 + }, + { + "epoch": 32.09, + "learning_rate": 3.39596349342349e-05, + "loss": 2.0722, + "step": 11087000 + }, + { + "epoch": 32.09, + "learning_rate": 3.395891273388292e-05, + "loss": 2.0697, + "step": 11087500 + }, + { + "epoch": 32.1, + "learning_rate": 3.3958189086235645e-05, + "loss": 2.0847, + "step": 11088000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395746688588366e-05, + "loss": 2.0969, + "step": 11088500 + }, + { + "epoch": 32.1, + "learning_rate": 3.395674323823639e-05, + "loss": 2.0733, + "step": 11089000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395601959058911e-05, + "loss": 2.0939, + "step": 11089500 + }, + { + "epoch": 32.1, + "learning_rate": 3.3955295942941834e-05, + "loss": 2.1077, + "step": 11090000 + }, + { + "epoch": 32.1, + "learning_rate": 3.3954572295294556e-05, + "loss": 2.1059, + "step": 11090500 + }, + { + "epoch": 32.1, + "learning_rate": 3.395384864764728e-05, + "loss": 2.0811, + "step": 11091000 + }, + { + "epoch": 32.11, + "learning_rate": 3.3953125e-05, + "loss": 2.0783, + "step": 11091500 + }, + { + "epoch": 32.11, + "learning_rate": 3.395240135235272e-05, + "loss": 2.085, + "step": 11092000 + }, + { + "epoch": 32.11, + "learning_rate": 3.3951677704705445e-05, + "loss": 2.0935, + "step": 11092500 + }, + { + "epoch": 32.11, + "learning_rate": 3.3950954057058174e-05, + "loss": 2.1206, + "step": 11093000 + }, + { + "epoch": 32.11, + "learning_rate": 3.3950230409410896e-05, + "loss": 2.0831, + "step": 11093500 + }, + { + "epoch": 32.11, + "learning_rate": 3.394950820905891e-05, + "loss": 2.0824, + "step": 11094000 + }, + { + "epoch": 32.11, + "learning_rate": 3.3948784561411634e-05, + "loss": 2.0653, + "step": 11094500 + }, + { + "epoch": 32.12, + "learning_rate": 3.3948060913764356e-05, + "loss": 2.0849, + "step": 11095000 + }, + { + "epoch": 32.12, + "learning_rate": 3.394733871341237e-05, + "loss": 2.0787, + "step": 11095500 + }, + { + "epoch": 32.12, + "learning_rate": 3.39466150657651e-05, + "loss": 2.0752, + "step": 11096000 + }, + { + "epoch": 32.12, + "learning_rate": 3.394589141811782e-05, + "loss": 2.0753, + "step": 11096500 + }, + { + "epoch": 32.12, + "learning_rate": 3.3945167770470545e-05, + "loss": 2.0889, + "step": 11097000 + }, + { + "epoch": 32.12, + "learning_rate": 3.3944444122823274e-05, + "loss": 2.0537, + "step": 11097500 + }, + { + "epoch": 32.12, + "learning_rate": 3.3943720475175996e-05, + "loss": 2.1074, + "step": 11098000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394299682752872e-05, + "loss": 2.0719, + "step": 11098500 + }, + { + "epoch": 32.13, + "learning_rate": 3.394227317988144e-05, + "loss": 2.089, + "step": 11099000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394154953223416e-05, + "loss": 2.0875, + "step": 11099500 + }, + { + "epoch": 32.13, + "learning_rate": 3.394082733188218e-05, + "loss": 2.0743, + "step": 11100000 + }, + { + "epoch": 32.13, + "learning_rate": 3.39401036842349e-05, + "loss": 2.0679, + "step": 11100500 + }, + { + "epoch": 32.13, + "learning_rate": 3.393938003658762e-05, + "loss": 2.0899, + "step": 11101000 + }, + { + "epoch": 32.13, + "learning_rate": 3.3938656388940345e-05, + "loss": 2.0655, + "step": 11101500 + }, + { + "epoch": 32.14, + "learning_rate": 3.3937932741293074e-05, + "loss": 2.0916, + "step": 11102000 + }, + { + "epoch": 32.14, + "learning_rate": 3.3937209093645796e-05, + "loss": 2.0972, + "step": 11102500 + }, + { + "epoch": 32.14, + "learning_rate": 3.393648544599852e-05, + "loss": 2.1027, + "step": 11103000 + }, + { + "epoch": 32.14, + "learning_rate": 3.393576179835125e-05, + "loss": 2.0755, + "step": 11103500 + }, + { + "epoch": 32.14, + "learning_rate": 3.393503959799926e-05, + "loss": 2.0763, + "step": 11104000 + }, + { + "epoch": 32.14, + "learning_rate": 3.3934315950351986e-05, + "loss": 2.0968, + "step": 11104500 + }, + { + "epoch": 32.14, + "learning_rate": 3.393359230270471e-05, + "loss": 2.092, + "step": 11105000 + }, + { + "epoch": 32.15, + "learning_rate": 3.393286865505743e-05, + "loss": 2.1001, + "step": 11105500 + }, + { + "epoch": 32.15, + "learning_rate": 3.393214645470545e-05, + "loss": 2.0733, + "step": 11106000 + }, + { + "epoch": 32.15, + "learning_rate": 3.3931422807058175e-05, + "loss": 2.1094, + "step": 11106500 + }, + { + "epoch": 32.15, + "learning_rate": 3.39306991594109e-05, + "loss": 2.083, + "step": 11107000 + }, + { + "epoch": 32.15, + "learning_rate": 3.392997551176362e-05, + "loss": 2.0913, + "step": 11107500 + }, + { + "epoch": 32.15, + "learning_rate": 3.392925186411634e-05, + "loss": 2.0907, + "step": 11108000 + }, + { + "epoch": 32.15, + "learning_rate": 3.392852966376436e-05, + "loss": 2.07, + "step": 11108500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392780601611708e-05, + "loss": 2.0948, + "step": 11109000 + }, + { + "epoch": 32.16, + "learning_rate": 3.39270823684698e-05, + "loss": 2.0947, + "step": 11109500 + }, + { + "epoch": 32.16, + "learning_rate": 3.3926358720822524e-05, + "loss": 2.1131, + "step": 11110000 + }, + { + "epoch": 32.16, + "learning_rate": 3.392563507317525e-05, + "loss": 2.0729, + "step": 11110500 + }, + { + "epoch": 32.16, + "learning_rate": 3.3924911425527975e-05, + "loss": 2.0791, + "step": 11111000 + }, + { + "epoch": 32.16, + "learning_rate": 3.3924187777880704e-05, + "loss": 2.1045, + "step": 11111500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392346557752872e-05, + "loss": 2.1057, + "step": 11112000 + }, + { + "epoch": 32.17, + "learning_rate": 3.392274771906262e-05, + "loss": 2.0983, + "step": 11112500 + }, + { + "epoch": 32.17, + "learning_rate": 3.3922024071415344e-05, + "loss": 2.0947, + "step": 11113000 + }, + { + "epoch": 32.17, + "learning_rate": 3.3921300423768066e-05, + "loss": 2.0808, + "step": 11113500 + }, + { + "epoch": 32.17, + "learning_rate": 3.392057677612079e-05, + "loss": 2.1142, + "step": 11114000 + }, + { + "epoch": 32.17, + "learning_rate": 3.391985312847351e-05, + "loss": 2.081, + "step": 11114500 + }, + { + "epoch": 32.17, + "learning_rate": 3.391912948082623e-05, + "loss": 2.091, + "step": 11115000 + }, + { + "epoch": 32.17, + "learning_rate": 3.3918405833178955e-05, + "loss": 2.0748, + "step": 11115500 + }, + { + "epoch": 32.18, + "learning_rate": 3.391768218553168e-05, + "loss": 2.0833, + "step": 11116000 + }, + { + "epoch": 32.18, + "learning_rate": 3.39169585378844e-05, + "loss": 2.0951, + "step": 11116500 + }, + { + "epoch": 32.18, + "learning_rate": 3.391623489023713e-05, + "loss": 2.0947, + "step": 11117000 + }, + { + "epoch": 32.18, + "learning_rate": 3.391551124258985e-05, + "loss": 2.0941, + "step": 11117500 + }, + { + "epoch": 32.18, + "learning_rate": 3.391478759494257e-05, + "loss": 2.0664, + "step": 11118000 + }, + { + "epoch": 32.18, + "learning_rate": 3.3914063947295295e-05, + "loss": 2.084, + "step": 11118500 + }, + { + "epoch": 32.18, + "learning_rate": 3.3913340299648024e-05, + "loss": 2.0732, + "step": 11119000 + }, + { + "epoch": 32.19, + "learning_rate": 3.3912616652000746e-05, + "loss": 2.0726, + "step": 11119500 + }, + { + "epoch": 32.19, + "learning_rate": 3.391189300435347e-05, + "loss": 2.0911, + "step": 11120000 + }, + { + "epoch": 32.19, + "learning_rate": 3.391116935670619e-05, + "loss": 2.1006, + "step": 11120500 + }, + { + "epoch": 32.19, + "learning_rate": 3.391044570905891e-05, + "loss": 2.0632, + "step": 11121000 + }, + { + "epoch": 32.19, + "learning_rate": 3.3909722061411635e-05, + "loss": 2.1067, + "step": 11121500 + }, + { + "epoch": 32.19, + "learning_rate": 3.390899841376436e-05, + "loss": 2.0869, + "step": 11122000 + }, + { + "epoch": 32.2, + "learning_rate": 3.390827476611708e-05, + "loss": 2.1037, + "step": 11122500 + }, + { + "epoch": 32.2, + "learning_rate": 3.39075511184698e-05, + "loss": 2.0864, + "step": 11123000 + }, + { + "epoch": 32.2, + "learning_rate": 3.3906827470822524e-05, + "loss": 2.078, + "step": 11123500 + }, + { + "epoch": 32.2, + "learning_rate": 3.390610382317525e-05, + "loss": 2.0904, + "step": 11124000 + }, + { + "epoch": 32.2, + "learning_rate": 3.3905380175527976e-05, + "loss": 2.0825, + "step": 11124500 + }, + { + "epoch": 32.2, + "learning_rate": 3.39046565278807e-05, + "loss": 2.0744, + "step": 11125000 + }, + { + "epoch": 32.2, + "learning_rate": 3.390393432752871e-05, + "loss": 2.0716, + "step": 11125500 + }, + { + "epoch": 32.21, + "learning_rate": 3.390321067988144e-05, + "loss": 2.0882, + "step": 11126000 + }, + { + "epoch": 32.21, + "learning_rate": 3.3902487032234165e-05, + "loss": 2.0557, + "step": 11126500 + }, + { + "epoch": 32.21, + "learning_rate": 3.390176338458689e-05, + "loss": 2.0811, + "step": 11127000 + }, + { + "epoch": 32.21, + "learning_rate": 3.390103973693961e-05, + "loss": 2.0752, + "step": 11127500 + }, + { + "epoch": 32.21, + "learning_rate": 3.390031608929233e-05, + "loss": 2.069, + "step": 11128000 + }, + { + "epoch": 32.21, + "learning_rate": 3.3899592441645054e-05, + "loss": 2.1071, + "step": 11128500 + }, + { + "epoch": 32.21, + "learning_rate": 3.3898868793997776e-05, + "loss": 2.0685, + "step": 11129000 + }, + { + "epoch": 32.22, + "learning_rate": 3.3898145146350505e-05, + "loss": 2.0597, + "step": 11129500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389742294599852e-05, + "loss": 2.0599, + "step": 11130000 + }, + { + "epoch": 32.22, + "learning_rate": 3.389669929835124e-05, + "loss": 2.0682, + "step": 11130500 + }, + { + "epoch": 32.22, + "learning_rate": 3.3895975650703965e-05, + "loss": 2.076, + "step": 11131000 + }, + { + "epoch": 32.22, + "learning_rate": 3.389525200305669e-05, + "loss": 2.0684, + "step": 11131500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389452835540941e-05, + "loss": 2.052, + "step": 11132000 + }, + { + "epoch": 32.22, + "learning_rate": 3.3893806155057425e-05, + "loss": 2.0908, + "step": 11132500 + }, + { + "epoch": 32.23, + "learning_rate": 3.3893082507410154e-05, + "loss": 2.0819, + "step": 11133000 + }, + { + "epoch": 32.23, + "learning_rate": 3.3892358859762876e-05, + "loss": 2.1044, + "step": 11133500 + }, + { + "epoch": 32.23, + "learning_rate": 3.38916366594109e-05, + "loss": 2.0985, + "step": 11134000 + }, + { + "epoch": 32.23, + "learning_rate": 3.389091301176362e-05, + "loss": 2.047, + "step": 11134500 + }, + { + "epoch": 32.23, + "learning_rate": 3.389018936411634e-05, + "loss": 2.0924, + "step": 11135000 + }, + { + "epoch": 32.23, + "learning_rate": 3.3889465716469065e-05, + "loss": 2.0566, + "step": 11135500 + }, + { + "epoch": 32.23, + "learning_rate": 3.388874206882179e-05, + "loss": 2.0814, + "step": 11136000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388801842117451e-05, + "loss": 2.0559, + "step": 11136500 + }, + { + "epoch": 32.24, + "learning_rate": 3.388729477352723e-05, + "loss": 2.0783, + "step": 11137000 + }, + { + "epoch": 32.24, + "learning_rate": 3.3886572573175254e-05, + "loss": 2.0934, + "step": 11137500 + }, + { + "epoch": 32.24, + "learning_rate": 3.388585037282327e-05, + "loss": 2.0781, + "step": 11138000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388512672517599e-05, + "loss": 2.0807, + "step": 11138500 + }, + { + "epoch": 32.24, + "learning_rate": 3.3884403077528714e-05, + "loss": 2.1042, + "step": 11139000 + }, + { + "epoch": 32.24, + "learning_rate": 3.3883679429881436e-05, + "loss": 2.0761, + "step": 11139500 + }, + { + "epoch": 32.25, + "learning_rate": 3.388295578223416e-05, + "loss": 2.0859, + "step": 11140000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388223213458688e-05, + "loss": 2.1075, + "step": 11140500 + }, + { + "epoch": 32.25, + "learning_rate": 3.388150848693961e-05, + "loss": 2.1082, + "step": 11141000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388078628658763e-05, + "loss": 2.0654, + "step": 11141500 + }, + { + "epoch": 32.25, + "learning_rate": 3.3880062638940354e-05, + "loss": 2.0954, + "step": 11142000 + }, + { + "epoch": 32.25, + "learning_rate": 3.387933899129308e-05, + "loss": 2.0707, + "step": 11142500 + }, + { + "epoch": 32.25, + "learning_rate": 3.38786153436458e-05, + "loss": 2.0878, + "step": 11143000 + }, + { + "epoch": 32.26, + "learning_rate": 3.387789169599852e-05, + "loss": 2.0615, + "step": 11143500 + }, + { + "epoch": 32.26, + "learning_rate": 3.387716804835124e-05, + "loss": 2.1077, + "step": 11144000 + }, + { + "epoch": 32.26, + "learning_rate": 3.3876444400703966e-05, + "loss": 2.0768, + "step": 11144500 + }, + { + "epoch": 32.26, + "learning_rate": 3.387572075305669e-05, + "loss": 2.0653, + "step": 11145000 + }, + { + "epoch": 32.26, + "learning_rate": 3.387499710540941e-05, + "loss": 2.0721, + "step": 11145500 + }, + { + "epoch": 32.26, + "learning_rate": 3.387427345776213e-05, + "loss": 2.0938, + "step": 11146000 + }, + { + "epoch": 32.26, + "learning_rate": 3.3873551257410155e-05, + "loss": 2.0851, + "step": 11146500 + }, + { + "epoch": 32.27, + "learning_rate": 3.387282760976288e-05, + "loss": 2.0681, + "step": 11147000 + }, + { + "epoch": 32.27, + "learning_rate": 3.38721039621156e-05, + "loss": 2.0779, + "step": 11147500 + }, + { + "epoch": 32.27, + "learning_rate": 3.387138031446832e-05, + "loss": 2.0749, + "step": 11148000 + }, + { + "epoch": 32.27, + "learning_rate": 3.387065666682105e-05, + "loss": 2.0871, + "step": 11148500 + }, + { + "epoch": 32.27, + "learning_rate": 3.386993301917377e-05, + "loss": 2.0828, + "step": 11149000 + }, + { + "epoch": 32.27, + "learning_rate": 3.3869209371526495e-05, + "loss": 2.0798, + "step": 11149500 + }, + { + "epoch": 32.27, + "learning_rate": 3.386848717117451e-05, + "loss": 2.0705, + "step": 11150000 + }, + { + "epoch": 32.28, + "learning_rate": 3.386776352352723e-05, + "loss": 2.095, + "step": 11150500 + }, + { + "epoch": 32.28, + "learning_rate": 3.3867039875879955e-05, + "loss": 2.0804, + "step": 11151000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3866316228232684e-05, + "loss": 2.0664, + "step": 11151500 + }, + { + "epoch": 32.28, + "learning_rate": 3.386559547517599e-05, + "loss": 2.0525, + "step": 11152000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3864871827528715e-05, + "loss": 2.0952, + "step": 11152500 + }, + { + "epoch": 32.28, + "learning_rate": 3.386414817988144e-05, + "loss": 2.081, + "step": 11153000 + }, + { + "epoch": 32.28, + "learning_rate": 3.386342453223416e-05, + "loss": 2.0729, + "step": 11153500 + }, + { + "epoch": 32.29, + "learning_rate": 3.386270088458688e-05, + "loss": 2.0992, + "step": 11154000 + }, + { + "epoch": 32.29, + "learning_rate": 3.3861977236939604e-05, + "loss": 2.0812, + "step": 11154500 + }, + { + "epoch": 32.29, + "learning_rate": 3.386125648388292e-05, + "loss": 2.0853, + "step": 11155000 + }, + { + "epoch": 32.29, + "learning_rate": 3.386053283623564e-05, + "loss": 2.0781, + "step": 11155500 + }, + { + "epoch": 32.29, + "learning_rate": 3.3859809188588364e-05, + "loss": 2.0671, + "step": 11156000 + }, + { + "epoch": 32.29, + "learning_rate": 3.385908554094109e-05, + "loss": 2.0945, + "step": 11156500 + }, + { + "epoch": 32.29, + "learning_rate": 3.3858361893293815e-05, + "loss": 2.0844, + "step": 11157000 + }, + { + "epoch": 32.3, + "learning_rate": 3.385763969294183e-05, + "loss": 2.0966, + "step": 11157500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385691604529456e-05, + "loss": 2.0792, + "step": 11158000 + }, + { + "epoch": 32.3, + "learning_rate": 3.385619239764728e-05, + "loss": 2.0907, + "step": 11158500 + }, + { + "epoch": 32.3, + "learning_rate": 3.3855468750000004e-05, + "loss": 2.0776, + "step": 11159000 + }, + { + "epoch": 32.3, + "learning_rate": 3.3854745102352726e-05, + "loss": 2.0798, + "step": 11159500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385402145470545e-05, + "loss": 2.0812, + "step": 11160000 + }, + { + "epoch": 32.31, + "learning_rate": 3.385329780705817e-05, + "loss": 2.094, + "step": 11160500 + }, + { + "epoch": 32.31, + "learning_rate": 3.385257415941089e-05, + "loss": 2.0936, + "step": 11161000 + }, + { + "epoch": 32.31, + "learning_rate": 3.3851850511763615e-05, + "loss": 2.09, + "step": 11161500 + }, + { + "epoch": 32.31, + "learning_rate": 3.385112686411634e-05, + "loss": 2.0672, + "step": 11162000 + }, + { + "epoch": 32.31, + "learning_rate": 3.385040466376436e-05, + "loss": 2.0852, + "step": 11162500 + }, + { + "epoch": 32.31, + "learning_rate": 3.384968101611708e-05, + "loss": 2.0829, + "step": 11163000 + }, + { + "epoch": 32.31, + "learning_rate": 3.38489588157651e-05, + "loss": 2.0884, + "step": 11163500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384823516811783e-05, + "loss": 2.0782, + "step": 11164000 + }, + { + "epoch": 32.32, + "learning_rate": 3.384751152047055e-05, + "loss": 2.0956, + "step": 11164500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384678787282327e-05, + "loss": 2.084, + "step": 11165000 + }, + { + "epoch": 32.32, + "learning_rate": 3.3846064225175993e-05, + "loss": 2.08, + "step": 11165500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384534347211931e-05, + "loss": 2.0759, + "step": 11166000 + }, + { + "epoch": 32.32, + "learning_rate": 3.384461982447203e-05, + "loss": 2.1057, + "step": 11166500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384389762412005e-05, + "loss": 2.1112, + "step": 11167000 + }, + { + "epoch": 32.33, + "learning_rate": 3.384317397647277e-05, + "loss": 2.0893, + "step": 11167500 + }, + { + "epoch": 32.33, + "learning_rate": 3.384245032882549e-05, + "loss": 2.1149, + "step": 11168000 + }, + { + "epoch": 32.33, + "learning_rate": 3.3841726681178214e-05, + "loss": 2.0912, + "step": 11168500 + }, + { + "epoch": 32.33, + "learning_rate": 3.3841003033530936e-05, + "loss": 2.0752, + "step": 11169000 + }, + { + "epoch": 32.33, + "learning_rate": 3.384027938588366e-05, + "loss": 2.0858, + "step": 11169500 + }, + { + "epoch": 32.33, + "learning_rate": 3.383955573823638e-05, + "loss": 2.089, + "step": 11170000 + }, + { + "epoch": 32.33, + "learning_rate": 3.383883209058911e-05, + "loss": 2.0805, + "step": 11170500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383810844294183e-05, + "loss": 2.0929, + "step": 11171000 + }, + { + "epoch": 32.34, + "learning_rate": 3.383738479529456e-05, + "loss": 2.0887, + "step": 11171500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383666114764728e-05, + "loss": 2.1026, + "step": 11172000 + }, + { + "epoch": 32.34, + "learning_rate": 3.3835937500000005e-05, + "loss": 2.0894, + "step": 11172500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383521385235273e-05, + "loss": 2.0812, + "step": 11173000 + }, + { + "epoch": 32.34, + "learning_rate": 3.383449165200074e-05, + "loss": 2.0684, + "step": 11173500 + }, + { + "epoch": 32.34, + "learning_rate": 3.3833768004353465e-05, + "loss": 2.0899, + "step": 11174000 + }, + { + "epoch": 32.35, + "learning_rate": 3.383304435670619e-05, + "loss": 2.064, + "step": 11174500 + }, + { + "epoch": 32.35, + "learning_rate": 3.383232070905891e-05, + "loss": 2.079, + "step": 11175000 + }, + { + "epoch": 32.35, + "learning_rate": 3.383159706141163e-05, + "loss": 2.0886, + "step": 11175500 + }, + { + "epoch": 32.35, + "learning_rate": 3.383087341376436e-05, + "loss": 2.1082, + "step": 11176000 + }, + { + "epoch": 32.35, + "learning_rate": 3.383014976611708e-05, + "loss": 2.0656, + "step": 11176500 + }, + { + "epoch": 32.35, + "learning_rate": 3.3829426118469805e-05, + "loss": 2.0977, + "step": 11177000 + }, + { + "epoch": 32.35, + "learning_rate": 3.382870247082253e-05, + "loss": 2.0813, + "step": 11177500 + }, + { + "epoch": 32.36, + "learning_rate": 3.382797882317525e-05, + "loss": 2.1031, + "step": 11178000 + }, + { + "epoch": 32.36, + "learning_rate": 3.3827256622823265e-05, + "loss": 2.0898, + "step": 11178500 + }, + { + "epoch": 32.36, + "learning_rate": 3.3826532975175994e-05, + "loss": 2.0997, + "step": 11179000 + }, + { + "epoch": 32.36, + "learning_rate": 3.3825809327528717e-05, + "loss": 2.0862, + "step": 11179500 + }, + { + "epoch": 32.36, + "learning_rate": 3.382508567988144e-05, + "loss": 2.1061, + "step": 11180000 + }, + { + "epoch": 32.36, + "learning_rate": 3.382436203223416e-05, + "loss": 2.0902, + "step": 11180500 + }, + { + "epoch": 32.36, + "learning_rate": 3.382363838458688e-05, + "loss": 2.0885, + "step": 11181000 + }, + { + "epoch": 32.37, + "learning_rate": 3.3822916184234906e-05, + "loss": 2.0977, + "step": 11181500 + }, + { + "epoch": 32.37, + "learning_rate": 3.382219253658763e-05, + "loss": 2.0862, + "step": 11182000 + }, + { + "epoch": 32.37, + "learning_rate": 3.382146888894035e-05, + "loss": 2.0839, + "step": 11182500 + }, + { + "epoch": 32.37, + "learning_rate": 3.382074524129307e-05, + "loss": 2.0732, + "step": 11183000 + }, + { + "epoch": 32.37, + "learning_rate": 3.3820021593645794e-05, + "loss": 2.079, + "step": 11183500 + }, + { + "epoch": 32.37, + "learning_rate": 3.381929794599852e-05, + "loss": 2.1104, + "step": 11184000 + }, + { + "epoch": 32.37, + "learning_rate": 3.381857429835124e-05, + "loss": 2.0937, + "step": 11184500 + }, + { + "epoch": 32.38, + "learning_rate": 3.381785065070396e-05, + "loss": 2.0813, + "step": 11185000 + }, + { + "epoch": 32.38, + "learning_rate": 3.3817127003056683e-05, + "loss": 2.0905, + "step": 11185500 + }, + { + "epoch": 32.38, + "learning_rate": 3.381640335540941e-05, + "loss": 2.1026, + "step": 11186000 + }, + { + "epoch": 32.38, + "learning_rate": 3.3815679707762135e-05, + "loss": 2.0749, + "step": 11186500 + }, + { + "epoch": 32.38, + "learning_rate": 3.381495750741016e-05, + "loss": 2.1167, + "step": 11187000 + }, + { + "epoch": 32.38, + "learning_rate": 3.381423385976288e-05, + "loss": 2.0801, + "step": 11187500 + }, + { + "epoch": 32.38, + "learning_rate": 3.38135102121156e-05, + "loss": 2.0857, + "step": 11188000 + }, + { + "epoch": 32.39, + "learning_rate": 3.3812786564468324e-05, + "loss": 2.1024, + "step": 11188500 + }, + { + "epoch": 32.39, + "learning_rate": 3.3812062916821046e-05, + "loss": 2.0947, + "step": 11189000 + }, + { + "epoch": 32.39, + "learning_rate": 3.381133926917377e-05, + "loss": 2.0816, + "step": 11189500 + }, + { + "epoch": 32.39, + "learning_rate": 3.381061562152649e-05, + "loss": 2.0785, + "step": 11190000 + }, + { + "epoch": 32.39, + "learning_rate": 3.380989197387921e-05, + "loss": 2.0753, + "step": 11190500 + }, + { + "epoch": 32.39, + "learning_rate": 3.3809168326231935e-05, + "loss": 2.0976, + "step": 11191000 + }, + { + "epoch": 32.39, + "learning_rate": 3.3808444678584664e-05, + "loss": 2.0756, + "step": 11191500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380772247823268e-05, + "loss": 2.0869, + "step": 11192000 + }, + { + "epoch": 32.4, + "learning_rate": 3.38069988305854e-05, + "loss": 2.0933, + "step": 11192500 + }, + { + "epoch": 32.4, + "learning_rate": 3.3806275182938124e-05, + "loss": 2.09, + "step": 11193000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380555442988144e-05, + "loss": 2.0881, + "step": 11193500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380483078223416e-05, + "loss": 2.1068, + "step": 11194000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380410713458689e-05, + "loss": 2.0821, + "step": 11194500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380338348693961e-05, + "loss": 2.1017, + "step": 11195000 + }, + { + "epoch": 32.41, + "learning_rate": 3.3802659839292335e-05, + "loss": 2.0759, + "step": 11195500 + }, + { + "epoch": 32.41, + "learning_rate": 3.380193619164506e-05, + "loss": 2.072, + "step": 11196000 + }, + { + "epoch": 32.41, + "learning_rate": 3.380121254399778e-05, + "loss": 2.1036, + "step": 11196500 + }, + { + "epoch": 32.41, + "learning_rate": 3.38004888963505e-05, + "loss": 2.1077, + "step": 11197000 + }, + { + "epoch": 32.41, + "learning_rate": 3.3799765248703224e-05, + "loss": 2.0965, + "step": 11197500 + }, + { + "epoch": 32.41, + "learning_rate": 3.3799041601055946e-05, + "loss": 2.0795, + "step": 11198000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379831795340867e-05, + "loss": 2.0776, + "step": 11198500 + }, + { + "epoch": 32.42, + "learning_rate": 3.379759430576139e-05, + "loss": 2.0629, + "step": 11199000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379687065811411e-05, + "loss": 2.0885, + "step": 11199500 + }, + { + "epoch": 32.42, + "learning_rate": 3.379614990505743e-05, + "loss": 2.0822, + "step": 11200000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379542625741015e-05, + "loss": 2.0746, + "step": 11200500 + }, + { + "epoch": 32.42, + "learning_rate": 3.379470260976288e-05, + "loss": 2.0891, + "step": 11201000 + }, + { + "epoch": 32.42, + "learning_rate": 3.37939789621156e-05, + "loss": 2.0929, + "step": 11201500 + }, + { + "epoch": 32.43, + "learning_rate": 3.3793255314468324e-05, + "loss": 2.0688, + "step": 11202000 + }, + { + "epoch": 32.43, + "learning_rate": 3.379253166682105e-05, + "loss": 2.0744, + "step": 11202500 + }, + { + "epoch": 32.43, + "learning_rate": 3.379180801917377e-05, + "loss": 2.1042, + "step": 11203000 + }, + { + "epoch": 32.43, + "learning_rate": 3.379108437152649e-05, + "loss": 2.0733, + "step": 11203500 + }, + { + "epoch": 32.43, + "learning_rate": 3.3790362171174514e-05, + "loss": 2.0711, + "step": 11204000 + }, + { + "epoch": 32.43, + "learning_rate": 3.378963997082253e-05, + "loss": 2.0879, + "step": 11204500 + }, + { + "epoch": 32.43, + "learning_rate": 3.378891632317525e-05, + "loss": 2.0799, + "step": 11205000 + }, + { + "epoch": 32.44, + "learning_rate": 3.3788192675527974e-05, + "loss": 2.0805, + "step": 11205500 + }, + { + "epoch": 32.44, + "learning_rate": 3.3787469027880696e-05, + "loss": 2.0987, + "step": 11206000 + }, + { + "epoch": 32.44, + "learning_rate": 3.378674682752871e-05, + "loss": 2.1066, + "step": 11206500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378602317988144e-05, + "loss": 2.0684, + "step": 11207000 + }, + { + "epoch": 32.44, + "learning_rate": 3.3785300979529456e-05, + "loss": 2.0823, + "step": 11207500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378457733188218e-05, + "loss": 2.1032, + "step": 11208000 + }, + { + "epoch": 32.44, + "learning_rate": 3.37838536842349e-05, + "loss": 2.0986, + "step": 11208500 + }, + { + "epoch": 32.45, + "learning_rate": 3.378313003658763e-05, + "loss": 2.0898, + "step": 11209000 + }, + { + "epoch": 32.45, + "learning_rate": 3.378240638894035e-05, + "loss": 2.0918, + "step": 11209500 + }, + { + "epoch": 32.45, + "learning_rate": 3.3781682741293074e-05, + "loss": 2.0964, + "step": 11210000 + }, + { + "epoch": 32.45, + "learning_rate": 3.3780959093645796e-05, + "loss": 2.0627, + "step": 11210500 + }, + { + "epoch": 32.45, + "learning_rate": 3.378023544599852e-05, + "loss": 2.1028, + "step": 11211000 + }, + { + "epoch": 32.45, + "learning_rate": 3.377951179835124e-05, + "loss": 2.0859, + "step": 11211500 + }, + { + "epoch": 32.45, + "learning_rate": 3.377878815070396e-05, + "loss": 2.0824, + "step": 11212000 + }, + { + "epoch": 32.46, + "learning_rate": 3.377806450305669e-05, + "loss": 2.0965, + "step": 11212500 + }, + { + "epoch": 32.46, + "learning_rate": 3.3777340855409414e-05, + "loss": 2.0746, + "step": 11213000 + }, + { + "epoch": 32.46, + "learning_rate": 3.3776617207762136e-05, + "loss": 2.0992, + "step": 11213500 + }, + { + "epoch": 32.46, + "learning_rate": 3.377589500741015e-05, + "loss": 2.1114, + "step": 11214000 + }, + { + "epoch": 32.46, + "learning_rate": 3.3775171359762874e-05, + "loss": 2.1001, + "step": 11214500 + }, + { + "epoch": 32.46, + "learning_rate": 3.377444915941089e-05, + "loss": 2.0893, + "step": 11215000 + }, + { + "epoch": 32.46, + "learning_rate": 3.377372551176362e-05, + "loss": 2.0945, + "step": 11215500 + }, + { + "epoch": 32.47, + "learning_rate": 3.377300186411634e-05, + "loss": 2.0785, + "step": 11216000 + }, + { + "epoch": 32.47, + "learning_rate": 3.377227821646907e-05, + "loss": 2.0726, + "step": 11216500 + }, + { + "epoch": 32.47, + "learning_rate": 3.377155456882179e-05, + "loss": 2.0993, + "step": 11217000 + }, + { + "epoch": 32.47, + "learning_rate": 3.377083236846981e-05, + "loss": 2.1135, + "step": 11217500 + }, + { + "epoch": 32.47, + "learning_rate": 3.377010872082253e-05, + "loss": 2.0802, + "step": 11218000 + }, + { + "epoch": 32.47, + "learning_rate": 3.376938507317525e-05, + "loss": 2.1042, + "step": 11218500 + }, + { + "epoch": 32.47, + "learning_rate": 3.3768661425527974e-05, + "loss": 2.0902, + "step": 11219000 + }, + { + "epoch": 32.48, + "learning_rate": 3.3767937777880697e-05, + "loss": 2.0698, + "step": 11219500 + }, + { + "epoch": 32.48, + "learning_rate": 3.376721557752872e-05, + "loss": 2.0865, + "step": 11220000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376649192988144e-05, + "loss": 2.0949, + "step": 11220500 + }, + { + "epoch": 32.48, + "learning_rate": 3.376576828223416e-05, + "loss": 2.094, + "step": 11221000 + }, + { + "epoch": 32.48, + "learning_rate": 3.3765044634586886e-05, + "loss": 2.1094, + "step": 11221500 + }, + { + "epoch": 32.48, + "learning_rate": 3.37643224342349e-05, + "loss": 2.0853, + "step": 11222000 + }, + { + "epoch": 32.48, + "learning_rate": 3.3763598786587623e-05, + "loss": 2.1028, + "step": 11222500 + }, + { + "epoch": 32.49, + "learning_rate": 3.3762875138940346e-05, + "loss": 2.0865, + "step": 11223000 + }, + { + "epoch": 32.49, + "learning_rate": 3.3762151491293075e-05, + "loss": 2.0944, + "step": 11223500 + }, + { + "epoch": 32.49, + "learning_rate": 3.37614278436458e-05, + "loss": 2.0765, + "step": 11224000 + }, + { + "epoch": 32.49, + "learning_rate": 3.376070419599852e-05, + "loss": 2.0894, + "step": 11224500 + }, + { + "epoch": 32.49, + "learning_rate": 3.375998054835124e-05, + "loss": 2.0821, + "step": 11225000 + }, + { + "epoch": 32.49, + "learning_rate": 3.375925690070397e-05, + "loss": 2.093, + "step": 11225500 + }, + { + "epoch": 32.49, + "learning_rate": 3.375853325305669e-05, + "loss": 2.084, + "step": 11226000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375781105270471e-05, + "loss": 2.0619, + "step": 11226500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375708740505743e-05, + "loss": 2.101, + "step": 11227000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375636375741015e-05, + "loss": 2.1095, + "step": 11227500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375564155705817e-05, + "loss": 2.0583, + "step": 11228000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375491790941089e-05, + "loss": 2.0993, + "step": 11228500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375419426176362e-05, + "loss": 2.1048, + "step": 11229000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375347061411634e-05, + "loss": 2.1003, + "step": 11229500 + }, + { + "epoch": 32.51, + "learning_rate": 3.3752746966469064e-05, + "loss": 2.0843, + "step": 11230000 + }, + { + "epoch": 32.51, + "learning_rate": 3.3752023318821786e-05, + "loss": 2.0868, + "step": 11230500 + }, + { + "epoch": 32.51, + "learning_rate": 3.375130111846981e-05, + "loss": 2.0789, + "step": 11231000 + }, + { + "epoch": 32.51, + "learning_rate": 3.375057747082253e-05, + "loss": 2.084, + "step": 11231500 + }, + { + "epoch": 32.51, + "learning_rate": 3.374985382317525e-05, + "loss": 2.0965, + "step": 11232000 + }, + { + "epoch": 32.51, + "learning_rate": 3.3749130175527975e-05, + "loss": 2.1112, + "step": 11232500 + }, + { + "epoch": 32.51, + "learning_rate": 3.37484065278807e-05, + "loss": 2.0792, + "step": 11233000 + }, + { + "epoch": 32.52, + "learning_rate": 3.374768288023342e-05, + "loss": 2.0668, + "step": 11233500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374695923258614e-05, + "loss": 2.083, + "step": 11234000 + }, + { + "epoch": 32.52, + "learning_rate": 3.374623558493887e-05, + "loss": 2.0988, + "step": 11234500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374551193729159e-05, + "loss": 2.1102, + "step": 11235000 + }, + { + "epoch": 32.52, + "learning_rate": 3.3744788289644315e-05, + "loss": 2.09, + "step": 11235500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374406464199704e-05, + "loss": 2.1049, + "step": 11236000 + }, + { + "epoch": 32.53, + "learning_rate": 3.374334244164505e-05, + "loss": 2.097, + "step": 11236500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374262024129307e-05, + "loss": 2.0907, + "step": 11237000 + }, + { + "epoch": 32.53, + "learning_rate": 3.374189659364579e-05, + "loss": 2.102, + "step": 11237500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374117294599852e-05, + "loss": 2.0797, + "step": 11238000 + }, + { + "epoch": 32.53, + "learning_rate": 3.374044929835124e-05, + "loss": 2.093, + "step": 11238500 + }, + { + "epoch": 32.53, + "learning_rate": 3.373972565070397e-05, + "loss": 2.0878, + "step": 11239000 + }, + { + "epoch": 32.53, + "learning_rate": 3.373900200305669e-05, + "loss": 2.092, + "step": 11239500 + }, + { + "epoch": 32.54, + "learning_rate": 3.373827980270471e-05, + "loss": 2.0813, + "step": 11240000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373755615505743e-05, + "loss": 2.0877, + "step": 11240500 + }, + { + "epoch": 32.54, + "learning_rate": 3.373683395470545e-05, + "loss": 2.106, + "step": 11241000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373611030705817e-05, + "loss": 2.0805, + "step": 11241500 + }, + { + "epoch": 32.54, + "learning_rate": 3.37353866594109e-05, + "loss": 2.0982, + "step": 11242000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373466301176362e-05, + "loss": 2.088, + "step": 11242500 + }, + { + "epoch": 32.54, + "learning_rate": 3.3733940811411636e-05, + "loss": 2.1118, + "step": 11243000 + }, + { + "epoch": 32.55, + "learning_rate": 3.373321716376436e-05, + "loss": 2.0854, + "step": 11243500 + }, + { + "epoch": 32.55, + "learning_rate": 3.373249351611708e-05, + "loss": 2.12, + "step": 11244000 + }, + { + "epoch": 32.55, + "learning_rate": 3.37317698684698e-05, + "loss": 2.102, + "step": 11244500 + }, + { + "epoch": 32.55, + "learning_rate": 3.3731046220822525e-05, + "loss": 2.085, + "step": 11245000 + }, + { + "epoch": 32.55, + "learning_rate": 3.373032257317525e-05, + "loss": 2.0741, + "step": 11245500 + }, + { + "epoch": 32.55, + "learning_rate": 3.3729598925527976e-05, + "loss": 2.1125, + "step": 11246000 + }, + { + "epoch": 32.55, + "learning_rate": 3.37288752778807e-05, + "loss": 2.0879, + "step": 11246500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372815307752872e-05, + "loss": 2.0923, + "step": 11247000 + }, + { + "epoch": 32.56, + "learning_rate": 3.372742942988144e-05, + "loss": 2.0812, + "step": 11247500 + }, + { + "epoch": 32.56, + "learning_rate": 3.3726705782234165e-05, + "loss": 2.103, + "step": 11248000 + }, + { + "epoch": 32.56, + "learning_rate": 3.372598213458689e-05, + "loss": 2.0896, + "step": 11248500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372525848693961e-05, + "loss": 2.112, + "step": 11249000 + }, + { + "epoch": 32.56, + "learning_rate": 3.372453483929233e-05, + "loss": 2.1155, + "step": 11249500 + }, + { + "epoch": 32.56, + "learning_rate": 3.3723811191645054e-05, + "loss": 2.1146, + "step": 11250000 + }, + { + "epoch": 32.57, + "learning_rate": 3.3723087543997776e-05, + "loss": 2.1001, + "step": 11250500 + }, + { + "epoch": 32.57, + "learning_rate": 3.37223638963505e-05, + "loss": 2.1008, + "step": 11251000 + }, + { + "epoch": 32.57, + "learning_rate": 3.372164024870322e-05, + "loss": 2.1154, + "step": 11251500 + }, + { + "epoch": 32.57, + "learning_rate": 3.372091804835124e-05, + "loss": 2.073, + "step": 11252000 + }, + { + "epoch": 32.57, + "learning_rate": 3.3720194400703965e-05, + "loss": 2.073, + "step": 11252500 + }, + { + "epoch": 32.57, + "learning_rate": 3.371947075305669e-05, + "loss": 2.0811, + "step": 11253000 + }, + { + "epoch": 32.57, + "learning_rate": 3.3718747105409416e-05, + "loss": 2.1081, + "step": 11253500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371802345776214e-05, + "loss": 2.1001, + "step": 11254000 + }, + { + "epoch": 32.58, + "learning_rate": 3.371729981011486e-05, + "loss": 2.0837, + "step": 11254500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371657616246758e-05, + "loss": 2.0763, + "step": 11255000 + }, + { + "epoch": 32.58, + "learning_rate": 3.37158539621156e-05, + "loss": 2.0844, + "step": 11255500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371513031446832e-05, + "loss": 2.1067, + "step": 11256000 + }, + { + "epoch": 32.58, + "learning_rate": 3.371440666682105e-05, + "loss": 2.0847, + "step": 11256500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371368301917377e-05, + "loss": 2.1134, + "step": 11257000 + }, + { + "epoch": 32.59, + "learning_rate": 3.371296081882179e-05, + "loss": 2.0919, + "step": 11257500 + }, + { + "epoch": 32.59, + "learning_rate": 3.371223717117451e-05, + "loss": 2.0988, + "step": 11258000 + }, + { + "epoch": 32.59, + "learning_rate": 3.371151352352723e-05, + "loss": 2.0723, + "step": 11258500 + }, + { + "epoch": 32.59, + "learning_rate": 3.3710789875879954e-05, + "loss": 2.1005, + "step": 11259000 + }, + { + "epoch": 32.59, + "learning_rate": 3.371006622823268e-05, + "loss": 2.0693, + "step": 11259500 + }, + { + "epoch": 32.59, + "learning_rate": 3.37093425805854e-05, + "loss": 2.0951, + "step": 11260000 + }, + { + "epoch": 32.59, + "learning_rate": 3.370862038023342e-05, + "loss": 2.0944, + "step": 11260500 + }, + { + "epoch": 32.6, + "learning_rate": 3.3707898179881444e-05, + "loss": 2.115, + "step": 11261000 + }, + { + "epoch": 32.6, + "learning_rate": 3.3707174532234166e-05, + "loss": 2.071, + "step": 11261500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370645088458689e-05, + "loss": 2.0731, + "step": 11262000 + }, + { + "epoch": 32.6, + "learning_rate": 3.370572723693961e-05, + "loss": 2.0894, + "step": 11262500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370500358929233e-05, + "loss": 2.1161, + "step": 11263000 + }, + { + "epoch": 32.6, + "learning_rate": 3.3704279941645055e-05, + "loss": 2.0792, + "step": 11263500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370355629399778e-05, + "loss": 2.1103, + "step": 11264000 + }, + { + "epoch": 32.61, + "learning_rate": 3.37028326463505e-05, + "loss": 2.0601, + "step": 11264500 + }, + { + "epoch": 32.61, + "learning_rate": 3.370210899870322e-05, + "loss": 2.0947, + "step": 11265000 + }, + { + "epoch": 32.61, + "learning_rate": 3.370138535105595e-05, + "loss": 2.0628, + "step": 11265500 + }, + { + "epoch": 32.61, + "learning_rate": 3.370066170340867e-05, + "loss": 2.0947, + "step": 11266000 + }, + { + "epoch": 32.61, + "learning_rate": 3.3699938055761395e-05, + "loss": 2.0868, + "step": 11266500 + }, + { + "epoch": 32.61, + "learning_rate": 3.3699217302704704e-05, + "loss": 2.0957, + "step": 11267000 + }, + { + "epoch": 32.61, + "learning_rate": 3.3698493655057426e-05, + "loss": 2.1232, + "step": 11267500 + }, + { + "epoch": 32.62, + "learning_rate": 3.369777145470545e-05, + "loss": 2.0901, + "step": 11268000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369704780705817e-05, + "loss": 2.1072, + "step": 11268500 + }, + { + "epoch": 32.62, + "learning_rate": 3.36963241594109e-05, + "loss": 2.084, + "step": 11269000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369560051176362e-05, + "loss": 2.0901, + "step": 11269500 + }, + { + "epoch": 32.62, + "learning_rate": 3.3694876864116344e-05, + "loss": 2.1101, + "step": 11270000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369415466376436e-05, + "loss": 2.0799, + "step": 11270500 + }, + { + "epoch": 32.62, + "learning_rate": 3.369343101611708e-05, + "loss": 2.1083, + "step": 11271000 + }, + { + "epoch": 32.63, + "learning_rate": 3.3692707368469804e-05, + "loss": 2.0771, + "step": 11271500 + }, + { + "epoch": 32.63, + "learning_rate": 3.3691985168117826e-05, + "loss": 2.1, + "step": 11272000 + }, + { + "epoch": 32.63, + "learning_rate": 3.369126152047055e-05, + "loss": 2.0937, + "step": 11272500 + }, + { + "epoch": 32.63, + "learning_rate": 3.369053787282327e-05, + "loss": 2.0851, + "step": 11273000 + }, + { + "epoch": 32.63, + "learning_rate": 3.368981422517599e-05, + "loss": 2.0988, + "step": 11273500 + }, + { + "epoch": 32.63, + "learning_rate": 3.368909202482401e-05, + "loss": 2.1007, + "step": 11274000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368836837717673e-05, + "loss": 2.1077, + "step": 11274500 + }, + { + "epoch": 32.64, + "learning_rate": 3.368764472952945e-05, + "loss": 2.0934, + "step": 11275000 + }, + { + "epoch": 32.64, + "learning_rate": 3.3686921081882175e-05, + "loss": 2.0773, + "step": 11275500 + }, + { + "epoch": 32.64, + "learning_rate": 3.36861974342349e-05, + "loss": 2.0915, + "step": 11276000 + }, + { + "epoch": 32.64, + "learning_rate": 3.3685473786587627e-05, + "loss": 2.108, + "step": 11276500 + }, + { + "epoch": 32.64, + "learning_rate": 3.368475013894035e-05, + "loss": 2.0966, + "step": 11277000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368402649129308e-05, + "loss": 2.0984, + "step": 11277500 + }, + { + "epoch": 32.65, + "learning_rate": 3.36833028436458e-05, + "loss": 2.0917, + "step": 11278000 + }, + { + "epoch": 32.65, + "learning_rate": 3.368257919599852e-05, + "loss": 2.0909, + "step": 11278500 + }, + { + "epoch": 32.65, + "learning_rate": 3.3681855548351245e-05, + "loss": 2.1118, + "step": 11279000 + }, + { + "epoch": 32.65, + "learning_rate": 3.368113190070397e-05, + "loss": 2.1053, + "step": 11279500 + }, + { + "epoch": 32.65, + "learning_rate": 3.368040825305669e-05, + "loss": 2.1065, + "step": 11280000 + }, + { + "epoch": 32.65, + "learning_rate": 3.367968460540941e-05, + "loss": 2.1134, + "step": 11280500 + }, + { + "epoch": 32.65, + "learning_rate": 3.3678960957762133e-05, + "loss": 2.0757, + "step": 11281000 + }, + { + "epoch": 32.66, + "learning_rate": 3.367823875741015e-05, + "loss": 2.0862, + "step": 11281500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367751510976288e-05, + "loss": 2.113, + "step": 11282000 + }, + { + "epoch": 32.66, + "learning_rate": 3.36767914621156e-05, + "loss": 2.1142, + "step": 11282500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367606781446832e-05, + "loss": 2.105, + "step": 11283000 + }, + { + "epoch": 32.66, + "learning_rate": 3.367534416682105e-05, + "loss": 2.1001, + "step": 11283500 + }, + { + "epoch": 32.66, + "learning_rate": 3.3674620519173774e-05, + "loss": 2.084, + "step": 11284000 + }, + { + "epoch": 32.66, + "learning_rate": 3.3673896871526496e-05, + "loss": 2.0848, + "step": 11284500 + }, + { + "epoch": 32.67, + "learning_rate": 3.367317467117451e-05, + "loss": 2.0942, + "step": 11285000 + }, + { + "epoch": 32.67, + "learning_rate": 3.3672451023527234e-05, + "loss": 2.0953, + "step": 11285500 + }, + { + "epoch": 32.67, + "learning_rate": 3.3671727375879956e-05, + "loss": 2.0793, + "step": 11286000 + }, + { + "epoch": 32.67, + "learning_rate": 3.367100372823268e-05, + "loss": 2.0688, + "step": 11286500 + }, + { + "epoch": 32.67, + "learning_rate": 3.36702800805854e-05, + "loss": 2.0824, + "step": 11287000 + }, + { + "epoch": 32.67, + "learning_rate": 3.366955643293813e-05, + "loss": 2.0989, + "step": 11287500 + }, + { + "epoch": 32.67, + "learning_rate": 3.366883278529085e-05, + "loss": 2.085, + "step": 11288000 + }, + { + "epoch": 32.68, + "learning_rate": 3.3668109137643574e-05, + "loss": 2.1048, + "step": 11288500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3667385489996296e-05, + "loss": 2.0837, + "step": 11289000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366666328964431e-05, + "loss": 2.1116, + "step": 11289500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3665939641997034e-05, + "loss": 2.0819, + "step": 11290000 + }, + { + "epoch": 32.68, + "learning_rate": 3.3665215994349756e-05, + "loss": 2.0911, + "step": 11290500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3664492346702485e-05, + "loss": 2.0735, + "step": 11291000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366376869905521e-05, + "loss": 2.1221, + "step": 11291500 + }, + { + "epoch": 32.69, + "learning_rate": 3.366304649870323e-05, + "loss": 2.0917, + "step": 11292000 + }, + { + "epoch": 32.69, + "learning_rate": 3.3662324298351245e-05, + "loss": 2.0987, + "step": 11292500 + }, + { + "epoch": 32.69, + "learning_rate": 3.366160065070397e-05, + "loss": 2.0973, + "step": 11293000 + }, + { + "epoch": 32.69, + "learning_rate": 3.366087700305669e-05, + "loss": 2.1181, + "step": 11293500 + }, + { + "epoch": 32.69, + "learning_rate": 3.366015335540941e-05, + "loss": 2.1046, + "step": 11294000 + }, + { + "epoch": 32.69, + "learning_rate": 3.365943115505743e-05, + "loss": 2.1022, + "step": 11294500 + }, + { + "epoch": 32.69, + "learning_rate": 3.3658707507410157e-05, + "loss": 2.0805, + "step": 11295000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365798385976288e-05, + "loss": 2.1143, + "step": 11295500 + }, + { + "epoch": 32.7, + "learning_rate": 3.36572602121156e-05, + "loss": 2.1168, + "step": 11296000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365653656446832e-05, + "loss": 2.0981, + "step": 11296500 + }, + { + "epoch": 32.7, + "learning_rate": 3.365581436411634e-05, + "loss": 2.1093, + "step": 11297000 + }, + { + "epoch": 32.7, + "learning_rate": 3.3655092163764354e-05, + "loss": 2.0904, + "step": 11297500 + }, + { + "epoch": 32.7, + "learning_rate": 3.365436851611708e-05, + "loss": 2.1018, + "step": 11298000 + }, + { + "epoch": 32.7, + "learning_rate": 3.3653644868469806e-05, + "loss": 2.1027, + "step": 11298500 + }, + { + "epoch": 32.71, + "learning_rate": 3.365292122082253e-05, + "loss": 2.0853, + "step": 11299000 + }, + { + "epoch": 32.71, + "learning_rate": 3.365219757317526e-05, + "loss": 2.1083, + "step": 11299500 + }, + { + "epoch": 32.71, + "learning_rate": 3.365147392552798e-05, + "loss": 2.0685, + "step": 11300000 + }, + { + "epoch": 32.71, + "learning_rate": 3.36507502778807e-05, + "loss": 2.1188, + "step": 11300500 + }, + { + "epoch": 32.71, + "learning_rate": 3.3650026630233424e-05, + "loss": 2.084, + "step": 11301000 + }, + { + "epoch": 32.71, + "learning_rate": 3.3649302982586146e-05, + "loss": 2.0781, + "step": 11301500 + }, + { + "epoch": 32.71, + "learning_rate": 3.364857933493887e-05, + "loss": 2.1031, + "step": 11302000 + }, + { + "epoch": 32.72, + "learning_rate": 3.364785568729159e-05, + "loss": 2.1133, + "step": 11302500 + }, + { + "epoch": 32.72, + "learning_rate": 3.364713203964431e-05, + "loss": 2.0922, + "step": 11303000 + }, + { + "epoch": 32.72, + "learning_rate": 3.3646408391997035e-05, + "loss": 2.0739, + "step": 11303500 + }, + { + "epoch": 32.72, + "learning_rate": 3.364568474434976e-05, + "loss": 2.1121, + "step": 11304000 + }, + { + "epoch": 32.72, + "learning_rate": 3.364496109670248e-05, + "loss": 2.0926, + "step": 11304500 + }, + { + "epoch": 32.72, + "learning_rate": 3.36442374490552e-05, + "loss": 2.1013, + "step": 11305000 + }, + { + "epoch": 32.72, + "learning_rate": 3.364351380140793e-05, + "loss": 2.1293, + "step": 11305500 + }, + { + "epoch": 32.73, + "learning_rate": 3.3642793048351246e-05, + "loss": 2.12, + "step": 11306000 + }, + { + "epoch": 32.73, + "learning_rate": 3.364207084799926e-05, + "loss": 2.081, + "step": 11306500 + }, + { + "epoch": 32.73, + "learning_rate": 3.3641347200351984e-05, + "loss": 2.0862, + "step": 11307000 + }, + { + "epoch": 32.73, + "learning_rate": 3.3640623552704706e-05, + "loss": 2.1169, + "step": 11307500 + }, + { + "epoch": 32.73, + "learning_rate": 3.363990135235273e-05, + "loss": 2.0968, + "step": 11308000 + }, + { + "epoch": 32.73, + "learning_rate": 3.363917770470545e-05, + "loss": 2.1026, + "step": 11308500 + }, + { + "epoch": 32.73, + "learning_rate": 3.363845405705817e-05, + "loss": 2.0812, + "step": 11309000 + }, + { + "epoch": 32.74, + "learning_rate": 3.3637730409410895e-05, + "loss": 2.1379, + "step": 11309500 + }, + { + "epoch": 32.74, + "learning_rate": 3.363700676176362e-05, + "loss": 2.0791, + "step": 11310000 + }, + { + "epoch": 32.74, + "learning_rate": 3.363628311411634e-05, + "loss": 2.0931, + "step": 11310500 + }, + { + "epoch": 32.74, + "learning_rate": 3.363555946646906e-05, + "loss": 2.098, + "step": 11311000 + }, + { + "epoch": 32.74, + "learning_rate": 3.363483726611708e-05, + "loss": 2.084, + "step": 11311500 + }, + { + "epoch": 32.74, + "learning_rate": 3.3634113618469806e-05, + "loss": 2.0872, + "step": 11312000 + }, + { + "epoch": 32.75, + "learning_rate": 3.363338997082253e-05, + "loss": 2.0726, + "step": 11312500 + }, + { + "epoch": 32.75, + "learning_rate": 3.363266632317525e-05, + "loss": 2.1144, + "step": 11313000 + }, + { + "epoch": 32.75, + "learning_rate": 3.363194267552798e-05, + "loss": 2.0936, + "step": 11313500 + }, + { + "epoch": 32.75, + "learning_rate": 3.3631220475175995e-05, + "loss": 2.089, + "step": 11314000 + }, + { + "epoch": 32.75, + "learning_rate": 3.363049682752872e-05, + "loss": 2.0971, + "step": 11314500 + }, + { + "epoch": 32.75, + "learning_rate": 3.362977317988144e-05, + "loss": 2.0992, + "step": 11315000 + }, + { + "epoch": 32.75, + "learning_rate": 3.3629052426824756e-05, + "loss": 2.1014, + "step": 11315500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362832877917748e-05, + "loss": 2.0929, + "step": 11316000 + }, + { + "epoch": 32.76, + "learning_rate": 3.36276051315302e-05, + "loss": 2.1029, + "step": 11316500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362688148388292e-05, + "loss": 2.0849, + "step": 11317000 + }, + { + "epoch": 32.76, + "learning_rate": 3.3626157836235645e-05, + "loss": 2.102, + "step": 11317500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362543418858837e-05, + "loss": 2.0979, + "step": 11318000 + }, + { + "epoch": 32.76, + "learning_rate": 3.362471054094109e-05, + "loss": 2.089, + "step": 11318500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362398689329381e-05, + "loss": 2.0959, + "step": 11319000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3623263245646533e-05, + "loss": 2.0978, + "step": 11319500 + }, + { + "epoch": 32.77, + "learning_rate": 3.3622539597999256e-05, + "loss": 2.0868, + "step": 11320000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3621815950351985e-05, + "loss": 2.1081, + "step": 11320500 + }, + { + "epoch": 32.77, + "learning_rate": 3.362109230270471e-05, + "loss": 2.0985, + "step": 11321000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3620368655057436e-05, + "loss": 2.095, + "step": 11321500 + }, + { + "epoch": 32.77, + "learning_rate": 3.361964500741016e-05, + "loss": 2.1272, + "step": 11322000 + }, + { + "epoch": 32.77, + "learning_rate": 3.361892135976288e-05, + "loss": 2.0827, + "step": 11322500 + }, + { + "epoch": 32.78, + "learning_rate": 3.36181977121156e-05, + "loss": 2.079, + "step": 11323000 + }, + { + "epoch": 32.78, + "learning_rate": 3.361747551176362e-05, + "loss": 2.1054, + "step": 11323500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361675186411634e-05, + "loss": 2.0962, + "step": 11324000 + }, + { + "epoch": 32.78, + "learning_rate": 3.361602821646906e-05, + "loss": 2.1041, + "step": 11324500 + }, + { + "epoch": 32.78, + "learning_rate": 3.3615304568821785e-05, + "loss": 2.0852, + "step": 11325000 + }, + { + "epoch": 32.78, + "learning_rate": 3.361458092117451e-05, + "loss": 2.106, + "step": 11325500 + }, + { + "epoch": 32.78, + "learning_rate": 3.3613857273527236e-05, + "loss": 2.0935, + "step": 11326000 + }, + { + "epoch": 32.79, + "learning_rate": 3.361313362587996e-05, + "loss": 2.076, + "step": 11326500 + }, + { + "epoch": 32.79, + "learning_rate": 3.361240997823268e-05, + "loss": 2.0947, + "step": 11327000 + }, + { + "epoch": 32.79, + "learning_rate": 3.36116863305854e-05, + "loss": 2.0925, + "step": 11327500 + }, + { + "epoch": 32.79, + "learning_rate": 3.3610962682938125e-05, + "loss": 2.0888, + "step": 11328000 + }, + { + "epoch": 32.79, + "learning_rate": 3.3610239035290854e-05, + "loss": 2.0735, + "step": 11328500 + }, + { + "epoch": 32.79, + "learning_rate": 3.360951683493887e-05, + "loss": 2.0968, + "step": 11329000 + }, + { + "epoch": 32.79, + "learning_rate": 3.360879318729159e-05, + "loss": 2.0888, + "step": 11329500 + }, + { + "epoch": 32.8, + "learning_rate": 3.3608069539644314e-05, + "loss": 2.0904, + "step": 11330000 + }, + { + "epoch": 32.8, + "learning_rate": 3.3607345891997036e-05, + "loss": 2.1117, + "step": 11330500 + }, + { + "epoch": 32.8, + "learning_rate": 3.360662369164506e-05, + "loss": 2.0867, + "step": 11331000 + }, + { + "epoch": 32.8, + "learning_rate": 3.360590004399778e-05, + "loss": 2.1024, + "step": 11331500 + }, + { + "epoch": 32.8, + "learning_rate": 3.36051763963505e-05, + "loss": 2.0573, + "step": 11332000 + }, + { + "epoch": 32.8, + "learning_rate": 3.3604452748703225e-05, + "loss": 2.0718, + "step": 11332500 + }, + { + "epoch": 32.8, + "learning_rate": 3.360373054835124e-05, + "loss": 2.0827, + "step": 11333000 + }, + { + "epoch": 32.81, + "learning_rate": 3.360300690070396e-05, + "loss": 2.062, + "step": 11333500 + }, + { + "epoch": 32.81, + "learning_rate": 3.3602283253056685e-05, + "loss": 2.1088, + "step": 11334000 + }, + { + "epoch": 32.81, + "learning_rate": 3.360155960540941e-05, + "loss": 2.0983, + "step": 11334500 + }, + { + "epoch": 32.81, + "learning_rate": 3.360083595776214e-05, + "loss": 2.1146, + "step": 11335000 + }, + { + "epoch": 32.81, + "learning_rate": 3.360011231011486e-05, + "loss": 2.0939, + "step": 11335500 + }, + { + "epoch": 32.81, + "learning_rate": 3.359938866246759e-05, + "loss": 2.0852, + "step": 11336000 + }, + { + "epoch": 32.81, + "learning_rate": 3.359866501482031e-05, + "loss": 2.1146, + "step": 11336500 + }, + { + "epoch": 32.82, + "learning_rate": 3.359794136717303e-05, + "loss": 2.0949, + "step": 11337000 + }, + { + "epoch": 32.82, + "learning_rate": 3.3597217719525755e-05, + "loss": 2.0887, + "step": 11337500 + }, + { + "epoch": 32.82, + "learning_rate": 3.359649407187848e-05, + "loss": 2.0961, + "step": 11338000 + }, + { + "epoch": 32.82, + "learning_rate": 3.359577187152649e-05, + "loss": 2.1032, + "step": 11338500 + }, + { + "epoch": 32.82, + "learning_rate": 3.3595048223879215e-05, + "loss": 2.1102, + "step": 11339000 + }, + { + "epoch": 32.82, + "learning_rate": 3.359432457623194e-05, + "loss": 2.1022, + "step": 11339500 + }, + { + "epoch": 32.82, + "learning_rate": 3.359360092858466e-05, + "loss": 2.1093, + "step": 11340000 + }, + { + "epoch": 32.83, + "learning_rate": 3.359287728093739e-05, + "loss": 2.0867, + "step": 11340500 + }, + { + "epoch": 32.83, + "learning_rate": 3.359215363329011e-05, + "loss": 2.0863, + "step": 11341000 + }, + { + "epoch": 32.83, + "learning_rate": 3.3591431432938126e-05, + "loss": 2.1054, + "step": 11341500 + }, + { + "epoch": 32.83, + "learning_rate": 3.359070778529085e-05, + "loss": 2.0876, + "step": 11342000 + }, + { + "epoch": 32.83, + "learning_rate": 3.358998413764357e-05, + "loss": 2.0917, + "step": 11342500 + }, + { + "epoch": 32.83, + "learning_rate": 3.35892604899963e-05, + "loss": 2.0925, + "step": 11343000 + }, + { + "epoch": 32.83, + "learning_rate": 3.358853684234902e-05, + "loss": 2.109, + "step": 11343500 + }, + { + "epoch": 32.84, + "learning_rate": 3.3587813194701744e-05, + "loss": 2.1015, + "step": 11344000 + }, + { + "epoch": 32.84, + "learning_rate": 3.3587089547054466e-05, + "loss": 2.1005, + "step": 11344500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358636589940719e-05, + "loss": 2.0668, + "step": 11345000 + }, + { + "epoch": 32.84, + "learning_rate": 3.358564369905521e-05, + "loss": 2.1128, + "step": 11345500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358492005140793e-05, + "loss": 2.1104, + "step": 11346000 + }, + { + "epoch": 32.84, + "learning_rate": 3.3584196403760655e-05, + "loss": 2.0704, + "step": 11346500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358347275611338e-05, + "loss": 2.1089, + "step": 11347000 + }, + { + "epoch": 32.85, + "learning_rate": 3.35827491084661e-05, + "loss": 2.1091, + "step": 11347500 + }, + { + "epoch": 32.85, + "learning_rate": 3.358202546081882e-05, + "loss": 2.0904, + "step": 11348000 + }, + { + "epoch": 32.85, + "learning_rate": 3.3581301813171544e-05, + "loss": 2.1002, + "step": 11348500 + }, + { + "epoch": 32.85, + "learning_rate": 3.3580578165524266e-05, + "loss": 2.0919, + "step": 11349000 + }, + { + "epoch": 32.85, + "learning_rate": 3.357985451787699e-05, + "loss": 2.1037, + "step": 11349500 + }, + { + "epoch": 32.85, + "learning_rate": 3.357913087022971e-05, + "loss": 2.0985, + "step": 11350000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357840866987774e-05, + "loss": 2.097, + "step": 11350500 + }, + { + "epoch": 32.86, + "learning_rate": 3.3577686469525755e-05, + "loss": 2.1152, + "step": 11351000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357696282187848e-05, + "loss": 2.1039, + "step": 11351500 + }, + { + "epoch": 32.86, + "learning_rate": 3.35762391742312e-05, + "loss": 2.0926, + "step": 11352000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357551552658392e-05, + "loss": 2.0684, + "step": 11352500 + }, + { + "epoch": 32.86, + "learning_rate": 3.3574791878936644e-05, + "loss": 2.0872, + "step": 11353000 + }, + { + "epoch": 32.86, + "learning_rate": 3.3574068231289367e-05, + "loss": 2.0898, + "step": 11353500 + }, + { + "epoch": 32.87, + "learning_rate": 3.357334458364209e-05, + "loss": 2.0997, + "step": 11354000 + }, + { + "epoch": 32.87, + "learning_rate": 3.357262093599481e-05, + "loss": 2.1043, + "step": 11354500 + }, + { + "epoch": 32.87, + "learning_rate": 3.357189728834754e-05, + "loss": 2.1159, + "step": 11355000 + }, + { + "epoch": 32.87, + "learning_rate": 3.357117364070026e-05, + "loss": 2.0984, + "step": 11355500 + }, + { + "epoch": 32.87, + "learning_rate": 3.3570449993052984e-05, + "loss": 2.1179, + "step": 11356000 + }, + { + "epoch": 32.87, + "learning_rate": 3.356972634540571e-05, + "loss": 2.1214, + "step": 11356500 + }, + { + "epoch": 32.87, + "learning_rate": 3.356900414505372e-05, + "loss": 2.0721, + "step": 11357000 + }, + { + "epoch": 32.88, + "learning_rate": 3.3568280497406445e-05, + "loss": 2.1199, + "step": 11357500 + }, + { + "epoch": 32.88, + "learning_rate": 3.3567556849759174e-05, + "loss": 2.0843, + "step": 11358000 + }, + { + "epoch": 32.88, + "learning_rate": 3.3566833202111896e-05, + "loss": 2.0991, + "step": 11358500 + }, + { + "epoch": 32.88, + "learning_rate": 3.356610955446462e-05, + "loss": 2.1026, + "step": 11359000 + }, + { + "epoch": 32.88, + "learning_rate": 3.356538590681734e-05, + "loss": 2.0788, + "step": 11359500 + }, + { + "epoch": 32.88, + "learning_rate": 3.356466370646536e-05, + "loss": 2.1061, + "step": 11360000 + }, + { + "epoch": 32.88, + "learning_rate": 3.3563940058818085e-05, + "loss": 2.0739, + "step": 11360500 + }, + { + "epoch": 32.89, + "learning_rate": 3.356321641117081e-05, + "loss": 2.0938, + "step": 11361000 + }, + { + "epoch": 32.89, + "learning_rate": 3.356249276352353e-05, + "loss": 2.1041, + "step": 11361500 + }, + { + "epoch": 32.89, + "learning_rate": 3.356177201046684e-05, + "loss": 2.096, + "step": 11362000 + }, + { + "epoch": 32.89, + "learning_rate": 3.356104836281957e-05, + "loss": 2.0736, + "step": 11362500 + }, + { + "epoch": 32.89, + "learning_rate": 3.356032471517229e-05, + "loss": 2.0807, + "step": 11363000 + }, + { + "epoch": 32.89, + "learning_rate": 3.355960106752501e-05, + "loss": 2.087, + "step": 11363500 + }, + { + "epoch": 32.89, + "learning_rate": 3.355887886717303e-05, + "loss": 2.1138, + "step": 11364000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355815521952575e-05, + "loss": 2.1079, + "step": 11364500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355743157187847e-05, + "loss": 2.083, + "step": 11365000 + }, + { + "epoch": 32.9, + "learning_rate": 3.35567079242312e-05, + "loss": 2.1071, + "step": 11365500 + }, + { + "epoch": 32.9, + "learning_rate": 3.3555985723879216e-05, + "loss": 2.1076, + "step": 11366000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355526207623194e-05, + "loss": 2.084, + "step": 11366500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355453842858467e-05, + "loss": 2.0993, + "step": 11367000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355381478093739e-05, + "loss": 2.1086, + "step": 11367500 + }, + { + "epoch": 32.91, + "learning_rate": 3.355309113329011e-05, + "loss": 2.0906, + "step": 11368000 + }, + { + "epoch": 32.91, + "learning_rate": 3.3552367485642834e-05, + "loss": 2.1024, + "step": 11368500 + }, + { + "epoch": 32.91, + "learning_rate": 3.3551643837995556e-05, + "loss": 2.0882, + "step": 11369000 + }, + { + "epoch": 32.91, + "learning_rate": 3.355092019034828e-05, + "loss": 2.0854, + "step": 11369500 + }, + { + "epoch": 32.91, + "learning_rate": 3.3550197989996294e-05, + "loss": 2.0926, + "step": 11370000 + }, + { + "epoch": 32.91, + "learning_rate": 3.3549474342349016e-05, + "loss": 2.1039, + "step": 11370500 + }, + { + "epoch": 32.91, + "learning_rate": 3.354875069470174e-05, + "loss": 2.1137, + "step": 11371000 + }, + { + "epoch": 32.92, + "learning_rate": 3.354802849434976e-05, + "loss": 2.0968, + "step": 11371500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354730484670248e-05, + "loss": 2.0875, + "step": 11372000 + }, + { + "epoch": 32.92, + "learning_rate": 3.3546581199055205e-05, + "loss": 2.0789, + "step": 11372500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354585899870322e-05, + "loss": 2.1142, + "step": 11373000 + }, + { + "epoch": 32.92, + "learning_rate": 3.354513535105595e-05, + "loss": 2.0667, + "step": 11373500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354441170340867e-05, + "loss": 2.1118, + "step": 11374000 + }, + { + "epoch": 32.92, + "learning_rate": 3.3543688055761394e-05, + "loss": 2.0949, + "step": 11374500 + }, + { + "epoch": 32.93, + "learning_rate": 3.354296440811412e-05, + "loss": 2.1049, + "step": 11375000 + }, + { + "epoch": 32.93, + "learning_rate": 3.354224076046684e-05, + "loss": 2.1126, + "step": 11375500 + }, + { + "epoch": 32.93, + "learning_rate": 3.354151711281957e-05, + "loss": 2.1137, + "step": 11376000 + }, + { + "epoch": 32.93, + "learning_rate": 3.354079346517229e-05, + "loss": 2.0933, + "step": 11376500 + }, + { + "epoch": 32.93, + "learning_rate": 3.354006981752501e-05, + "loss": 2.0885, + "step": 11377000 + }, + { + "epoch": 32.93, + "learning_rate": 3.3539346169877735e-05, + "loss": 2.0898, + "step": 11377500 + }, + { + "epoch": 32.93, + "learning_rate": 3.353862252223046e-05, + "loss": 2.1123, + "step": 11378000 + }, + { + "epoch": 32.94, + "learning_rate": 3.353789887458318e-05, + "loss": 2.0843, + "step": 11378500 + }, + { + "epoch": 32.94, + "learning_rate": 3.35371752269359e-05, + "loss": 2.0734, + "step": 11379000 + }, + { + "epoch": 32.94, + "learning_rate": 3.3536451579288624e-05, + "loss": 2.1219, + "step": 11379500 + }, + { + "epoch": 32.94, + "learning_rate": 3.3535727931641346e-05, + "loss": 2.1021, + "step": 11380000 + }, + { + "epoch": 32.94, + "learning_rate": 3.3535004283994075e-05, + "loss": 2.1185, + "step": 11380500 + }, + { + "epoch": 32.94, + "learning_rate": 3.353428208364209e-05, + "loss": 2.0792, + "step": 11381000 + }, + { + "epoch": 32.94, + "learning_rate": 3.353355988329011e-05, + "loss": 2.0908, + "step": 11381500 + }, + { + "epoch": 32.95, + "learning_rate": 3.3532836235642835e-05, + "loss": 2.0868, + "step": 11382000 + }, + { + "epoch": 32.95, + "learning_rate": 3.353211403529085e-05, + "loss": 2.0985, + "step": 11382500 + }, + { + "epoch": 32.95, + "learning_rate": 3.353139038764357e-05, + "loss": 2.0991, + "step": 11383000 + }, + { + "epoch": 32.95, + "learning_rate": 3.3530666739996295e-05, + "loss": 2.1102, + "step": 11383500 + }, + { + "epoch": 32.95, + "learning_rate": 3.352994309234902e-05, + "loss": 2.0819, + "step": 11384000 + }, + { + "epoch": 32.95, + "learning_rate": 3.352921944470174e-05, + "loss": 2.1002, + "step": 11384500 + }, + { + "epoch": 32.95, + "learning_rate": 3.352849579705447e-05, + "loss": 2.1269, + "step": 11385000 + }, + { + "epoch": 32.96, + "learning_rate": 3.3527773596702484e-05, + "loss": 2.0805, + "step": 11385500 + }, + { + "epoch": 32.96, + "learning_rate": 3.3527049949055206e-05, + "loss": 2.0919, + "step": 11386000 + }, + { + "epoch": 32.96, + "learning_rate": 3.352632630140793e-05, + "loss": 2.0928, + "step": 11386500 + }, + { + "epoch": 32.96, + "learning_rate": 3.352560265376065e-05, + "loss": 2.0823, + "step": 11387000 + }, + { + "epoch": 32.96, + "learning_rate": 3.352487900611337e-05, + "loss": 2.0988, + "step": 11387500 + }, + { + "epoch": 32.96, + "learning_rate": 3.35241553584661e-05, + "loss": 2.0692, + "step": 11388000 + }, + { + "epoch": 32.97, + "learning_rate": 3.3523431710818824e-05, + "loss": 2.0975, + "step": 11388500 + }, + { + "epoch": 32.97, + "learning_rate": 3.3522709510466847e-05, + "loss": 2.0655, + "step": 11389000 + }, + { + "epoch": 32.97, + "learning_rate": 3.352198731011486e-05, + "loss": 2.0752, + "step": 11389500 + }, + { + "epoch": 32.97, + "learning_rate": 3.3521263662467584e-05, + "loss": 2.1022, + "step": 11390000 + }, + { + "epoch": 32.97, + "learning_rate": 3.3520540014820307e-05, + "loss": 2.0926, + "step": 11390500 + }, + { + "epoch": 32.97, + "learning_rate": 3.351981636717303e-05, + "loss": 2.0916, + "step": 11391000 + }, + { + "epoch": 32.97, + "learning_rate": 3.3519094166821044e-05, + "loss": 2.0954, + "step": 11391500 + }, + { + "epoch": 32.98, + "learning_rate": 3.3518370519173767e-05, + "loss": 2.0929, + "step": 11392000 + }, + { + "epoch": 32.98, + "learning_rate": 3.3517646871526496e-05, + "loss": 2.1118, + "step": 11392500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351692322387922e-05, + "loss": 2.0894, + "step": 11393000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351619957623194e-05, + "loss": 2.0921, + "step": 11393500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351547592858466e-05, + "loss": 2.0946, + "step": 11394000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351475372823268e-05, + "loss": 2.1136, + "step": 11394500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351403152788069e-05, + "loss": 2.1199, + "step": 11395000 + }, + { + "epoch": 32.99, + "learning_rate": 3.3513307880233416e-05, + "loss": 2.0861, + "step": 11395500 + }, + { + "epoch": 32.99, + "learning_rate": 3.3512584232586145e-05, + "loss": 2.1246, + "step": 11396000 + }, + { + "epoch": 32.99, + "learning_rate": 3.351186203223417e-05, + "loss": 2.0875, + "step": 11396500 + }, + { + "epoch": 32.99, + "learning_rate": 3.351113838458689e-05, + "loss": 2.1086, + "step": 11397000 + }, + { + "epoch": 32.99, + "learning_rate": 3.351041473693961e-05, + "loss": 2.0919, + "step": 11397500 + }, + { + "epoch": 32.99, + "learning_rate": 3.3509691089292334e-05, + "loss": 2.0804, + "step": 11398000 + }, + { + "epoch": 32.99, + "learning_rate": 3.3508967441645056e-05, + "loss": 2.1205, + "step": 11398500 + }, + { + "epoch": 33.0, + "learning_rate": 3.350824379399778e-05, + "loss": 2.0986, + "step": 11399000 + }, + { + "epoch": 33.0, + "learning_rate": 3.35075201463505e-05, + "loss": 2.093, + "step": 11399500 + }, + { + "epoch": 33.0, + "learning_rate": 3.350679649870322e-05, + "loss": 2.0906, + "step": 11400000 + }, + { + "epoch": 33.0, + "learning_rate": 3.3506072851055945e-05, + "loss": 2.1081, + "step": 11400500 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.6677652966372922, + "eval_accuracy_mlm": 0.6326864891368785, + "eval_accuracy_nsp": 0.8559715481248252, + "eval_loss": 2.179738759994507, + "eval_runtime": 331.543, + "eval_samples_per_second": 1316.227, + "eval_steps_per_second": 54.844, + "step": 11400576 + }, + { + "epoch": 33.0, + "learning_rate": 3.350534920340867e-05, + "loss": 2.0683, + "step": 11401000 + }, + { + "epoch": 33.0, + "learning_rate": 3.3504625555761396e-05, + "loss": 2.0833, + "step": 11401500 + }, + { + "epoch": 33.0, + "learning_rate": 3.350390190811412e-05, + "loss": 2.0711, + "step": 11402000 + }, + { + "epoch": 33.01, + "learning_rate": 3.350317826046684e-05, + "loss": 2.1027, + "step": 11402500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350245461281957e-05, + "loss": 2.0772, + "step": 11403000 + }, + { + "epoch": 33.01, + "learning_rate": 3.350173096517229e-05, + "loss": 2.0842, + "step": 11403500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350100876482031e-05, + "loss": 2.0676, + "step": 11404000 + }, + { + "epoch": 33.01, + "learning_rate": 3.350028511717303e-05, + "loss": 2.0813, + "step": 11404500 + }, + { + "epoch": 33.01, + "learning_rate": 3.349956146952575e-05, + "loss": 2.0817, + "step": 11405000 + }, + { + "epoch": 33.01, + "learning_rate": 3.3498837821878474e-05, + "loss": 2.0761, + "step": 11405500 + }, + { + "epoch": 33.02, + "learning_rate": 3.3498114174231196e-05, + "loss": 2.0555, + "step": 11406000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349739052658392e-05, + "loss": 2.0629, + "step": 11406500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349666687893665e-05, + "loss": 2.113, + "step": 11407000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349594323128937e-05, + "loss": 2.0806, + "step": 11407500 + }, + { + "epoch": 33.02, + "learning_rate": 3.3495221030937385e-05, + "loss": 2.0568, + "step": 11408000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349449738329011e-05, + "loss": 2.0908, + "step": 11408500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349377373564283e-05, + "loss": 2.0872, + "step": 11409000 + }, + { + "epoch": 33.03, + "learning_rate": 3.349305008799555e-05, + "loss": 2.0867, + "step": 11409500 + }, + { + "epoch": 33.03, + "learning_rate": 3.349232788764357e-05, + "loss": 2.0412, + "step": 11410000 + }, + { + "epoch": 33.03, + "learning_rate": 3.3491604239996297e-05, + "loss": 2.0786, + "step": 11410500 + }, + { + "epoch": 33.03, + "learning_rate": 3.349088059234902e-05, + "loss": 2.0591, + "step": 11411000 + }, + { + "epoch": 33.03, + "learning_rate": 3.349015694470175e-05, + "loss": 2.1039, + "step": 11411500 + }, + { + "epoch": 33.03, + "learning_rate": 3.348943329705447e-05, + "loss": 2.0768, + "step": 11412000 + }, + { + "epoch": 33.03, + "learning_rate": 3.348870964940719e-05, + "loss": 2.092, + "step": 11412500 + }, + { + "epoch": 33.04, + "learning_rate": 3.3487986001759915e-05, + "loss": 2.0586, + "step": 11413000 + }, + { + "epoch": 33.04, + "learning_rate": 3.348726235411264e-05, + "loss": 2.0882, + "step": 11413500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348653870646536e-05, + "loss": 2.0505, + "step": 11414000 + }, + { + "epoch": 33.04, + "learning_rate": 3.348581505881808e-05, + "loss": 2.054, + "step": 11414500 + }, + { + "epoch": 33.04, + "learning_rate": 3.3485091411170803e-05, + "loss": 2.1065, + "step": 11415000 + }, + { + "epoch": 33.04, + "learning_rate": 3.348436921081882e-05, + "loss": 2.0608, + "step": 11415500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348364556317155e-05, + "loss": 2.0652, + "step": 11416000 + }, + { + "epoch": 33.05, + "learning_rate": 3.348292191552427e-05, + "loss": 2.0784, + "step": 11416500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348219826787699e-05, + "loss": 2.0764, + "step": 11417000 + }, + { + "epoch": 33.05, + "learning_rate": 3.348147606752501e-05, + "loss": 2.1013, + "step": 11417500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348075241987774e-05, + "loss": 2.0917, + "step": 11418000 + }, + { + "epoch": 33.05, + "learning_rate": 3.348003021952575e-05, + "loss": 2.0621, + "step": 11418500 + }, + { + "epoch": 33.05, + "learning_rate": 3.3479306571878475e-05, + "loss": 2.1033, + "step": 11419000 + }, + { + "epoch": 33.05, + "learning_rate": 3.34785829242312e-05, + "loss": 2.0716, + "step": 11419500 + }, + { + "epoch": 33.06, + "learning_rate": 3.3477859276583926e-05, + "loss": 2.0767, + "step": 11420000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347713562893665e-05, + "loss": 2.0628, + "step": 11420500 + }, + { + "epoch": 33.06, + "learning_rate": 3.3476413428584664e-05, + "loss": 2.1032, + "step": 11421000 + }, + { + "epoch": 33.06, + "learning_rate": 3.3475689780937386e-05, + "loss": 2.0615, + "step": 11421500 + }, + { + "epoch": 33.06, + "learning_rate": 3.347496613329011e-05, + "loss": 2.0781, + "step": 11422000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347424248564283e-05, + "loss": 2.0958, + "step": 11422500 + }, + { + "epoch": 33.06, + "learning_rate": 3.347351883799555e-05, + "loss": 2.061, + "step": 11423000 + }, + { + "epoch": 33.07, + "learning_rate": 3.3472795190348275e-05, + "loss": 2.0641, + "step": 11423500 + }, + { + "epoch": 33.07, + "learning_rate": 3.34720729899963e-05, + "loss": 2.0751, + "step": 11424000 + }, + { + "epoch": 33.07, + "learning_rate": 3.347135078964431e-05, + "loss": 2.0642, + "step": 11424500 + }, + { + "epoch": 33.07, + "learning_rate": 3.3470627141997035e-05, + "loss": 2.0978, + "step": 11425000 + }, + { + "epoch": 33.07, + "learning_rate": 3.346990349434976e-05, + "loss": 2.078, + "step": 11425500 + }, + { + "epoch": 33.07, + "learning_rate": 3.3469179846702486e-05, + "loss": 2.0577, + "step": 11426000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346845619905521e-05, + "loss": 2.0678, + "step": 11426500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346773255140793e-05, + "loss": 2.0769, + "step": 11427000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346700890376065e-05, + "loss": 2.0774, + "step": 11427500 + }, + { + "epoch": 33.08, + "learning_rate": 3.3466285256113375e-05, + "loss": 2.0919, + "step": 11428000 + }, + { + "epoch": 33.08, + "learning_rate": 3.34655616084661e-05, + "loss": 2.0641, + "step": 11428500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346483940811412e-05, + "loss": 2.0819, + "step": 11429000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346411576046684e-05, + "loss": 2.0918, + "step": 11429500 + }, + { + "epoch": 33.09, + "learning_rate": 3.3463392112819564e-05, + "loss": 2.0683, + "step": 11430000 + }, + { + "epoch": 33.09, + "learning_rate": 3.3462668465172287e-05, + "loss": 2.0706, + "step": 11430500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346194481752501e-05, + "loss": 2.0698, + "step": 11431000 + }, + { + "epoch": 33.09, + "learning_rate": 3.346122116987773e-05, + "loss": 2.0849, + "step": 11431500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346049752223045e-05, + "loss": 2.0735, + "step": 11432000 + }, + { + "epoch": 33.09, + "learning_rate": 3.3459773874583176e-05, + "loss": 2.1006, + "step": 11432500 + }, + { + "epoch": 33.09, + "learning_rate": 3.3459050226935905e-05, + "loss": 2.0963, + "step": 11433000 + }, + { + "epoch": 33.1, + "learning_rate": 3.345832802658393e-05, + "loss": 2.072, + "step": 11433500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345760437893665e-05, + "loss": 2.0803, + "step": 11434000 + }, + { + "epoch": 33.1, + "learning_rate": 3.345688073128937e-05, + "loss": 2.0648, + "step": 11434500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345615853093739e-05, + "loss": 2.0695, + "step": 11435000 + }, + { + "epoch": 33.1, + "learning_rate": 3.34554363305854e-05, + "loss": 2.0677, + "step": 11435500 + }, + { + "epoch": 33.1, + "learning_rate": 3.3454712682938125e-05, + "loss": 2.0911, + "step": 11436000 + }, + { + "epoch": 33.1, + "learning_rate": 3.345398903529085e-05, + "loss": 2.0878, + "step": 11436500 + }, + { + "epoch": 33.11, + "learning_rate": 3.3453265387643576e-05, + "loss": 2.0665, + "step": 11437000 + }, + { + "epoch": 33.11, + "learning_rate": 3.34525417399963e-05, + "loss": 2.0942, + "step": 11437500 + }, + { + "epoch": 33.11, + "learning_rate": 3.345181809234902e-05, + "loss": 2.0719, + "step": 11438000 + }, + { + "epoch": 33.11, + "learning_rate": 3.345109444470174e-05, + "loss": 2.088, + "step": 11438500 + }, + { + "epoch": 33.11, + "learning_rate": 3.3450370797054465e-05, + "loss": 2.0724, + "step": 11439000 + }, + { + "epoch": 33.11, + "learning_rate": 3.344964714940719e-05, + "loss": 2.0826, + "step": 11439500 + }, + { + "epoch": 33.11, + "learning_rate": 3.344892350175991e-05, + "loss": 2.0807, + "step": 11440000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344819985411264e-05, + "loss": 2.0644, + "step": 11440500 + }, + { + "epoch": 33.12, + "learning_rate": 3.344747620646536e-05, + "loss": 2.0844, + "step": 11441000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344675255881808e-05, + "loss": 2.0972, + "step": 11441500 + }, + { + "epoch": 33.12, + "learning_rate": 3.3446028911170805e-05, + "loss": 2.0783, + "step": 11442000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344530526352353e-05, + "loss": 2.0699, + "step": 11442500 + }, + { + "epoch": 33.12, + "learning_rate": 3.344458161587625e-05, + "loss": 2.0467, + "step": 11443000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344385941552427e-05, + "loss": 2.055, + "step": 11443500 + }, + { + "epoch": 33.13, + "learning_rate": 3.344313721517229e-05, + "loss": 2.0756, + "step": 11444000 + }, + { + "epoch": 33.13, + "learning_rate": 3.344241356752501e-05, + "loss": 2.1077, + "step": 11444500 + }, + { + "epoch": 33.13, + "learning_rate": 3.344168991987773e-05, + "loss": 2.0813, + "step": 11445000 + }, + { + "epoch": 33.13, + "learning_rate": 3.3440966272230454e-05, + "loss": 2.0583, + "step": 11445500 + }, + { + "epoch": 33.13, + "learning_rate": 3.3440242624583176e-05, + "loss": 2.0638, + "step": 11446000 + }, + { + "epoch": 33.13, + "learning_rate": 3.34395189769359e-05, + "loss": 2.0927, + "step": 11446500 + }, + { + "epoch": 33.13, + "learning_rate": 3.343879532928863e-05, + "loss": 2.06, + "step": 11447000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343807168164135e-05, + "loss": 2.07, + "step": 11447500 + }, + { + "epoch": 33.14, + "learning_rate": 3.343734948128937e-05, + "loss": 2.0928, + "step": 11448000 + }, + { + "epoch": 33.14, + "learning_rate": 3.3436625833642094e-05, + "loss": 2.0777, + "step": 11448500 + }, + { + "epoch": 33.14, + "learning_rate": 3.3435902185994817e-05, + "loss": 2.0747, + "step": 11449000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343517998564283e-05, + "loss": 2.096, + "step": 11449500 + }, + { + "epoch": 33.14, + "learning_rate": 3.3434456337995554e-05, + "loss": 2.0788, + "step": 11450000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343373269034828e-05, + "loss": 2.0896, + "step": 11450500 + }, + { + "epoch": 33.15, + "learning_rate": 3.3433009042701006e-05, + "loss": 2.0989, + "step": 11451000 + }, + { + "epoch": 33.15, + "learning_rate": 3.343228684234902e-05, + "loss": 2.0513, + "step": 11451500 + }, + { + "epoch": 33.15, + "learning_rate": 3.343156464199704e-05, + "loss": 2.0647, + "step": 11452000 + }, + { + "epoch": 33.15, + "learning_rate": 3.343084099434976e-05, + "loss": 2.0896, + "step": 11452500 + }, + { + "epoch": 33.15, + "learning_rate": 3.343011734670248e-05, + "loss": 2.0622, + "step": 11453000 + }, + { + "epoch": 33.15, + "learning_rate": 3.3429393699055203e-05, + "loss": 2.0936, + "step": 11453500 + }, + { + "epoch": 33.15, + "learning_rate": 3.3428670051407926e-05, + "loss": 2.123, + "step": 11454000 + }, + { + "epoch": 33.16, + "learning_rate": 3.3427946403760655e-05, + "loss": 2.093, + "step": 11454500 + }, + { + "epoch": 33.16, + "learning_rate": 3.342722275611338e-05, + "loss": 2.0935, + "step": 11455000 + }, + { + "epoch": 33.16, + "learning_rate": 3.3426499108466106e-05, + "loss": 2.0646, + "step": 11455500 + }, + { + "epoch": 33.16, + "learning_rate": 3.342577690811412e-05, + "loss": 2.0842, + "step": 11456000 + }, + { + "epoch": 33.16, + "learning_rate": 3.3425053260466844e-05, + "loss": 2.0923, + "step": 11456500 + }, + { + "epoch": 33.16, + "learning_rate": 3.3424329612819566e-05, + "loss": 2.0735, + "step": 11457000 + }, + { + "epoch": 33.16, + "learning_rate": 3.342360596517229e-05, + "loss": 2.0636, + "step": 11457500 + }, + { + "epoch": 33.17, + "learning_rate": 3.342288231752501e-05, + "loss": 2.0603, + "step": 11458000 + }, + { + "epoch": 33.17, + "learning_rate": 3.342215866987773e-05, + "loss": 2.0746, + "step": 11458500 + }, + { + "epoch": 33.17, + "learning_rate": 3.3421435022230455e-05, + "loss": 2.0655, + "step": 11459000 + }, + { + "epoch": 33.17, + "learning_rate": 3.342071137458318e-05, + "loss": 2.0839, + "step": 11459500 + }, + { + "epoch": 33.17, + "learning_rate": 3.34199891742312e-05, + "loss": 2.0877, + "step": 11460000 + }, + { + "epoch": 33.17, + "learning_rate": 3.341926552658392e-05, + "loss": 2.0788, + "step": 11460500 + }, + { + "epoch": 33.17, + "learning_rate": 3.3418541878936644e-05, + "loss": 2.086, + "step": 11461000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3417818231289366e-05, + "loss": 2.1039, + "step": 11461500 + }, + { + "epoch": 33.18, + "learning_rate": 3.341709603093738e-05, + "loss": 2.0772, + "step": 11462000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3416372383290104e-05, + "loss": 2.081, + "step": 11462500 + }, + { + "epoch": 33.18, + "learning_rate": 3.341564873564283e-05, + "loss": 2.0633, + "step": 11463000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3414925087995555e-05, + "loss": 2.0848, + "step": 11463500 + }, + { + "epoch": 33.18, + "learning_rate": 3.341420144034828e-05, + "loss": 2.0765, + "step": 11464000 + }, + { + "epoch": 33.19, + "learning_rate": 3.3413477792701006e-05, + "loss": 2.079, + "step": 11464500 + }, + { + "epoch": 33.19, + "learning_rate": 3.341275414505373e-05, + "loss": 2.0342, + "step": 11465000 + }, + { + "epoch": 33.19, + "learning_rate": 3.341203049740645e-05, + "loss": 2.0952, + "step": 11465500 + }, + { + "epoch": 33.19, + "learning_rate": 3.341130684975917e-05, + "loss": 2.0717, + "step": 11466000 + }, + { + "epoch": 33.19, + "learning_rate": 3.3410583202111895e-05, + "loss": 2.0851, + "step": 11466500 + }, + { + "epoch": 33.19, + "learning_rate": 3.340985955446462e-05, + "loss": 2.0834, + "step": 11467000 + }, + { + "epoch": 33.19, + "learning_rate": 3.340913590681734e-05, + "loss": 2.0978, + "step": 11467500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340841225917006e-05, + "loss": 2.0766, + "step": 11468000 + }, + { + "epoch": 33.2, + "learning_rate": 3.3407688611522784e-05, + "loss": 2.077, + "step": 11468500 + }, + { + "epoch": 33.2, + "learning_rate": 3.3406964963875507e-05, + "loss": 2.0923, + "step": 11469000 + }, + { + "epoch": 33.2, + "learning_rate": 3.340624131622823e-05, + "loss": 2.0699, + "step": 11469500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340551911587626e-05, + "loss": 2.0861, + "step": 11470000 + }, + { + "epoch": 33.2, + "learning_rate": 3.340479546822898e-05, + "loss": 2.0851, + "step": 11470500 + }, + { + "epoch": 33.2, + "learning_rate": 3.34040718205817e-05, + "loss": 2.0693, + "step": 11471000 + }, + { + "epoch": 33.21, + "learning_rate": 3.340334962022972e-05, + "loss": 2.0764, + "step": 11471500 + }, + { + "epoch": 33.21, + "learning_rate": 3.340262597258244e-05, + "loss": 2.0567, + "step": 11472000 + }, + { + "epoch": 33.21, + "learning_rate": 3.340190232493516e-05, + "loss": 2.0875, + "step": 11472500 + }, + { + "epoch": 33.21, + "learning_rate": 3.3401178677287885e-05, + "loss": 2.0741, + "step": 11473000 + }, + { + "epoch": 33.21, + "learning_rate": 3.340045502964061e-05, + "loss": 2.0924, + "step": 11473500 + }, + { + "epoch": 33.21, + "learning_rate": 3.339973282928863e-05, + "loss": 2.084, + "step": 11474000 + }, + { + "epoch": 33.21, + "learning_rate": 3.339900918164135e-05, + "loss": 2.0798, + "step": 11474500 + }, + { + "epoch": 33.22, + "learning_rate": 3.3398285533994074e-05, + "loss": 2.0763, + "step": 11475000 + }, + { + "epoch": 33.22, + "learning_rate": 3.3397561886346796e-05, + "loss": 2.1009, + "step": 11475500 + }, + { + "epoch": 33.22, + "learning_rate": 3.339683823869952e-05, + "loss": 2.0899, + "step": 11476000 + }, + { + "epoch": 33.22, + "learning_rate": 3.3396116038347534e-05, + "loss": 2.0763, + "step": 11476500 + }, + { + "epoch": 33.22, + "learning_rate": 3.3395392390700256e-05, + "loss": 2.0953, + "step": 11477000 + }, + { + "epoch": 33.22, + "learning_rate": 3.339466874305298e-05, + "loss": 2.0838, + "step": 11477500 + }, + { + "epoch": 33.22, + "learning_rate": 3.339394509540571e-05, + "loss": 2.0802, + "step": 11478000 + }, + { + "epoch": 33.23, + "learning_rate": 3.339322144775843e-05, + "loss": 2.0884, + "step": 11478500 + }, + { + "epoch": 33.23, + "learning_rate": 3.339249780011116e-05, + "loss": 2.081, + "step": 11479000 + }, + { + "epoch": 33.23, + "learning_rate": 3.3391775599759174e-05, + "loss": 2.077, + "step": 11479500 + }, + { + "epoch": 33.23, + "learning_rate": 3.3391051952111896e-05, + "loss": 2.0723, + "step": 11480000 + }, + { + "epoch": 33.23, + "learning_rate": 3.339032830446462e-05, + "loss": 2.0806, + "step": 11480500 + }, + { + "epoch": 33.23, + "learning_rate": 3.338960465681734e-05, + "loss": 2.0627, + "step": 11481000 + }, + { + "epoch": 33.23, + "learning_rate": 3.338888100917006e-05, + "loss": 2.1063, + "step": 11481500 + }, + { + "epoch": 33.24, + "learning_rate": 3.3388157361522785e-05, + "loss": 2.0832, + "step": 11482000 + }, + { + "epoch": 33.24, + "learning_rate": 3.338743371387551e-05, + "loss": 2.0894, + "step": 11482500 + }, + { + "epoch": 33.24, + "learning_rate": 3.338671006622823e-05, + "loss": 2.0948, + "step": 11483000 + }, + { + "epoch": 33.24, + "learning_rate": 3.338598786587625e-05, + "loss": 2.0817, + "step": 11483500 + }, + { + "epoch": 33.24, + "learning_rate": 3.3385264218228974e-05, + "loss": 2.1093, + "step": 11484000 + }, + { + "epoch": 33.24, + "learning_rate": 3.3384540570581696e-05, + "loss": 2.1173, + "step": 11484500 + }, + { + "epoch": 33.24, + "learning_rate": 3.338381837022971e-05, + "loss": 2.0922, + "step": 11485000 + }, + { + "epoch": 33.25, + "learning_rate": 3.338309472258244e-05, + "loss": 2.0476, + "step": 11485500 + }, + { + "epoch": 33.25, + "learning_rate": 3.338237107493516e-05, + "loss": 2.0984, + "step": 11486000 + }, + { + "epoch": 33.25, + "learning_rate": 3.3381647427287885e-05, + "loss": 2.1033, + "step": 11486500 + }, + { + "epoch": 33.25, + "learning_rate": 3.338092522693591e-05, + "loss": 2.0707, + "step": 11487000 + }, + { + "epoch": 33.25, + "learning_rate": 3.338020157928863e-05, + "loss": 2.0766, + "step": 11487500 + }, + { + "epoch": 33.25, + "learning_rate": 3.337947793164135e-05, + "loss": 2.0936, + "step": 11488000 + }, + { + "epoch": 33.25, + "learning_rate": 3.3378754283994074e-05, + "loss": 2.0636, + "step": 11488500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337803208364209e-05, + "loss": 2.0943, + "step": 11489000 + }, + { + "epoch": 33.26, + "learning_rate": 3.337730843599481e-05, + "loss": 2.0804, + "step": 11489500 + }, + { + "epoch": 33.26, + "learning_rate": 3.3376586235642835e-05, + "loss": 2.0611, + "step": 11490000 + }, + { + "epoch": 33.26, + "learning_rate": 3.337586258799556e-05, + "loss": 2.0696, + "step": 11490500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337514038764357e-05, + "loss": 2.0771, + "step": 11491000 + }, + { + "epoch": 33.26, + "learning_rate": 3.3374416739996295e-05, + "loss": 2.0766, + "step": 11491500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337369309234902e-05, + "loss": 2.0777, + "step": 11492000 + }, + { + "epoch": 33.27, + "learning_rate": 3.337296944470174e-05, + "loss": 2.095, + "step": 11492500 + }, + { + "epoch": 33.27, + "learning_rate": 3.337224579705447e-05, + "loss": 2.0916, + "step": 11493000 + }, + { + "epoch": 33.27, + "learning_rate": 3.337152214940719e-05, + "loss": 2.0849, + "step": 11493500 + }, + { + "epoch": 33.27, + "learning_rate": 3.337079850175991e-05, + "loss": 2.0765, + "step": 11494000 + }, + { + "epoch": 33.27, + "learning_rate": 3.3370074854112635e-05, + "loss": 2.076, + "step": 11494500 + }, + { + "epoch": 33.27, + "learning_rate": 3.336935120646536e-05, + "loss": 2.0734, + "step": 11495000 + }, + { + "epoch": 33.27, + "learning_rate": 3.3368627558818086e-05, + "loss": 2.0969, + "step": 11495500 + }, + { + "epoch": 33.28, + "learning_rate": 3.336790391117081e-05, + "loss": 2.0708, + "step": 11496000 + }, + { + "epoch": 33.28, + "learning_rate": 3.336718026352353e-05, + "loss": 2.0842, + "step": 11496500 + }, + { + "epoch": 33.28, + "learning_rate": 3.336645661587625e-05, + "loss": 2.0752, + "step": 11497000 + }, + { + "epoch": 33.28, + "learning_rate": 3.336573441552427e-05, + "loss": 2.0887, + "step": 11497500 + }, + { + "epoch": 33.28, + "learning_rate": 3.336501076787699e-05, + "loss": 2.0941, + "step": 11498000 + }, + { + "epoch": 33.28, + "learning_rate": 3.336428712022971e-05, + "loss": 2.0907, + "step": 11498500 + }, + { + "epoch": 33.28, + "learning_rate": 3.3363563472582435e-05, + "loss": 2.0685, + "step": 11499000 + }, + { + "epoch": 33.29, + "learning_rate": 3.336283982493516e-05, + "loss": 2.0836, + "step": 11499500 + }, + { + "epoch": 33.29, + "learning_rate": 3.3362116177287886e-05, + "loss": 2.1096, + "step": 11500000 + }, + { + "epoch": 33.29, + "learning_rate": 3.336139252964061e-05, + "loss": 2.0907, + "step": 11500500 + }, + { + "epoch": 33.29, + "learning_rate": 3.336066888199334e-05, + "loss": 2.1103, + "step": 11501000 + }, + { + "epoch": 33.29, + "learning_rate": 3.335994523434606e-05, + "loss": 2.0673, + "step": 11501500 + }, + { + "epoch": 33.29, + "learning_rate": 3.335922158669878e-05, + "loss": 2.0736, + "step": 11502000 + }, + { + "epoch": 33.3, + "learning_rate": 3.3358497939051504e-05, + "loss": 2.0711, + "step": 11502500 + }, + { + "epoch": 33.3, + "learning_rate": 3.335777573869952e-05, + "loss": 2.1109, + "step": 11503000 + }, + { + "epoch": 33.3, + "learning_rate": 3.335705209105224e-05, + "loss": 2.0901, + "step": 11503500 + }, + { + "epoch": 33.3, + "learning_rate": 3.3356328443404964e-05, + "loss": 2.0703, + "step": 11504000 + }, + { + "epoch": 33.3, + "learning_rate": 3.3355604795757686e-05, + "loss": 2.0939, + "step": 11504500 + }, + { + "epoch": 33.3, + "learning_rate": 3.335488114811041e-05, + "loss": 2.0705, + "step": 11505000 + }, + { + "epoch": 33.3, + "learning_rate": 3.335415750046314e-05, + "loss": 2.1021, + "step": 11505500 + }, + { + "epoch": 33.31, + "learning_rate": 3.335343530011115e-05, + "loss": 2.0933, + "step": 11506000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335271309975917e-05, + "loss": 2.0714, + "step": 11506500 + }, + { + "epoch": 33.31, + "learning_rate": 3.335198945211189e-05, + "loss": 2.0749, + "step": 11507000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335126580446461e-05, + "loss": 2.095, + "step": 11507500 + }, + { + "epoch": 33.31, + "learning_rate": 3.335054215681734e-05, + "loss": 2.0777, + "step": 11508000 + }, + { + "epoch": 33.31, + "learning_rate": 3.3349818509170064e-05, + "loss": 2.0662, + "step": 11508500 + }, + { + "epoch": 33.31, + "learning_rate": 3.334909486152279e-05, + "loss": 2.0801, + "step": 11509000 + }, + { + "epoch": 33.32, + "learning_rate": 3.334837121387551e-05, + "loss": 2.0928, + "step": 11509500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334764756622824e-05, + "loss": 2.0737, + "step": 11510000 + }, + { + "epoch": 33.32, + "learning_rate": 3.334692391858096e-05, + "loss": 2.0943, + "step": 11510500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334620027093368e-05, + "loss": 2.0996, + "step": 11511000 + }, + { + "epoch": 33.32, + "learning_rate": 3.3345476623286405e-05, + "loss": 2.0876, + "step": 11511500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334475297563913e-05, + "loss": 2.0753, + "step": 11512000 + }, + { + "epoch": 33.32, + "learning_rate": 3.334402932799185e-05, + "loss": 2.0833, + "step": 11512500 + }, + { + "epoch": 33.33, + "learning_rate": 3.334330568034457e-05, + "loss": 2.0791, + "step": 11513000 + }, + { + "epoch": 33.33, + "learning_rate": 3.3342582032697294e-05, + "loss": 2.0813, + "step": 11513500 + }, + { + "epoch": 33.33, + "learning_rate": 3.334185983234531e-05, + "loss": 2.0702, + "step": 11514000 + }, + { + "epoch": 33.33, + "learning_rate": 3.334113618469804e-05, + "loss": 2.0834, + "step": 11514500 + }, + { + "epoch": 33.33, + "learning_rate": 3.334041253705076e-05, + "loss": 2.063, + "step": 11515000 + }, + { + "epoch": 33.33, + "learning_rate": 3.333968888940349e-05, + "loss": 2.0778, + "step": 11515500 + }, + { + "epoch": 33.33, + "learning_rate": 3.333896524175621e-05, + "loss": 2.0858, + "step": 11516000 + }, + { + "epoch": 33.34, + "learning_rate": 3.3338241594108934e-05, + "loss": 2.0836, + "step": 11516500 + }, + { + "epoch": 33.34, + "learning_rate": 3.3337517946461656e-05, + "loss": 2.094, + "step": 11517000 + }, + { + "epoch": 33.34, + "learning_rate": 3.333679574610967e-05, + "loss": 2.1021, + "step": 11517500 + }, + { + "epoch": 33.34, + "learning_rate": 3.333607354575769e-05, + "loss": 2.0933, + "step": 11518000 + }, + { + "epoch": 33.34, + "learning_rate": 3.3335349898110416e-05, + "loss": 2.1025, + "step": 11518500 + }, + { + "epoch": 33.34, + "learning_rate": 3.333462769775843e-05, + "loss": 2.098, + "step": 11519000 + }, + { + "epoch": 33.34, + "learning_rate": 3.3333904050111154e-05, + "loss": 2.0757, + "step": 11519500 + }, + { + "epoch": 33.35, + "learning_rate": 3.3333180402463876e-05, + "loss": 2.0699, + "step": 11520000 + }, + { + "epoch": 33.35, + "learning_rate": 3.33324567548166e-05, + "loss": 2.1122, + "step": 11520500 + }, + { + "epoch": 33.35, + "learning_rate": 3.333173310716932e-05, + "loss": 2.0884, + "step": 11521000 + }, + { + "epoch": 33.35, + "learning_rate": 3.333100945952204e-05, + "loss": 2.0961, + "step": 11521500 + }, + { + "epoch": 33.35, + "learning_rate": 3.3330285811874765e-05, + "loss": 2.075, + "step": 11522000 + }, + { + "epoch": 33.35, + "learning_rate": 3.3329562164227494e-05, + "loss": 2.0751, + "step": 11522500 + }, + { + "epoch": 33.35, + "learning_rate": 3.3328838516580216e-05, + "loss": 2.0973, + "step": 11523000 + }, + { + "epoch": 33.36, + "learning_rate": 3.332811631622824e-05, + "loss": 2.0818, + "step": 11523500 + }, + { + "epoch": 33.36, + "learning_rate": 3.3327394115876254e-05, + "loss": 2.074, + "step": 11524000 + }, + { + "epoch": 33.36, + "learning_rate": 3.332667191552427e-05, + "loss": 2.0958, + "step": 11524500 + }, + { + "epoch": 33.36, + "learning_rate": 3.332594826787699e-05, + "loss": 2.0992, + "step": 11525000 + }, + { + "epoch": 33.36, + "learning_rate": 3.3325224620229714e-05, + "loss": 2.0839, + "step": 11525500 + }, + { + "epoch": 33.36, + "learning_rate": 3.3324500972582437e-05, + "loss": 2.0944, + "step": 11526000 + }, + { + "epoch": 33.36, + "learning_rate": 3.3323777324935166e-05, + "loss": 2.0777, + "step": 11526500 + }, + { + "epoch": 33.37, + "learning_rate": 3.332305367728789e-05, + "loss": 2.1044, + "step": 11527000 + }, + { + "epoch": 33.37, + "learning_rate": 3.332233002964061e-05, + "loss": 2.0879, + "step": 11527500 + }, + { + "epoch": 33.37, + "learning_rate": 3.332160638199333e-05, + "loss": 2.0903, + "step": 11528000 + }, + { + "epoch": 33.37, + "learning_rate": 3.3320882734346054e-05, + "loss": 2.095, + "step": 11528500 + }, + { + "epoch": 33.37, + "learning_rate": 3.332016053399407e-05, + "loss": 2.0844, + "step": 11529000 + }, + { + "epoch": 33.37, + "learning_rate": 3.331943688634679e-05, + "loss": 2.0696, + "step": 11529500 + }, + { + "epoch": 33.37, + "learning_rate": 3.331871323869952e-05, + "loss": 2.0752, + "step": 11530000 + }, + { + "epoch": 33.38, + "learning_rate": 3.3317989591052244e-05, + "loss": 2.0717, + "step": 11530500 + }, + { + "epoch": 33.38, + "learning_rate": 3.3317265943404966e-05, + "loss": 2.0931, + "step": 11531000 + }, + { + "epoch": 33.38, + "learning_rate": 3.331654229575769e-05, + "loss": 2.079, + "step": 11531500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331581864811042e-05, + "loss": 2.083, + "step": 11532000 + }, + { + "epoch": 33.38, + "learning_rate": 3.331509500046314e-05, + "loss": 2.0682, + "step": 11532500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331437135281586e-05, + "loss": 2.0844, + "step": 11533000 + }, + { + "epoch": 33.38, + "learning_rate": 3.3313647705168584e-05, + "loss": 2.0643, + "step": 11533500 + }, + { + "epoch": 33.39, + "learning_rate": 3.33129255048166e-05, + "loss": 2.0775, + "step": 11534000 + }, + { + "epoch": 33.39, + "learning_rate": 3.331220185716932e-05, + "loss": 2.0757, + "step": 11534500 + }, + { + "epoch": 33.39, + "learning_rate": 3.3311478209522044e-05, + "loss": 2.1054, + "step": 11535000 + }, + { + "epoch": 33.39, + "learning_rate": 3.3310754561874766e-05, + "loss": 2.0905, + "step": 11535500 + }, + { + "epoch": 33.39, + "learning_rate": 3.331003091422749e-05, + "loss": 2.0975, + "step": 11536000 + }, + { + "epoch": 33.39, + "learning_rate": 3.330930726658022e-05, + "loss": 2.0661, + "step": 11536500 + }, + { + "epoch": 33.39, + "learning_rate": 3.330858361893294e-05, + "loss": 2.0998, + "step": 11537000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330785997128567e-05, + "loss": 2.0972, + "step": 11537500 + }, + { + "epoch": 33.4, + "learning_rate": 3.330713921822898e-05, + "loss": 2.1132, + "step": 11538000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330641846517229e-05, + "loss": 2.098, + "step": 11538500 + }, + { + "epoch": 33.4, + "learning_rate": 3.3305694817525015e-05, + "loss": 2.0933, + "step": 11539000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330497116987774e-05, + "loss": 2.0741, + "step": 11539500 + }, + { + "epoch": 33.4, + "learning_rate": 3.330424752223046e-05, + "loss": 2.0794, + "step": 11540000 + }, + { + "epoch": 33.41, + "learning_rate": 3.330352387458318e-05, + "loss": 2.0816, + "step": 11540500 + }, + { + "epoch": 33.41, + "learning_rate": 3.3302800226935904e-05, + "loss": 2.068, + "step": 11541000 + }, + { + "epoch": 33.41, + "learning_rate": 3.3302076579288626e-05, + "loss": 2.0877, + "step": 11541500 + }, + { + "epoch": 33.41, + "learning_rate": 3.330135293164135e-05, + "loss": 2.0897, + "step": 11542000 + }, + { + "epoch": 33.41, + "learning_rate": 3.330062928399407e-05, + "loss": 2.0861, + "step": 11542500 + }, + { + "epoch": 33.41, + "learning_rate": 3.329990563634679e-05, + "loss": 2.0585, + "step": 11543000 + }, + { + "epoch": 33.41, + "learning_rate": 3.3299181988699515e-05, + "loss": 2.0959, + "step": 11543500 + }, + { + "epoch": 33.42, + "learning_rate": 3.3298458341052244e-05, + "loss": 2.1084, + "step": 11544000 + }, + { + "epoch": 33.42, + "learning_rate": 3.329773614070026e-05, + "loss": 2.0686, + "step": 11544500 + }, + { + "epoch": 33.42, + "learning_rate": 3.329701249305298e-05, + "loss": 2.0929, + "step": 11545000 + }, + { + "epoch": 33.42, + "learning_rate": 3.329628884540571e-05, + "loss": 2.0968, + "step": 11545500 + }, + { + "epoch": 33.42, + "learning_rate": 3.329556519775843e-05, + "loss": 2.0719, + "step": 11546000 + }, + { + "epoch": 33.42, + "learning_rate": 3.3294841550111156e-05, + "loss": 2.068, + "step": 11546500 + }, + { + "epoch": 33.42, + "learning_rate": 3.329411790246388e-05, + "loss": 2.1199, + "step": 11547000 + }, + { + "epoch": 33.43, + "learning_rate": 3.32933942548166e-05, + "loss": 2.0854, + "step": 11547500 + }, + { + "epoch": 33.43, + "learning_rate": 3.329267060716932e-05, + "loss": 2.078, + "step": 11548000 + }, + { + "epoch": 33.43, + "learning_rate": 3.3291946959522044e-05, + "loss": 2.0751, + "step": 11548500 + }, + { + "epoch": 33.43, + "learning_rate": 3.329122331187477e-05, + "loss": 2.0871, + "step": 11549000 + }, + { + "epoch": 33.43, + "learning_rate": 3.3290499664227496e-05, + "loss": 2.0945, + "step": 11549500 + }, + { + "epoch": 33.43, + "learning_rate": 3.328977601658022e-05, + "loss": 2.1027, + "step": 11550000 + }, + { + "epoch": 33.43, + "learning_rate": 3.328905236893294e-05, + "loss": 2.0926, + "step": 11550500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328832872128566e-05, + "loss": 2.1086, + "step": 11551000 + }, + { + "epoch": 33.44, + "learning_rate": 3.328760652093368e-05, + "loss": 2.0981, + "step": 11551500 + }, + { + "epoch": 33.44, + "learning_rate": 3.32868828732864e-05, + "loss": 2.0928, + "step": 11552000 + }, + { + "epoch": 33.44, + "learning_rate": 3.328616067293442e-05, + "loss": 2.0993, + "step": 11552500 + }, + { + "epoch": 33.44, + "learning_rate": 3.3285437025287145e-05, + "loss": 2.0687, + "step": 11553000 + }, + { + "epoch": 33.44, + "learning_rate": 3.328471337763987e-05, + "loss": 2.0729, + "step": 11553500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328399117728789e-05, + "loss": 2.0749, + "step": 11554000 + }, + { + "epoch": 33.45, + "learning_rate": 3.328326752964061e-05, + "loss": 2.1239, + "step": 11554500 + }, + { + "epoch": 33.45, + "learning_rate": 3.3282543881993334e-05, + "loss": 2.0652, + "step": 11555000 + }, + { + "epoch": 33.45, + "learning_rate": 3.3281820234346056e-05, + "loss": 2.0885, + "step": 11555500 + }, + { + "epoch": 33.45, + "learning_rate": 3.328109658669878e-05, + "loss": 2.0694, + "step": 11556000 + }, + { + "epoch": 33.45, + "learning_rate": 3.32803729390515e-05, + "loss": 2.1112, + "step": 11556500 + }, + { + "epoch": 33.45, + "learning_rate": 3.327964929140422e-05, + "loss": 2.0858, + "step": 11557000 + }, + { + "epoch": 33.45, + "learning_rate": 3.3278925643756945e-05, + "loss": 2.0997, + "step": 11557500 + }, + { + "epoch": 33.46, + "learning_rate": 3.327820199610967e-05, + "loss": 2.0658, + "step": 11558000 + }, + { + "epoch": 33.46, + "learning_rate": 3.3277478348462396e-05, + "loss": 2.0909, + "step": 11558500 + }, + { + "epoch": 33.46, + "learning_rate": 3.327675614811041e-05, + "loss": 2.0986, + "step": 11559000 + }, + { + "epoch": 33.46, + "learning_rate": 3.3276032500463134e-05, + "loss": 2.0684, + "step": 11559500 + }, + { + "epoch": 33.46, + "learning_rate": 3.3275310300111156e-05, + "loss": 2.089, + "step": 11560000 + }, + { + "epoch": 33.46, + "learning_rate": 3.327458665246388e-05, + "loss": 2.0624, + "step": 11560500 + }, + { + "epoch": 33.46, + "learning_rate": 3.32738630048166e-05, + "loss": 2.0653, + "step": 11561000 + }, + { + "epoch": 33.47, + "learning_rate": 3.327313935716932e-05, + "loss": 2.0674, + "step": 11561500 + }, + { + "epoch": 33.47, + "learning_rate": 3.3272415709522045e-05, + "loss": 2.0904, + "step": 11562000 + }, + { + "epoch": 33.47, + "learning_rate": 3.327169206187477e-05, + "loss": 2.0747, + "step": 11562500 + }, + { + "epoch": 33.47, + "learning_rate": 3.3270968414227497e-05, + "loss": 2.1005, + "step": 11563000 + }, + { + "epoch": 33.47, + "learning_rate": 3.327024476658022e-05, + "loss": 2.1081, + "step": 11563500 + }, + { + "epoch": 33.47, + "learning_rate": 3.326952111893294e-05, + "loss": 2.0867, + "step": 11564000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3268798918580957e-05, + "loss": 2.1002, + "step": 11564500 + }, + { + "epoch": 33.48, + "learning_rate": 3.326807527093368e-05, + "loss": 2.0904, + "step": 11565000 + }, + { + "epoch": 33.48, + "learning_rate": 3.32673516232864e-05, + "loss": 2.0602, + "step": 11565500 + }, + { + "epoch": 33.48, + "learning_rate": 3.326662797563912e-05, + "loss": 2.0829, + "step": 11566000 + }, + { + "epoch": 33.48, + "learning_rate": 3.3265904327991845e-05, + "loss": 2.1092, + "step": 11566500 + }, + { + "epoch": 33.48, + "learning_rate": 3.326518068034457e-05, + "loss": 2.0531, + "step": 11567000 + }, + { + "epoch": 33.48, + "learning_rate": 3.32644584799926e-05, + "loss": 2.0869, + "step": 11567500 + }, + { + "epoch": 33.48, + "learning_rate": 3.326373483234532e-05, + "loss": 2.0923, + "step": 11568000 + }, + { + "epoch": 33.49, + "learning_rate": 3.3263012631993335e-05, + "loss": 2.0881, + "step": 11568500 + }, + { + "epoch": 33.49, + "learning_rate": 3.326228898434606e-05, + "loss": 2.0997, + "step": 11569000 + }, + { + "epoch": 33.49, + "learning_rate": 3.326156533669878e-05, + "loss": 2.0881, + "step": 11569500 + }, + { + "epoch": 33.49, + "learning_rate": 3.32608416890515e-05, + "loss": 2.1037, + "step": 11570000 + }, + { + "epoch": 33.49, + "learning_rate": 3.3260118041404224e-05, + "loss": 2.0884, + "step": 11570500 + }, + { + "epoch": 33.49, + "learning_rate": 3.3259394393756946e-05, + "loss": 2.0825, + "step": 11571000 + }, + { + "epoch": 33.49, + "learning_rate": 3.325867219340497e-05, + "loss": 2.0976, + "step": 11571500 + }, + { + "epoch": 33.5, + "learning_rate": 3.325794854575769e-05, + "loss": 2.0872, + "step": 11572000 + }, + { + "epoch": 33.5, + "learning_rate": 3.325722489811041e-05, + "loss": 2.0889, + "step": 11572500 + }, + { + "epoch": 33.5, + "learning_rate": 3.3256501250463135e-05, + "loss": 2.092, + "step": 11573000 + }, + { + "epoch": 33.5, + "learning_rate": 3.325577760281586e-05, + "loss": 2.0732, + "step": 11573500 + }, + { + "epoch": 33.5, + "learning_rate": 3.325505540246387e-05, + "loss": 2.1021, + "step": 11574000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3254331754816595e-05, + "loss": 2.0922, + "step": 11574500 + }, + { + "epoch": 33.5, + "learning_rate": 3.3253608107169324e-05, + "loss": 2.1004, + "step": 11575000 + }, + { + "epoch": 33.51, + "learning_rate": 3.3252884459522046e-05, + "loss": 2.1022, + "step": 11575500 + }, + { + "epoch": 33.51, + "learning_rate": 3.325216225917007e-05, + "loss": 2.0685, + "step": 11576000 + }, + { + "epoch": 33.51, + "learning_rate": 3.325143861152279e-05, + "loss": 2.0806, + "step": 11576500 + }, + { + "epoch": 33.51, + "learning_rate": 3.325071496387551e-05, + "loss": 2.0952, + "step": 11577000 + }, + { + "epoch": 33.51, + "learning_rate": 3.3249991316228235e-05, + "loss": 2.0781, + "step": 11577500 + }, + { + "epoch": 33.51, + "learning_rate": 3.324926766858096e-05, + "loss": 2.0673, + "step": 11578000 + }, + { + "epoch": 33.52, + "learning_rate": 3.324854402093368e-05, + "loss": 2.1041, + "step": 11578500 + }, + { + "epoch": 33.52, + "learning_rate": 3.32478203732864e-05, + "loss": 2.0995, + "step": 11579000 + }, + { + "epoch": 33.52, + "learning_rate": 3.3247096725639124e-05, + "loss": 2.098, + "step": 11579500 + }, + { + "epoch": 33.52, + "learning_rate": 3.3246373077991846e-05, + "loss": 2.0748, + "step": 11580000 + }, + { + "epoch": 33.52, + "learning_rate": 3.3245649430344575e-05, + "loss": 2.0739, + "step": 11580500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324492722999259e-05, + "loss": 2.0889, + "step": 11581000 + }, + { + "epoch": 33.52, + "learning_rate": 3.324420358234531e-05, + "loss": 2.1168, + "step": 11581500 + }, + { + "epoch": 33.53, + "learning_rate": 3.3243479934698035e-05, + "loss": 2.0992, + "step": 11582000 + }, + { + "epoch": 33.53, + "learning_rate": 3.3242756287050764e-05, + "loss": 2.1035, + "step": 11582500 + }, + { + "epoch": 33.53, + "learning_rate": 3.3242032639403487e-05, + "loss": 2.0847, + "step": 11583000 + }, + { + "epoch": 33.53, + "learning_rate": 3.32413104390515e-05, + "loss": 2.0796, + "step": 11583500 + }, + { + "epoch": 33.53, + "learning_rate": 3.3240588238699524e-05, + "loss": 2.0792, + "step": 11584000 + }, + { + "epoch": 33.53, + "learning_rate": 3.323986459105225e-05, + "loss": 2.1101, + "step": 11584500 + }, + { + "epoch": 33.53, + "learning_rate": 3.323914094340497e-05, + "loss": 2.094, + "step": 11585000 + }, + { + "epoch": 33.54, + "learning_rate": 3.3238418743052984e-05, + "loss": 2.0644, + "step": 11585500 + }, + { + "epoch": 33.54, + "learning_rate": 3.323769509540571e-05, + "loss": 2.0967, + "step": 11586000 + }, + { + "epoch": 33.54, + "learning_rate": 3.323697144775843e-05, + "loss": 2.1022, + "step": 11586500 + }, + { + "epoch": 33.54, + "learning_rate": 3.323624780011115e-05, + "loss": 2.0837, + "step": 11587000 + }, + { + "epoch": 33.54, + "learning_rate": 3.3235524152463873e-05, + "loss": 2.1079, + "step": 11587500 + }, + { + "epoch": 33.54, + "learning_rate": 3.3234800504816596e-05, + "loss": 2.0808, + "step": 11588000 + }, + { + "epoch": 33.54, + "learning_rate": 3.3234076857169325e-05, + "loss": 2.0879, + "step": 11588500 + }, + { + "epoch": 33.55, + "learning_rate": 3.323335320952205e-05, + "loss": 2.084, + "step": 11589000 + }, + { + "epoch": 33.55, + "learning_rate": 3.323262956187477e-05, + "loss": 2.0853, + "step": 11589500 + }, + { + "epoch": 33.55, + "learning_rate": 3.323190736152279e-05, + "loss": 2.0783, + "step": 11590000 + }, + { + "epoch": 33.55, + "learning_rate": 3.3231183713875514e-05, + "loss": 2.0802, + "step": 11590500 + }, + { + "epoch": 33.55, + "learning_rate": 3.3230460066228236e-05, + "loss": 2.09, + "step": 11591000 + }, + { + "epoch": 33.55, + "learning_rate": 3.322973641858096e-05, + "loss": 2.0863, + "step": 11591500 + }, + { + "epoch": 33.55, + "learning_rate": 3.322901277093368e-05, + "loss": 2.0967, + "step": 11592000 + }, + { + "epoch": 33.56, + "learning_rate": 3.32282905705817e-05, + "loss": 2.0886, + "step": 11592500 + }, + { + "epoch": 33.56, + "learning_rate": 3.3227566922934425e-05, + "loss": 2.085, + "step": 11593000 + }, + { + "epoch": 33.56, + "learning_rate": 3.322684327528715e-05, + "loss": 2.063, + "step": 11593500 + }, + { + "epoch": 33.56, + "learning_rate": 3.322611962763987e-05, + "loss": 2.0998, + "step": 11594000 + }, + { + "epoch": 33.56, + "learning_rate": 3.322539597999259e-05, + "loss": 2.0888, + "step": 11594500 + }, + { + "epoch": 33.56, + "learning_rate": 3.322467377964061e-05, + "loss": 2.0835, + "step": 11595000 + }, + { + "epoch": 33.56, + "learning_rate": 3.322395013199333e-05, + "loss": 2.0818, + "step": 11595500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322322648434605e-05, + "loss": 2.1062, + "step": 11596000 + }, + { + "epoch": 33.57, + "learning_rate": 3.3222504283994074e-05, + "loss": 2.0759, + "step": 11596500 + }, + { + "epoch": 33.57, + "learning_rate": 3.3221780636346796e-05, + "loss": 2.0884, + "step": 11597000 + }, + { + "epoch": 33.57, + "learning_rate": 3.3221056988699525e-05, + "loss": 2.1077, + "step": 11597500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322033334105225e-05, + "loss": 2.1144, + "step": 11598000 + }, + { + "epoch": 33.57, + "learning_rate": 3.321960969340497e-05, + "loss": 2.0901, + "step": 11598500 + }, + { + "epoch": 33.57, + "learning_rate": 3.321888604575769e-05, + "loss": 2.0746, + "step": 11599000 + }, + { + "epoch": 33.58, + "learning_rate": 3.3218162398110414e-05, + "loss": 2.0872, + "step": 11599500 + }, + { + "epoch": 33.58, + "learning_rate": 3.3217438750463136e-05, + "loss": 2.1001, + "step": 11600000 + }, + { + "epoch": 33.58, + "learning_rate": 3.321671510281586e-05, + "loss": 2.1071, + "step": 11600500 + }, + { + "epoch": 33.58, + "learning_rate": 3.321599145516858e-05, + "loss": 2.089, + "step": 11601000 + }, + { + "epoch": 33.58, + "learning_rate": 3.32152678075213e-05, + "loss": 2.1002, + "step": 11601500 + }, + { + "epoch": 33.58, + "learning_rate": 3.3214544159874025e-05, + "loss": 2.1014, + "step": 11602000 + }, + { + "epoch": 33.58, + "learning_rate": 3.321382195952205e-05, + "loss": 2.0694, + "step": 11602500 + }, + { + "epoch": 33.59, + "learning_rate": 3.321309831187477e-05, + "loss": 2.0773, + "step": 11603000 + }, + { + "epoch": 33.59, + "learning_rate": 3.321237466422749e-05, + "loss": 2.0994, + "step": 11603500 + }, + { + "epoch": 33.59, + "learning_rate": 3.321165246387551e-05, + "loss": 2.0688, + "step": 11604000 + }, + { + "epoch": 33.59, + "learning_rate": 3.321092881622823e-05, + "loss": 2.0688, + "step": 11604500 + }, + { + "epoch": 33.59, + "learning_rate": 3.321020516858096e-05, + "loss": 2.086, + "step": 11605000 + }, + { + "epoch": 33.59, + "learning_rate": 3.320948152093368e-05, + "loss": 2.1103, + "step": 11605500 + }, + { + "epoch": 33.59, + "learning_rate": 3.3208757873286403e-05, + "loss": 2.093, + "step": 11606000 + }, + { + "epoch": 33.6, + "learning_rate": 3.3208034225639126e-05, + "loss": 2.0946, + "step": 11606500 + }, + { + "epoch": 33.6, + "learning_rate": 3.3207310577991855e-05, + "loss": 2.0923, + "step": 11607000 + }, + { + "epoch": 33.6, + "learning_rate": 3.320658693034458e-05, + "loss": 2.0866, + "step": 11607500 + }, + { + "epoch": 33.6, + "learning_rate": 3.32058632826973e-05, + "loss": 2.1166, + "step": 11608000 + }, + { + "epoch": 33.6, + "learning_rate": 3.3205141082345315e-05, + "loss": 2.1265, + "step": 11608500 + }, + { + "epoch": 33.6, + "learning_rate": 3.320441743469804e-05, + "loss": 2.0866, + "step": 11609000 + }, + { + "epoch": 33.6, + "learning_rate": 3.320369378705076e-05, + "loss": 2.078, + "step": 11609500 + }, + { + "epoch": 33.61, + "learning_rate": 3.320297013940348e-05, + "loss": 2.0856, + "step": 11610000 + }, + { + "epoch": 33.61, + "learning_rate": 3.3202246491756204e-05, + "loss": 2.0681, + "step": 11610500 + }, + { + "epoch": 33.61, + "learning_rate": 3.3201522844108926e-05, + "loss": 2.0941, + "step": 11611000 + }, + { + "epoch": 33.61, + "learning_rate": 3.3200799196461655e-05, + "loss": 2.0878, + "step": 11611500 + }, + { + "epoch": 33.61, + "learning_rate": 3.320007554881438e-05, + "loss": 2.0854, + "step": 11612000 + }, + { + "epoch": 33.61, + "learning_rate": 3.31993533484624e-05, + "loss": 2.0848, + "step": 11612500 + }, + { + "epoch": 33.61, + "learning_rate": 3.319862970081512e-05, + "loss": 2.0728, + "step": 11613000 + }, + { + "epoch": 33.62, + "learning_rate": 3.3197906053167844e-05, + "loss": 2.071, + "step": 11613500 + }, + { + "epoch": 33.62, + "learning_rate": 3.3197182405520566e-05, + "loss": 2.0997, + "step": 11614000 + }, + { + "epoch": 33.62, + "learning_rate": 3.319646020516858e-05, + "loss": 2.0955, + "step": 11614500 + }, + { + "epoch": 33.62, + "learning_rate": 3.3195738004816604e-05, + "loss": 2.0833, + "step": 11615000 + }, + { + "epoch": 33.62, + "learning_rate": 3.3195014357169326e-05, + "loss": 2.0796, + "step": 11615500 + }, + { + "epoch": 33.62, + "learning_rate": 3.319429070952205e-05, + "loss": 2.0919, + "step": 11616000 + }, + { + "epoch": 33.63, + "learning_rate": 3.319356706187477e-05, + "loss": 2.0931, + "step": 11616500 + }, + { + "epoch": 33.63, + "learning_rate": 3.319284341422749e-05, + "loss": 2.0923, + "step": 11617000 + }, + { + "epoch": 33.63, + "learning_rate": 3.3192119766580215e-05, + "loss": 2.0906, + "step": 11617500 + }, + { + "epoch": 33.63, + "learning_rate": 3.319139611893294e-05, + "loss": 2.0889, + "step": 11618000 + }, + { + "epoch": 33.63, + "learning_rate": 3.319067247128566e-05, + "loss": 2.0902, + "step": 11618500 + }, + { + "epoch": 33.63, + "learning_rate": 3.318994882363838e-05, + "loss": 2.095, + "step": 11619000 + }, + { + "epoch": 33.63, + "learning_rate": 3.3189226623286404e-05, + "loss": 2.1011, + "step": 11619500 + }, + { + "epoch": 33.64, + "learning_rate": 3.3188502975639126e-05, + "loss": 2.0994, + "step": 11620000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318778222258244e-05, + "loss": 2.0852, + "step": 11620500 + }, + { + "epoch": 33.64, + "learning_rate": 3.3187058574935164e-05, + "loss": 2.0961, + "step": 11621000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318633637458318e-05, + "loss": 2.0978, + "step": 11621500 + }, + { + "epoch": 33.64, + "learning_rate": 3.31856127269359e-05, + "loss": 2.0926, + "step": 11622000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318488907928863e-05, + "loss": 2.1234, + "step": 11622500 + }, + { + "epoch": 33.64, + "learning_rate": 3.318416543164135e-05, + "loss": 2.0914, + "step": 11623000 + }, + { + "epoch": 33.65, + "learning_rate": 3.3183441783994076e-05, + "loss": 2.0934, + "step": 11623500 + }, + { + "epoch": 33.65, + "learning_rate": 3.31827181363468e-05, + "loss": 2.0761, + "step": 11624000 + }, + { + "epoch": 33.65, + "learning_rate": 3.318199593599481e-05, + "loss": 2.1006, + "step": 11624500 + }, + { + "epoch": 33.65, + "learning_rate": 3.3181272288347536e-05, + "loss": 2.0661, + "step": 11625000 + }, + { + "epoch": 33.65, + "learning_rate": 3.318055008799555e-05, + "loss": 2.0856, + "step": 11625500 + }, + { + "epoch": 33.65, + "learning_rate": 3.317982644034828e-05, + "loss": 2.0716, + "step": 11626000 + }, + { + "epoch": 33.65, + "learning_rate": 3.3179102792701e-05, + "loss": 2.0958, + "step": 11626500 + }, + { + "epoch": 33.66, + "learning_rate": 3.3178379145053725e-05, + "loss": 2.1009, + "step": 11627000 + }, + { + "epoch": 33.66, + "learning_rate": 3.317765549740645e-05, + "loss": 2.093, + "step": 11627500 + }, + { + "epoch": 33.66, + "learning_rate": 3.3176931849759176e-05, + "loss": 2.0871, + "step": 11628000 + }, + { + "epoch": 33.66, + "learning_rate": 3.317620964940719e-05, + "loss": 2.0731, + "step": 11628500 + }, + { + "epoch": 33.66, + "learning_rate": 3.3175486001759914e-05, + "loss": 2.1009, + "step": 11629000 + }, + { + "epoch": 33.66, + "learning_rate": 3.3174762354112636e-05, + "loss": 2.0928, + "step": 11629500 + }, + { + "epoch": 33.66, + "learning_rate": 3.317403870646536e-05, + "loss": 2.099, + "step": 11630000 + }, + { + "epoch": 33.67, + "learning_rate": 3.317331505881808e-05, + "loss": 2.0959, + "step": 11630500 + }, + { + "epoch": 33.67, + "learning_rate": 3.31725914111708e-05, + "loss": 2.079, + "step": 11631000 + }, + { + "epoch": 33.67, + "learning_rate": 3.317186776352353e-05, + "loss": 2.0774, + "step": 11631500 + }, + { + "epoch": 33.67, + "learning_rate": 3.3171144115876254e-05, + "loss": 2.1021, + "step": 11632000 + }, + { + "epoch": 33.67, + "learning_rate": 3.317042191552427e-05, + "loss": 2.0902, + "step": 11632500 + }, + { + "epoch": 33.67, + "learning_rate": 3.316969826787699e-05, + "loss": 2.0684, + "step": 11633000 + }, + { + "epoch": 33.67, + "learning_rate": 3.3168974620229714e-05, + "loss": 2.1003, + "step": 11633500 + }, + { + "epoch": 33.68, + "learning_rate": 3.3168250972582436e-05, + "loss": 2.092, + "step": 11634000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316752732493516e-05, + "loss": 2.0773, + "step": 11634500 + }, + { + "epoch": 33.68, + "learning_rate": 3.316680367728789e-05, + "loss": 2.0833, + "step": 11635000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316608002964061e-05, + "loss": 2.1025, + "step": 11635500 + }, + { + "epoch": 33.68, + "learning_rate": 3.316535638199333e-05, + "loss": 2.0918, + "step": 11636000 + }, + { + "epoch": 33.68, + "learning_rate": 3.3164632734346054e-05, + "loss": 2.085, + "step": 11636500 + }, + { + "epoch": 33.68, + "learning_rate": 3.316390908669878e-05, + "loss": 2.0753, + "step": 11637000 + }, + { + "epoch": 33.69, + "learning_rate": 3.3163185439051505e-05, + "loss": 2.0601, + "step": 11637500 + }, + { + "epoch": 33.69, + "learning_rate": 3.316246179140423e-05, + "loss": 2.0716, + "step": 11638000 + }, + { + "epoch": 33.69, + "learning_rate": 3.316173814375695e-05, + "loss": 2.0851, + "step": 11638500 + }, + { + "epoch": 33.69, + "learning_rate": 3.316101449610967e-05, + "loss": 2.0798, + "step": 11639000 + }, + { + "epoch": 33.69, + "learning_rate": 3.316029229575769e-05, + "loss": 2.0774, + "step": 11639500 + }, + { + "epoch": 33.69, + "learning_rate": 3.315956864811041e-05, + "loss": 2.0796, + "step": 11640000 + }, + { + "epoch": 33.69, + "learning_rate": 3.315884500046313e-05, + "loss": 2.0668, + "step": 11640500 + }, + { + "epoch": 33.7, + "learning_rate": 3.3158121352815854e-05, + "loss": 2.0986, + "step": 11641000 + }, + { + "epoch": 33.7, + "learning_rate": 3.3157399152463877e-05, + "loss": 2.1004, + "step": 11641500 + }, + { + "epoch": 33.7, + "learning_rate": 3.31566755048166e-05, + "loss": 2.0798, + "step": 11642000 + }, + { + "epoch": 33.7, + "learning_rate": 3.315595185716933e-05, + "loss": 2.0764, + "step": 11642500 + }, + { + "epoch": 33.7, + "learning_rate": 3.315522820952205e-05, + "loss": 2.1072, + "step": 11643000 + }, + { + "epoch": 33.7, + "learning_rate": 3.315450456187477e-05, + "loss": 2.0855, + "step": 11643500 + }, + { + "epoch": 33.7, + "learning_rate": 3.3153780914227495e-05, + "loss": 2.0839, + "step": 11644000 + }, + { + "epoch": 33.71, + "learning_rate": 3.315305726658022e-05, + "loss": 2.0819, + "step": 11644500 + }, + { + "epoch": 33.71, + "learning_rate": 3.315233361893294e-05, + "loss": 2.07, + "step": 11645000 + }, + { + "epoch": 33.71, + "learning_rate": 3.315160997128566e-05, + "loss": 2.0754, + "step": 11645500 + }, + { + "epoch": 33.71, + "learning_rate": 3.3150886323638383e-05, + "loss": 2.0952, + "step": 11646000 + }, + { + "epoch": 33.71, + "learning_rate": 3.3150162675991106e-05, + "loss": 2.1035, + "step": 11646500 + }, + { + "epoch": 33.71, + "learning_rate": 3.314944047563913e-05, + "loss": 2.0864, + "step": 11647000 + }, + { + "epoch": 33.71, + "learning_rate": 3.3148718275287144e-05, + "loss": 2.0822, + "step": 11647500 + }, + { + "epoch": 33.72, + "learning_rate": 3.3147994627639866e-05, + "loss": 2.0955, + "step": 11648000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314727242728788e-05, + "loss": 2.0962, + "step": 11648500 + }, + { + "epoch": 33.72, + "learning_rate": 3.314654877964061e-05, + "loss": 2.0907, + "step": 11649000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314582513199333e-05, + "loss": 2.0844, + "step": 11649500 + }, + { + "epoch": 33.72, + "learning_rate": 3.314510148434606e-05, + "loss": 2.0706, + "step": 11650000 + }, + { + "epoch": 33.72, + "learning_rate": 3.3144377836698784e-05, + "loss": 2.1027, + "step": 11650500 + }, + { + "epoch": 33.72, + "learning_rate": 3.3143654189051506e-05, + "loss": 2.0854, + "step": 11651000 + }, + { + "epoch": 33.73, + "learning_rate": 3.314293054140423e-05, + "loss": 2.1021, + "step": 11651500 + }, + { + "epoch": 33.73, + "learning_rate": 3.314220689375695e-05, + "loss": 2.1129, + "step": 11652000 + }, + { + "epoch": 33.73, + "learning_rate": 3.3141484693404966e-05, + "loss": 2.1121, + "step": 11652500 + }, + { + "epoch": 33.73, + "learning_rate": 3.314076104575769e-05, + "loss": 2.0973, + "step": 11653000 + }, + { + "epoch": 33.73, + "learning_rate": 3.314003739811041e-05, + "loss": 2.0778, + "step": 11653500 + }, + { + "epoch": 33.73, + "learning_rate": 3.313931375046313e-05, + "loss": 2.1001, + "step": 11654000 + }, + { + "epoch": 33.74, + "learning_rate": 3.313859010281586e-05, + "loss": 2.1015, + "step": 11654500 + }, + { + "epoch": 33.74, + "learning_rate": 3.3137866455168584e-05, + "loss": 2.0898, + "step": 11655000 + }, + { + "epoch": 33.74, + "learning_rate": 3.3137142807521306e-05, + "loss": 2.0829, + "step": 11655500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313641915987403e-05, + "loss": 2.0735, + "step": 11656000 + }, + { + "epoch": 33.74, + "learning_rate": 3.313569551222675e-05, + "loss": 2.0993, + "step": 11656500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313497186457948e-05, + "loss": 2.0967, + "step": 11657000 + }, + { + "epoch": 33.74, + "learning_rate": 3.31342482169322e-05, + "loss": 2.0873, + "step": 11657500 + }, + { + "epoch": 33.75, + "learning_rate": 3.3133524569284924e-05, + "loss": 2.0707, + "step": 11658000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3132800921637646e-05, + "loss": 2.0912, + "step": 11658500 + }, + { + "epoch": 33.75, + "learning_rate": 3.313207872128566e-05, + "loss": 2.0852, + "step": 11659000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3131355073638384e-05, + "loss": 2.1049, + "step": 11659500 + }, + { + "epoch": 33.75, + "learning_rate": 3.313063142599111e-05, + "loss": 2.0779, + "step": 11660000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3129907778343836e-05, + "loss": 2.1034, + "step": 11660500 + }, + { + "epoch": 33.75, + "learning_rate": 3.312918413069656e-05, + "loss": 2.1041, + "step": 11661000 + }, + { + "epoch": 33.76, + "learning_rate": 3.312846193034457e-05, + "loss": 2.0867, + "step": 11661500 + }, + { + "epoch": 33.76, + "learning_rate": 3.3127738282697296e-05, + "loss": 2.0956, + "step": 11662000 + }, + { + "epoch": 33.76, + "learning_rate": 3.312701608234531e-05, + "loss": 2.1141, + "step": 11662500 + }, + { + "epoch": 33.76, + "learning_rate": 3.312629243469803e-05, + "loss": 2.0966, + "step": 11663000 + }, + { + "epoch": 33.76, + "learning_rate": 3.312556878705076e-05, + "loss": 2.0547, + "step": 11663500 + }, + { + "epoch": 33.76, + "learning_rate": 3.312484803399407e-05, + "loss": 2.0933, + "step": 11664000 + }, + { + "epoch": 33.76, + "learning_rate": 3.3124124386346793e-05, + "loss": 2.0734, + "step": 11664500 + }, + { + "epoch": 33.77, + "learning_rate": 3.312340073869952e-05, + "loss": 2.0764, + "step": 11665000 + }, + { + "epoch": 33.77, + "learning_rate": 3.3122677091052245e-05, + "loss": 2.0909, + "step": 11665500 + }, + { + "epoch": 33.77, + "learning_rate": 3.312195344340497e-05, + "loss": 2.0643, + "step": 11666000 + }, + { + "epoch": 33.77, + "learning_rate": 3.312122979575769e-05, + "loss": 2.0907, + "step": 11666500 + }, + { + "epoch": 33.77, + "learning_rate": 3.312050614811041e-05, + "loss": 2.0981, + "step": 11667000 + }, + { + "epoch": 33.77, + "learning_rate": 3.3119782500463134e-05, + "loss": 2.0756, + "step": 11667500 + }, + { + "epoch": 33.77, + "learning_rate": 3.3119060300111156e-05, + "loss": 2.0727, + "step": 11668000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311833665246388e-05, + "loss": 2.0736, + "step": 11668500 + }, + { + "epoch": 33.78, + "learning_rate": 3.31176130048166e-05, + "loss": 2.0759, + "step": 11669000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311688935716932e-05, + "loss": 2.0822, + "step": 11669500 + }, + { + "epoch": 33.78, + "learning_rate": 3.3116165709522045e-05, + "loss": 2.0938, + "step": 11670000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311544206187477e-05, + "loss": 2.0871, + "step": 11670500 + }, + { + "epoch": 33.78, + "learning_rate": 3.311471841422749e-05, + "loss": 2.114, + "step": 11671000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311399476658021e-05, + "loss": 2.0847, + "step": 11671500 + }, + { + "epoch": 33.79, + "learning_rate": 3.3113271118932934e-05, + "loss": 2.0483, + "step": 11672000 + }, + { + "epoch": 33.79, + "learning_rate": 3.311254747128566e-05, + "loss": 2.0913, + "step": 11672500 + }, + { + "epoch": 33.79, + "learning_rate": 3.3111823823638385e-05, + "loss": 2.0688, + "step": 11673000 + }, + { + "epoch": 33.79, + "learning_rate": 3.311110162328641e-05, + "loss": 2.0822, + "step": 11673500 + }, + { + "epoch": 33.79, + "learning_rate": 3.311037797563913e-05, + "loss": 2.0927, + "step": 11674000 + }, + { + "epoch": 33.79, + "learning_rate": 3.310965432799185e-05, + "loss": 2.0776, + "step": 11674500 + }, + { + "epoch": 33.79, + "learning_rate": 3.3108930680344574e-05, + "loss": 2.1098, + "step": 11675000 + }, + { + "epoch": 33.8, + "learning_rate": 3.310820847999259e-05, + "loss": 2.0925, + "step": 11675500 + }, + { + "epoch": 33.8, + "learning_rate": 3.310748483234531e-05, + "loss": 2.0847, + "step": 11676000 + }, + { + "epoch": 33.8, + "learning_rate": 3.3106761184698034e-05, + "loss": 2.0652, + "step": 11676500 + }, + { + "epoch": 33.8, + "learning_rate": 3.310603753705076e-05, + "loss": 2.1004, + "step": 11677000 + }, + { + "epoch": 33.8, + "learning_rate": 3.3105313889403485e-05, + "loss": 2.0999, + "step": 11677500 + }, + { + "epoch": 33.8, + "learning_rate": 3.310459024175621e-05, + "loss": 2.1025, + "step": 11678000 + }, + { + "epoch": 33.8, + "learning_rate": 3.310386659410893e-05, + "loss": 2.0901, + "step": 11678500 + }, + { + "epoch": 33.81, + "learning_rate": 3.310314294646165e-05, + "loss": 2.0812, + "step": 11679000 + }, + { + "epoch": 33.81, + "learning_rate": 3.310241929881438e-05, + "loss": 2.0678, + "step": 11679500 + }, + { + "epoch": 33.81, + "learning_rate": 3.31016956511671e-05, + "loss": 2.0775, + "step": 11680000 + }, + { + "epoch": 33.81, + "learning_rate": 3.3100972003519826e-05, + "loss": 2.0689, + "step": 11680500 + }, + { + "epoch": 33.81, + "learning_rate": 3.310024835587255e-05, + "loss": 2.0798, + "step": 11681000 + }, + { + "epoch": 33.81, + "learning_rate": 3.309952470822527e-05, + "loss": 2.0749, + "step": 11681500 + }, + { + "epoch": 33.81, + "learning_rate": 3.309880106057799e-05, + "loss": 2.0686, + "step": 11682000 + }, + { + "epoch": 33.82, + "learning_rate": 3.3098078860226015e-05, + "loss": 2.0719, + "step": 11682500 + }, + { + "epoch": 33.82, + "learning_rate": 3.309735521257874e-05, + "loss": 2.0871, + "step": 11683000 + }, + { + "epoch": 33.82, + "learning_rate": 3.309663156493146e-05, + "loss": 2.0917, + "step": 11683500 + }, + { + "epoch": 33.82, + "learning_rate": 3.309590791728418e-05, + "loss": 2.0887, + "step": 11684000 + }, + { + "epoch": 33.82, + "learning_rate": 3.3095184269636904e-05, + "loss": 2.0879, + "step": 11684500 + }, + { + "epoch": 33.82, + "learning_rate": 3.3094460621989626e-05, + "loss": 2.0852, + "step": 11685000 + }, + { + "epoch": 33.82, + "learning_rate": 3.309373842163764e-05, + "loss": 2.0985, + "step": 11685500 + }, + { + "epoch": 33.83, + "learning_rate": 3.3093014773990364e-05, + "loss": 2.0871, + "step": 11686000 + }, + { + "epoch": 33.83, + "learning_rate": 3.3092292573638386e-05, + "loss": 2.1035, + "step": 11686500 + }, + { + "epoch": 33.83, + "learning_rate": 3.3091568925991115e-05, + "loss": 2.0803, + "step": 11687000 + }, + { + "epoch": 33.83, + "learning_rate": 3.309084527834384e-05, + "loss": 2.0848, + "step": 11687500 + }, + { + "epoch": 33.83, + "learning_rate": 3.309012307799185e-05, + "loss": 2.0928, + "step": 11688000 + }, + { + "epoch": 33.83, + "learning_rate": 3.3089399430344575e-05, + "loss": 2.1, + "step": 11688500 + }, + { + "epoch": 33.83, + "learning_rate": 3.30886757826973e-05, + "loss": 2.0821, + "step": 11689000 + }, + { + "epoch": 33.84, + "learning_rate": 3.308795213505002e-05, + "loss": 2.0917, + "step": 11689500 + }, + { + "epoch": 33.84, + "learning_rate": 3.308722848740274e-05, + "loss": 2.0952, + "step": 11690000 + }, + { + "epoch": 33.84, + "learning_rate": 3.3086504839755464e-05, + "loss": 2.0686, + "step": 11690500 + }, + { + "epoch": 33.84, + "learning_rate": 3.308578119210819e-05, + "loss": 2.0799, + "step": 11691000 + }, + { + "epoch": 33.84, + "learning_rate": 3.3085057544460915e-05, + "loss": 2.0828, + "step": 11691500 + }, + { + "epoch": 33.84, + "learning_rate": 3.308433389681364e-05, + "loss": 2.0852, + "step": 11692000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308361024916636e-05, + "loss": 2.1174, + "step": 11692500 + }, + { + "epoch": 33.85, + "learning_rate": 3.308288660151908e-05, + "loss": 2.0695, + "step": 11693000 + }, + { + "epoch": 33.85, + "learning_rate": 3.3082162953871804e-05, + "loss": 2.0851, + "step": 11693500 + }, + { + "epoch": 33.85, + "learning_rate": 3.3081439306224526e-05, + "loss": 2.0796, + "step": 11694000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308071710587255e-05, + "loss": 2.1108, + "step": 11694500 + }, + { + "epoch": 33.85, + "learning_rate": 3.307999345822527e-05, + "loss": 2.111, + "step": 11695000 + }, + { + "epoch": 33.85, + "learning_rate": 3.307926981057799e-05, + "loss": 2.0707, + "step": 11695500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3078547610226015e-05, + "loss": 2.1109, + "step": 11696000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307782396257874e-05, + "loss": 2.0798, + "step": 11696500 + }, + { + "epoch": 33.86, + "learning_rate": 3.307710031493146e-05, + "loss": 2.0527, + "step": 11697000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307637666728418e-05, + "loss": 2.0901, + "step": 11697500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3075653019636904e-05, + "loss": 2.096, + "step": 11698000 + }, + { + "epoch": 33.86, + "learning_rate": 3.3074929371989627e-05, + "loss": 2.0759, + "step": 11698500 + }, + { + "epoch": 33.86, + "learning_rate": 3.307420572434235e-05, + "loss": 2.1082, + "step": 11699000 + }, + { + "epoch": 33.87, + "learning_rate": 3.307348207669507e-05, + "loss": 2.074, + "step": 11699500 + }, + { + "epoch": 33.87, + "learning_rate": 3.307275987634309e-05, + "loss": 2.0813, + "step": 11700000 + }, + { + "epoch": 33.87, + "learning_rate": 3.3072036228695816e-05, + "loss": 2.0708, + "step": 11700500 + }, + { + "epoch": 33.87, + "learning_rate": 3.307131258104854e-05, + "loss": 2.0742, + "step": 11701000 + }, + { + "epoch": 33.87, + "learning_rate": 3.307058893340126e-05, + "loss": 2.056, + "step": 11701500 + }, + { + "epoch": 33.87, + "learning_rate": 3.306986528575399e-05, + "loss": 2.099, + "step": 11702000 + }, + { + "epoch": 33.87, + "learning_rate": 3.306914163810671e-05, + "loss": 2.067, + "step": 11702500 + }, + { + "epoch": 33.88, + "learning_rate": 3.306841943775473e-05, + "loss": 2.0963, + "step": 11703000 + }, + { + "epoch": 33.88, + "learning_rate": 3.306769579010745e-05, + "loss": 2.0731, + "step": 11703500 + }, + { + "epoch": 33.88, + "learning_rate": 3.306697214246017e-05, + "loss": 2.0767, + "step": 11704000 + }, + { + "epoch": 33.88, + "learning_rate": 3.3066249942108194e-05, + "loss": 2.0569, + "step": 11704500 + }, + { + "epoch": 33.88, + "learning_rate": 3.3065526294460916e-05, + "loss": 2.09, + "step": 11705000 + }, + { + "epoch": 33.88, + "learning_rate": 3.306480264681364e-05, + "loss": 2.1249, + "step": 11705500 + }, + { + "epoch": 33.88, + "learning_rate": 3.306407899916636e-05, + "loss": 2.0881, + "step": 11706000 + }, + { + "epoch": 33.89, + "learning_rate": 3.3063356798814376e-05, + "loss": 2.1119, + "step": 11706500 + }, + { + "epoch": 33.89, + "learning_rate": 3.30626331511671e-05, + "loss": 2.1004, + "step": 11707000 + }, + { + "epoch": 33.89, + "learning_rate": 3.306190950351982e-05, + "loss": 2.0924, + "step": 11707500 + }, + { + "epoch": 33.89, + "learning_rate": 3.306118585587254e-05, + "loss": 2.1048, + "step": 11708000 + }, + { + "epoch": 33.89, + "learning_rate": 3.3060462208225265e-05, + "loss": 2.0896, + "step": 11708500 + }, + { + "epoch": 33.89, + "learning_rate": 3.3059738560577994e-05, + "loss": 2.0899, + "step": 11709000 + }, + { + "epoch": 33.89, + "learning_rate": 3.3059014912930716e-05, + "loss": 2.0724, + "step": 11709500 + }, + { + "epoch": 33.9, + "learning_rate": 3.3058291265283445e-05, + "loss": 2.0603, + "step": 11710000 + }, + { + "epoch": 33.9, + "learning_rate": 3.305756906493146e-05, + "loss": 2.0818, + "step": 11710500 + }, + { + "epoch": 33.9, + "learning_rate": 3.305684541728418e-05, + "loss": 2.0761, + "step": 11711000 + }, + { + "epoch": 33.9, + "learning_rate": 3.3056121769636905e-05, + "loss": 2.0831, + "step": 11711500 + }, + { + "epoch": 33.9, + "learning_rate": 3.305539812198963e-05, + "loss": 2.077, + "step": 11712000 + }, + { + "epoch": 33.9, + "learning_rate": 3.305467447434235e-05, + "loss": 2.0693, + "step": 11712500 + }, + { + "epoch": 33.9, + "learning_rate": 3.3053952273990365e-05, + "loss": 2.0949, + "step": 11713000 + }, + { + "epoch": 33.91, + "learning_rate": 3.3053228626343094e-05, + "loss": 2.0816, + "step": 11713500 + }, + { + "epoch": 33.91, + "learning_rate": 3.305250642599111e-05, + "loss": 2.0977, + "step": 11714000 + }, + { + "epoch": 33.91, + "learning_rate": 3.305178277834383e-05, + "loss": 2.0706, + "step": 11714500 + }, + { + "epoch": 33.91, + "learning_rate": 3.305106057799185e-05, + "loss": 2.0847, + "step": 11715000 + }, + { + "epoch": 33.91, + "learning_rate": 3.305033693034457e-05, + "loss": 2.0959, + "step": 11715500 + }, + { + "epoch": 33.91, + "learning_rate": 3.304961328269729e-05, + "loss": 2.0836, + "step": 11716000 + }, + { + "epoch": 33.91, + "learning_rate": 3.3048891082345314e-05, + "loss": 2.0753, + "step": 11716500 + }, + { + "epoch": 33.92, + "learning_rate": 3.3048167434698037e-05, + "loss": 2.0853, + "step": 11717000 + }, + { + "epoch": 33.92, + "learning_rate": 3.3047443787050766e-05, + "loss": 2.0998, + "step": 11717500 + }, + { + "epoch": 33.92, + "learning_rate": 3.304672013940349e-05, + "loss": 2.0762, + "step": 11718000 + }, + { + "epoch": 33.92, + "learning_rate": 3.304599649175621e-05, + "loss": 2.1111, + "step": 11718500 + }, + { + "epoch": 33.92, + "learning_rate": 3.304527284410893e-05, + "loss": 2.1008, + "step": 11719000 + }, + { + "epoch": 33.92, + "learning_rate": 3.3044549196461654e-05, + "loss": 2.1127, + "step": 11719500 + }, + { + "epoch": 33.92, + "learning_rate": 3.304382554881438e-05, + "loss": 2.1209, + "step": 11720000 + }, + { + "epoch": 33.93, + "learning_rate": 3.30431019011671e-05, + "loss": 2.0972, + "step": 11720500 + }, + { + "epoch": 33.93, + "learning_rate": 3.304237825351982e-05, + "loss": 2.0906, + "step": 11721000 + }, + { + "epoch": 33.93, + "learning_rate": 3.304165460587254e-05, + "loss": 2.0788, + "step": 11721500 + }, + { + "epoch": 33.93, + "learning_rate": 3.304093095822527e-05, + "loss": 2.0565, + "step": 11722000 + }, + { + "epoch": 33.93, + "learning_rate": 3.3040207310577995e-05, + "loss": 2.0848, + "step": 11722500 + }, + { + "epoch": 33.93, + "learning_rate": 3.303948366293072e-05, + "loss": 2.0761, + "step": 11723000 + }, + { + "epoch": 33.93, + "learning_rate": 3.303876001528344e-05, + "loss": 2.073, + "step": 11723500 + }, + { + "epoch": 33.94, + "learning_rate": 3.303803636763616e-05, + "loss": 2.0886, + "step": 11724000 + }, + { + "epoch": 33.94, + "learning_rate": 3.3037314167284184e-05, + "loss": 2.1171, + "step": 11724500 + }, + { + "epoch": 33.94, + "learning_rate": 3.3036590519636906e-05, + "loss": 2.0833, + "step": 11725000 + }, + { + "epoch": 33.94, + "learning_rate": 3.303586687198963e-05, + "loss": 2.0907, + "step": 11725500 + }, + { + "epoch": 33.94, + "learning_rate": 3.303514322434235e-05, + "loss": 2.088, + "step": 11726000 + }, + { + "epoch": 33.94, + "learning_rate": 3.303441957669507e-05, + "loss": 2.0699, + "step": 11726500 + }, + { + "epoch": 33.94, + "learning_rate": 3.3033695929047795e-05, + "loss": 2.0844, + "step": 11727000 + }, + { + "epoch": 33.95, + "learning_rate": 3.3032972281400524e-05, + "loss": 2.0925, + "step": 11727500 + }, + { + "epoch": 33.95, + "learning_rate": 3.3032248633753246e-05, + "loss": 2.0913, + "step": 11728000 + }, + { + "epoch": 33.95, + "learning_rate": 3.303152498610597e-05, + "loss": 2.0799, + "step": 11728500 + }, + { + "epoch": 33.95, + "learning_rate": 3.303080133845869e-05, + "loss": 2.0879, + "step": 11729000 + }, + { + "epoch": 33.95, + "learning_rate": 3.303007769081141e-05, + "loss": 2.1103, + "step": 11729500 + }, + { + "epoch": 33.95, + "learning_rate": 3.3029354043164135e-05, + "loss": 2.0887, + "step": 11730000 + }, + { + "epoch": 33.95, + "learning_rate": 3.302863184281215e-05, + "loss": 2.0939, + "step": 11730500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302790819516487e-05, + "loss": 2.0801, + "step": 11731000 + }, + { + "epoch": 33.96, + "learning_rate": 3.30271845475176e-05, + "loss": 2.0742, + "step": 11731500 + }, + { + "epoch": 33.96, + "learning_rate": 3.3026460899870324e-05, + "loss": 2.0848, + "step": 11732000 + }, + { + "epoch": 33.96, + "learning_rate": 3.3025738699518346e-05, + "loss": 2.1011, + "step": 11732500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302501505187107e-05, + "loss": 2.075, + "step": 11733000 + }, + { + "epoch": 33.96, + "learning_rate": 3.3024292851519084e-05, + "loss": 2.0803, + "step": 11733500 + }, + { + "epoch": 33.97, + "learning_rate": 3.3023569203871806e-05, + "loss": 2.101, + "step": 11734000 + }, + { + "epoch": 33.97, + "learning_rate": 3.302284700351982e-05, + "loss": 2.0901, + "step": 11734500 + }, + { + "epoch": 33.97, + "learning_rate": 3.3022123355872544e-05, + "loss": 2.0943, + "step": 11735000 + }, + { + "epoch": 33.97, + "learning_rate": 3.302139970822527e-05, + "loss": 2.0945, + "step": 11735500 + }, + { + "epoch": 33.97, + "learning_rate": 3.3020676060577995e-05, + "loss": 2.0736, + "step": 11736000 + }, + { + "epoch": 33.97, + "learning_rate": 3.301995386022601e-05, + "loss": 2.0864, + "step": 11736500 + }, + { + "epoch": 33.97, + "learning_rate": 3.301923021257873e-05, + "loss": 2.0809, + "step": 11737000 + }, + { + "epoch": 33.98, + "learning_rate": 3.3018506564931455e-05, + "loss": 2.0885, + "step": 11737500 + }, + { + "epoch": 33.98, + "learning_rate": 3.301778291728418e-05, + "loss": 2.1041, + "step": 11738000 + }, + { + "epoch": 33.98, + "learning_rate": 3.301706071693219e-05, + "loss": 2.0821, + "step": 11738500 + }, + { + "epoch": 33.98, + "learning_rate": 3.301633706928492e-05, + "loss": 2.0999, + "step": 11739000 + }, + { + "epoch": 33.98, + "learning_rate": 3.3015613421637644e-05, + "loss": 2.1192, + "step": 11739500 + }, + { + "epoch": 33.98, + "learning_rate": 3.3014889773990373e-05, + "loss": 2.067, + "step": 11740000 + }, + { + "epoch": 33.98, + "learning_rate": 3.3014166126343096e-05, + "loss": 2.0791, + "step": 11740500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301344247869582e-05, + "loss": 2.1023, + "step": 11741000 + }, + { + "epoch": 33.99, + "learning_rate": 3.301271883104854e-05, + "loss": 2.0997, + "step": 11741500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301199518340126e-05, + "loss": 2.0968, + "step": 11742000 + }, + { + "epoch": 33.99, + "learning_rate": 3.3011271535753985e-05, + "loss": 2.1097, + "step": 11742500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301054788810671e-05, + "loss": 2.0738, + "step": 11743000 + }, + { + "epoch": 33.99, + "learning_rate": 3.300982424045943e-05, + "loss": 2.075, + "step": 11743500 + }, + { + "epoch": 33.99, + "learning_rate": 3.300910059281215e-05, + "loss": 2.0988, + "step": 11744000 + }, + { + "epoch": 34.0, + "learning_rate": 3.3008376945164874e-05, + "loss": 2.104, + "step": 11744500 + }, + { + "epoch": 34.0, + "learning_rate": 3.3007654744812896e-05, + "loss": 2.0784, + "step": 11745000 + }, + { + "epoch": 34.0, + "learning_rate": 3.300693109716562e-05, + "loss": 2.0744, + "step": 11745500 + }, + { + "epoch": 34.0, + "learning_rate": 3.300620744951834e-05, + "loss": 2.1208, + "step": 11746000 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.6683752018054113, + "eval_accuracy_mlm": 0.6330868903676227, + "eval_accuracy_nsp": 0.8576833353957276, + "eval_loss": 2.1848316192626953, + "eval_runtime": 331.8363, + "eval_samples_per_second": 1315.064, + "eval_steps_per_second": 54.795, + "step": 11746048 + }, + { + "epoch": 34.0, + "learning_rate": 3.300548380187107e-05, + "loss": 2.0541, + "step": 11746500 + }, + { + "epoch": 34.0, + "learning_rate": 3.3004761601519085e-05, + "loss": 2.0843, + "step": 11747000 + }, + { + "epoch": 34.0, + "learning_rate": 3.300403795387181e-05, + "loss": 2.0417, + "step": 11747500 + }, + { + "epoch": 34.01, + "learning_rate": 3.300331430622453e-05, + "loss": 2.0662, + "step": 11748000 + }, + { + "epoch": 34.01, + "learning_rate": 3.300259065857725e-05, + "loss": 2.0647, + "step": 11748500 + }, + { + "epoch": 34.01, + "learning_rate": 3.3001867010929974e-05, + "loss": 2.052, + "step": 11749000 + }, + { + "epoch": 34.01, + "learning_rate": 3.3001143363282696e-05, + "loss": 2.0667, + "step": 11749500 + }, + { + "epoch": 34.01, + "learning_rate": 3.300042116293072e-05, + "loss": 2.0503, + "step": 11750000 + }, + { + "epoch": 34.01, + "learning_rate": 3.299969751528344e-05, + "loss": 2.0598, + "step": 11750500 + }, + { + "epoch": 34.01, + "learning_rate": 3.2998975314931456e-05, + "loss": 2.0656, + "step": 11751000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299825166728418e-05, + "loss": 2.0474, + "step": 11751500 + }, + { + "epoch": 34.02, + "learning_rate": 3.29975280196369e-05, + "loss": 2.0609, + "step": 11752000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299680581928492e-05, + "loss": 2.0618, + "step": 11752500 + }, + { + "epoch": 34.02, + "learning_rate": 3.2996082171637645e-05, + "loss": 2.0827, + "step": 11753000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299535852399037e-05, + "loss": 2.0515, + "step": 11753500 + }, + { + "epoch": 34.02, + "learning_rate": 3.299463487634309e-05, + "loss": 2.0554, + "step": 11754000 + }, + { + "epoch": 34.02, + "learning_rate": 3.299391122869582e-05, + "loss": 2.0759, + "step": 11754500 + }, + { + "epoch": 34.03, + "learning_rate": 3.299318758104854e-05, + "loss": 2.0613, + "step": 11755000 + }, + { + "epoch": 34.03, + "learning_rate": 3.299246393340126e-05, + "loss": 2.0762, + "step": 11755500 + }, + { + "epoch": 34.03, + "learning_rate": 3.2991740285753985e-05, + "loss": 2.0491, + "step": 11756000 + }, + { + "epoch": 34.03, + "learning_rate": 3.299101663810671e-05, + "loss": 2.0709, + "step": 11756500 + }, + { + "epoch": 34.03, + "learning_rate": 3.299029443775472e-05, + "loss": 2.0752, + "step": 11757000 + }, + { + "epoch": 34.03, + "learning_rate": 3.298957079010745e-05, + "loss": 2.063, + "step": 11757500 + }, + { + "epoch": 34.03, + "learning_rate": 3.2988847142460174e-05, + "loss": 2.0705, + "step": 11758000 + }, + { + "epoch": 34.04, + "learning_rate": 3.29881234948129e-05, + "loss": 2.0852, + "step": 11758500 + }, + { + "epoch": 34.04, + "learning_rate": 3.298740129446091e-05, + "loss": 2.0856, + "step": 11759000 + }, + { + "epoch": 34.04, + "learning_rate": 3.2986677646813635e-05, + "loss": 2.073, + "step": 11759500 + }, + { + "epoch": 34.04, + "learning_rate": 3.298595689375695e-05, + "loss": 2.0644, + "step": 11760000 + }, + { + "epoch": 34.04, + "learning_rate": 3.298523324610967e-05, + "loss": 2.061, + "step": 11760500 + }, + { + "epoch": 34.04, + "learning_rate": 3.2984509598462395e-05, + "loss": 2.0538, + "step": 11761000 + }, + { + "epoch": 34.04, + "learning_rate": 3.298378595081512e-05, + "loss": 2.0657, + "step": 11761500 + }, + { + "epoch": 34.05, + "learning_rate": 3.2983062303167846e-05, + "loss": 2.0939, + "step": 11762000 + }, + { + "epoch": 34.05, + "learning_rate": 3.298233865552057e-05, + "loss": 2.0421, + "step": 11762500 + }, + { + "epoch": 34.05, + "learning_rate": 3.298161500787329e-05, + "loss": 2.0746, + "step": 11763000 + }, + { + "epoch": 34.05, + "learning_rate": 3.298089136022601e-05, + "loss": 2.0625, + "step": 11763500 + }, + { + "epoch": 34.05, + "learning_rate": 3.298016915987403e-05, + "loss": 2.0915, + "step": 11764000 + }, + { + "epoch": 34.05, + "learning_rate": 3.297944551222675e-05, + "loss": 2.0715, + "step": 11764500 + }, + { + "epoch": 34.05, + "learning_rate": 3.297872186457948e-05, + "loss": 2.0651, + "step": 11765000 + }, + { + "epoch": 34.06, + "learning_rate": 3.29779982169322e-05, + "loss": 2.1096, + "step": 11765500 + }, + { + "epoch": 34.06, + "learning_rate": 3.2977274569284924e-05, + "loss": 2.0695, + "step": 11766000 + }, + { + "epoch": 34.06, + "learning_rate": 3.2976550921637646e-05, + "loss": 2.0827, + "step": 11766500 + }, + { + "epoch": 34.06, + "learning_rate": 3.297582727399037e-05, + "loss": 2.0595, + "step": 11767000 + }, + { + "epoch": 34.06, + "learning_rate": 3.297510362634309e-05, + "loss": 2.0558, + "step": 11767500 + }, + { + "epoch": 34.06, + "learning_rate": 3.297437997869581e-05, + "loss": 2.0537, + "step": 11768000 + }, + { + "epoch": 34.06, + "learning_rate": 3.297365777834383e-05, + "loss": 2.0665, + "step": 11768500 + }, + { + "epoch": 34.07, + "learning_rate": 3.297293413069655e-05, + "loss": 2.0891, + "step": 11769000 + }, + { + "epoch": 34.07, + "learning_rate": 3.297221048304928e-05, + "loss": 2.0659, + "step": 11769500 + }, + { + "epoch": 34.07, + "learning_rate": 3.2971486835402e-05, + "loss": 2.0969, + "step": 11770000 + }, + { + "epoch": 34.07, + "learning_rate": 3.2970763187754724e-05, + "loss": 2.0883, + "step": 11770500 + }, + { + "epoch": 34.07, + "learning_rate": 3.297004243469804e-05, + "loss": 2.0772, + "step": 11771000 + }, + { + "epoch": 34.07, + "learning_rate": 3.296931878705076e-05, + "loss": 2.0877, + "step": 11771500 + }, + { + "epoch": 34.08, + "learning_rate": 3.2968595139403484e-05, + "loss": 2.0845, + "step": 11772000 + }, + { + "epoch": 34.08, + "learning_rate": 3.2967871491756206e-05, + "loss": 2.0875, + "step": 11772500 + }, + { + "epoch": 34.08, + "learning_rate": 3.296714784410893e-05, + "loss": 2.0819, + "step": 11773000 + }, + { + "epoch": 34.08, + "learning_rate": 3.296642419646165e-05, + "loss": 2.0609, + "step": 11773500 + }, + { + "epoch": 34.08, + "learning_rate": 3.296570054881438e-05, + "loss": 2.0783, + "step": 11774000 + }, + { + "epoch": 34.08, + "learning_rate": 3.29649769011671e-05, + "loss": 2.084, + "step": 11774500 + }, + { + "epoch": 34.08, + "learning_rate": 3.2964253253519824e-05, + "loss": 2.0782, + "step": 11775000 + }, + { + "epoch": 34.09, + "learning_rate": 3.2963529605872547e-05, + "loss": 2.0791, + "step": 11775500 + }, + { + "epoch": 34.09, + "learning_rate": 3.296280740552056e-05, + "loss": 2.058, + "step": 11776000 + }, + { + "epoch": 34.09, + "learning_rate": 3.2962083757873284e-05, + "loss": 2.0687, + "step": 11776500 + }, + { + "epoch": 34.09, + "learning_rate": 3.296136011022601e-05, + "loss": 2.0633, + "step": 11777000 + }, + { + "epoch": 34.09, + "learning_rate": 3.2960636462578736e-05, + "loss": 2.0861, + "step": 11777500 + }, + { + "epoch": 34.09, + "learning_rate": 3.295991281493146e-05, + "loss": 2.0464, + "step": 11778000 + }, + { + "epoch": 34.09, + "learning_rate": 3.295918916728418e-05, + "loss": 2.0552, + "step": 11778500 + }, + { + "epoch": 34.1, + "learning_rate": 3.29584655196369e-05, + "loss": 2.0559, + "step": 11779000 + }, + { + "epoch": 34.1, + "learning_rate": 3.295774187198963e-05, + "loss": 2.0849, + "step": 11779500 + }, + { + "epoch": 34.1, + "learning_rate": 3.2957018224342354e-05, + "loss": 2.0898, + "step": 11780000 + }, + { + "epoch": 34.1, + "learning_rate": 3.2956294576695076e-05, + "loss": 2.0519, + "step": 11780500 + }, + { + "epoch": 34.1, + "learning_rate": 3.29555709290478e-05, + "loss": 2.071, + "step": 11781000 + }, + { + "epoch": 34.1, + "learning_rate": 3.295484728140052e-05, + "loss": 2.0886, + "step": 11781500 + }, + { + "epoch": 34.1, + "learning_rate": 3.295412363375324e-05, + "loss": 2.0841, + "step": 11782000 + }, + { + "epoch": 34.11, + "learning_rate": 3.2953399986105965e-05, + "loss": 2.0711, + "step": 11782500 + }, + { + "epoch": 34.11, + "learning_rate": 3.295267633845869e-05, + "loss": 2.084, + "step": 11783000 + }, + { + "epoch": 34.11, + "learning_rate": 3.295195269081141e-05, + "loss": 2.0786, + "step": 11783500 + }, + { + "epoch": 34.11, + "learning_rate": 3.295122904316414e-05, + "loss": 2.0444, + "step": 11784000 + }, + { + "epoch": 34.11, + "learning_rate": 3.2950506842812154e-05, + "loss": 2.0636, + "step": 11784500 + }, + { + "epoch": 34.11, + "learning_rate": 3.2949784642460176e-05, + "loss": 2.068, + "step": 11785000 + }, + { + "epoch": 34.11, + "learning_rate": 3.294906244210819e-05, + "loss": 2.0591, + "step": 11785500 + }, + { + "epoch": 34.12, + "learning_rate": 3.2948338794460914e-05, + "loss": 2.0663, + "step": 11786000 + }, + { + "epoch": 34.12, + "learning_rate": 3.2947615146813636e-05, + "loss": 2.059, + "step": 11786500 + }, + { + "epoch": 34.12, + "learning_rate": 3.294689294646165e-05, + "loss": 2.0956, + "step": 11787000 + }, + { + "epoch": 34.12, + "learning_rate": 3.294616929881438e-05, + "loss": 2.078, + "step": 11787500 + }, + { + "epoch": 34.12, + "learning_rate": 3.29454456511671e-05, + "loss": 2.0863, + "step": 11788000 + }, + { + "epoch": 34.12, + "learning_rate": 3.2944722003519825e-05, + "loss": 2.0534, + "step": 11788500 + }, + { + "epoch": 34.12, + "learning_rate": 3.294399835587255e-05, + "loss": 2.0767, + "step": 11789000 + }, + { + "epoch": 34.13, + "learning_rate": 3.294327470822527e-05, + "loss": 2.0804, + "step": 11789500 + }, + { + "epoch": 34.13, + "learning_rate": 3.294255106057799e-05, + "loss": 2.0808, + "step": 11790000 + }, + { + "epoch": 34.13, + "learning_rate": 3.2941827412930714e-05, + "loss": 2.0672, + "step": 11790500 + }, + { + "epoch": 34.13, + "learning_rate": 3.2941103765283436e-05, + "loss": 2.0873, + "step": 11791000 + }, + { + "epoch": 34.13, + "learning_rate": 3.294038011763616e-05, + "loss": 2.0539, + "step": 11791500 + }, + { + "epoch": 34.13, + "learning_rate": 3.293965646998889e-05, + "loss": 2.0846, + "step": 11792000 + }, + { + "epoch": 34.13, + "learning_rate": 3.293893282234161e-05, + "loss": 2.0529, + "step": 11792500 + }, + { + "epoch": 34.14, + "learning_rate": 3.293820917469433e-05, + "loss": 2.071, + "step": 11793000 + }, + { + "epoch": 34.14, + "learning_rate": 3.2937486974342354e-05, + "loss": 2.0645, + "step": 11793500 + }, + { + "epoch": 34.14, + "learning_rate": 3.2936763326695077e-05, + "loss": 2.0782, + "step": 11794000 + }, + { + "epoch": 34.14, + "learning_rate": 3.29360396790478e-05, + "loss": 2.0544, + "step": 11794500 + }, + { + "epoch": 34.14, + "learning_rate": 3.293531603140052e-05, + "loss": 2.0782, + "step": 11795000 + }, + { + "epoch": 34.14, + "learning_rate": 3.293459238375324e-05, + "loss": 2.0772, + "step": 11795500 + }, + { + "epoch": 34.14, + "learning_rate": 3.2933868736105966e-05, + "loss": 2.0607, + "step": 11796000 + }, + { + "epoch": 34.15, + "learning_rate": 3.293314508845869e-05, + "loss": 2.082, + "step": 11796500 + }, + { + "epoch": 34.15, + "learning_rate": 3.29324228881067e-05, + "loss": 2.0909, + "step": 11797000 + }, + { + "epoch": 34.15, + "learning_rate": 3.2931700687754726e-05, + "loss": 2.0745, + "step": 11797500 + }, + { + "epoch": 34.15, + "learning_rate": 3.293097704010745e-05, + "loss": 2.0617, + "step": 11798000 + }, + { + "epoch": 34.15, + "learning_rate": 3.293025339246017e-05, + "loss": 2.0706, + "step": 11798500 + }, + { + "epoch": 34.15, + "learning_rate": 3.292952974481289e-05, + "loss": 2.0894, + "step": 11799000 + }, + { + "epoch": 34.15, + "learning_rate": 3.292880609716562e-05, + "loss": 2.0677, + "step": 11799500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292808389681364e-05, + "loss": 2.0815, + "step": 11800000 + }, + { + "epoch": 34.16, + "learning_rate": 3.292736024916636e-05, + "loss": 2.09, + "step": 11800500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292663660151908e-05, + "loss": 2.0706, + "step": 11801000 + }, + { + "epoch": 34.16, + "learning_rate": 3.2925912953871804e-05, + "loss": 2.0848, + "step": 11801500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292518930622453e-05, + "loss": 2.0687, + "step": 11802000 + }, + { + "epoch": 34.16, + "learning_rate": 3.2924465658577255e-05, + "loss": 2.0518, + "step": 11802500 + }, + { + "epoch": 34.16, + "learning_rate": 3.292374345822527e-05, + "loss": 2.0762, + "step": 11803000 + }, + { + "epoch": 34.17, + "learning_rate": 3.292301981057799e-05, + "loss": 2.0949, + "step": 11803500 + }, + { + "epoch": 34.17, + "learning_rate": 3.2922296162930715e-05, + "loss": 2.0569, + "step": 11804000 + }, + { + "epoch": 34.17, + "learning_rate": 3.292157251528344e-05, + "loss": 2.0685, + "step": 11804500 + }, + { + "epoch": 34.17, + "learning_rate": 3.292085031493146e-05, + "loss": 2.0883, + "step": 11805000 + }, + { + "epoch": 34.17, + "learning_rate": 3.292012666728418e-05, + "loss": 2.0779, + "step": 11805500 + }, + { + "epoch": 34.17, + "learning_rate": 3.2919403019636904e-05, + "loss": 2.096, + "step": 11806000 + }, + { + "epoch": 34.17, + "learning_rate": 3.2918679371989626e-05, + "loss": 2.0712, + "step": 11806500 + }, + { + "epoch": 34.18, + "learning_rate": 3.2917955724342355e-05, + "loss": 2.0623, + "step": 11807000 + }, + { + "epoch": 34.18, + "learning_rate": 3.291723207669508e-05, + "loss": 2.0802, + "step": 11807500 + }, + { + "epoch": 34.18, + "learning_rate": 3.29165084290478e-05, + "loss": 2.0674, + "step": 11808000 + }, + { + "epoch": 34.18, + "learning_rate": 3.291578478140052e-05, + "loss": 2.0715, + "step": 11808500 + }, + { + "epoch": 34.18, + "learning_rate": 3.291506258104854e-05, + "loss": 2.0689, + "step": 11809000 + }, + { + "epoch": 34.18, + "learning_rate": 3.291433893340126e-05, + "loss": 2.0656, + "step": 11809500 + }, + { + "epoch": 34.19, + "learning_rate": 3.291361528575398e-05, + "loss": 2.0631, + "step": 11810000 + }, + { + "epoch": 34.19, + "learning_rate": 3.291289163810671e-05, + "loss": 2.0952, + "step": 11810500 + }, + { + "epoch": 34.19, + "learning_rate": 3.291216799045943e-05, + "loss": 2.0691, + "step": 11811000 + }, + { + "epoch": 34.19, + "learning_rate": 3.2911444342812155e-05, + "loss": 2.093, + "step": 11811500 + }, + { + "epoch": 34.19, + "learning_rate": 3.291072069516488e-05, + "loss": 2.0667, + "step": 11812000 + }, + { + "epoch": 34.19, + "learning_rate": 3.29099970475176e-05, + "loss": 2.0783, + "step": 11812500 + }, + { + "epoch": 34.19, + "learning_rate": 3.290927339987032e-05, + "loss": 2.0768, + "step": 11813000 + }, + { + "epoch": 34.2, + "learning_rate": 3.2908549752223044e-05, + "loss": 2.0774, + "step": 11813500 + }, + { + "epoch": 34.2, + "learning_rate": 3.290782610457577e-05, + "loss": 2.0762, + "step": 11814000 + }, + { + "epoch": 34.2, + "learning_rate": 3.2907102456928496e-05, + "loss": 2.0663, + "step": 11814500 + }, + { + "epoch": 34.2, + "learning_rate": 3.290637880928122e-05, + "loss": 2.0947, + "step": 11815000 + }, + { + "epoch": 34.2, + "learning_rate": 3.290565516163394e-05, + "loss": 2.092, + "step": 11815500 + }, + { + "epoch": 34.2, + "learning_rate": 3.290493151398666e-05, + "loss": 2.0701, + "step": 11816000 + }, + { + "epoch": 34.2, + "learning_rate": 3.2904209313634685e-05, + "loss": 2.0864, + "step": 11816500 + }, + { + "epoch": 34.21, + "learning_rate": 3.29034871132827e-05, + "loss": 2.0738, + "step": 11817000 + }, + { + "epoch": 34.21, + "learning_rate": 3.290276346563542e-05, + "loss": 2.0635, + "step": 11817500 + }, + { + "epoch": 34.21, + "learning_rate": 3.2902039817988145e-05, + "loss": 2.0787, + "step": 11818000 + }, + { + "epoch": 34.21, + "learning_rate": 3.290131617034087e-05, + "loss": 2.0751, + "step": 11818500 + }, + { + "epoch": 34.21, + "learning_rate": 3.290059252269359e-05, + "loss": 2.0808, + "step": 11819000 + }, + { + "epoch": 34.21, + "learning_rate": 3.289986887504631e-05, + "loss": 2.0526, + "step": 11819500 + }, + { + "epoch": 34.21, + "learning_rate": 3.2899145227399033e-05, + "loss": 2.0808, + "step": 11820000 + }, + { + "epoch": 34.22, + "learning_rate": 3.289842157975176e-05, + "loss": 2.0637, + "step": 11820500 + }, + { + "epoch": 34.22, + "learning_rate": 3.2897697932104485e-05, + "loss": 2.0806, + "step": 11821000 + }, + { + "epoch": 34.22, + "learning_rate": 3.2896974284457214e-05, + "loss": 2.0874, + "step": 11821500 + }, + { + "epoch": 34.22, + "learning_rate": 3.2896250636809936e-05, + "loss": 2.079, + "step": 11822000 + }, + { + "epoch": 34.22, + "learning_rate": 3.289552843645795e-05, + "loss": 2.0667, + "step": 11822500 + }, + { + "epoch": 34.22, + "learning_rate": 3.2894804788810674e-05, + "loss": 2.0665, + "step": 11823000 + }, + { + "epoch": 34.22, + "learning_rate": 3.2894081141163396e-05, + "loss": 2.0862, + "step": 11823500 + }, + { + "epoch": 34.23, + "learning_rate": 3.289335749351612e-05, + "loss": 2.0858, + "step": 11824000 + }, + { + "epoch": 34.23, + "learning_rate": 3.289263384586884e-05, + "loss": 2.0813, + "step": 11824500 + }, + { + "epoch": 34.23, + "learning_rate": 3.289191019822156e-05, + "loss": 2.0796, + "step": 11825000 + }, + { + "epoch": 34.23, + "learning_rate": 3.2891186550574285e-05, + "loss": 2.0635, + "step": 11825500 + }, + { + "epoch": 34.23, + "learning_rate": 3.2890462902927014e-05, + "loss": 2.101, + "step": 11826000 + }, + { + "epoch": 34.23, + "learning_rate": 3.2889739255279736e-05, + "loss": 2.0824, + "step": 11826500 + }, + { + "epoch": 34.23, + "learning_rate": 3.288901705492775e-05, + "loss": 2.0819, + "step": 11827000 + }, + { + "epoch": 34.24, + "learning_rate": 3.2888293407280474e-05, + "loss": 2.0891, + "step": 11827500 + }, + { + "epoch": 34.24, + "learning_rate": 3.2887569759633196e-05, + "loss": 2.0645, + "step": 11828000 + }, + { + "epoch": 34.24, + "learning_rate": 3.2886846111985925e-05, + "loss": 2.0446, + "step": 11828500 + }, + { + "epoch": 34.24, + "learning_rate": 3.288612391163394e-05, + "loss": 2.0595, + "step": 11829000 + }, + { + "epoch": 34.24, + "learning_rate": 3.288540026398666e-05, + "loss": 2.0683, + "step": 11829500 + }, + { + "epoch": 34.24, + "learning_rate": 3.2884676616339385e-05, + "loss": 2.0903, + "step": 11830000 + }, + { + "epoch": 34.24, + "learning_rate": 3.2883952968692114e-05, + "loss": 2.0702, + "step": 11830500 + }, + { + "epoch": 34.25, + "learning_rate": 3.2883229321044836e-05, + "loss": 2.0579, + "step": 11831000 + }, + { + "epoch": 34.25, + "learning_rate": 3.288250567339756e-05, + "loss": 2.0784, + "step": 11831500 + }, + { + "epoch": 34.25, + "learning_rate": 3.288178202575028e-05, + "loss": 2.0906, + "step": 11832000 + }, + { + "epoch": 34.25, + "learning_rate": 3.2881058378103e-05, + "loss": 2.0933, + "step": 11832500 + }, + { + "epoch": 34.25, + "learning_rate": 3.2880334730455725e-05, + "loss": 2.0592, + "step": 11833000 + }, + { + "epoch": 34.25, + "learning_rate": 3.287961108280845e-05, + "loss": 2.0663, + "step": 11833500 + }, + { + "epoch": 34.25, + "learning_rate": 3.287888743516117e-05, + "loss": 2.0624, + "step": 11834000 + }, + { + "epoch": 34.26, + "learning_rate": 3.2878165234809185e-05, + "loss": 2.0725, + "step": 11834500 + }, + { + "epoch": 34.26, + "learning_rate": 3.2877441587161914e-05, + "loss": 2.071, + "step": 11835000 + }, + { + "epoch": 34.26, + "learning_rate": 3.287671793951464e-05, + "loss": 2.0681, + "step": 11835500 + }, + { + "epoch": 34.26, + "learning_rate": 3.2875994291867366e-05, + "loss": 2.0825, + "step": 11836000 + }, + { + "epoch": 34.26, + "learning_rate": 3.287527209151538e-05, + "loss": 2.0762, + "step": 11836500 + }, + { + "epoch": 34.26, + "learning_rate": 3.28745498911634e-05, + "loss": 2.0685, + "step": 11837000 + }, + { + "epoch": 34.26, + "learning_rate": 3.287382624351612e-05, + "loss": 2.0911, + "step": 11837500 + }, + { + "epoch": 34.27, + "learning_rate": 3.287310259586884e-05, + "loss": 2.0727, + "step": 11838000 + }, + { + "epoch": 34.27, + "learning_rate": 3.2872378948221564e-05, + "loss": 2.0694, + "step": 11838500 + }, + { + "epoch": 34.27, + "learning_rate": 3.2871656747869586e-05, + "loss": 2.0561, + "step": 11839000 + }, + { + "epoch": 34.27, + "learning_rate": 3.287093310022231e-05, + "loss": 2.0549, + "step": 11839500 + }, + { + "epoch": 34.27, + "learning_rate": 3.287020945257503e-05, + "loss": 2.0601, + "step": 11840000 + }, + { + "epoch": 34.27, + "learning_rate": 3.286948580492775e-05, + "loss": 2.0514, + "step": 11840500 + }, + { + "epoch": 34.27, + "learning_rate": 3.286876360457577e-05, + "loss": 2.0579, + "step": 11841000 + }, + { + "epoch": 34.28, + "learning_rate": 3.286803995692849e-05, + "loss": 2.0559, + "step": 11841500 + }, + { + "epoch": 34.28, + "learning_rate": 3.286731630928121e-05, + "loss": 2.0964, + "step": 11842000 + }, + { + "epoch": 34.28, + "learning_rate": 3.2866592661633935e-05, + "loss": 2.0831, + "step": 11842500 + }, + { + "epoch": 34.28, + "learning_rate": 3.2865869013986664e-05, + "loss": 2.0662, + "step": 11843000 + }, + { + "epoch": 34.28, + "learning_rate": 3.2865145366339386e-05, + "loss": 2.056, + "step": 11843500 + }, + { + "epoch": 34.28, + "learning_rate": 3.2864421718692115e-05, + "loss": 2.0835, + "step": 11844000 + }, + { + "epoch": 34.28, + "learning_rate": 3.286369807104484e-05, + "loss": 2.0764, + "step": 11844500 + }, + { + "epoch": 34.29, + "learning_rate": 3.286297442339756e-05, + "loss": 2.0726, + "step": 11845000 + }, + { + "epoch": 34.29, + "learning_rate": 3.286225077575028e-05, + "loss": 2.0781, + "step": 11845500 + }, + { + "epoch": 34.29, + "learning_rate": 3.2861527128103004e-05, + "loss": 2.0948, + "step": 11846000 + }, + { + "epoch": 34.29, + "learning_rate": 3.2860803480455726e-05, + "loss": 2.0848, + "step": 11846500 + }, + { + "epoch": 34.29, + "learning_rate": 3.286007983280845e-05, + "loss": 2.0764, + "step": 11847000 + }, + { + "epoch": 34.29, + "learning_rate": 3.285935618516117e-05, + "loss": 2.0622, + "step": 11847500 + }, + { + "epoch": 34.3, + "learning_rate": 3.2858633984809186e-05, + "loss": 2.0842, + "step": 11848000 + }, + { + "epoch": 34.3, + "learning_rate": 3.2857910337161915e-05, + "loss": 2.0715, + "step": 11848500 + }, + { + "epoch": 34.3, + "learning_rate": 3.285718668951464e-05, + "loss": 2.078, + "step": 11849000 + }, + { + "epoch": 34.3, + "learning_rate": 3.285646304186736e-05, + "loss": 2.0526, + "step": 11849500 + }, + { + "epoch": 34.3, + "learning_rate": 3.285573939422008e-05, + "loss": 2.0827, + "step": 11850000 + }, + { + "epoch": 34.3, + "learning_rate": 3.2855015746572804e-05, + "loss": 2.069, + "step": 11850500 + }, + { + "epoch": 34.3, + "learning_rate": 3.285429209892553e-05, + "loss": 2.0583, + "step": 11851000 + }, + { + "epoch": 34.31, + "learning_rate": 3.2853568451278255e-05, + "loss": 2.072, + "step": 11851500 + }, + { + "epoch": 34.31, + "learning_rate": 3.285284625092627e-05, + "loss": 2.0703, + "step": 11852000 + }, + { + "epoch": 34.31, + "learning_rate": 3.285212405057429e-05, + "loss": 2.0796, + "step": 11852500 + }, + { + "epoch": 34.31, + "learning_rate": 3.2851400402927016e-05, + "loss": 2.0784, + "step": 11853000 + }, + { + "epoch": 34.31, + "learning_rate": 3.285067675527974e-05, + "loss": 2.0633, + "step": 11853500 + }, + { + "epoch": 34.31, + "learning_rate": 3.284995310763246e-05, + "loss": 2.0706, + "step": 11854000 + }, + { + "epoch": 34.31, + "learning_rate": 3.284922945998518e-05, + "loss": 2.0692, + "step": 11854500 + }, + { + "epoch": 34.32, + "learning_rate": 3.2848505812337904e-05, + "loss": 2.0722, + "step": 11855000 + }, + { + "epoch": 34.32, + "learning_rate": 3.284778216469063e-05, + "loss": 2.0788, + "step": 11855500 + }, + { + "epoch": 34.32, + "learning_rate": 3.284705996433864e-05, + "loss": 2.0685, + "step": 11856000 + }, + { + "epoch": 34.32, + "learning_rate": 3.2846336316691364e-05, + "loss": 2.0481, + "step": 11856500 + }, + { + "epoch": 34.32, + "learning_rate": 3.2845612669044094e-05, + "loss": 2.0759, + "step": 11857000 + }, + { + "epoch": 34.32, + "learning_rate": 3.284489046869211e-05, + "loss": 2.1118, + "step": 11857500 + }, + { + "epoch": 34.32, + "learning_rate": 3.284416682104483e-05, + "loss": 2.0922, + "step": 11858000 + }, + { + "epoch": 34.33, + "learning_rate": 3.284344317339756e-05, + "loss": 2.1051, + "step": 11858500 + }, + { + "epoch": 34.33, + "learning_rate": 3.284271952575028e-05, + "loss": 2.0855, + "step": 11859000 + }, + { + "epoch": 34.33, + "learning_rate": 3.2841995878103005e-05, + "loss": 2.0725, + "step": 11859500 + }, + { + "epoch": 34.33, + "learning_rate": 3.284127367775102e-05, + "loss": 2.0398, + "step": 11860000 + }, + { + "epoch": 34.33, + "learning_rate": 3.284055003010374e-05, + "loss": 2.0646, + "step": 11860500 + }, + { + "epoch": 34.33, + "learning_rate": 3.2839826382456465e-05, + "loss": 2.0676, + "step": 11861000 + }, + { + "epoch": 34.33, + "learning_rate": 3.2839102734809194e-05, + "loss": 2.0883, + "step": 11861500 + }, + { + "epoch": 34.34, + "learning_rate": 3.2838379087161916e-05, + "loss": 2.0984, + "step": 11862000 + }, + { + "epoch": 34.34, + "learning_rate": 3.283765543951464e-05, + "loss": 2.088, + "step": 11862500 + }, + { + "epoch": 34.34, + "learning_rate": 3.283693179186736e-05, + "loss": 2.0904, + "step": 11863000 + }, + { + "epoch": 34.34, + "learning_rate": 3.283620814422008e-05, + "loss": 2.0876, + "step": 11863500 + }, + { + "epoch": 34.34, + "learning_rate": 3.2835484496572805e-05, + "loss": 2.0788, + "step": 11864000 + }, + { + "epoch": 34.34, + "learning_rate": 3.283476229622082e-05, + "loss": 2.0978, + "step": 11864500 + }, + { + "epoch": 34.34, + "learning_rate": 3.283403864857354e-05, + "loss": 2.0833, + "step": 11865000 + }, + { + "epoch": 34.35, + "learning_rate": 3.2833315000926265e-05, + "loss": 2.0943, + "step": 11865500 + }, + { + "epoch": 34.35, + "learning_rate": 3.2832591353278994e-05, + "loss": 2.0586, + "step": 11866000 + }, + { + "epoch": 34.35, + "learning_rate": 3.283187060022231e-05, + "loss": 2.0882, + "step": 11866500 + }, + { + "epoch": 34.35, + "learning_rate": 3.283114695257503e-05, + "loss": 2.1042, + "step": 11867000 + }, + { + "epoch": 34.35, + "learning_rate": 3.2830423304927754e-05, + "loss": 2.0841, + "step": 11867500 + }, + { + "epoch": 34.35, + "learning_rate": 3.2829699657280476e-05, + "loss": 2.056, + "step": 11868000 + }, + { + "epoch": 34.35, + "learning_rate": 3.28289760096332e-05, + "loss": 2.0748, + "step": 11868500 + }, + { + "epoch": 34.36, + "learning_rate": 3.282825236198592e-05, + "loss": 2.065, + "step": 11869000 + }, + { + "epoch": 34.36, + "learning_rate": 3.282753016163394e-05, + "loss": 2.0835, + "step": 11869500 + }, + { + "epoch": 34.36, + "learning_rate": 3.2826806513986665e-05, + "loss": 2.0824, + "step": 11870000 + }, + { + "epoch": 34.36, + "learning_rate": 3.282608286633939e-05, + "loss": 2.0795, + "step": 11870500 + }, + { + "epoch": 34.36, + "learning_rate": 3.282535921869211e-05, + "loss": 2.0756, + "step": 11871000 + }, + { + "epoch": 34.36, + "learning_rate": 3.282463557104483e-05, + "loss": 2.0781, + "step": 11871500 + }, + { + "epoch": 34.36, + "learning_rate": 3.2823911923397554e-05, + "loss": 2.0535, + "step": 11872000 + }, + { + "epoch": 34.37, + "learning_rate": 3.2823188275750277e-05, + "loss": 2.0538, + "step": 11872500 + }, + { + "epoch": 34.37, + "learning_rate": 3.2822464628103e-05, + "loss": 2.0771, + "step": 11873000 + }, + { + "epoch": 34.37, + "learning_rate": 3.282174098045573e-05, + "loss": 2.098, + "step": 11873500 + }, + { + "epoch": 34.37, + "learning_rate": 3.282101733280845e-05, + "loss": 2.0624, + "step": 11874000 + }, + { + "epoch": 34.37, + "learning_rate": 3.282029368516117e-05, + "loss": 2.079, + "step": 11874500 + }, + { + "epoch": 34.37, + "learning_rate": 3.2819570037513895e-05, + "loss": 2.0846, + "step": 11875000 + }, + { + "epoch": 34.37, + "learning_rate": 3.281884638986662e-05, + "loss": 2.049, + "step": 11875500 + }, + { + "epoch": 34.38, + "learning_rate": 3.2818122742219346e-05, + "loss": 2.073, + "step": 11876000 + }, + { + "epoch": 34.38, + "learning_rate": 3.281740054186736e-05, + "loss": 2.0808, + "step": 11876500 + }, + { + "epoch": 34.38, + "learning_rate": 3.2816676894220084e-05, + "loss": 2.0761, + "step": 11877000 + }, + { + "epoch": 34.38, + "learning_rate": 3.2815953246572806e-05, + "loss": 2.0803, + "step": 11877500 + }, + { + "epoch": 34.38, + "learning_rate": 3.281523104622082e-05, + "loss": 2.0762, + "step": 11878000 + }, + { + "epoch": 34.38, + "learning_rate": 3.2814507398573544e-05, + "loss": 2.0462, + "step": 11878500 + }, + { + "epoch": 34.38, + "learning_rate": 3.2813783750926266e-05, + "loss": 2.0695, + "step": 11879000 + }, + { + "epoch": 34.39, + "learning_rate": 3.2813060103278995e-05, + "loss": 2.0565, + "step": 11879500 + }, + { + "epoch": 34.39, + "learning_rate": 3.281233645563172e-05, + "loss": 2.0948, + "step": 11880000 + }, + { + "epoch": 34.39, + "learning_rate": 3.281161425527973e-05, + "loss": 2.0787, + "step": 11880500 + }, + { + "epoch": 34.39, + "learning_rate": 3.281089060763246e-05, + "loss": 2.0626, + "step": 11881000 + }, + { + "epoch": 34.39, + "learning_rate": 3.2810166959985184e-05, + "loss": 2.0615, + "step": 11881500 + }, + { + "epoch": 34.39, + "learning_rate": 3.2809443312337906e-05, + "loss": 2.0861, + "step": 11882000 + }, + { + "epoch": 34.39, + "learning_rate": 3.280871966469063e-05, + "loss": 2.0907, + "step": 11882500 + }, + { + "epoch": 34.4, + "learning_rate": 3.280799601704335e-05, + "loss": 2.0611, + "step": 11883000 + }, + { + "epoch": 34.4, + "learning_rate": 3.280727236939607e-05, + "loss": 2.0667, + "step": 11883500 + }, + { + "epoch": 34.4, + "learning_rate": 3.2806548721748795e-05, + "loss": 2.0649, + "step": 11884000 + }, + { + "epoch": 34.4, + "learning_rate": 3.280582507410152e-05, + "loss": 2.0867, + "step": 11884500 + }, + { + "epoch": 34.4, + "learning_rate": 3.2805101426454246e-05, + "loss": 2.0984, + "step": 11885000 + }, + { + "epoch": 34.4, + "learning_rate": 3.280437922610226e-05, + "loss": 2.0933, + "step": 11885500 + }, + { + "epoch": 34.41, + "learning_rate": 3.2803655578454984e-05, + "loss": 2.0601, + "step": 11886000 + }, + { + "epoch": 34.41, + "learning_rate": 3.2802933378103e-05, + "loss": 2.0586, + "step": 11886500 + }, + { + "epoch": 34.41, + "learning_rate": 3.280220973045572e-05, + "loss": 2.0949, + "step": 11887000 + }, + { + "epoch": 34.41, + "learning_rate": 3.2801486082808444e-05, + "loss": 2.0407, + "step": 11887500 + }, + { + "epoch": 34.41, + "learning_rate": 3.280076243516117e-05, + "loss": 2.0776, + "step": 11888000 + }, + { + "epoch": 34.41, + "learning_rate": 3.2800040234809195e-05, + "loss": 2.0552, + "step": 11888500 + }, + { + "epoch": 34.41, + "learning_rate": 3.279931658716192e-05, + "loss": 2.0498, + "step": 11889000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279859438680993e-05, + "loss": 2.086, + "step": 11889500 + }, + { + "epoch": 34.42, + "learning_rate": 3.2797870739162655e-05, + "loss": 2.0788, + "step": 11890000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279714709151538e-05, + "loss": 2.0848, + "step": 11890500 + }, + { + "epoch": 34.42, + "learning_rate": 3.27964234438681e-05, + "loss": 2.0765, + "step": 11891000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279569979622082e-05, + "loss": 2.1022, + "step": 11891500 + }, + { + "epoch": 34.42, + "learning_rate": 3.2794976148573544e-05, + "loss": 2.0715, + "step": 11892000 + }, + { + "epoch": 34.42, + "learning_rate": 3.279425250092627e-05, + "loss": 2.074, + "step": 11892500 + }, + { + "epoch": 34.43, + "learning_rate": 3.2793528853278996e-05, + "loss": 2.074, + "step": 11893000 + }, + { + "epoch": 34.43, + "learning_rate": 3.279280665292701e-05, + "loss": 2.0818, + "step": 11893500 + }, + { + "epoch": 34.43, + "learning_rate": 3.279208300527973e-05, + "loss": 2.1109, + "step": 11894000 + }, + { + "epoch": 34.43, + "learning_rate": 3.2791359357632456e-05, + "loss": 2.0853, + "step": 11894500 + }, + { + "epoch": 34.43, + "learning_rate": 3.279063570998518e-05, + "loss": 2.0809, + "step": 11895000 + }, + { + "epoch": 34.43, + "learning_rate": 3.27899120623379e-05, + "loss": 2.0612, + "step": 11895500 + }, + { + "epoch": 34.43, + "learning_rate": 3.2789191309281216e-05, + "loss": 2.0567, + "step": 11896000 + }, + { + "epoch": 34.44, + "learning_rate": 3.2788467661633945e-05, + "loss": 2.0724, + "step": 11896500 + }, + { + "epoch": 34.44, + "learning_rate": 3.278774401398667e-05, + "loss": 2.0594, + "step": 11897000 + }, + { + "epoch": 34.44, + "learning_rate": 3.278702036633939e-05, + "loss": 2.0726, + "step": 11897500 + }, + { + "epoch": 34.44, + "learning_rate": 3.278629671869211e-05, + "loss": 2.0808, + "step": 11898000 + }, + { + "epoch": 34.44, + "learning_rate": 3.2785573071044834e-05, + "loss": 2.0963, + "step": 11898500 + }, + { + "epoch": 34.44, + "learning_rate": 3.2784849423397556e-05, + "loss": 2.0972, + "step": 11899000 + }, + { + "epoch": 34.44, + "learning_rate": 3.278412577575028e-05, + "loss": 2.07, + "step": 11899500 + }, + { + "epoch": 34.45, + "learning_rate": 3.2783402128103e-05, + "loss": 2.0636, + "step": 11900000 + }, + { + "epoch": 34.45, + "learning_rate": 3.278267848045572e-05, + "loss": 2.0752, + "step": 11900500 + }, + { + "epoch": 34.45, + "learning_rate": 3.2781954832808445e-05, + "loss": 2.0842, + "step": 11901000 + }, + { + "epoch": 34.45, + "learning_rate": 3.2781231185161174e-05, + "loss": 2.0751, + "step": 11901500 + }, + { + "epoch": 34.45, + "learning_rate": 3.2780507537513896e-05, + "loss": 2.0788, + "step": 11902000 + }, + { + "epoch": 34.45, + "learning_rate": 3.277978388986662e-05, + "loss": 2.1004, + "step": 11902500 + }, + { + "epoch": 34.45, + "learning_rate": 3.277906024221934e-05, + "loss": 2.0559, + "step": 11903000 + }, + { + "epoch": 34.46, + "learning_rate": 3.277833659457207e-05, + "loss": 2.0879, + "step": 11903500 + }, + { + "epoch": 34.46, + "learning_rate": 3.2777614394220085e-05, + "loss": 2.0761, + "step": 11904000 + }, + { + "epoch": 34.46, + "learning_rate": 3.277689074657281e-05, + "loss": 2.0718, + "step": 11904500 + }, + { + "epoch": 34.46, + "learning_rate": 3.277616709892553e-05, + "loss": 2.0913, + "step": 11905000 + }, + { + "epoch": 34.46, + "learning_rate": 3.277544345127825e-05, + "loss": 2.077, + "step": 11905500 + }, + { + "epoch": 34.46, + "learning_rate": 3.2774719803630974e-05, + "loss": 2.0781, + "step": 11906000 + }, + { + "epoch": 34.46, + "learning_rate": 3.2773996155983696e-05, + "loss": 2.0837, + "step": 11906500 + }, + { + "epoch": 34.47, + "learning_rate": 3.2773272508336425e-05, + "loss": 2.078, + "step": 11907000 + }, + { + "epoch": 34.47, + "learning_rate": 3.277254886068915e-05, + "loss": 2.0896, + "step": 11907500 + }, + { + "epoch": 34.47, + "learning_rate": 3.277182521304187e-05, + "loss": 2.0808, + "step": 11908000 + }, + { + "epoch": 34.47, + "learning_rate": 3.2771103012689885e-05, + "loss": 2.0781, + "step": 11908500 + }, + { + "epoch": 34.47, + "learning_rate": 3.27703808123379e-05, + "loss": 2.0693, + "step": 11909000 + }, + { + "epoch": 34.47, + "learning_rate": 3.276965716469062e-05, + "loss": 2.0693, + "step": 11909500 + }, + { + "epoch": 34.47, + "learning_rate": 3.2768933517043345e-05, + "loss": 2.11, + "step": 11910000 + }, + { + "epoch": 34.48, + "learning_rate": 3.2768209869396074e-05, + "loss": 2.0712, + "step": 11910500 + }, + { + "epoch": 34.48, + "learning_rate": 3.2767486221748797e-05, + "loss": 2.0934, + "step": 11911000 + }, + { + "epoch": 34.48, + "learning_rate": 3.276676402139682e-05, + "loss": 2.0813, + "step": 11911500 + }, + { + "epoch": 34.48, + "learning_rate": 3.276604037374954e-05, + "loss": 2.0885, + "step": 11912000 + }, + { + "epoch": 34.48, + "learning_rate": 3.2765316726102263e-05, + "loss": 2.0887, + "step": 11912500 + }, + { + "epoch": 34.48, + "learning_rate": 3.2764593078454986e-05, + "loss": 2.0613, + "step": 11913000 + }, + { + "epoch": 34.48, + "learning_rate": 3.276386943080771e-05, + "loss": 2.0774, + "step": 11913500 + }, + { + "epoch": 34.49, + "learning_rate": 3.2763147230455723e-05, + "loss": 2.092, + "step": 11914000 + }, + { + "epoch": 34.49, + "learning_rate": 3.276242358280845e-05, + "loss": 2.061, + "step": 11914500 + }, + { + "epoch": 34.49, + "learning_rate": 3.2761699935161175e-05, + "loss": 2.0815, + "step": 11915000 + }, + { + "epoch": 34.49, + "learning_rate": 3.27609762875139e-05, + "loss": 2.0528, + "step": 11915500 + }, + { + "epoch": 34.49, + "learning_rate": 3.276025408716191e-05, + "loss": 2.1093, + "step": 11916000 + }, + { + "epoch": 34.49, + "learning_rate": 3.2759530439514635e-05, + "loss": 2.0673, + "step": 11916500 + }, + { + "epoch": 34.49, + "learning_rate": 3.275880679186736e-05, + "loss": 2.0648, + "step": 11917000 + }, + { + "epoch": 34.5, + "learning_rate": 3.275808314422008e-05, + "loss": 2.0772, + "step": 11917500 + }, + { + "epoch": 34.5, + "learning_rate": 3.27573594965728e-05, + "loss": 2.0927, + "step": 11918000 + }, + { + "epoch": 34.5, + "learning_rate": 3.275663584892553e-05, + "loss": 2.0706, + "step": 11918500 + }, + { + "epoch": 34.5, + "learning_rate": 3.275591220127825e-05, + "loss": 2.0917, + "step": 11919000 + }, + { + "epoch": 34.5, + "learning_rate": 3.2755188553630975e-05, + "loss": 2.077, + "step": 11919500 + }, + { + "epoch": 34.5, + "learning_rate": 3.2754464905983704e-05, + "loss": 2.0827, + "step": 11920000 + }, + { + "epoch": 34.5, + "learning_rate": 3.275374270563172e-05, + "loss": 2.068, + "step": 11920500 + }, + { + "epoch": 34.51, + "learning_rate": 3.2753020505279735e-05, + "loss": 2.077, + "step": 11921000 + }, + { + "epoch": 34.51, + "learning_rate": 3.275229685763246e-05, + "loss": 2.1051, + "step": 11921500 + }, + { + "epoch": 34.51, + "learning_rate": 3.275157320998518e-05, + "loss": 2.0502, + "step": 11922000 + }, + { + "epoch": 34.51, + "learning_rate": 3.27508510096332e-05, + "loss": 2.0981, + "step": 11922500 + }, + { + "epoch": 34.51, + "learning_rate": 3.2750127361985924e-05, + "loss": 2.0931, + "step": 11923000 + }, + { + "epoch": 34.51, + "learning_rate": 3.2749403714338646e-05, + "loss": 2.1017, + "step": 11923500 + }, + { + "epoch": 34.52, + "learning_rate": 3.274868006669137e-05, + "loss": 2.0654, + "step": 11924000 + }, + { + "epoch": 34.52, + "learning_rate": 3.274795641904409e-05, + "loss": 2.0828, + "step": 11924500 + }, + { + "epoch": 34.52, + "learning_rate": 3.274723277139681e-05, + "loss": 2.0747, + "step": 11925000 + }, + { + "epoch": 34.52, + "learning_rate": 3.2746509123749535e-05, + "loss": 2.1031, + "step": 11925500 + }, + { + "epoch": 34.52, + "learning_rate": 3.2745785476102264e-05, + "loss": 2.0889, + "step": 11926000 + }, + { + "epoch": 34.52, + "learning_rate": 3.274506327575028e-05, + "loss": 2.0766, + "step": 11926500 + }, + { + "epoch": 34.52, + "learning_rate": 3.2744339628103e-05, + "loss": 2.0603, + "step": 11927000 + }, + { + "epoch": 34.53, + "learning_rate": 3.2743615980455724e-05, + "loss": 2.0838, + "step": 11927500 + }, + { + "epoch": 34.53, + "learning_rate": 3.274289233280845e-05, + "loss": 2.0849, + "step": 11928000 + }, + { + "epoch": 34.53, + "learning_rate": 3.2742168685161175e-05, + "loss": 2.0595, + "step": 11928500 + }, + { + "epoch": 34.53, + "learning_rate": 3.27414450375139e-05, + "loss": 2.0846, + "step": 11929000 + }, + { + "epoch": 34.53, + "learning_rate": 3.274072138986662e-05, + "loss": 2.0898, + "step": 11929500 + }, + { + "epoch": 34.53, + "learning_rate": 3.273999774221934e-05, + "loss": 2.0833, + "step": 11930000 + }, + { + "epoch": 34.53, + "learning_rate": 3.2739274094572064e-05, + "loss": 2.079, + "step": 11930500 + }, + { + "epoch": 34.54, + "learning_rate": 3.273855044692479e-05, + "loss": 2.0859, + "step": 11931000 + }, + { + "epoch": 34.54, + "learning_rate": 3.27378282465728e-05, + "loss": 2.0545, + "step": 11931500 + }, + { + "epoch": 34.54, + "learning_rate": 3.2737104598925524e-05, + "loss": 2.087, + "step": 11932000 + }, + { + "epoch": 34.54, + "learning_rate": 3.2736380951278253e-05, + "loss": 2.087, + "step": 11932500 + }, + { + "epoch": 34.54, + "learning_rate": 3.2735657303630976e-05, + "loss": 2.0946, + "step": 11933000 + }, + { + "epoch": 34.54, + "learning_rate": 3.2734933655983705e-05, + "loss": 2.0676, + "step": 11933500 + }, + { + "epoch": 34.54, + "learning_rate": 3.273421145563172e-05, + "loss": 2.1048, + "step": 11934000 + }, + { + "epoch": 34.55, + "learning_rate": 3.273348780798444e-05, + "loss": 2.0739, + "step": 11934500 + }, + { + "epoch": 34.55, + "learning_rate": 3.273276705492775e-05, + "loss": 2.0723, + "step": 11935000 + }, + { + "epoch": 34.55, + "learning_rate": 3.273204340728048e-05, + "loss": 2.0772, + "step": 11935500 + }, + { + "epoch": 34.55, + "learning_rate": 3.2731321206928496e-05, + "loss": 2.0949, + "step": 11936000 + }, + { + "epoch": 34.55, + "learning_rate": 3.273059755928122e-05, + "loss": 2.0744, + "step": 11936500 + }, + { + "epoch": 34.55, + "learning_rate": 3.272987391163394e-05, + "loss": 2.0634, + "step": 11937000 + }, + { + "epoch": 34.55, + "learning_rate": 3.272915026398666e-05, + "loss": 2.0971, + "step": 11937500 + }, + { + "epoch": 34.56, + "learning_rate": 3.2728426616339385e-05, + "loss": 2.0943, + "step": 11938000 + }, + { + "epoch": 34.56, + "learning_rate": 3.272770296869211e-05, + "loss": 2.0614, + "step": 11938500 + }, + { + "epoch": 34.56, + "learning_rate": 3.272697932104483e-05, + "loss": 2.087, + "step": 11939000 + }, + { + "epoch": 34.56, + "learning_rate": 3.272625567339755e-05, + "loss": 2.1045, + "step": 11939500 + }, + { + "epoch": 34.56, + "learning_rate": 3.272553202575028e-05, + "loss": 2.0836, + "step": 11940000 + }, + { + "epoch": 34.56, + "learning_rate": 3.2724808378103e-05, + "loss": 2.0468, + "step": 11940500 + }, + { + "epoch": 34.56, + "learning_rate": 3.272408473045573e-05, + "loss": 2.0786, + "step": 11941000 + }, + { + "epoch": 34.57, + "learning_rate": 3.2723361082808454e-05, + "loss": 2.076, + "step": 11941500 + }, + { + "epoch": 34.57, + "learning_rate": 3.2722637435161176e-05, + "loss": 2.0663, + "step": 11942000 + }, + { + "epoch": 34.57, + "learning_rate": 3.27219137875139e-05, + "loss": 2.0839, + "step": 11942500 + }, + { + "epoch": 34.57, + "learning_rate": 3.2721191587161914e-05, + "loss": 2.0755, + "step": 11943000 + }, + { + "epoch": 34.57, + "learning_rate": 3.2720467939514636e-05, + "loss": 2.0853, + "step": 11943500 + }, + { + "epoch": 34.57, + "learning_rate": 3.271974429186736e-05, + "loss": 2.0611, + "step": 11944000 + }, + { + "epoch": 34.57, + "learning_rate": 3.271902209151538e-05, + "loss": 2.0878, + "step": 11944500 + }, + { + "epoch": 34.58, + "learning_rate": 3.27182984438681e-05, + "loss": 2.0829, + "step": 11945000 + }, + { + "epoch": 34.58, + "learning_rate": 3.2717574796220825e-05, + "loss": 2.0937, + "step": 11945500 + }, + { + "epoch": 34.58, + "learning_rate": 3.271685114857355e-05, + "loss": 2.0874, + "step": 11946000 + }, + { + "epoch": 34.58, + "learning_rate": 3.271612750092627e-05, + "loss": 2.0916, + "step": 11946500 + }, + { + "epoch": 34.58, + "learning_rate": 3.271540385327899e-05, + "loss": 2.0858, + "step": 11947000 + }, + { + "epoch": 34.58, + "learning_rate": 3.2714680205631714e-05, + "loss": 2.0838, + "step": 11947500 + }, + { + "epoch": 34.58, + "learning_rate": 3.2713956557984436e-05, + "loss": 2.0866, + "step": 11948000 + }, + { + "epoch": 34.59, + "learning_rate": 3.2713232910337165e-05, + "loss": 2.0602, + "step": 11948500 + }, + { + "epoch": 34.59, + "learning_rate": 3.271250926268989e-05, + "loss": 2.0881, + "step": 11949000 + }, + { + "epoch": 34.59, + "learning_rate": 3.271178561504261e-05, + "loss": 2.0691, + "step": 11949500 + }, + { + "epoch": 34.59, + "learning_rate": 3.271106196739533e-05, + "loss": 2.0895, + "step": 11950000 + }, + { + "epoch": 34.59, + "learning_rate": 3.2710338319748054e-05, + "loss": 2.0833, + "step": 11950500 + }, + { + "epoch": 34.59, + "learning_rate": 3.270961611939608e-05, + "loss": 2.0772, + "step": 11951000 + }, + { + "epoch": 34.59, + "learning_rate": 3.27088924717488e-05, + "loss": 2.1088, + "step": 11951500 + }, + { + "epoch": 34.6, + "learning_rate": 3.270816882410152e-05, + "loss": 2.0685, + "step": 11952000 + }, + { + "epoch": 34.6, + "learning_rate": 3.2707445176454243e-05, + "loss": 2.0982, + "step": 11952500 + }, + { + "epoch": 34.6, + "learning_rate": 3.2706721528806966e-05, + "loss": 2.0671, + "step": 11953000 + }, + { + "epoch": 34.6, + "learning_rate": 3.270599788115969e-05, + "loss": 2.0903, + "step": 11953500 + }, + { + "epoch": 34.6, + "learning_rate": 3.270527423351241e-05, + "loss": 2.0947, + "step": 11954000 + }, + { + "epoch": 34.6, + "learning_rate": 3.270455058586513e-05, + "loss": 2.0933, + "step": 11954500 + }, + { + "epoch": 34.6, + "learning_rate": 3.2703826938217855e-05, + "loss": 2.1156, + "step": 11955000 + }, + { + "epoch": 34.61, + "learning_rate": 3.2703104737865884e-05, + "loss": 2.1087, + "step": 11955500 + }, + { + "epoch": 34.61, + "learning_rate": 3.2702381090218606e-05, + "loss": 2.0669, + "step": 11956000 + }, + { + "epoch": 34.61, + "learning_rate": 3.270165888986662e-05, + "loss": 2.0991, + "step": 11956500 + }, + { + "epoch": 34.61, + "learning_rate": 3.2700935242219344e-05, + "loss": 2.0677, + "step": 11957000 + }, + { + "epoch": 34.61, + "learning_rate": 3.2700211594572066e-05, + "loss": 2.0821, + "step": 11957500 + }, + { + "epoch": 34.61, + "learning_rate": 3.269948794692479e-05, + "loss": 2.0859, + "step": 11958000 + }, + { + "epoch": 34.61, + "learning_rate": 3.269876429927751e-05, + "loss": 2.0726, + "step": 11958500 + }, + { + "epoch": 34.62, + "learning_rate": 3.269804065163023e-05, + "loss": 2.0995, + "step": 11959000 + }, + { + "epoch": 34.62, + "learning_rate": 3.2697317003982955e-05, + "loss": 2.0438, + "step": 11959500 + }, + { + "epoch": 34.62, + "learning_rate": 3.2696593356335684e-05, + "loss": 2.0668, + "step": 11960000 + }, + { + "epoch": 34.62, + "learning_rate": 3.2695869708688406e-05, + "loss": 2.082, + "step": 11960500 + }, + { + "epoch": 34.62, + "learning_rate": 3.269514750833642e-05, + "loss": 2.0803, + "step": 11961000 + }, + { + "epoch": 34.62, + "learning_rate": 3.269442530798444e-05, + "loss": 2.0648, + "step": 11961500 + }, + { + "epoch": 34.63, + "learning_rate": 3.269370166033716e-05, + "loss": 2.0954, + "step": 11962000 + }, + { + "epoch": 34.63, + "learning_rate": 3.269297801268988e-05, + "loss": 2.0851, + "step": 11962500 + }, + { + "epoch": 34.63, + "learning_rate": 3.2692254365042604e-05, + "loss": 2.0841, + "step": 11963000 + }, + { + "epoch": 34.63, + "learning_rate": 3.269153216469063e-05, + "loss": 2.09, + "step": 11963500 + }, + { + "epoch": 34.63, + "learning_rate": 3.269080996433865e-05, + "loss": 2.0971, + "step": 11964000 + }, + { + "epoch": 34.63, + "learning_rate": 3.269008631669137e-05, + "loss": 2.0969, + "step": 11964500 + }, + { + "epoch": 34.63, + "learning_rate": 3.268936266904409e-05, + "loss": 2.0825, + "step": 11965000 + }, + { + "epoch": 34.64, + "learning_rate": 3.2688639021396815e-05, + "loss": 2.0841, + "step": 11965500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268791537374954e-05, + "loss": 2.0821, + "step": 11966000 + }, + { + "epoch": 34.64, + "learning_rate": 3.268719317339756e-05, + "loss": 2.0811, + "step": 11966500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268646952575028e-05, + "loss": 2.0811, + "step": 11967000 + }, + { + "epoch": 34.64, + "learning_rate": 3.26857473253983e-05, + "loss": 2.0651, + "step": 11967500 + }, + { + "epoch": 34.64, + "learning_rate": 3.268502367775102e-05, + "loss": 2.0607, + "step": 11968000 + }, + { + "epoch": 34.64, + "learning_rate": 3.268430003010374e-05, + "loss": 2.0755, + "step": 11968500 + }, + { + "epoch": 34.65, + "learning_rate": 3.2683576382456464e-05, + "loss": 2.0628, + "step": 11969000 + }, + { + "epoch": 34.65, + "learning_rate": 3.268285273480919e-05, + "loss": 2.1, + "step": 11969500 + }, + { + "epoch": 34.65, + "learning_rate": 3.268212908716191e-05, + "loss": 2.0861, + "step": 11970000 + }, + { + "epoch": 34.65, + "learning_rate": 3.268140543951463e-05, + "loss": 2.0819, + "step": 11970500 + }, + { + "epoch": 34.65, + "learning_rate": 3.268068179186736e-05, + "loss": 2.0896, + "step": 11971000 + }, + { + "epoch": 34.65, + "learning_rate": 3.267995814422008e-05, + "loss": 2.1143, + "step": 11971500 + }, + { + "epoch": 34.65, + "learning_rate": 3.2679235943868105e-05, + "loss": 2.0821, + "step": 11972000 + }, + { + "epoch": 34.66, + "learning_rate": 3.267851229622083e-05, + "loss": 2.0787, + "step": 11972500 + }, + { + "epoch": 34.66, + "learning_rate": 3.267778864857355e-05, + "loss": 2.1069, + "step": 11973000 + }, + { + "epoch": 34.66, + "learning_rate": 3.267706500092627e-05, + "loss": 2.0839, + "step": 11973500 + }, + { + "epoch": 34.66, + "learning_rate": 3.2676341353278994e-05, + "loss": 2.0626, + "step": 11974000 + }, + { + "epoch": 34.66, + "learning_rate": 3.2675617705631716e-05, + "loss": 2.0934, + "step": 11974500 + }, + { + "epoch": 34.66, + "learning_rate": 3.267489405798444e-05, + "loss": 2.093, + "step": 11975000 + }, + { + "epoch": 34.66, + "learning_rate": 3.267417041033716e-05, + "loss": 2.0745, + "step": 11975500 + }, + { + "epoch": 34.67, + "learning_rate": 3.267344676268988e-05, + "loss": 2.0733, + "step": 11976000 + }, + { + "epoch": 34.67, + "learning_rate": 3.267272311504261e-05, + "loss": 2.0769, + "step": 11976500 + }, + { + "epoch": 34.67, + "learning_rate": 3.2671999467395334e-05, + "loss": 2.0726, + "step": 11977000 + }, + { + "epoch": 34.67, + "learning_rate": 3.267127726704335e-05, + "loss": 2.073, + "step": 11977500 + }, + { + "epoch": 34.67, + "learning_rate": 3.267055361939607e-05, + "loss": 2.1022, + "step": 11978000 + }, + { + "epoch": 34.67, + "learning_rate": 3.26698299717488e-05, + "loss": 2.102, + "step": 11978500 + }, + { + "epoch": 34.67, + "learning_rate": 3.266910632410152e-05, + "loss": 2.0613, + "step": 11979000 + }, + { + "epoch": 34.68, + "learning_rate": 3.2668382676454245e-05, + "loss": 2.0785, + "step": 11979500 + }, + { + "epoch": 34.68, + "learning_rate": 3.266765902880697e-05, + "loss": 2.0822, + "step": 11980000 + }, + { + "epoch": 34.68, + "learning_rate": 3.266693682845498e-05, + "loss": 2.0708, + "step": 11980500 + }, + { + "epoch": 34.68, + "learning_rate": 3.266621318080771e-05, + "loss": 2.0768, + "step": 11981000 + }, + { + "epoch": 34.68, + "learning_rate": 3.2665489533160434e-05, + "loss": 2.0807, + "step": 11981500 + }, + { + "epoch": 34.68, + "learning_rate": 3.2664765885513156e-05, + "loss": 2.0663, + "step": 11982000 + }, + { + "epoch": 34.68, + "learning_rate": 3.266404223786588e-05, + "loss": 2.0749, + "step": 11982500 + }, + { + "epoch": 34.69, + "learning_rate": 3.2663320037513894e-05, + "loss": 2.0892, + "step": 11983000 + }, + { + "epoch": 34.69, + "learning_rate": 3.2662596389866616e-05, + "loss": 2.0901, + "step": 11983500 + }, + { + "epoch": 34.69, + "learning_rate": 3.266187418951463e-05, + "loss": 2.0713, + "step": 11984000 + }, + { + "epoch": 34.69, + "learning_rate": 3.266115054186736e-05, + "loss": 2.0984, + "step": 11984500 + }, + { + "epoch": 34.69, + "learning_rate": 3.2660428341515376e-05, + "loss": 2.0754, + "step": 11985000 + }, + { + "epoch": 34.69, + "learning_rate": 3.26597046938681e-05, + "loss": 2.0672, + "step": 11985500 + }, + { + "epoch": 34.69, + "learning_rate": 3.265898104622083e-05, + "loss": 2.073, + "step": 11986000 + }, + { + "epoch": 34.7, + "learning_rate": 3.265825739857355e-05, + "loss": 2.1204, + "step": 11986500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265753375092627e-05, + "loss": 2.0937, + "step": 11987000 + }, + { + "epoch": 34.7, + "learning_rate": 3.2656810103278994e-05, + "loss": 2.0778, + "step": 11987500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265608645563172e-05, + "loss": 2.0797, + "step": 11988000 + }, + { + "epoch": 34.7, + "learning_rate": 3.265536280798444e-05, + "loss": 2.0784, + "step": 11988500 + }, + { + "epoch": 34.7, + "learning_rate": 3.265463916033716e-05, + "loss": 2.0834, + "step": 11989000 + }, + { + "epoch": 34.7, + "learning_rate": 3.265391551268988e-05, + "loss": 2.094, + "step": 11989500 + }, + { + "epoch": 34.71, + "learning_rate": 3.265319186504261e-05, + "loss": 2.0934, + "step": 11990000 + }, + { + "epoch": 34.71, + "learning_rate": 3.2652468217395335e-05, + "loss": 2.0778, + "step": 11990500 + }, + { + "epoch": 34.71, + "learning_rate": 3.265174456974806e-05, + "loss": 2.1027, + "step": 11991000 + }, + { + "epoch": 34.71, + "learning_rate": 3.265102092210078e-05, + "loss": 2.0873, + "step": 11991500 + }, + { + "epoch": 34.71, + "learning_rate": 3.26502972744535e-05, + "loss": 2.0908, + "step": 11992000 + }, + { + "epoch": 34.71, + "learning_rate": 3.2649573626806224e-05, + "loss": 2.0577, + "step": 11992500 + }, + { + "epoch": 34.71, + "learning_rate": 3.264885142645424e-05, + "loss": 2.08, + "step": 11993000 + }, + { + "epoch": 34.72, + "learning_rate": 3.264812777880697e-05, + "loss": 2.0687, + "step": 11993500 + }, + { + "epoch": 34.72, + "learning_rate": 3.264740413115969e-05, + "loss": 2.0808, + "step": 11994000 + }, + { + "epoch": 34.72, + "learning_rate": 3.264668048351241e-05, + "loss": 2.0877, + "step": 11994500 + }, + { + "epoch": 34.72, + "learning_rate": 3.264595973045573e-05, + "loss": 2.0892, + "step": 11995000 + }, + { + "epoch": 34.72, + "learning_rate": 3.264523608280845e-05, + "loss": 2.0852, + "step": 11995500 + }, + { + "epoch": 34.72, + "learning_rate": 3.264451243516117e-05, + "loss": 2.0792, + "step": 11996000 + }, + { + "epoch": 34.72, + "learning_rate": 3.2643788787513895e-05, + "loss": 2.1024, + "step": 11996500 + }, + { + "epoch": 34.73, + "learning_rate": 3.264306513986662e-05, + "loss": 2.074, + "step": 11997000 + }, + { + "epoch": 34.73, + "learning_rate": 3.264234149221934e-05, + "loss": 2.0853, + "step": 11997500 + }, + { + "epoch": 34.73, + "learning_rate": 3.264161929186736e-05, + "loss": 2.0843, + "step": 11998000 + }, + { + "epoch": 34.73, + "learning_rate": 3.2640895644220084e-05, + "loss": 2.0797, + "step": 11998500 + }, + { + "epoch": 34.73, + "learning_rate": 3.2640171996572806e-05, + "loss": 2.0811, + "step": 11999000 + }, + { + "epoch": 34.73, + "learning_rate": 3.263944834892553e-05, + "loss": 2.0893, + "step": 11999500 + }, + { + "epoch": 34.74, + "learning_rate": 3.263872470127825e-05, + "loss": 2.0723, + "step": 12000000 + }, + { + "epoch": 34.74, + "learning_rate": 3.263800105363097e-05, + "loss": 2.0815, + "step": 12000500 + }, + { + "epoch": 34.74, + "learning_rate": 3.26372774059837e-05, + "loss": 2.0744, + "step": 12001000 + }, + { + "epoch": 34.74, + "learning_rate": 3.2636553758336424e-05, + "loss": 2.1033, + "step": 12001500 + }, + { + "epoch": 34.74, + "learning_rate": 3.2635830110689146e-05, + "loss": 2.0763, + "step": 12002000 + }, + { + "epoch": 34.74, + "learning_rate": 3.263510646304187e-05, + "loss": 2.087, + "step": 12002500 + }, + { + "epoch": 34.74, + "learning_rate": 3.263438281539459e-05, + "loss": 2.1124, + "step": 12003000 + }, + { + "epoch": 34.75, + "learning_rate": 3.2633662062337906e-05, + "loss": 2.0633, + "step": 12003500 + }, + { + "epoch": 34.75, + "learning_rate": 3.263293841469063e-05, + "loss": 2.1015, + "step": 12004000 + }, + { + "epoch": 34.75, + "learning_rate": 3.263221476704335e-05, + "loss": 2.0766, + "step": 12004500 + }, + { + "epoch": 34.75, + "learning_rate": 3.263149111939607e-05, + "loss": 2.0924, + "step": 12005000 + }, + { + "epoch": 34.75, + "learning_rate": 3.2630767471748795e-05, + "loss": 2.1032, + "step": 12005500 + }, + { + "epoch": 34.75, + "learning_rate": 3.263004527139681e-05, + "loss": 2.0861, + "step": 12006000 + }, + { + "epoch": 34.75, + "learning_rate": 3.262932162374954e-05, + "loss": 2.1026, + "step": 12006500 + }, + { + "epoch": 34.76, + "learning_rate": 3.262859797610226e-05, + "loss": 2.1052, + "step": 12007000 + }, + { + "epoch": 34.76, + "learning_rate": 3.2627874328454984e-05, + "loss": 2.0769, + "step": 12007500 + }, + { + "epoch": 34.76, + "learning_rate": 3.2627152128103e-05, + "loss": 2.0665, + "step": 12008000 + }, + { + "epoch": 34.76, + "learning_rate": 3.262642848045573e-05, + "loss": 2.0785, + "step": 12008500 + }, + { + "epoch": 34.76, + "learning_rate": 3.262570483280845e-05, + "loss": 2.0918, + "step": 12009000 + }, + { + "epoch": 34.76, + "learning_rate": 3.2624981185161173e-05, + "loss": 2.0614, + "step": 12009500 + }, + { + "epoch": 34.76, + "learning_rate": 3.2624257537513896e-05, + "loss": 2.0881, + "step": 12010000 + }, + { + "epoch": 34.77, + "learning_rate": 3.262353388986662e-05, + "loss": 2.0819, + "step": 12010500 + }, + { + "epoch": 34.77, + "learning_rate": 3.262281024221934e-05, + "loss": 2.0733, + "step": 12011000 + }, + { + "epoch": 34.77, + "learning_rate": 3.262208659457206e-05, + "loss": 2.0676, + "step": 12011500 + }, + { + "epoch": 34.77, + "learning_rate": 3.262136294692479e-05, + "loss": 2.0768, + "step": 12012000 + }, + { + "epoch": 34.77, + "learning_rate": 3.2620639299277514e-05, + "loss": 2.0678, + "step": 12012500 + }, + { + "epoch": 34.77, + "learning_rate": 3.2619915651630236e-05, + "loss": 2.082, + "step": 12013000 + }, + { + "epoch": 34.77, + "learning_rate": 3.261919345127825e-05, + "loss": 2.0705, + "step": 12013500 + }, + { + "epoch": 34.78, + "learning_rate": 3.2618469803630974e-05, + "loss": 2.0806, + "step": 12014000 + }, + { + "epoch": 34.78, + "learning_rate": 3.2617746155983696e-05, + "loss": 2.0724, + "step": 12014500 + }, + { + "epoch": 34.78, + "learning_rate": 3.261702250833642e-05, + "loss": 2.0845, + "step": 12015000 + }, + { + "epoch": 34.78, + "learning_rate": 3.2616301755279734e-05, + "loss": 2.0712, + "step": 12015500 + }, + { + "epoch": 34.78, + "learning_rate": 3.261557810763246e-05, + "loss": 2.0783, + "step": 12016000 + }, + { + "epoch": 34.78, + "learning_rate": 3.2614854459985185e-05, + "loss": 2.0938, + "step": 12016500 + }, + { + "epoch": 34.78, + "learning_rate": 3.261413081233791e-05, + "loss": 2.0853, + "step": 12017000 + }, + { + "epoch": 34.79, + "learning_rate": 3.261340716469063e-05, + "loss": 2.0955, + "step": 12017500 + }, + { + "epoch": 34.79, + "learning_rate": 3.261268351704335e-05, + "loss": 2.0813, + "step": 12018000 + }, + { + "epoch": 34.79, + "learning_rate": 3.261196131669137e-05, + "loss": 2.0991, + "step": 12018500 + }, + { + "epoch": 34.79, + "learning_rate": 3.261123766904409e-05, + "loss": 2.0716, + "step": 12019000 + }, + { + "epoch": 34.79, + "learning_rate": 3.261051402139682e-05, + "loss": 2.0892, + "step": 12019500 + }, + { + "epoch": 34.79, + "learning_rate": 3.260979037374954e-05, + "loss": 2.0958, + "step": 12020000 + }, + { + "epoch": 34.79, + "learning_rate": 3.260906672610226e-05, + "loss": 2.0905, + "step": 12020500 + }, + { + "epoch": 34.8, + "learning_rate": 3.2608343078454985e-05, + "loss": 2.0892, + "step": 12021000 + }, + { + "epoch": 34.8, + "learning_rate": 3.260761943080771e-05, + "loss": 2.096, + "step": 12021500 + }, + { + "epoch": 34.8, + "learning_rate": 3.260689578316043e-05, + "loss": 2.0891, + "step": 12022000 + }, + { + "epoch": 34.8, + "learning_rate": 3.2606173582808445e-05, + "loss": 2.0639, + "step": 12022500 + }, + { + "epoch": 34.8, + "learning_rate": 3.260544993516117e-05, + "loss": 2.084, + "step": 12023000 + }, + { + "epoch": 34.8, + "learning_rate": 3.2604726287513896e-05, + "loss": 2.0998, + "step": 12023500 + }, + { + "epoch": 34.8, + "learning_rate": 3.260400408716192e-05, + "loss": 2.0922, + "step": 12024000 + }, + { + "epoch": 34.81, + "learning_rate": 3.260328043951464e-05, + "loss": 2.0703, + "step": 12024500 + }, + { + "epoch": 34.81, + "learning_rate": 3.260255679186736e-05, + "loss": 2.0885, + "step": 12025000 + }, + { + "epoch": 34.81, + "learning_rate": 3.2601833144220086e-05, + "loss": 2.0647, + "step": 12025500 + }, + { + "epoch": 34.81, + "learning_rate": 3.260110949657281e-05, + "loss": 2.0588, + "step": 12026000 + }, + { + "epoch": 34.81, + "learning_rate": 3.260038584892553e-05, + "loss": 2.0893, + "step": 12026500 + }, + { + "epoch": 34.81, + "learning_rate": 3.259966220127825e-05, + "loss": 2.1022, + "step": 12027000 + }, + { + "epoch": 34.81, + "learning_rate": 3.2598938553630974e-05, + "loss": 2.0795, + "step": 12027500 + }, + { + "epoch": 34.82, + "learning_rate": 3.25982149059837e-05, + "loss": 2.0918, + "step": 12028000 + }, + { + "epoch": 34.82, + "learning_rate": 3.259749125833642e-05, + "loss": 2.1076, + "step": 12028500 + }, + { + "epoch": 34.82, + "learning_rate": 3.259676761068914e-05, + "loss": 2.0761, + "step": 12029000 + }, + { + "epoch": 34.82, + "learning_rate": 3.259604396304187e-05, + "loss": 2.101, + "step": 12029500 + }, + { + "epoch": 34.82, + "learning_rate": 3.259532031539459e-05, + "loss": 2.0778, + "step": 12030000 + }, + { + "epoch": 34.82, + "learning_rate": 3.259459666774732e-05, + "loss": 2.0602, + "step": 12030500 + }, + { + "epoch": 34.82, + "learning_rate": 3.2593873020100044e-05, + "loss": 2.0911, + "step": 12031000 + }, + { + "epoch": 34.83, + "learning_rate": 3.2593149372452766e-05, + "loss": 2.0719, + "step": 12031500 + }, + { + "epoch": 34.83, + "learning_rate": 3.259242572480549e-05, + "loss": 2.073, + "step": 12032000 + }, + { + "epoch": 34.83, + "learning_rate": 3.25917049717488e-05, + "loss": 2.1019, + "step": 12032500 + }, + { + "epoch": 34.83, + "learning_rate": 3.259098132410152e-05, + "loss": 2.0765, + "step": 12033000 + }, + { + "epoch": 34.83, + "learning_rate": 3.259025767645424e-05, + "loss": 2.0886, + "step": 12033500 + }, + { + "epoch": 34.83, + "learning_rate": 3.258953402880697e-05, + "loss": 2.1079, + "step": 12034000 + }, + { + "epoch": 34.83, + "learning_rate": 3.258881038115969e-05, + "loss": 2.0948, + "step": 12034500 + }, + { + "epoch": 34.84, + "learning_rate": 3.2588086733512415e-05, + "loss": 2.0956, + "step": 12035000 + }, + { + "epoch": 34.84, + "learning_rate": 3.258736308586514e-05, + "loss": 2.081, + "step": 12035500 + }, + { + "epoch": 34.84, + "learning_rate": 3.258663943821786e-05, + "loss": 2.0815, + "step": 12036000 + }, + { + "epoch": 34.84, + "learning_rate": 3.258591579057058e-05, + "loss": 2.0742, + "step": 12036500 + }, + { + "epoch": 34.84, + "learning_rate": 3.258519503751389e-05, + "loss": 2.0836, + "step": 12037000 + }, + { + "epoch": 34.84, + "learning_rate": 3.258447138986662e-05, + "loss": 2.062, + "step": 12037500 + }, + { + "epoch": 34.85, + "learning_rate": 3.258374774221934e-05, + "loss": 2.0807, + "step": 12038000 + }, + { + "epoch": 34.85, + "learning_rate": 3.258302409457207e-05, + "loss": 2.0817, + "step": 12038500 + }, + { + "epoch": 34.85, + "learning_rate": 3.258230044692479e-05, + "loss": 2.0669, + "step": 12039000 + }, + { + "epoch": 34.85, + "learning_rate": 3.2581576799277515e-05, + "loss": 2.074, + "step": 12039500 + }, + { + "epoch": 34.85, + "learning_rate": 3.258085315163024e-05, + "loss": 2.0799, + "step": 12040000 + }, + { + "epoch": 34.85, + "learning_rate": 3.258012950398296e-05, + "loss": 2.0888, + "step": 12040500 + }, + { + "epoch": 34.85, + "learning_rate": 3.257940585633568e-05, + "loss": 2.0654, + "step": 12041000 + }, + { + "epoch": 34.86, + "learning_rate": 3.2578682208688404e-05, + "loss": 2.0959, + "step": 12041500 + }, + { + "epoch": 34.86, + "learning_rate": 3.2577958561041126e-05, + "loss": 2.0891, + "step": 12042000 + }, + { + "epoch": 34.86, + "learning_rate": 3.257723491339385e-05, + "loss": 2.0797, + "step": 12042500 + }, + { + "epoch": 34.86, + "learning_rate": 3.257651271304187e-05, + "loss": 2.0764, + "step": 12043000 + }, + { + "epoch": 34.86, + "learning_rate": 3.257578906539459e-05, + "loss": 2.0892, + "step": 12043500 + }, + { + "epoch": 34.86, + "learning_rate": 3.2575065417747315e-05, + "loss": 2.0833, + "step": 12044000 + }, + { + "epoch": 34.86, + "learning_rate": 3.2574344664690624e-05, + "loss": 2.0869, + "step": 12044500 + }, + { + "epoch": 34.87, + "learning_rate": 3.2573621017043347e-05, + "loss": 2.0733, + "step": 12045000 + }, + { + "epoch": 34.87, + "learning_rate": 3.257289736939607e-05, + "loss": 2.089, + "step": 12045500 + }, + { + "epoch": 34.87, + "learning_rate": 3.25721737217488e-05, + "loss": 2.0676, + "step": 12046000 + }, + { + "epoch": 34.87, + "learning_rate": 3.257145007410152e-05, + "loss": 2.0774, + "step": 12046500 + }, + { + "epoch": 34.87, + "learning_rate": 3.257072642645424e-05, + "loss": 2.0773, + "step": 12047000 + }, + { + "epoch": 34.87, + "learning_rate": 3.257000277880697e-05, + "loss": 2.083, + "step": 12047500 + }, + { + "epoch": 34.87, + "learning_rate": 3.2569279131159693e-05, + "loss": 2.071, + "step": 12048000 + }, + { + "epoch": 34.88, + "learning_rate": 3.256855693080771e-05, + "loss": 2.0971, + "step": 12048500 + }, + { + "epoch": 34.88, + "learning_rate": 3.256783328316043e-05, + "loss": 2.0888, + "step": 12049000 + }, + { + "epoch": 34.88, + "learning_rate": 3.2567109635513154e-05, + "loss": 2.0955, + "step": 12049500 + }, + { + "epoch": 34.88, + "learning_rate": 3.2566385987865876e-05, + "loss": 2.0779, + "step": 12050000 + }, + { + "epoch": 34.88, + "learning_rate": 3.25656637875139e-05, + "loss": 2.0785, + "step": 12050500 + }, + { + "epoch": 34.88, + "learning_rate": 3.256494013986662e-05, + "loss": 2.074, + "step": 12051000 + }, + { + "epoch": 34.88, + "learning_rate": 3.256421649221934e-05, + "loss": 2.0935, + "step": 12051500 + }, + { + "epoch": 34.89, + "learning_rate": 3.2563492844572065e-05, + "loss": 2.0813, + "step": 12052000 + }, + { + "epoch": 34.89, + "learning_rate": 3.256276919692479e-05, + "loss": 2.0955, + "step": 12052500 + }, + { + "epoch": 34.89, + "learning_rate": 3.2562045549277516e-05, + "loss": 2.073, + "step": 12053000 + }, + { + "epoch": 34.89, + "learning_rate": 3.256132190163024e-05, + "loss": 2.0923, + "step": 12053500 + }, + { + "epoch": 34.89, + "learning_rate": 3.2560599701278254e-05, + "loss": 2.0972, + "step": 12054000 + }, + { + "epoch": 34.89, + "learning_rate": 3.2559876053630976e-05, + "loss": 2.0733, + "step": 12054500 + }, + { + "epoch": 34.89, + "learning_rate": 3.25591524059837e-05, + "loss": 2.0845, + "step": 12055000 + }, + { + "epoch": 34.9, + "learning_rate": 3.255842875833642e-05, + "loss": 2.0692, + "step": 12055500 + }, + { + "epoch": 34.9, + "learning_rate": 3.255770511068915e-05, + "loss": 2.0942, + "step": 12056000 + }, + { + "epoch": 34.9, + "learning_rate": 3.255698146304187e-05, + "loss": 2.0874, + "step": 12056500 + }, + { + "epoch": 34.9, + "learning_rate": 3.2556257815394594e-05, + "loss": 2.0938, + "step": 12057000 + }, + { + "epoch": 34.9, + "learning_rate": 3.255553561504261e-05, + "loss": 2.067, + "step": 12057500 + }, + { + "epoch": 34.9, + "learning_rate": 3.255481196739533e-05, + "loss": 2.0708, + "step": 12058000 + }, + { + "epoch": 34.9, + "learning_rate": 3.2554088319748054e-05, + "loss": 2.0572, + "step": 12058500 + }, + { + "epoch": 34.91, + "learning_rate": 3.2553364672100776e-05, + "loss": 2.0899, + "step": 12059000 + }, + { + "epoch": 34.91, + "learning_rate": 3.25526410244535e-05, + "loss": 2.0797, + "step": 12059500 + }, + { + "epoch": 34.91, + "learning_rate": 3.255191737680622e-05, + "loss": 2.0753, + "step": 12060000 + }, + { + "epoch": 34.91, + "learning_rate": 3.255119372915895e-05, + "loss": 2.0831, + "step": 12060500 + }, + { + "epoch": 34.91, + "learning_rate": 3.255047008151167e-05, + "loss": 2.0678, + "step": 12061000 + }, + { + "epoch": 34.91, + "learning_rate": 3.2549747881159694e-05, + "loss": 2.0799, + "step": 12061500 + }, + { + "epoch": 34.91, + "learning_rate": 3.2549024233512417e-05, + "loss": 2.0813, + "step": 12062000 + }, + { + "epoch": 34.92, + "learning_rate": 3.254830058586514e-05, + "loss": 2.1055, + "step": 12062500 + }, + { + "epoch": 34.92, + "learning_rate": 3.254757693821786e-05, + "loss": 2.0941, + "step": 12063000 + }, + { + "epoch": 34.92, + "learning_rate": 3.254685329057058e-05, + "loss": 2.0584, + "step": 12063500 + }, + { + "epoch": 34.92, + "learning_rate": 3.25461310902186e-05, + "loss": 2.0878, + "step": 12064000 + }, + { + "epoch": 34.92, + "learning_rate": 3.254540888986662e-05, + "loss": 2.1005, + "step": 12064500 + }, + { + "epoch": 34.92, + "learning_rate": 3.254468524221934e-05, + "loss": 2.0908, + "step": 12065000 + }, + { + "epoch": 34.92, + "learning_rate": 3.2543961594572066e-05, + "loss": 2.0991, + "step": 12065500 + }, + { + "epoch": 34.93, + "learning_rate": 3.254323794692479e-05, + "loss": 2.0596, + "step": 12066000 + }, + { + "epoch": 34.93, + "learning_rate": 3.254251429927751e-05, + "loss": 2.0855, + "step": 12066500 + }, + { + "epoch": 34.93, + "learning_rate": 3.254179065163023e-05, + "loss": 2.078, + "step": 12067000 + }, + { + "epoch": 34.93, + "learning_rate": 3.2541067003982955e-05, + "loss": 2.088, + "step": 12067500 + }, + { + "epoch": 34.93, + "learning_rate": 3.2540343356335684e-05, + "loss": 2.1078, + "step": 12068000 + }, + { + "epoch": 34.93, + "learning_rate": 3.25396211559837e-05, + "loss": 2.0772, + "step": 12068500 + }, + { + "epoch": 34.93, + "learning_rate": 3.253889750833642e-05, + "loss": 2.0777, + "step": 12069000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253817386068915e-05, + "loss": 2.0688, + "step": 12069500 + }, + { + "epoch": 34.94, + "learning_rate": 3.253745021304187e-05, + "loss": 2.0972, + "step": 12070000 + }, + { + "epoch": 34.94, + "learning_rate": 3.2536726565394595e-05, + "loss": 2.1017, + "step": 12070500 + }, + { + "epoch": 34.94, + "learning_rate": 3.253600291774732e-05, + "loss": 2.0907, + "step": 12071000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253528071739533e-05, + "loss": 2.1142, + "step": 12071500 + }, + { + "epoch": 34.94, + "learning_rate": 3.2534557069748055e-05, + "loss": 2.0709, + "step": 12072000 + }, + { + "epoch": 34.94, + "learning_rate": 3.253383342210078e-05, + "loss": 2.0945, + "step": 12072500 + }, + { + "epoch": 34.95, + "learning_rate": 3.25331097744535e-05, + "loss": 2.0985, + "step": 12073000 + }, + { + "epoch": 34.95, + "learning_rate": 3.253238612680622e-05, + "loss": 2.0889, + "step": 12073500 + }, + { + "epoch": 34.95, + "learning_rate": 3.253166247915895e-05, + "loss": 2.0878, + "step": 12074000 + }, + { + "epoch": 34.95, + "learning_rate": 3.253093883151167e-05, + "loss": 2.0581, + "step": 12074500 + }, + { + "epoch": 34.95, + "learning_rate": 3.2530215183864395e-05, + "loss": 2.0765, + "step": 12075000 + }, + { + "epoch": 34.95, + "learning_rate": 3.2529491536217124e-05, + "loss": 2.1122, + "step": 12075500 + }, + { + "epoch": 34.96, + "learning_rate": 3.252876933586514e-05, + "loss": 2.0669, + "step": 12076000 + }, + { + "epoch": 34.96, + "learning_rate": 3.252804568821786e-05, + "loss": 2.0982, + "step": 12076500 + }, + { + "epoch": 34.96, + "learning_rate": 3.2527322040570584e-05, + "loss": 2.0815, + "step": 12077000 + }, + { + "epoch": 34.96, + "learning_rate": 3.2526598392923306e-05, + "loss": 2.1089, + "step": 12077500 + }, + { + "epoch": 34.96, + "learning_rate": 3.252587619257132e-05, + "loss": 2.0947, + "step": 12078000 + }, + { + "epoch": 34.96, + "learning_rate": 3.252515254492405e-05, + "loss": 2.071, + "step": 12078500 + }, + { + "epoch": 34.96, + "learning_rate": 3.252442889727677e-05, + "loss": 2.0594, + "step": 12079000 + }, + { + "epoch": 34.97, + "learning_rate": 3.2523705249629495e-05, + "loss": 2.081, + "step": 12079500 + }, + { + "epoch": 34.97, + "learning_rate": 3.252298160198222e-05, + "loss": 2.0784, + "step": 12080000 + }, + { + "epoch": 34.97, + "learning_rate": 3.252225795433494e-05, + "loss": 2.0592, + "step": 12080500 + }, + { + "epoch": 34.97, + "learning_rate": 3.252153430668766e-05, + "loss": 2.0773, + "step": 12081000 + }, + { + "epoch": 34.97, + "learning_rate": 3.2520810659040384e-05, + "loss": 2.091, + "step": 12081500 + }, + { + "epoch": 34.97, + "learning_rate": 3.2520087011393106e-05, + "loss": 2.0784, + "step": 12082000 + }, + { + "epoch": 34.97, + "learning_rate": 3.251936336374583e-05, + "loss": 2.0763, + "step": 12082500 + }, + { + "epoch": 34.98, + "learning_rate": 3.251863971609856e-05, + "loss": 2.0959, + "step": 12083000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251791751574657e-05, + "loss": 2.0677, + "step": 12083500 + }, + { + "epoch": 34.98, + "learning_rate": 3.2517195315394596e-05, + "loss": 2.1041, + "step": 12084000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251647166774732e-05, + "loss": 2.0712, + "step": 12084500 + }, + { + "epoch": 34.98, + "learning_rate": 3.251574802010004e-05, + "loss": 2.0801, + "step": 12085000 + }, + { + "epoch": 34.98, + "learning_rate": 3.251502437245276e-05, + "loss": 2.0483, + "step": 12085500 + }, + { + "epoch": 34.98, + "learning_rate": 3.2514300724805485e-05, + "loss": 2.0741, + "step": 12086000 + }, + { + "epoch": 34.99, + "learning_rate": 3.251357707715821e-05, + "loss": 2.0703, + "step": 12086500 + }, + { + "epoch": 34.99, + "learning_rate": 3.251285342951093e-05, + "loss": 2.095, + "step": 12087000 + }, + { + "epoch": 34.99, + "learning_rate": 3.251212978186365e-05, + "loss": 2.0847, + "step": 12087500 + }, + { + "epoch": 34.99, + "learning_rate": 3.2511406134216373e-05, + "loss": 2.0831, + "step": 12088000 + }, + { + "epoch": 34.99, + "learning_rate": 3.25106824865691e-05, + "loss": 2.1114, + "step": 12088500 + }, + { + "epoch": 34.99, + "learning_rate": 3.2509958838921825e-05, + "loss": 2.0647, + "step": 12089000 + }, + { + "epoch": 34.99, + "learning_rate": 3.250923663856984e-05, + "loss": 2.0886, + "step": 12089500 + }, + { + "epoch": 35.0, + "learning_rate": 3.250851299092256e-05, + "loss": 2.0871, + "step": 12090000 + }, + { + "epoch": 35.0, + "learning_rate": 3.250778934327529e-05, + "loss": 2.087, + "step": 12090500 + }, + { + "epoch": 35.0, + "learning_rate": 3.2507065695628014e-05, + "loss": 2.0969, + "step": 12091000 + }, + { + "epoch": 35.0, + "learning_rate": 3.250634349527603e-05, + "loss": 2.0697, + "step": 12091500 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.6687623409562915, + "eval_accuracy_mlm": 0.6335090121750351, + "eval_accuracy_nsp": 0.8578345776445624, + "eval_loss": 2.168201446533203, + "eval_runtime": 331.5723, + "eval_samples_per_second": 1316.111, + "eval_steps_per_second": 54.839, + "step": 12091520 + }, + { + "epoch": 35.0, + "learning_rate": 3.250561984762875e-05, + "loss": 2.0743, + "step": 12092000 + }, + { + "epoch": 35.0, + "learning_rate": 3.250489619998148e-05, + "loss": 2.0658, + "step": 12092500 + }, + { + "epoch": 35.0, + "learning_rate": 3.25041725523342e-05, + "loss": 2.0524, + "step": 12093000 + }, + { + "epoch": 35.01, + "learning_rate": 3.2503448904686925e-05, + "loss": 2.052, + "step": 12093500 + }, + { + "epoch": 35.01, + "learning_rate": 3.250272525703965e-05, + "loss": 2.0333, + "step": 12094000 + }, + { + "epoch": 35.01, + "learning_rate": 3.250200160939237e-05, + "loss": 2.0897, + "step": 12094500 + }, + { + "epoch": 35.01, + "learning_rate": 3.250127796174509e-05, + "loss": 2.0747, + "step": 12095000 + }, + { + "epoch": 35.01, + "learning_rate": 3.2500554314097814e-05, + "loss": 2.066, + "step": 12095500 + }, + { + "epoch": 35.01, + "learning_rate": 3.249983211374583e-05, + "loss": 2.0713, + "step": 12096000 + }, + { + "epoch": 35.01, + "learning_rate": 3.249910991339385e-05, + "loss": 2.0707, + "step": 12096500 + }, + { + "epoch": 35.02, + "learning_rate": 3.2498386265746574e-05, + "loss": 2.0554, + "step": 12097000 + }, + { + "epoch": 35.02, + "learning_rate": 3.249766406539459e-05, + "loss": 2.0761, + "step": 12097500 + }, + { + "epoch": 35.02, + "learning_rate": 3.249694041774732e-05, + "loss": 2.0655, + "step": 12098000 + }, + { + "epoch": 35.02, + "learning_rate": 3.249621966469063e-05, + "loss": 2.0674, + "step": 12098500 + }, + { + "epoch": 35.02, + "learning_rate": 3.2495496017043356e-05, + "loss": 2.0642, + "step": 12099000 + }, + { + "epoch": 35.02, + "learning_rate": 3.249477236939608e-05, + "loss": 2.0858, + "step": 12099500 + }, + { + "epoch": 35.02, + "learning_rate": 3.24940487217488e-05, + "loss": 2.0681, + "step": 12100000 + }, + { + "epoch": 35.03, + "learning_rate": 3.2493326521396817e-05, + "loss": 2.0769, + "step": 12100500 + }, + { + "epoch": 35.03, + "learning_rate": 3.249260287374954e-05, + "loss": 2.0707, + "step": 12101000 + }, + { + "epoch": 35.03, + "learning_rate": 3.249187922610226e-05, + "loss": 2.0592, + "step": 12101500 + }, + { + "epoch": 35.03, + "learning_rate": 3.249115557845498e-05, + "loss": 2.0667, + "step": 12102000 + }, + { + "epoch": 35.03, + "learning_rate": 3.2490431930807705e-05, + "loss": 2.0766, + "step": 12102500 + }, + { + "epoch": 35.03, + "learning_rate": 3.248970828316043e-05, + "loss": 2.0546, + "step": 12103000 + }, + { + "epoch": 35.03, + "learning_rate": 3.248898463551316e-05, + "loss": 2.0661, + "step": 12103500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248826098786588e-05, + "loss": 2.0761, + "step": 12104000 + }, + { + "epoch": 35.04, + "learning_rate": 3.24875373402186e-05, + "loss": 2.0575, + "step": 12104500 + }, + { + "epoch": 35.04, + "learning_rate": 3.2486813692571323e-05, + "loss": 2.0716, + "step": 12105000 + }, + { + "epoch": 35.04, + "learning_rate": 3.248609149221934e-05, + "loss": 2.0664, + "step": 12105500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248536784457207e-05, + "loss": 2.0647, + "step": 12106000 + }, + { + "epoch": 35.04, + "learning_rate": 3.248464419692479e-05, + "loss": 2.0736, + "step": 12106500 + }, + { + "epoch": 35.04, + "learning_rate": 3.248392054927751e-05, + "loss": 2.0873, + "step": 12107000 + }, + { + "epoch": 35.05, + "learning_rate": 3.2483196901630235e-05, + "loss": 2.056, + "step": 12107500 + }, + { + "epoch": 35.05, + "learning_rate": 3.248247325398296e-05, + "loss": 2.0707, + "step": 12108000 + }, + { + "epoch": 35.05, + "learning_rate": 3.248174960633568e-05, + "loss": 2.0668, + "step": 12108500 + }, + { + "epoch": 35.05, + "learning_rate": 3.24810274059837e-05, + "loss": 2.0785, + "step": 12109000 + }, + { + "epoch": 35.05, + "learning_rate": 3.2480303758336424e-05, + "loss": 2.047, + "step": 12109500 + }, + { + "epoch": 35.05, + "learning_rate": 3.2479580110689146e-05, + "loss": 2.0746, + "step": 12110000 + }, + { + "epoch": 35.05, + "learning_rate": 3.247885646304187e-05, + "loss": 2.0683, + "step": 12110500 + }, + { + "epoch": 35.06, + "learning_rate": 3.247813281539459e-05, + "loss": 2.048, + "step": 12111000 + }, + { + "epoch": 35.06, + "learning_rate": 3.247740916774731e-05, + "loss": 2.07, + "step": 12111500 + }, + { + "epoch": 35.06, + "learning_rate": 3.2476685520100035e-05, + "loss": 2.0647, + "step": 12112000 + }, + { + "epoch": 35.06, + "learning_rate": 3.247596187245276e-05, + "loss": 2.0839, + "step": 12112500 + }, + { + "epoch": 35.06, + "learning_rate": 3.2475238224805486e-05, + "loss": 2.0798, + "step": 12113000 + }, + { + "epoch": 35.06, + "learning_rate": 3.247451457715821e-05, + "loss": 2.0349, + "step": 12113500 + }, + { + "epoch": 35.07, + "learning_rate": 3.247379092951093e-05, + "loss": 2.0658, + "step": 12114000 + }, + { + "epoch": 35.07, + "learning_rate": 3.247306728186365e-05, + "loss": 2.074, + "step": 12114500 + }, + { + "epoch": 35.07, + "learning_rate": 3.247234363421638e-05, + "loss": 2.0921, + "step": 12115000 + }, + { + "epoch": 35.07, + "learning_rate": 3.2471619986569104e-05, + "loss": 2.0795, + "step": 12115500 + }, + { + "epoch": 35.07, + "learning_rate": 3.2470896338921826e-05, + "loss": 2.0553, + "step": 12116000 + }, + { + "epoch": 35.07, + "learning_rate": 3.247017269127455e-05, + "loss": 2.101, + "step": 12116500 + }, + { + "epoch": 35.07, + "learning_rate": 3.246944904362727e-05, + "loss": 2.0659, + "step": 12117000 + }, + { + "epoch": 35.08, + "learning_rate": 3.2468726843275286e-05, + "loss": 2.0565, + "step": 12117500 + }, + { + "epoch": 35.08, + "learning_rate": 3.246800319562801e-05, + "loss": 2.0969, + "step": 12118000 + }, + { + "epoch": 35.08, + "learning_rate": 3.246728099527603e-05, + "loss": 2.0751, + "step": 12118500 + }, + { + "epoch": 35.08, + "learning_rate": 3.246655734762875e-05, + "loss": 2.0692, + "step": 12119000 + }, + { + "epoch": 35.08, + "learning_rate": 3.2465833699981475e-05, + "loss": 2.0561, + "step": 12119500 + }, + { + "epoch": 35.08, + "learning_rate": 3.24651100523342e-05, + "loss": 2.0483, + "step": 12120000 + }, + { + "epoch": 35.08, + "learning_rate": 3.2464386404686927e-05, + "loss": 2.0668, + "step": 12120500 + }, + { + "epoch": 35.09, + "learning_rate": 3.2463665651630235e-05, + "loss": 2.0676, + "step": 12121000 + }, + { + "epoch": 35.09, + "learning_rate": 3.246294200398296e-05, + "loss": 2.0721, + "step": 12121500 + }, + { + "epoch": 35.09, + "learning_rate": 3.246221835633568e-05, + "loss": 2.0589, + "step": 12122000 + }, + { + "epoch": 35.09, + "learning_rate": 3.246149470868841e-05, + "loss": 2.0807, + "step": 12122500 + }, + { + "epoch": 35.09, + "learning_rate": 3.246077106104113e-05, + "loss": 2.0684, + "step": 12123000 + }, + { + "epoch": 35.09, + "learning_rate": 3.2460047413393853e-05, + "loss": 2.0988, + "step": 12123500 + }, + { + "epoch": 35.09, + "learning_rate": 3.2459323765746576e-05, + "loss": 2.0418, + "step": 12124000 + }, + { + "epoch": 35.1, + "learning_rate": 3.24586001180993e-05, + "loss": 2.0782, + "step": 12124500 + }, + { + "epoch": 35.1, + "learning_rate": 3.245787647045202e-05, + "loss": 2.0636, + "step": 12125000 + }, + { + "epoch": 35.1, + "learning_rate": 3.2457154270100036e-05, + "loss": 2.0665, + "step": 12125500 + }, + { + "epoch": 35.1, + "learning_rate": 3.245643062245276e-05, + "loss": 2.0548, + "step": 12126000 + }, + { + "epoch": 35.1, + "learning_rate": 3.245570697480548e-05, + "loss": 2.0844, + "step": 12126500 + }, + { + "epoch": 35.1, + "learning_rate": 3.245498332715821e-05, + "loss": 2.0619, + "step": 12127000 + }, + { + "epoch": 35.1, + "learning_rate": 3.2454261126806225e-05, + "loss": 2.0653, + "step": 12127500 + }, + { + "epoch": 35.11, + "learning_rate": 3.2453537479158954e-05, + "loss": 2.0521, + "step": 12128000 + }, + { + "epoch": 35.11, + "learning_rate": 3.245281672610226e-05, + "loss": 2.0764, + "step": 12128500 + }, + { + "epoch": 35.11, + "learning_rate": 3.2452093078454985e-05, + "loss": 2.0577, + "step": 12129000 + }, + { + "epoch": 35.11, + "learning_rate": 3.245136943080771e-05, + "loss": 2.0606, + "step": 12129500 + }, + { + "epoch": 35.11, + "learning_rate": 3.245064867775102e-05, + "loss": 2.0783, + "step": 12130000 + }, + { + "epoch": 35.11, + "learning_rate": 3.2449925030103745e-05, + "loss": 2.0718, + "step": 12130500 + }, + { + "epoch": 35.11, + "learning_rate": 3.244920138245647e-05, + "loss": 2.0621, + "step": 12131000 + }, + { + "epoch": 35.12, + "learning_rate": 3.244847773480919e-05, + "loss": 2.0813, + "step": 12131500 + }, + { + "epoch": 35.12, + "learning_rate": 3.244775408716191e-05, + "loss": 2.0704, + "step": 12132000 + }, + { + "epoch": 35.12, + "learning_rate": 3.2447030439514634e-05, + "loss": 2.076, + "step": 12132500 + }, + { + "epoch": 35.12, + "learning_rate": 3.2446306791867356e-05, + "loss": 2.0878, + "step": 12133000 + }, + { + "epoch": 35.12, + "learning_rate": 3.2445583144220085e-05, + "loss": 2.0542, + "step": 12133500 + }, + { + "epoch": 35.12, + "learning_rate": 3.244485949657281e-05, + "loss": 2.089, + "step": 12134000 + }, + { + "epoch": 35.12, + "learning_rate": 3.244413729622082e-05, + "loss": 2.059, + "step": 12134500 + }, + { + "epoch": 35.13, + "learning_rate": 3.2443413648573545e-05, + "loss": 2.0647, + "step": 12135000 + }, + { + "epoch": 35.13, + "learning_rate": 3.244269000092627e-05, + "loss": 2.0436, + "step": 12135500 + }, + { + "epoch": 35.13, + "learning_rate": 3.2441966353278996e-05, + "loss": 2.0623, + "step": 12136000 + }, + { + "epoch": 35.13, + "learning_rate": 3.244124270563172e-05, + "loss": 2.0721, + "step": 12136500 + }, + { + "epoch": 35.13, + "learning_rate": 3.2440520505279734e-05, + "loss": 2.0778, + "step": 12137000 + }, + { + "epoch": 35.13, + "learning_rate": 3.2439796857632456e-05, + "loss": 2.0532, + "step": 12137500 + }, + { + "epoch": 35.13, + "learning_rate": 3.2439073209985185e-05, + "loss": 2.0734, + "step": 12138000 + }, + { + "epoch": 35.14, + "learning_rate": 3.243834956233791e-05, + "loss": 2.0808, + "step": 12138500 + }, + { + "epoch": 35.14, + "learning_rate": 3.243762591469063e-05, + "loss": 2.0473, + "step": 12139000 + }, + { + "epoch": 35.14, + "learning_rate": 3.243690226704335e-05, + "loss": 2.0713, + "step": 12139500 + }, + { + "epoch": 35.14, + "learning_rate": 3.2436178619396074e-05, + "loss": 2.0456, + "step": 12140000 + }, + { + "epoch": 35.14, + "learning_rate": 3.2435454971748797e-05, + "loss": 2.0842, + "step": 12140500 + }, + { + "epoch": 35.14, + "learning_rate": 3.243473132410152e-05, + "loss": 2.0818, + "step": 12141000 + }, + { + "epoch": 35.14, + "learning_rate": 3.243400767645424e-05, + "loss": 2.0641, + "step": 12141500 + }, + { + "epoch": 35.15, + "learning_rate": 3.243328402880696e-05, + "loss": 2.0575, + "step": 12142000 + }, + { + "epoch": 35.15, + "learning_rate": 3.2432560381159686e-05, + "loss": 2.0435, + "step": 12142500 + }, + { + "epoch": 35.15, + "learning_rate": 3.2431836733512415e-05, + "loss": 2.0739, + "step": 12143000 + }, + { + "epoch": 35.15, + "learning_rate": 3.243111308586514e-05, + "loss": 2.0605, + "step": 12143500 + }, + { + "epoch": 35.15, + "learning_rate": 3.243038943821786e-05, + "loss": 2.0506, + "step": 12144000 + }, + { + "epoch": 35.15, + "learning_rate": 3.242966579057059e-05, + "loss": 2.0518, + "step": 12144500 + }, + { + "epoch": 35.15, + "learning_rate": 3.2428943590218604e-05, + "loss": 2.0576, + "step": 12145000 + }, + { + "epoch": 35.16, + "learning_rate": 3.2428219942571326e-05, + "loss": 2.0692, + "step": 12145500 + }, + { + "epoch": 35.16, + "learning_rate": 3.242749629492405e-05, + "loss": 2.0478, + "step": 12146000 + }, + { + "epoch": 35.16, + "learning_rate": 3.242677264727677e-05, + "loss": 2.0601, + "step": 12146500 + }, + { + "epoch": 35.16, + "learning_rate": 3.2426050446924786e-05, + "loss": 2.0757, + "step": 12147000 + }, + { + "epoch": 35.16, + "learning_rate": 3.242532679927751e-05, + "loss": 2.0788, + "step": 12147500 + }, + { + "epoch": 35.16, + "learning_rate": 3.242460315163024e-05, + "loss": 2.0752, + "step": 12148000 + }, + { + "epoch": 35.16, + "learning_rate": 3.242387950398296e-05, + "loss": 2.0685, + "step": 12148500 + }, + { + "epoch": 35.17, + "learning_rate": 3.242315585633568e-05, + "loss": 2.0691, + "step": 12149000 + }, + { + "epoch": 35.17, + "learning_rate": 3.2422432208688404e-05, + "loss": 2.0572, + "step": 12149500 + }, + { + "epoch": 35.17, + "learning_rate": 3.2421708561041126e-05, + "loss": 2.0729, + "step": 12150000 + }, + { + "epoch": 35.17, + "learning_rate": 3.2420984913393855e-05, + "loss": 2.0654, + "step": 12150500 + }, + { + "epoch": 35.17, + "learning_rate": 3.242026126574658e-05, + "loss": 2.0406, + "step": 12151000 + }, + { + "epoch": 35.17, + "learning_rate": 3.241953906539459e-05, + "loss": 2.0694, + "step": 12151500 + }, + { + "epoch": 35.18, + "learning_rate": 3.2418815417747315e-05, + "loss": 2.0702, + "step": 12152000 + }, + { + "epoch": 35.18, + "learning_rate": 3.241809177010004e-05, + "loss": 2.0556, + "step": 12152500 + }, + { + "epoch": 35.18, + "learning_rate": 3.241736812245276e-05, + "loss": 2.0581, + "step": 12153000 + }, + { + "epoch": 35.18, + "learning_rate": 3.241664447480549e-05, + "loss": 2.0782, + "step": 12153500 + }, + { + "epoch": 35.18, + "learning_rate": 3.241592082715821e-05, + "loss": 2.0882, + "step": 12154000 + }, + { + "epoch": 35.18, + "learning_rate": 3.241519717951093e-05, + "loss": 2.0635, + "step": 12154500 + }, + { + "epoch": 35.18, + "learning_rate": 3.2414473531863655e-05, + "loss": 2.0851, + "step": 12155000 + }, + { + "epoch": 35.19, + "learning_rate": 3.241374988421638e-05, + "loss": 2.0636, + "step": 12155500 + }, + { + "epoch": 35.19, + "learning_rate": 3.241302768386439e-05, + "loss": 2.0619, + "step": 12156000 + }, + { + "epoch": 35.19, + "learning_rate": 3.2412304036217115e-05, + "loss": 2.0762, + "step": 12156500 + }, + { + "epoch": 35.19, + "learning_rate": 3.241158038856984e-05, + "loss": 2.0818, + "step": 12157000 + }, + { + "epoch": 35.19, + "learning_rate": 3.241085674092256e-05, + "loss": 2.0783, + "step": 12157500 + }, + { + "epoch": 35.19, + "learning_rate": 3.241013309327529e-05, + "loss": 2.0624, + "step": 12158000 + }, + { + "epoch": 35.19, + "learning_rate": 3.240941089292331e-05, + "loss": 2.0692, + "step": 12158500 + }, + { + "epoch": 35.2, + "learning_rate": 3.240868724527603e-05, + "loss": 2.0675, + "step": 12159000 + }, + { + "epoch": 35.2, + "learning_rate": 3.240796504492405e-05, + "loss": 2.0897, + "step": 12159500 + }, + { + "epoch": 35.2, + "learning_rate": 3.240724139727677e-05, + "loss": 2.0824, + "step": 12160000 + }, + { + "epoch": 35.2, + "learning_rate": 3.240651774962949e-05, + "loss": 2.0612, + "step": 12160500 + }, + { + "epoch": 35.2, + "learning_rate": 3.2405794101982216e-05, + "loss": 2.0581, + "step": 12161000 + }, + { + "epoch": 35.2, + "learning_rate": 3.240507045433494e-05, + "loss": 2.0456, + "step": 12161500 + }, + { + "epoch": 35.2, + "learning_rate": 3.240434680668766e-05, + "loss": 2.0833, + "step": 12162000 + }, + { + "epoch": 35.21, + "learning_rate": 3.240362315904039e-05, + "loss": 2.0646, + "step": 12162500 + }, + { + "epoch": 35.21, + "learning_rate": 3.2402900958688405e-05, + "loss": 2.0602, + "step": 12163000 + }, + { + "epoch": 35.21, + "learning_rate": 3.240217731104113e-05, + "loss": 2.068, + "step": 12163500 + }, + { + "epoch": 35.21, + "learning_rate": 3.240145366339385e-05, + "loss": 2.0979, + "step": 12164000 + }, + { + "epoch": 35.21, + "learning_rate": 3.240073001574657e-05, + "loss": 2.0633, + "step": 12164500 + }, + { + "epoch": 35.21, + "learning_rate": 3.240000781539459e-05, + "loss": 2.076, + "step": 12165000 + }, + { + "epoch": 35.21, + "learning_rate": 3.2399284167747316e-05, + "loss": 2.0641, + "step": 12165500 + }, + { + "epoch": 35.22, + "learning_rate": 3.239856052010004e-05, + "loss": 2.075, + "step": 12166000 + }, + { + "epoch": 35.22, + "learning_rate": 3.239783687245277e-05, + "loss": 2.0591, + "step": 12166500 + }, + { + "epoch": 35.22, + "learning_rate": 3.239711322480549e-05, + "loss": 2.0698, + "step": 12167000 + }, + { + "epoch": 35.22, + "learning_rate": 3.239638957715821e-05, + "loss": 2.073, + "step": 12167500 + }, + { + "epoch": 35.22, + "learning_rate": 3.2395665929510934e-05, + "loss": 2.0493, + "step": 12168000 + }, + { + "epoch": 35.22, + "learning_rate": 3.239494372915895e-05, + "loss": 2.0493, + "step": 12168500 + }, + { + "epoch": 35.22, + "learning_rate": 3.239422008151167e-05, + "loss": 2.0375, + "step": 12169000 + }, + { + "epoch": 35.23, + "learning_rate": 3.2393496433864394e-05, + "loss": 2.0633, + "step": 12169500 + }, + { + "epoch": 35.23, + "learning_rate": 3.2392774233512416e-05, + "loss": 2.0761, + "step": 12170000 + }, + { + "epoch": 35.23, + "learning_rate": 3.239205203316043e-05, + "loss": 2.0731, + "step": 12170500 + }, + { + "epoch": 35.23, + "learning_rate": 3.2391328385513154e-05, + "loss": 2.0812, + "step": 12171000 + }, + { + "epoch": 35.23, + "learning_rate": 3.2390604737865876e-05, + "loss": 2.0638, + "step": 12171500 + }, + { + "epoch": 35.23, + "learning_rate": 3.23898810902186e-05, + "loss": 2.09, + "step": 12172000 + }, + { + "epoch": 35.23, + "learning_rate": 3.238915744257132e-05, + "loss": 2.0494, + "step": 12172500 + }, + { + "epoch": 35.24, + "learning_rate": 3.238843379492405e-05, + "loss": 2.0893, + "step": 12173000 + }, + { + "epoch": 35.24, + "learning_rate": 3.238771014727677e-05, + "loss": 2.0811, + "step": 12173500 + }, + { + "epoch": 35.24, + "learning_rate": 3.2386986499629494e-05, + "loss": 2.0592, + "step": 12174000 + }, + { + "epoch": 35.24, + "learning_rate": 3.2386262851982216e-05, + "loss": 2.0701, + "step": 12174500 + }, + { + "epoch": 35.24, + "learning_rate": 3.238553920433494e-05, + "loss": 2.0845, + "step": 12175000 + }, + { + "epoch": 35.24, + "learning_rate": 3.238481555668767e-05, + "loss": 2.0674, + "step": 12175500 + }, + { + "epoch": 35.24, + "learning_rate": 3.238409190904039e-05, + "loss": 2.0488, + "step": 12176000 + }, + { + "epoch": 35.25, + "learning_rate": 3.238336826139311e-05, + "loss": 2.0668, + "step": 12176500 + }, + { + "epoch": 35.25, + "learning_rate": 3.238264606104113e-05, + "loss": 2.078, + "step": 12177000 + }, + { + "epoch": 35.25, + "learning_rate": 3.238192241339385e-05, + "loss": 2.0522, + "step": 12177500 + }, + { + "epoch": 35.25, + "learning_rate": 3.2381200213041865e-05, + "loss": 2.0394, + "step": 12178000 + }, + { + "epoch": 35.25, + "learning_rate": 3.238047656539459e-05, + "loss": 2.0748, + "step": 12178500 + }, + { + "epoch": 35.25, + "learning_rate": 3.2379752917747317e-05, + "loss": 2.0557, + "step": 12179000 + }, + { + "epoch": 35.25, + "learning_rate": 3.237902927010004e-05, + "loss": 2.081, + "step": 12179500 + }, + { + "epoch": 35.26, + "learning_rate": 3.237830562245276e-05, + "loss": 2.0678, + "step": 12180000 + }, + { + "epoch": 35.26, + "learning_rate": 3.237758197480549e-05, + "loss": 2.0674, + "step": 12180500 + }, + { + "epoch": 35.26, + "learning_rate": 3.237685832715821e-05, + "loss": 2.0892, + "step": 12181000 + }, + { + "epoch": 35.26, + "learning_rate": 3.2376134679510935e-05, + "loss": 2.0745, + "step": 12181500 + }, + { + "epoch": 35.26, + "learning_rate": 3.237541103186366e-05, + "loss": 2.0829, + "step": 12182000 + }, + { + "epoch": 35.26, + "learning_rate": 3.237468738421638e-05, + "loss": 2.0612, + "step": 12182500 + }, + { + "epoch": 35.26, + "learning_rate": 3.23739637365691e-05, + "loss": 2.0788, + "step": 12183000 + }, + { + "epoch": 35.27, + "learning_rate": 3.2373240088921823e-05, + "loss": 2.0533, + "step": 12183500 + }, + { + "epoch": 35.27, + "learning_rate": 3.2372516441274546e-05, + "loss": 2.0654, + "step": 12184000 + }, + { + "epoch": 35.27, + "learning_rate": 3.237179279362727e-05, + "loss": 2.0455, + "step": 12184500 + }, + { + "epoch": 35.27, + "learning_rate": 3.237106914597999e-05, + "loss": 2.0516, + "step": 12185000 + }, + { + "epoch": 35.27, + "learning_rate": 3.237034549833272e-05, + "loss": 2.0756, + "step": 12185500 + }, + { + "epoch": 35.27, + "learning_rate": 3.2369623297980735e-05, + "loss": 2.0543, + "step": 12186000 + }, + { + "epoch": 35.27, + "learning_rate": 3.236889965033346e-05, + "loss": 2.0646, + "step": 12186500 + }, + { + "epoch": 35.28, + "learning_rate": 3.236817600268618e-05, + "loss": 2.074, + "step": 12187000 + }, + { + "epoch": 35.28, + "learning_rate": 3.2367453802334195e-05, + "loss": 2.0673, + "step": 12187500 + }, + { + "epoch": 35.28, + "learning_rate": 3.2366730154686924e-05, + "loss": 2.0652, + "step": 12188000 + }, + { + "epoch": 35.28, + "learning_rate": 3.2366006507039646e-05, + "loss": 2.0729, + "step": 12188500 + }, + { + "epoch": 35.28, + "learning_rate": 3.236528430668767e-05, + "loss": 2.0495, + "step": 12189000 + }, + { + "epoch": 35.28, + "learning_rate": 3.236456065904039e-05, + "loss": 2.0616, + "step": 12189500 + }, + { + "epoch": 35.29, + "learning_rate": 3.236383701139311e-05, + "loss": 2.0432, + "step": 12190000 + }, + { + "epoch": 35.29, + "learning_rate": 3.2363113363745835e-05, + "loss": 2.0543, + "step": 12190500 + }, + { + "epoch": 35.29, + "learning_rate": 3.236238971609856e-05, + "loss": 2.0598, + "step": 12191000 + }, + { + "epoch": 35.29, + "learning_rate": 3.236166606845128e-05, + "loss": 2.0285, + "step": 12191500 + }, + { + "epoch": 35.29, + "learning_rate": 3.2360942420804e-05, + "loss": 2.0781, + "step": 12192000 + }, + { + "epoch": 35.29, + "learning_rate": 3.236022022045202e-05, + "loss": 2.0553, + "step": 12192500 + }, + { + "epoch": 35.29, + "learning_rate": 3.235949657280474e-05, + "loss": 2.1, + "step": 12193000 + }, + { + "epoch": 35.3, + "learning_rate": 3.235877292515747e-05, + "loss": 2.0696, + "step": 12193500 + }, + { + "epoch": 35.3, + "learning_rate": 3.235804927751019e-05, + "loss": 2.0732, + "step": 12194000 + }, + { + "epoch": 35.3, + "learning_rate": 3.235732562986291e-05, + "loss": 2.0585, + "step": 12194500 + }, + { + "epoch": 35.3, + "learning_rate": 3.235660198221564e-05, + "loss": 2.078, + "step": 12195000 + }, + { + "epoch": 35.3, + "learning_rate": 3.2355878334568364e-05, + "loss": 2.0572, + "step": 12195500 + }, + { + "epoch": 35.3, + "learning_rate": 3.2355154686921086e-05, + "loss": 2.0912, + "step": 12196000 + }, + { + "epoch": 35.3, + "learning_rate": 3.235443103927381e-05, + "loss": 2.0968, + "step": 12196500 + }, + { + "epoch": 35.31, + "learning_rate": 3.2353708838921824e-05, + "loss": 2.0851, + "step": 12197000 + }, + { + "epoch": 35.31, + "learning_rate": 3.2352986638569847e-05, + "loss": 2.0791, + "step": 12197500 + }, + { + "epoch": 35.31, + "learning_rate": 3.235226299092257e-05, + "loss": 2.0744, + "step": 12198000 + }, + { + "epoch": 35.31, + "learning_rate": 3.235153934327529e-05, + "loss": 2.0541, + "step": 12198500 + }, + { + "epoch": 35.31, + "learning_rate": 3.235081569562801e-05, + "loss": 2.0565, + "step": 12199000 + }, + { + "epoch": 35.31, + "learning_rate": 3.235009349527603e-05, + "loss": 2.0762, + "step": 12199500 + }, + { + "epoch": 35.31, + "learning_rate": 3.234936984762875e-05, + "loss": 2.0674, + "step": 12200000 + }, + { + "epoch": 35.32, + "learning_rate": 3.234864619998147e-05, + "loss": 2.0827, + "step": 12200500 + }, + { + "epoch": 35.32, + "learning_rate": 3.2347922552334196e-05, + "loss": 2.0736, + "step": 12201000 + }, + { + "epoch": 35.32, + "learning_rate": 3.234720035198222e-05, + "loss": 2.0705, + "step": 12201500 + }, + { + "epoch": 35.32, + "learning_rate": 3.234647670433494e-05, + "loss": 2.0769, + "step": 12202000 + }, + { + "epoch": 35.32, + "learning_rate": 3.234575305668766e-05, + "loss": 2.0783, + "step": 12202500 + }, + { + "epoch": 35.32, + "learning_rate": 3.234502940904039e-05, + "loss": 2.0699, + "step": 12203000 + }, + { + "epoch": 35.32, + "learning_rate": 3.2344305761393114e-05, + "loss": 2.0631, + "step": 12203500 + }, + { + "epoch": 35.33, + "learning_rate": 3.234358500833642e-05, + "loss": 2.0913, + "step": 12204000 + }, + { + "epoch": 35.33, + "learning_rate": 3.2342861360689145e-05, + "loss": 2.0568, + "step": 12204500 + }, + { + "epoch": 35.33, + "learning_rate": 3.234213771304187e-05, + "loss": 2.0799, + "step": 12205000 + }, + { + "epoch": 35.33, + "learning_rate": 3.2341414065394596e-05, + "loss": 2.0778, + "step": 12205500 + }, + { + "epoch": 35.33, + "learning_rate": 3.234069041774732e-05, + "loss": 2.0746, + "step": 12206000 + }, + { + "epoch": 35.33, + "learning_rate": 3.233996677010004e-05, + "loss": 2.088, + "step": 12206500 + }, + { + "epoch": 35.33, + "learning_rate": 3.2339244569748056e-05, + "loss": 2.0757, + "step": 12207000 + }, + { + "epoch": 35.34, + "learning_rate": 3.233852092210078e-05, + "loss": 2.0521, + "step": 12207500 + }, + { + "epoch": 35.34, + "learning_rate": 3.23377972744535e-05, + "loss": 2.0711, + "step": 12208000 + }, + { + "epoch": 35.34, + "learning_rate": 3.233707362680622e-05, + "loss": 2.0797, + "step": 12208500 + }, + { + "epoch": 35.34, + "learning_rate": 3.2336349979158945e-05, + "loss": 2.0569, + "step": 12209000 + }, + { + "epoch": 35.34, + "learning_rate": 3.233562633151167e-05, + "loss": 2.0608, + "step": 12209500 + }, + { + "epoch": 35.34, + "learning_rate": 3.2334902683864396e-05, + "loss": 2.0726, + "step": 12210000 + }, + { + "epoch": 35.34, + "learning_rate": 3.233417903621712e-05, + "loss": 2.037, + "step": 12210500 + }, + { + "epoch": 35.35, + "learning_rate": 3.233345538856985e-05, + "loss": 2.0704, + "step": 12211000 + }, + { + "epoch": 35.35, + "learning_rate": 3.233273174092257e-05, + "loss": 2.0634, + "step": 12211500 + }, + { + "epoch": 35.35, + "learning_rate": 3.233200809327529e-05, + "loss": 2.0502, + "step": 12212000 + }, + { + "epoch": 35.35, + "learning_rate": 3.2331284445628014e-05, + "loss": 2.1014, + "step": 12212500 + }, + { + "epoch": 35.35, + "learning_rate": 3.2330560797980736e-05, + "loss": 2.0574, + "step": 12213000 + }, + { + "epoch": 35.35, + "learning_rate": 3.232983715033346e-05, + "loss": 2.0642, + "step": 12213500 + }, + { + "epoch": 35.35, + "learning_rate": 3.232911350268618e-05, + "loss": 2.0668, + "step": 12214000 + }, + { + "epoch": 35.36, + "learning_rate": 3.23283898550389e-05, + "loss": 2.063, + "step": 12214500 + }, + { + "epoch": 35.36, + "learning_rate": 3.232766910198222e-05, + "loss": 2.0976, + "step": 12215000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232694545433494e-05, + "loss": 2.0702, + "step": 12215500 + }, + { + "epoch": 35.36, + "learning_rate": 3.2326223253982956e-05, + "loss": 2.0849, + "step": 12216000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232549960633568e-05, + "loss": 2.0877, + "step": 12216500 + }, + { + "epoch": 35.36, + "learning_rate": 3.23247759586884e-05, + "loss": 2.0614, + "step": 12217000 + }, + { + "epoch": 35.36, + "learning_rate": 3.232405231104112e-05, + "loss": 2.0972, + "step": 12217500 + }, + { + "epoch": 35.37, + "learning_rate": 3.232332866339385e-05, + "loss": 2.1018, + "step": 12218000 + }, + { + "epoch": 35.37, + "learning_rate": 3.2322605015746574e-05, + "loss": 2.033, + "step": 12218500 + }, + { + "epoch": 35.37, + "learning_rate": 3.23218828153946e-05, + "loss": 2.0856, + "step": 12219000 + }, + { + "epoch": 35.37, + "learning_rate": 3.232115916774732e-05, + "loss": 2.0785, + "step": 12219500 + }, + { + "epoch": 35.37, + "learning_rate": 3.232043552010004e-05, + "loss": 2.0803, + "step": 12220000 + }, + { + "epoch": 35.37, + "learning_rate": 3.2319711872452763e-05, + "loss": 2.0695, + "step": 12220500 + }, + { + "epoch": 35.37, + "learning_rate": 3.2318988224805486e-05, + "loss": 2.0613, + "step": 12221000 + }, + { + "epoch": 35.38, + "learning_rate": 3.231826457715821e-05, + "loss": 2.0619, + "step": 12221500 + }, + { + "epoch": 35.38, + "learning_rate": 3.231754092951093e-05, + "loss": 2.0543, + "step": 12222000 + }, + { + "epoch": 35.38, + "learning_rate": 3.2316818729158946e-05, + "loss": 2.0491, + "step": 12222500 + }, + { + "epoch": 35.38, + "learning_rate": 3.2316095081511675e-05, + "loss": 2.072, + "step": 12223000 + }, + { + "epoch": 35.38, + "learning_rate": 3.23153714338644e-05, + "loss": 2.0871, + "step": 12223500 + }, + { + "epoch": 35.38, + "learning_rate": 3.231464778621712e-05, + "loss": 2.0799, + "step": 12224000 + }, + { + "epoch": 35.38, + "learning_rate": 3.231392413856984e-05, + "loss": 2.0892, + "step": 12224500 + }, + { + "epoch": 35.39, + "learning_rate": 3.2313200490922564e-05, + "loss": 2.0758, + "step": 12225000 + }, + { + "epoch": 35.39, + "learning_rate": 3.2312478290570586e-05, + "loss": 2.0857, + "step": 12225500 + }, + { + "epoch": 35.39, + "learning_rate": 3.231175464292331e-05, + "loss": 2.0655, + "step": 12226000 + }, + { + "epoch": 35.39, + "learning_rate": 3.231103099527603e-05, + "loss": 2.0632, + "step": 12226500 + }, + { + "epoch": 35.39, + "learning_rate": 3.231030734762875e-05, + "loss": 2.0652, + "step": 12227000 + }, + { + "epoch": 35.39, + "learning_rate": 3.2309583699981475e-05, + "loss": 2.0707, + "step": 12227500 + }, + { + "epoch": 35.4, + "learning_rate": 3.23088600523342e-05, + "loss": 2.0817, + "step": 12228000 + }, + { + "epoch": 35.4, + "learning_rate": 3.2308136404686926e-05, + "loss": 2.0837, + "step": 12228500 + }, + { + "epoch": 35.4, + "learning_rate": 3.230741275703965e-05, + "loss": 2.0564, + "step": 12229000 + }, + { + "epoch": 35.4, + "learning_rate": 3.230668910939237e-05, + "loss": 2.0624, + "step": 12229500 + }, + { + "epoch": 35.4, + "learning_rate": 3.230596546174509e-05, + "loss": 2.0808, + "step": 12230000 + }, + { + "epoch": 35.4, + "learning_rate": 3.2305241814097815e-05, + "loss": 2.0792, + "step": 12230500 + }, + { + "epoch": 35.4, + "learning_rate": 3.230451961374583e-05, + "loss": 2.0936, + "step": 12231000 + }, + { + "epoch": 35.41, + "learning_rate": 3.230379596609855e-05, + "loss": 2.0718, + "step": 12231500 + }, + { + "epoch": 35.41, + "learning_rate": 3.2303072318451275e-05, + "loss": 2.0643, + "step": 12232000 + }, + { + "epoch": 35.41, + "learning_rate": 3.2302348670804004e-05, + "loss": 2.1005, + "step": 12232500 + }, + { + "epoch": 35.41, + "learning_rate": 3.2301625023156726e-05, + "loss": 2.062, + "step": 12233000 + }, + { + "epoch": 35.41, + "learning_rate": 3.230090282280475e-05, + "loss": 2.0647, + "step": 12233500 + }, + { + "epoch": 35.41, + "learning_rate": 3.230017917515747e-05, + "loss": 2.0661, + "step": 12234000 + }, + { + "epoch": 35.41, + "learning_rate": 3.229945552751019e-05, + "loss": 2.0802, + "step": 12234500 + }, + { + "epoch": 35.42, + "learning_rate": 3.2298731879862915e-05, + "loss": 2.0832, + "step": 12235000 + }, + { + "epoch": 35.42, + "learning_rate": 3.229800967951093e-05, + "loss": 2.0676, + "step": 12235500 + }, + { + "epoch": 35.42, + "learning_rate": 3.229728603186365e-05, + "loss": 2.0479, + "step": 12236000 + }, + { + "epoch": 35.42, + "learning_rate": 3.2296562384216375e-05, + "loss": 2.0753, + "step": 12236500 + }, + { + "epoch": 35.42, + "learning_rate": 3.22958387365691e-05, + "loss": 2.0754, + "step": 12237000 + }, + { + "epoch": 35.42, + "learning_rate": 3.229511508892183e-05, + "loss": 2.0603, + "step": 12237500 + }, + { + "epoch": 35.42, + "learning_rate": 3.229439144127455e-05, + "loss": 2.0779, + "step": 12238000 + }, + { + "epoch": 35.43, + "learning_rate": 3.229366779362727e-05, + "loss": 2.0482, + "step": 12238500 + }, + { + "epoch": 35.43, + "learning_rate": 3.229294414597999e-05, + "loss": 2.0655, + "step": 12239000 + }, + { + "epoch": 35.43, + "learning_rate": 3.229222194562801e-05, + "loss": 2.0695, + "step": 12239500 + }, + { + "epoch": 35.43, + "learning_rate": 3.229149829798074e-05, + "loss": 2.1075, + "step": 12240000 + }, + { + "epoch": 35.43, + "learning_rate": 3.229077465033346e-05, + "loss": 2.0964, + "step": 12240500 + }, + { + "epoch": 35.43, + "learning_rate": 3.229005100268618e-05, + "loss": 2.0552, + "step": 12241000 + }, + { + "epoch": 35.43, + "learning_rate": 3.2289327355038905e-05, + "loss": 2.0803, + "step": 12241500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228860370739163e-05, + "loss": 2.0712, + "step": 12242000 + }, + { + "epoch": 35.44, + "learning_rate": 3.228788005974435e-05, + "loss": 2.0827, + "step": 12242500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228715641209708e-05, + "loss": 2.0821, + "step": 12243000 + }, + { + "epoch": 35.44, + "learning_rate": 3.22864327644498e-05, + "loss": 2.0582, + "step": 12243500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228570911680252e-05, + "loss": 2.0728, + "step": 12244000 + }, + { + "epoch": 35.44, + "learning_rate": 3.228498691645054e-05, + "loss": 2.0619, + "step": 12244500 + }, + { + "epoch": 35.44, + "learning_rate": 3.228426326880326e-05, + "loss": 2.0783, + "step": 12245000 + }, + { + "epoch": 35.45, + "learning_rate": 3.228353962115598e-05, + "loss": 2.056, + "step": 12245500 + }, + { + "epoch": 35.45, + "learning_rate": 3.2282815973508705e-05, + "loss": 2.099, + "step": 12246000 + }, + { + "epoch": 35.45, + "learning_rate": 3.228209232586143e-05, + "loss": 2.0765, + "step": 12246500 + }, + { + "epoch": 35.45, + "learning_rate": 3.228136867821415e-05, + "loss": 2.0844, + "step": 12247000 + }, + { + "epoch": 35.45, + "learning_rate": 3.228064503056688e-05, + "loss": 2.0828, + "step": 12247500 + }, + { + "epoch": 35.45, + "learning_rate": 3.22799213829196e-05, + "loss": 2.0511, + "step": 12248000 + }, + { + "epoch": 35.45, + "learning_rate": 3.227919918256762e-05, + "loss": 2.0754, + "step": 12248500 + }, + { + "epoch": 35.46, + "learning_rate": 3.2278475534920345e-05, + "loss": 2.0722, + "step": 12249000 + }, + { + "epoch": 35.46, + "learning_rate": 3.227775188727307e-05, + "loss": 2.0702, + "step": 12249500 + }, + { + "epoch": 35.46, + "learning_rate": 3.227702823962579e-05, + "loss": 2.0926, + "step": 12250000 + }, + { + "epoch": 35.46, + "learning_rate": 3.2276306039273805e-05, + "loss": 2.0378, + "step": 12250500 + }, + { + "epoch": 35.46, + "learning_rate": 3.227558239162653e-05, + "loss": 2.0549, + "step": 12251000 + }, + { + "epoch": 35.46, + "learning_rate": 3.227486019127455e-05, + "loss": 2.048, + "step": 12251500 + }, + { + "epoch": 35.46, + "learning_rate": 3.227413654362727e-05, + "loss": 2.093, + "step": 12252000 + }, + { + "epoch": 35.47, + "learning_rate": 3.227341434327529e-05, + "loss": 2.0532, + "step": 12252500 + }, + { + "epoch": 35.47, + "learning_rate": 3.227269069562801e-05, + "loss": 2.064, + "step": 12253000 + }, + { + "epoch": 35.47, + "learning_rate": 3.227196704798073e-05, + "loss": 2.0835, + "step": 12253500 + }, + { + "epoch": 35.47, + "learning_rate": 3.2271243400333454e-05, + "loss": 2.0598, + "step": 12254000 + }, + { + "epoch": 35.47, + "learning_rate": 3.2270519752686176e-05, + "loss": 2.0532, + "step": 12254500 + }, + { + "epoch": 35.47, + "learning_rate": 3.2269796105038905e-05, + "loss": 2.0699, + "step": 12255000 + }, + { + "epoch": 35.47, + "learning_rate": 3.226907245739163e-05, + "loss": 2.0665, + "step": 12255500 + }, + { + "epoch": 35.48, + "learning_rate": 3.226834880974435e-05, + "loss": 2.0671, + "step": 12256000 + }, + { + "epoch": 35.48, + "learning_rate": 3.226762660939237e-05, + "loss": 2.0706, + "step": 12256500 + }, + { + "epoch": 35.48, + "learning_rate": 3.2266902961745094e-05, + "loss": 2.0848, + "step": 12257000 + }, + { + "epoch": 35.48, + "learning_rate": 3.226617931409782e-05, + "loss": 2.0727, + "step": 12257500 + }, + { + "epoch": 35.48, + "learning_rate": 3.226545566645054e-05, + "loss": 2.0737, + "step": 12258000 + }, + { + "epoch": 35.48, + "learning_rate": 3.226473201880326e-05, + "loss": 2.0648, + "step": 12258500 + }, + { + "epoch": 35.48, + "learning_rate": 3.226400837115598e-05, + "loss": 2.082, + "step": 12259000 + }, + { + "epoch": 35.49, + "learning_rate": 3.2263284723508706e-05, + "loss": 2.0811, + "step": 12259500 + }, + { + "epoch": 35.49, + "learning_rate": 3.226256252315673e-05, + "loss": 2.0753, + "step": 12260000 + }, + { + "epoch": 35.49, + "learning_rate": 3.226183887550945e-05, + "loss": 2.0901, + "step": 12260500 + }, + { + "epoch": 35.49, + "learning_rate": 3.226111522786217e-05, + "loss": 2.0706, + "step": 12261000 + }, + { + "epoch": 35.49, + "learning_rate": 3.2260391580214895e-05, + "loss": 2.0491, + "step": 12261500 + }, + { + "epoch": 35.49, + "learning_rate": 3.225966793256762e-05, + "loss": 2.0731, + "step": 12262000 + }, + { + "epoch": 35.49, + "learning_rate": 3.2258944284920346e-05, + "loss": 2.0785, + "step": 12262500 + }, + { + "epoch": 35.5, + "learning_rate": 3.225822063727307e-05, + "loss": 2.0675, + "step": 12263000 + }, + { + "epoch": 35.5, + "learning_rate": 3.2257498436921084e-05, + "loss": 2.0879, + "step": 12263500 + }, + { + "epoch": 35.5, + "learning_rate": 3.2256774789273806e-05, + "loss": 2.0721, + "step": 12264000 + }, + { + "epoch": 35.5, + "learning_rate": 3.225605114162653e-05, + "loss": 2.0678, + "step": 12264500 + }, + { + "epoch": 35.5, + "learning_rate": 3.225532749397926e-05, + "loss": 2.0605, + "step": 12265000 + }, + { + "epoch": 35.5, + "learning_rate": 3.225460384633198e-05, + "loss": 2.0596, + "step": 12265500 + }, + { + "epoch": 35.51, + "learning_rate": 3.22538801986847e-05, + "loss": 2.076, + "step": 12266000 + }, + { + "epoch": 35.51, + "learning_rate": 3.2253156551037424e-05, + "loss": 2.0753, + "step": 12266500 + }, + { + "epoch": 35.51, + "learning_rate": 3.2252432903390146e-05, + "loss": 2.048, + "step": 12267000 + }, + { + "epoch": 35.51, + "learning_rate": 3.225170925574287e-05, + "loss": 2.0902, + "step": 12267500 + }, + { + "epoch": 35.51, + "learning_rate": 3.225098560809559e-05, + "loss": 2.0803, + "step": 12268000 + }, + { + "epoch": 35.51, + "learning_rate": 3.225026196044831e-05, + "loss": 2.0709, + "step": 12268500 + }, + { + "epoch": 35.51, + "learning_rate": 3.224953976009633e-05, + "loss": 2.0868, + "step": 12269000 + }, + { + "epoch": 35.52, + "learning_rate": 3.224881611244906e-05, + "loss": 2.0854, + "step": 12269500 + }, + { + "epoch": 35.52, + "learning_rate": 3.224809246480178e-05, + "loss": 2.0749, + "step": 12270000 + }, + { + "epoch": 35.52, + "learning_rate": 3.22473702644498e-05, + "loss": 2.0713, + "step": 12270500 + }, + { + "epoch": 35.52, + "learning_rate": 3.2246646616802524e-05, + "loss": 2.0581, + "step": 12271000 + }, + { + "epoch": 35.52, + "learning_rate": 3.2245922969155246e-05, + "loss": 2.0842, + "step": 12271500 + }, + { + "epoch": 35.52, + "learning_rate": 3.224519932150797e-05, + "loss": 2.0859, + "step": 12272000 + }, + { + "epoch": 35.52, + "learning_rate": 3.224447567386069e-05, + "loss": 2.0401, + "step": 12272500 + }, + { + "epoch": 35.53, + "learning_rate": 3.224375202621341e-05, + "loss": 2.0788, + "step": 12273000 + }, + { + "epoch": 35.53, + "learning_rate": 3.2243028378566135e-05, + "loss": 2.0872, + "step": 12273500 + }, + { + "epoch": 35.53, + "learning_rate": 3.224230473091886e-05, + "loss": 2.0843, + "step": 12274000 + }, + { + "epoch": 35.53, + "learning_rate": 3.224158108327158e-05, + "loss": 2.075, + "step": 12274500 + }, + { + "epoch": 35.53, + "learning_rate": 3.22408588829196e-05, + "loss": 2.0868, + "step": 12275000 + }, + { + "epoch": 35.53, + "learning_rate": 3.2240135235272324e-05, + "loss": 2.0877, + "step": 12275500 + }, + { + "epoch": 35.53, + "learning_rate": 3.2239411587625047e-05, + "loss": 2.0911, + "step": 12276000 + }, + { + "epoch": 35.54, + "learning_rate": 3.223868938727306e-05, + "loss": 2.085, + "step": 12276500 + }, + { + "epoch": 35.54, + "learning_rate": 3.2237965739625784e-05, + "loss": 2.1145, + "step": 12277000 + }, + { + "epoch": 35.54, + "learning_rate": 3.2237242091978513e-05, + "loss": 2.0601, + "step": 12277500 + }, + { + "epoch": 35.54, + "learning_rate": 3.2236518444331236e-05, + "loss": 2.0697, + "step": 12278000 + }, + { + "epoch": 35.54, + "learning_rate": 3.223579624397926e-05, + "loss": 2.0831, + "step": 12278500 + }, + { + "epoch": 35.54, + "learning_rate": 3.223507259633198e-05, + "loss": 2.0835, + "step": 12279000 + }, + { + "epoch": 35.54, + "learning_rate": 3.22343489486847e-05, + "loss": 2.0755, + "step": 12279500 + }, + { + "epoch": 35.55, + "learning_rate": 3.2233625301037425e-05, + "loss": 2.0889, + "step": 12280000 + }, + { + "epoch": 35.55, + "learning_rate": 3.223290165339015e-05, + "loss": 2.072, + "step": 12280500 + }, + { + "epoch": 35.55, + "learning_rate": 3.223217800574287e-05, + "loss": 2.0646, + "step": 12281000 + }, + { + "epoch": 35.55, + "learning_rate": 3.223145435809559e-05, + "loss": 2.0966, + "step": 12281500 + }, + { + "epoch": 35.55, + "learning_rate": 3.2230730710448314e-05, + "loss": 2.0992, + "step": 12282000 + }, + { + "epoch": 35.55, + "learning_rate": 3.2230007062801036e-05, + "loss": 2.0593, + "step": 12282500 + }, + { + "epoch": 35.55, + "learning_rate": 3.222928341515376e-05, + "loss": 2.069, + "step": 12283000 + }, + { + "epoch": 35.56, + "learning_rate": 3.222856121480178e-05, + "loss": 2.0557, + "step": 12283500 + }, + { + "epoch": 35.56, + "learning_rate": 3.22278375671545e-05, + "loss": 2.0895, + "step": 12284000 + }, + { + "epoch": 35.56, + "learning_rate": 3.222711391950723e-05, + "loss": 2.0716, + "step": 12284500 + }, + { + "epoch": 35.56, + "learning_rate": 3.2226390271859954e-05, + "loss": 2.0772, + "step": 12285000 + }, + { + "epoch": 35.56, + "learning_rate": 3.2225666624212676e-05, + "loss": 2.0702, + "step": 12285500 + }, + { + "epoch": 35.56, + "learning_rate": 3.22249429765654e-05, + "loss": 2.0564, + "step": 12286000 + }, + { + "epoch": 35.56, + "learning_rate": 3.222421932891812e-05, + "loss": 2.0736, + "step": 12286500 + }, + { + "epoch": 35.57, + "learning_rate": 3.2223497128566136e-05, + "loss": 2.0926, + "step": 12287000 + }, + { + "epoch": 35.57, + "learning_rate": 3.222277348091886e-05, + "loss": 2.0628, + "step": 12287500 + }, + { + "epoch": 35.57, + "learning_rate": 3.222204983327158e-05, + "loss": 2.0888, + "step": 12288000 + }, + { + "epoch": 35.57, + "learning_rate": 3.222132618562431e-05, + "loss": 2.0615, + "step": 12288500 + }, + { + "epoch": 35.57, + "learning_rate": 3.222060253797703e-05, + "loss": 2.0765, + "step": 12289000 + }, + { + "epoch": 35.57, + "learning_rate": 3.2219878890329754e-05, + "loss": 2.0754, + "step": 12289500 + }, + { + "epoch": 35.57, + "learning_rate": 3.2219155242682476e-05, + "loss": 2.0803, + "step": 12290000 + }, + { + "epoch": 35.58, + "learning_rate": 3.22184315950352e-05, + "loss": 2.0889, + "step": 12290500 + }, + { + "epoch": 35.58, + "learning_rate": 3.221770794738792e-05, + "loss": 2.069, + "step": 12291000 + }, + { + "epoch": 35.58, + "learning_rate": 3.221698429974064e-05, + "loss": 2.0823, + "step": 12291500 + }, + { + "epoch": 35.58, + "learning_rate": 3.221626065209337e-05, + "loss": 2.0598, + "step": 12292000 + }, + { + "epoch": 35.58, + "learning_rate": 3.221553845174139e-05, + "loss": 2.0813, + "step": 12292500 + }, + { + "epoch": 35.58, + "learning_rate": 3.221481480409411e-05, + "loss": 2.0689, + "step": 12293000 + }, + { + "epoch": 35.58, + "learning_rate": 3.221409115644683e-05, + "loss": 2.0732, + "step": 12293500 + }, + { + "epoch": 35.59, + "learning_rate": 3.221336750879956e-05, + "loss": 2.0603, + "step": 12294000 + }, + { + "epoch": 35.59, + "learning_rate": 3.221264386115228e-05, + "loss": 2.0729, + "step": 12294500 + }, + { + "epoch": 35.59, + "learning_rate": 3.22119216608003e-05, + "loss": 2.072, + "step": 12295000 + }, + { + "epoch": 35.59, + "learning_rate": 3.221119801315302e-05, + "loss": 2.0608, + "step": 12295500 + }, + { + "epoch": 35.59, + "learning_rate": 3.221047436550574e-05, + "loss": 2.0845, + "step": 12296000 + }, + { + "epoch": 35.59, + "learning_rate": 3.2209750717858466e-05, + "loss": 2.061, + "step": 12296500 + }, + { + "epoch": 35.59, + "learning_rate": 3.220902707021119e-05, + "loss": 2.0877, + "step": 12297000 + }, + { + "epoch": 35.6, + "learning_rate": 3.220830342256391e-05, + "loss": 2.0725, + "step": 12297500 + }, + { + "epoch": 35.6, + "learning_rate": 3.220757977491663e-05, + "loss": 2.0828, + "step": 12298000 + }, + { + "epoch": 35.6, + "learning_rate": 3.220685612726936e-05, + "loss": 2.058, + "step": 12298500 + }, + { + "epoch": 35.6, + "learning_rate": 3.2206132479622083e-05, + "loss": 2.0852, + "step": 12299000 + }, + { + "epoch": 35.6, + "learning_rate": 3.220540883197481e-05, + "loss": 2.036, + "step": 12299500 + }, + { + "epoch": 35.6, + "learning_rate": 3.2204685184327535e-05, + "loss": 2.0623, + "step": 12300000 + }, + { + "epoch": 35.6, + "learning_rate": 3.220396298397555e-05, + "loss": 2.0668, + "step": 12300500 + }, + { + "epoch": 35.61, + "learning_rate": 3.220323933632827e-05, + "loss": 2.0614, + "step": 12301000 + }, + { + "epoch": 35.61, + "learning_rate": 3.2202515688680995e-05, + "loss": 2.0814, + "step": 12301500 + }, + { + "epoch": 35.61, + "learning_rate": 3.220179204103372e-05, + "loss": 2.0684, + "step": 12302000 + }, + { + "epoch": 35.61, + "learning_rate": 3.220106839338644e-05, + "loss": 2.078, + "step": 12302500 + }, + { + "epoch": 35.61, + "learning_rate": 3.220034474573916e-05, + "loss": 2.0873, + "step": 12303000 + }, + { + "epoch": 35.61, + "learning_rate": 3.2199621098091884e-05, + "loss": 2.0888, + "step": 12303500 + }, + { + "epoch": 35.62, + "learning_rate": 3.219889745044461e-05, + "loss": 2.0935, + "step": 12304000 + }, + { + "epoch": 35.62, + "learning_rate": 3.219817525009263e-05, + "loss": 2.0724, + "step": 12304500 + }, + { + "epoch": 35.62, + "learning_rate": 3.2197453049740644e-05, + "loss": 2.0775, + "step": 12305000 + }, + { + "epoch": 35.62, + "learning_rate": 3.219673084938866e-05, + "loss": 2.0745, + "step": 12305500 + }, + { + "epoch": 35.62, + "learning_rate": 3.219600720174138e-05, + "loss": 2.0632, + "step": 12306000 + }, + { + "epoch": 35.62, + "learning_rate": 3.219528355409411e-05, + "loss": 2.0734, + "step": 12306500 + }, + { + "epoch": 35.62, + "learning_rate": 3.219455990644683e-05, + "loss": 2.0734, + "step": 12307000 + }, + { + "epoch": 35.63, + "learning_rate": 3.219383625879956e-05, + "loss": 2.0821, + "step": 12307500 + }, + { + "epoch": 35.63, + "learning_rate": 3.2193112611152284e-05, + "loss": 2.0717, + "step": 12308000 + }, + { + "epoch": 35.63, + "learning_rate": 3.2192388963505006e-05, + "loss": 2.0798, + "step": 12308500 + }, + { + "epoch": 35.63, + "learning_rate": 3.219166531585773e-05, + "loss": 2.0824, + "step": 12309000 + }, + { + "epoch": 35.63, + "learning_rate": 3.219094166821045e-05, + "loss": 2.0678, + "step": 12309500 + }, + { + "epoch": 35.63, + "learning_rate": 3.219021802056317e-05, + "loss": 2.0835, + "step": 12310000 + }, + { + "epoch": 35.63, + "learning_rate": 3.2189494372915895e-05, + "loss": 2.0984, + "step": 12310500 + }, + { + "epoch": 35.64, + "learning_rate": 3.218877072526862e-05, + "loss": 2.0645, + "step": 12311000 + }, + { + "epoch": 35.64, + "learning_rate": 3.218804852491663e-05, + "loss": 2.0843, + "step": 12311500 + }, + { + "epoch": 35.64, + "learning_rate": 3.2187326324564655e-05, + "loss": 2.0683, + "step": 12312000 + }, + { + "epoch": 35.64, + "learning_rate": 3.218660267691738e-05, + "loss": 2.0697, + "step": 12312500 + }, + { + "epoch": 35.64, + "learning_rate": 3.21858790292701e-05, + "loss": 2.0849, + "step": 12313000 + }, + { + "epoch": 35.64, + "learning_rate": 3.218515538162282e-05, + "loss": 2.0767, + "step": 12313500 + }, + { + "epoch": 35.64, + "learning_rate": 3.218443173397555e-05, + "loss": 2.0676, + "step": 12314000 + }, + { + "epoch": 35.65, + "learning_rate": 3.218370808632827e-05, + "loss": 2.0795, + "step": 12314500 + }, + { + "epoch": 35.65, + "learning_rate": 3.218298588597629e-05, + "loss": 2.0858, + "step": 12315000 + }, + { + "epoch": 35.65, + "learning_rate": 3.218226223832901e-05, + "loss": 2.0429, + "step": 12315500 + }, + { + "epoch": 35.65, + "learning_rate": 3.218153859068174e-05, + "loss": 2.0856, + "step": 12316000 + }, + { + "epoch": 35.65, + "learning_rate": 3.218081494303446e-05, + "loss": 2.0565, + "step": 12316500 + }, + { + "epoch": 35.65, + "learning_rate": 3.218009274268248e-05, + "loss": 2.0792, + "step": 12317000 + }, + { + "epoch": 35.65, + "learning_rate": 3.2179370542330493e-05, + "loss": 2.0675, + "step": 12317500 + }, + { + "epoch": 35.66, + "learning_rate": 3.2178646894683216e-05, + "loss": 2.0846, + "step": 12318000 + }, + { + "epoch": 35.66, + "learning_rate": 3.217792324703594e-05, + "loss": 2.0871, + "step": 12318500 + }, + { + "epoch": 35.66, + "learning_rate": 3.217719959938866e-05, + "loss": 2.0927, + "step": 12319000 + }, + { + "epoch": 35.66, + "learning_rate": 3.217647595174139e-05, + "loss": 2.0976, + "step": 12319500 + }, + { + "epoch": 35.66, + "learning_rate": 3.217575230409411e-05, + "loss": 2.069, + "step": 12320000 + }, + { + "epoch": 35.66, + "learning_rate": 3.2175028656446834e-05, + "loss": 2.069, + "step": 12320500 + }, + { + "epoch": 35.66, + "learning_rate": 3.2174305008799556e-05, + "loss": 2.0692, + "step": 12321000 + }, + { + "epoch": 35.67, + "learning_rate": 3.217358136115228e-05, + "loss": 2.0808, + "step": 12321500 + }, + { + "epoch": 35.67, + "learning_rate": 3.217285771350501e-05, + "loss": 2.0601, + "step": 12322000 + }, + { + "epoch": 35.67, + "learning_rate": 3.217213406585773e-05, + "loss": 2.0905, + "step": 12322500 + }, + { + "epoch": 35.67, + "learning_rate": 3.217141041821045e-05, + "loss": 2.0784, + "step": 12323000 + }, + { + "epoch": 35.67, + "learning_rate": 3.2170686770563174e-05, + "loss": 2.0639, + "step": 12323500 + }, + { + "epoch": 35.67, + "learning_rate": 3.216996457021119e-05, + "loss": 2.0925, + "step": 12324000 + }, + { + "epoch": 35.67, + "learning_rate": 3.216924236985921e-05, + "loss": 2.0599, + "step": 12324500 + }, + { + "epoch": 35.68, + "learning_rate": 3.2168518722211934e-05, + "loss": 2.0615, + "step": 12325000 + }, + { + "epoch": 35.68, + "learning_rate": 3.2167795074564656e-05, + "loss": 2.0804, + "step": 12325500 + }, + { + "epoch": 35.68, + "learning_rate": 3.216707142691738e-05, + "loss": 2.0625, + "step": 12326000 + }, + { + "epoch": 35.68, + "learning_rate": 3.21663477792701e-05, + "loss": 2.08, + "step": 12326500 + }, + { + "epoch": 35.68, + "learning_rate": 3.216562413162282e-05, + "loss": 2.0676, + "step": 12327000 + }, + { + "epoch": 35.68, + "learning_rate": 3.2164900483975545e-05, + "loss": 2.0551, + "step": 12327500 + }, + { + "epoch": 35.68, + "learning_rate": 3.216417683632827e-05, + "loss": 2.0876, + "step": 12328000 + }, + { + "epoch": 35.69, + "learning_rate": 3.216345318868099e-05, + "loss": 2.0757, + "step": 12328500 + }, + { + "epoch": 35.69, + "learning_rate": 3.216272954103372e-05, + "loss": 2.0543, + "step": 12329000 + }, + { + "epoch": 35.69, + "learning_rate": 3.216200734068174e-05, + "loss": 2.0827, + "step": 12329500 + }, + { + "epoch": 35.69, + "learning_rate": 3.216128369303446e-05, + "loss": 2.0829, + "step": 12330000 + }, + { + "epoch": 35.69, + "learning_rate": 3.216056149268248e-05, + "loss": 2.0717, + "step": 12330500 + }, + { + "epoch": 35.69, + "learning_rate": 3.21598378450352e-05, + "loss": 2.0608, + "step": 12331000 + }, + { + "epoch": 35.69, + "learning_rate": 3.215911419738792e-05, + "loss": 2.0463, + "step": 12331500 + }, + { + "epoch": 35.7, + "learning_rate": 3.2158390549740645e-05, + "loss": 2.0652, + "step": 12332000 + }, + { + "epoch": 35.7, + "learning_rate": 3.215766690209337e-05, + "loss": 2.0926, + "step": 12332500 + }, + { + "epoch": 35.7, + "learning_rate": 3.215694325444609e-05, + "loss": 2.0827, + "step": 12333000 + }, + { + "epoch": 35.7, + "learning_rate": 3.215621960679881e-05, + "loss": 2.0611, + "step": 12333500 + }, + { + "epoch": 35.7, + "learning_rate": 3.215549595915154e-05, + "loss": 2.0753, + "step": 12334000 + }, + { + "epoch": 35.7, + "learning_rate": 3.215477375879956e-05, + "loss": 2.0613, + "step": 12334500 + }, + { + "epoch": 35.7, + "learning_rate": 3.215405011115228e-05, + "loss": 2.0682, + "step": 12335000 + }, + { + "epoch": 35.71, + "learning_rate": 3.2153326463505e-05, + "loss": 2.0723, + "step": 12335500 + }, + { + "epoch": 35.71, + "learning_rate": 3.215260281585772e-05, + "loss": 2.0738, + "step": 12336000 + }, + { + "epoch": 35.71, + "learning_rate": 3.215188061550574e-05, + "loss": 2.0801, + "step": 12336500 + }, + { + "epoch": 35.71, + "learning_rate": 3.215115696785847e-05, + "loss": 2.0988, + "step": 12337000 + }, + { + "epoch": 35.71, + "learning_rate": 3.215043332021119e-05, + "loss": 2.0863, + "step": 12337500 + }, + { + "epoch": 35.71, + "learning_rate": 3.214970967256391e-05, + "loss": 2.0766, + "step": 12338000 + }, + { + "epoch": 35.71, + "learning_rate": 3.2148987472211935e-05, + "loss": 2.057, + "step": 12338500 + }, + { + "epoch": 35.72, + "learning_rate": 3.214826382456466e-05, + "loss": 2.0669, + "step": 12339000 + }, + { + "epoch": 35.72, + "learning_rate": 3.214754017691738e-05, + "loss": 2.0566, + "step": 12339500 + }, + { + "epoch": 35.72, + "learning_rate": 3.2146817976565395e-05, + "loss": 2.07, + "step": 12340000 + }, + { + "epoch": 35.72, + "learning_rate": 3.214609432891812e-05, + "loss": 2.0699, + "step": 12340500 + }, + { + "epoch": 35.72, + "learning_rate": 3.214537068127084e-05, + "loss": 2.0725, + "step": 12341000 + }, + { + "epoch": 35.72, + "learning_rate": 3.214464703362357e-05, + "loss": 2.0654, + "step": 12341500 + }, + { + "epoch": 35.73, + "learning_rate": 3.214392338597629e-05, + "loss": 2.095, + "step": 12342000 + }, + { + "epoch": 35.73, + "learning_rate": 3.214319973832901e-05, + "loss": 2.0897, + "step": 12342500 + }, + { + "epoch": 35.73, + "learning_rate": 3.2142476090681735e-05, + "loss": 2.0639, + "step": 12343000 + }, + { + "epoch": 35.73, + "learning_rate": 3.214175244303446e-05, + "loss": 2.0747, + "step": 12343500 + }, + { + "epoch": 35.73, + "learning_rate": 3.214103024268247e-05, + "loss": 2.0985, + "step": 12344000 + }, + { + "epoch": 35.73, + "learning_rate": 3.21403065950352e-05, + "loss": 2.0821, + "step": 12344500 + }, + { + "epoch": 35.73, + "learning_rate": 3.2139582947387924e-05, + "loss": 2.0772, + "step": 12345000 + }, + { + "epoch": 35.74, + "learning_rate": 3.2138859299740646e-05, + "loss": 2.0793, + "step": 12345500 + }, + { + "epoch": 35.74, + "learning_rate": 3.213813565209337e-05, + "loss": 2.0757, + "step": 12346000 + }, + { + "epoch": 35.74, + "learning_rate": 3.213741345174139e-05, + "loss": 2.0579, + "step": 12346500 + }, + { + "epoch": 35.74, + "learning_rate": 3.2136691251389406e-05, + "loss": 2.0839, + "step": 12347000 + }, + { + "epoch": 35.74, + "learning_rate": 3.213596760374213e-05, + "loss": 2.0958, + "step": 12347500 + }, + { + "epoch": 35.74, + "learning_rate": 3.213524395609485e-05, + "loss": 2.0923, + "step": 12348000 + }, + { + "epoch": 35.74, + "learning_rate": 3.213452030844757e-05, + "loss": 2.072, + "step": 12348500 + }, + { + "epoch": 35.75, + "learning_rate": 3.2133796660800295e-05, + "loss": 2.0561, + "step": 12349000 + }, + { + "epoch": 35.75, + "learning_rate": 3.213307301315302e-05, + "loss": 2.1017, + "step": 12349500 + }, + { + "epoch": 35.75, + "learning_rate": 3.213234936550574e-05, + "loss": 2.0697, + "step": 12350000 + }, + { + "epoch": 35.75, + "learning_rate": 3.213162571785847e-05, + "loss": 2.0788, + "step": 12350500 + }, + { + "epoch": 35.75, + "learning_rate": 3.213090207021119e-05, + "loss": 2.0646, + "step": 12351000 + }, + { + "epoch": 35.75, + "learning_rate": 3.213017842256392e-05, + "loss": 2.1037, + "step": 12351500 + }, + { + "epoch": 35.75, + "learning_rate": 3.212945477491664e-05, + "loss": 2.0851, + "step": 12352000 + }, + { + "epoch": 35.76, + "learning_rate": 3.2128731127269364e-05, + "loss": 2.0882, + "step": 12352500 + }, + { + "epoch": 35.76, + "learning_rate": 3.212800747962209e-05, + "loss": 2.1017, + "step": 12353000 + }, + { + "epoch": 35.76, + "learning_rate": 3.212728383197481e-05, + "loss": 2.0967, + "step": 12353500 + }, + { + "epoch": 35.76, + "learning_rate": 3.2126561631622824e-05, + "loss": 2.0773, + "step": 12354000 + }, + { + "epoch": 35.76, + "learning_rate": 3.212583798397555e-05, + "loss": 2.0672, + "step": 12354500 + }, + { + "epoch": 35.76, + "learning_rate": 3.212511433632827e-05, + "loss": 2.1051, + "step": 12355000 + }, + { + "epoch": 35.76, + "learning_rate": 3.212439068868099e-05, + "loss": 2.0756, + "step": 12355500 + }, + { + "epoch": 35.77, + "learning_rate": 3.212366704103372e-05, + "loss": 2.0727, + "step": 12356000 + }, + { + "epoch": 35.77, + "learning_rate": 3.212294339338644e-05, + "loss": 2.0646, + "step": 12356500 + }, + { + "epoch": 35.77, + "learning_rate": 3.212222119303446e-05, + "loss": 2.0542, + "step": 12357000 + }, + { + "epoch": 35.77, + "learning_rate": 3.212149754538718e-05, + "loss": 2.0827, + "step": 12357500 + }, + { + "epoch": 35.77, + "learning_rate": 3.2120775345035196e-05, + "loss": 2.0811, + "step": 12358000 + }, + { + "epoch": 35.77, + "learning_rate": 3.212005169738792e-05, + "loss": 2.0549, + "step": 12358500 + }, + { + "epoch": 35.77, + "learning_rate": 3.211932804974064e-05, + "loss": 2.0801, + "step": 12359000 + }, + { + "epoch": 35.78, + "learning_rate": 3.211860440209337e-05, + "loss": 2.0765, + "step": 12359500 + }, + { + "epoch": 35.78, + "learning_rate": 3.211788220174139e-05, + "loss": 2.0914, + "step": 12360000 + }, + { + "epoch": 35.78, + "learning_rate": 3.2117158554094114e-05, + "loss": 2.0775, + "step": 12360500 + }, + { + "epoch": 35.78, + "learning_rate": 3.2116434906446836e-05, + "loss": 2.0919, + "step": 12361000 + }, + { + "epoch": 35.78, + "learning_rate": 3.211571125879956e-05, + "loss": 2.0749, + "step": 12361500 + }, + { + "epoch": 35.78, + "learning_rate": 3.2114989058447574e-05, + "loss": 2.069, + "step": 12362000 + }, + { + "epoch": 35.78, + "learning_rate": 3.2114265410800296e-05, + "loss": 2.0814, + "step": 12362500 + }, + { + "epoch": 35.79, + "learning_rate": 3.211354176315302e-05, + "loss": 2.0694, + "step": 12363000 + }, + { + "epoch": 35.79, + "learning_rate": 3.211281811550575e-05, + "loss": 2.08, + "step": 12363500 + }, + { + "epoch": 35.79, + "learning_rate": 3.211209446785847e-05, + "loss": 2.0777, + "step": 12364000 + }, + { + "epoch": 35.79, + "learning_rate": 3.211137082021119e-05, + "loss": 2.0801, + "step": 12364500 + }, + { + "epoch": 35.79, + "learning_rate": 3.2110647172563914e-05, + "loss": 2.0677, + "step": 12365000 + }, + { + "epoch": 35.79, + "learning_rate": 3.2109923524916636e-05, + "loss": 2.0672, + "step": 12365500 + }, + { + "epoch": 35.79, + "learning_rate": 3.210919987726936e-05, + "loss": 2.0767, + "step": 12366000 + }, + { + "epoch": 35.8, + "learning_rate": 3.210847622962209e-05, + "loss": 2.0748, + "step": 12366500 + }, + { + "epoch": 35.8, + "learning_rate": 3.210775258197481e-05, + "loss": 2.0755, + "step": 12367000 + }, + { + "epoch": 35.8, + "learning_rate": 3.210702893432753e-05, + "loss": 2.0837, + "step": 12367500 + }, + { + "epoch": 35.8, + "learning_rate": 3.2106305286680254e-05, + "loss": 2.0788, + "step": 12368000 + }, + { + "epoch": 35.8, + "learning_rate": 3.2105581639032976e-05, + "loss": 2.0958, + "step": 12368500 + }, + { + "epoch": 35.8, + "learning_rate": 3.21048579913857e-05, + "loss": 2.0801, + "step": 12369000 + }, + { + "epoch": 35.8, + "learning_rate": 3.210413434373842e-05, + "loss": 2.074, + "step": 12369500 + }, + { + "epoch": 35.81, + "learning_rate": 3.210341069609114e-05, + "loss": 2.068, + "step": 12370000 + }, + { + "epoch": 35.81, + "learning_rate": 3.2102688495739165e-05, + "loss": 2.0894, + "step": 12370500 + }, + { + "epoch": 35.81, + "learning_rate": 3.210196484809189e-05, + "loss": 2.0845, + "step": 12371000 + }, + { + "epoch": 35.81, + "learning_rate": 3.210124120044461e-05, + "loss": 2.0939, + "step": 12371500 + }, + { + "epoch": 35.81, + "learning_rate": 3.210051755279733e-05, + "loss": 2.0645, + "step": 12372000 + }, + { + "epoch": 35.81, + "learning_rate": 3.2099793905150054e-05, + "loss": 2.0725, + "step": 12372500 + }, + { + "epoch": 35.81, + "learning_rate": 3.2099070257502777e-05, + "loss": 2.0645, + "step": 12373000 + }, + { + "epoch": 35.82, + "learning_rate": 3.2098346609855506e-05, + "loss": 2.0696, + "step": 12373500 + }, + { + "epoch": 35.82, + "learning_rate": 3.209762440950352e-05, + "loss": 2.0794, + "step": 12374000 + }, + { + "epoch": 35.82, + "learning_rate": 3.2096902209151543e-05, + "loss": 2.0687, + "step": 12374500 + }, + { + "epoch": 35.82, + "learning_rate": 3.2096178561504266e-05, + "loss": 2.0684, + "step": 12375000 + }, + { + "epoch": 35.82, + "learning_rate": 3.209545491385699e-05, + "loss": 2.1013, + "step": 12375500 + }, + { + "epoch": 35.82, + "learning_rate": 3.209473126620971e-05, + "loss": 2.0805, + "step": 12376000 + }, + { + "epoch": 35.82, + "learning_rate": 3.209400761856243e-05, + "loss": 2.0652, + "step": 12376500 + }, + { + "epoch": 35.83, + "learning_rate": 3.2093283970915155e-05, + "loss": 2.078, + "step": 12377000 + }, + { + "epoch": 35.83, + "learning_rate": 3.209256032326788e-05, + "loss": 2.0767, + "step": 12377500 + }, + { + "epoch": 35.83, + "learning_rate": 3.20918366756206e-05, + "loss": 2.0718, + "step": 12378000 + }, + { + "epoch": 35.83, + "learning_rate": 3.209111302797332e-05, + "loss": 2.0854, + "step": 12378500 + }, + { + "epoch": 35.83, + "learning_rate": 3.2090390827621344e-05, + "loss": 2.0548, + "step": 12379000 + }, + { + "epoch": 35.83, + "learning_rate": 3.2089667179974066e-05, + "loss": 2.0751, + "step": 12379500 + }, + { + "epoch": 35.84, + "learning_rate": 3.208894353232679e-05, + "loss": 2.0785, + "step": 12380000 + }, + { + "epoch": 35.84, + "learning_rate": 3.208821988467951e-05, + "loss": 2.0649, + "step": 12380500 + }, + { + "epoch": 35.84, + "learning_rate": 3.2087497684327526e-05, + "loss": 2.06, + "step": 12381000 + }, + { + "epoch": 35.84, + "learning_rate": 3.2086774036680255e-05, + "loss": 2.0807, + "step": 12381500 + }, + { + "epoch": 35.84, + "learning_rate": 3.208605038903298e-05, + "loss": 2.0539, + "step": 12382000 + }, + { + "epoch": 35.84, + "learning_rate": 3.20853267413857e-05, + "loss": 2.0846, + "step": 12382500 + }, + { + "epoch": 35.84, + "learning_rate": 3.2084605988329015e-05, + "loss": 2.0738, + "step": 12383000 + }, + { + "epoch": 35.85, + "learning_rate": 3.208388234068174e-05, + "loss": 2.0586, + "step": 12383500 + }, + { + "epoch": 35.85, + "learning_rate": 3.208315869303446e-05, + "loss": 2.0694, + "step": 12384000 + }, + { + "epoch": 35.85, + "learning_rate": 3.208243504538718e-05, + "loss": 2.0706, + "step": 12384500 + }, + { + "epoch": 35.85, + "learning_rate": 3.2081711397739904e-05, + "loss": 2.0649, + "step": 12385000 + }, + { + "epoch": 35.85, + "learning_rate": 3.2080987750092626e-05, + "loss": 2.0862, + "step": 12385500 + }, + { + "epoch": 35.85, + "learning_rate": 3.208026410244535e-05, + "loss": 2.07, + "step": 12386000 + }, + { + "epoch": 35.85, + "learning_rate": 3.207954045479807e-05, + "loss": 2.0759, + "step": 12386500 + }, + { + "epoch": 35.86, + "learning_rate": 3.20788168071508e-05, + "loss": 2.0763, + "step": 12387000 + }, + { + "epoch": 35.86, + "learning_rate": 3.207809605409411e-05, + "loss": 2.0803, + "step": 12387500 + }, + { + "epoch": 35.86, + "learning_rate": 3.2077373853742124e-05, + "loss": 2.0679, + "step": 12388000 + }, + { + "epoch": 35.86, + "learning_rate": 3.2076650206094846e-05, + "loss": 2.0741, + "step": 12388500 + }, + { + "epoch": 35.86, + "learning_rate": 3.2075926558447575e-05, + "loss": 2.1108, + "step": 12389000 + }, + { + "epoch": 35.86, + "learning_rate": 3.20752029108003e-05, + "loss": 2.0748, + "step": 12389500 + }, + { + "epoch": 35.86, + "learning_rate": 3.207447926315303e-05, + "loss": 2.0829, + "step": 12390000 + }, + { + "epoch": 35.87, + "learning_rate": 3.207375706280104e-05, + "loss": 2.0733, + "step": 12390500 + }, + { + "epoch": 35.87, + "learning_rate": 3.2073033415153764e-05, + "loss": 2.0505, + "step": 12391000 + }, + { + "epoch": 35.87, + "learning_rate": 3.207230976750649e-05, + "loss": 2.0956, + "step": 12391500 + }, + { + "epoch": 35.87, + "learning_rate": 3.207158611985921e-05, + "loss": 2.0625, + "step": 12392000 + }, + { + "epoch": 35.87, + "learning_rate": 3.207086247221193e-05, + "loss": 2.1094, + "step": 12392500 + }, + { + "epoch": 35.87, + "learning_rate": 3.207013882456465e-05, + "loss": 2.0948, + "step": 12393000 + }, + { + "epoch": 35.87, + "learning_rate": 3.2069415176917376e-05, + "loss": 2.0757, + "step": 12393500 + }, + { + "epoch": 35.88, + "learning_rate": 3.20686915292701e-05, + "loss": 2.0879, + "step": 12394000 + }, + { + "epoch": 35.88, + "learning_rate": 3.206796932891812e-05, + "loss": 2.083, + "step": 12394500 + }, + { + "epoch": 35.88, + "learning_rate": 3.206724568127084e-05, + "loss": 2.0923, + "step": 12395000 + }, + { + "epoch": 35.88, + "learning_rate": 3.2066522033623565e-05, + "loss": 2.0571, + "step": 12395500 + }, + { + "epoch": 35.88, + "learning_rate": 3.206579838597629e-05, + "loss": 2.0994, + "step": 12396000 + }, + { + "epoch": 35.88, + "learning_rate": 3.206507473832901e-05, + "loss": 2.0584, + "step": 12396500 + }, + { + "epoch": 35.88, + "learning_rate": 3.206435109068174e-05, + "loss": 2.0532, + "step": 12397000 + }, + { + "epoch": 35.89, + "learning_rate": 3.206362744303446e-05, + "loss": 2.0969, + "step": 12397500 + }, + { + "epoch": 35.89, + "learning_rate": 3.2062905242682476e-05, + "loss": 2.0907, + "step": 12398000 + }, + { + "epoch": 35.89, + "learning_rate": 3.20621815950352e-05, + "loss": 2.0594, + "step": 12398500 + }, + { + "epoch": 35.89, + "learning_rate": 3.206145794738793e-05, + "loss": 2.0663, + "step": 12399000 + }, + { + "epoch": 35.89, + "learning_rate": 3.206073429974065e-05, + "loss": 2.076, + "step": 12399500 + }, + { + "epoch": 35.89, + "learning_rate": 3.206001065209337e-05, + "loss": 2.0678, + "step": 12400000 + }, + { + "epoch": 35.89, + "learning_rate": 3.2059287004446094e-05, + "loss": 2.0838, + "step": 12400500 + }, + { + "epoch": 35.9, + "learning_rate": 3.2058563356798816e-05, + "loss": 2.065, + "step": 12401000 + }, + { + "epoch": 35.9, + "learning_rate": 3.205784115644683e-05, + "loss": 2.0945, + "step": 12401500 + }, + { + "epoch": 35.9, + "learning_rate": 3.2057117508799554e-05, + "loss": 2.0806, + "step": 12402000 + }, + { + "epoch": 35.9, + "learning_rate": 3.2056393861152276e-05, + "loss": 2.0867, + "step": 12402500 + }, + { + "epoch": 35.9, + "learning_rate": 3.2055670213505e-05, + "loss": 2.1111, + "step": 12403000 + }, + { + "epoch": 35.9, + "learning_rate": 3.205494656585773e-05, + "loss": 2.0866, + "step": 12403500 + }, + { + "epoch": 35.9, + "learning_rate": 3.205422291821045e-05, + "loss": 2.0932, + "step": 12404000 + }, + { + "epoch": 35.91, + "learning_rate": 3.205349927056318e-05, + "loss": 2.0781, + "step": 12404500 + }, + { + "epoch": 35.91, + "learning_rate": 3.20527756229159e-05, + "loss": 2.094, + "step": 12405000 + }, + { + "epoch": 35.91, + "learning_rate": 3.205205197526862e-05, + "loss": 2.0925, + "step": 12405500 + }, + { + "epoch": 35.91, + "learning_rate": 3.2051328327621345e-05, + "loss": 2.0854, + "step": 12406000 + }, + { + "epoch": 35.91, + "learning_rate": 3.205060467997407e-05, + "loss": 2.0633, + "step": 12406500 + }, + { + "epoch": 35.91, + "learning_rate": 3.204988247962208e-05, + "loss": 2.0532, + "step": 12407000 + }, + { + "epoch": 35.91, + "learning_rate": 3.2049158831974805e-05, + "loss": 2.0783, + "step": 12407500 + }, + { + "epoch": 35.92, + "learning_rate": 3.204843518432753e-05, + "loss": 2.0743, + "step": 12408000 + }, + { + "epoch": 35.92, + "learning_rate": 3.204771298397555e-05, + "loss": 2.0825, + "step": 12408500 + }, + { + "epoch": 35.92, + "learning_rate": 3.204698933632827e-05, + "loss": 2.0698, + "step": 12409000 + }, + { + "epoch": 35.92, + "learning_rate": 3.2046265688680994e-05, + "loss": 2.0744, + "step": 12409500 + }, + { + "epoch": 35.92, + "learning_rate": 3.2045542041033717e-05, + "loss": 2.081, + "step": 12410000 + }, + { + "epoch": 35.92, + "learning_rate": 3.204481839338644e-05, + "loss": 2.0975, + "step": 12410500 + }, + { + "epoch": 35.92, + "learning_rate": 3.204409474573916e-05, + "loss": 2.0849, + "step": 12411000 + }, + { + "epoch": 35.93, + "learning_rate": 3.204337254538718e-05, + "loss": 2.085, + "step": 12411500 + }, + { + "epoch": 35.93, + "learning_rate": 3.2042648897739906e-05, + "loss": 2.0817, + "step": 12412000 + }, + { + "epoch": 35.93, + "learning_rate": 3.204192525009263e-05, + "loss": 2.0916, + "step": 12412500 + }, + { + "epoch": 35.93, + "learning_rate": 3.204120160244535e-05, + "loss": 2.0827, + "step": 12413000 + }, + { + "epoch": 35.93, + "learning_rate": 3.204047795479808e-05, + "loss": 2.0756, + "step": 12413500 + }, + { + "epoch": 35.93, + "learning_rate": 3.20397543071508e-05, + "loss": 2.0696, + "step": 12414000 + }, + { + "epoch": 35.93, + "learning_rate": 3.2039030659503524e-05, + "loss": 2.0673, + "step": 12414500 + }, + { + "epoch": 35.94, + "learning_rate": 3.203830845915154e-05, + "loss": 2.0982, + "step": 12415000 + }, + { + "epoch": 35.94, + "learning_rate": 3.203758481150426e-05, + "loss": 2.0743, + "step": 12415500 + }, + { + "epoch": 35.94, + "learning_rate": 3.2036861163856984e-05, + "loss": 2.0681, + "step": 12416000 + }, + { + "epoch": 35.94, + "learning_rate": 3.2036137516209706e-05, + "loss": 2.084, + "step": 12416500 + }, + { + "epoch": 35.94, + "learning_rate": 3.203541386856243e-05, + "loss": 2.0789, + "step": 12417000 + }, + { + "epoch": 35.94, + "learning_rate": 3.203469166821045e-05, + "loss": 2.0807, + "step": 12417500 + }, + { + "epoch": 35.95, + "learning_rate": 3.203396802056317e-05, + "loss": 2.0525, + "step": 12418000 + }, + { + "epoch": 35.95, + "learning_rate": 3.2033244372915895e-05, + "loss": 2.0452, + "step": 12418500 + }, + { + "epoch": 35.95, + "learning_rate": 3.2032520725268624e-05, + "loss": 2.0895, + "step": 12419000 + }, + { + "epoch": 35.95, + "learning_rate": 3.2031797077621346e-05, + "loss": 2.0621, + "step": 12419500 + }, + { + "epoch": 35.95, + "learning_rate": 3.203107342997407e-05, + "loss": 2.0865, + "step": 12420000 + }, + { + "epoch": 35.95, + "learning_rate": 3.203034978232679e-05, + "loss": 2.049, + "step": 12420500 + }, + { + "epoch": 35.95, + "learning_rate": 3.202962613467951e-05, + "loss": 2.0984, + "step": 12421000 + }, + { + "epoch": 35.96, + "learning_rate": 3.202890393432753e-05, + "loss": 2.0844, + "step": 12421500 + }, + { + "epoch": 35.96, + "learning_rate": 3.202818028668025e-05, + "loss": 2.0891, + "step": 12422000 + }, + { + "epoch": 35.96, + "learning_rate": 3.202745663903298e-05, + "loss": 2.0873, + "step": 12422500 + }, + { + "epoch": 35.96, + "learning_rate": 3.2026734438680995e-05, + "loss": 2.0764, + "step": 12423000 + }, + { + "epoch": 35.96, + "learning_rate": 3.202601079103372e-05, + "loss": 2.0731, + "step": 12423500 + }, + { + "epoch": 35.96, + "learning_rate": 3.202528714338644e-05, + "loss": 2.0592, + "step": 12424000 + }, + { + "epoch": 35.96, + "learning_rate": 3.202456349573916e-05, + "loss": 2.0657, + "step": 12424500 + }, + { + "epoch": 35.97, + "learning_rate": 3.2023839848091884e-05, + "loss": 2.0917, + "step": 12425000 + }, + { + "epoch": 35.97, + "learning_rate": 3.2023117647739906e-05, + "loss": 2.08, + "step": 12425500 + }, + { + "epoch": 35.97, + "learning_rate": 3.202239400009263e-05, + "loss": 2.0762, + "step": 12426000 + }, + { + "epoch": 35.97, + "learning_rate": 3.202167035244536e-05, + "loss": 2.0708, + "step": 12426500 + }, + { + "epoch": 35.97, + "learning_rate": 3.202094670479808e-05, + "loss": 2.0826, + "step": 12427000 + }, + { + "epoch": 35.97, + "learning_rate": 3.20202230571508e-05, + "loss": 2.0832, + "step": 12427500 + }, + { + "epoch": 35.97, + "learning_rate": 3.2019499409503524e-05, + "loss": 2.0948, + "step": 12428000 + }, + { + "epoch": 35.98, + "learning_rate": 3.201877720915154e-05, + "loss": 2.0785, + "step": 12428500 + }, + { + "epoch": 35.98, + "learning_rate": 3.201805356150426e-05, + "loss": 2.0773, + "step": 12429000 + }, + { + "epoch": 35.98, + "learning_rate": 3.2017329913856984e-05, + "loss": 2.0607, + "step": 12429500 + }, + { + "epoch": 35.98, + "learning_rate": 3.2016606266209707e-05, + "loss": 2.1062, + "step": 12430000 + }, + { + "epoch": 35.98, + "learning_rate": 3.201588261856243e-05, + "loss": 2.0751, + "step": 12430500 + }, + { + "epoch": 35.98, + "learning_rate": 3.201515897091516e-05, + "loss": 2.0714, + "step": 12431000 + }, + { + "epoch": 35.98, + "learning_rate": 3.201443532326788e-05, + "loss": 2.0867, + "step": 12431500 + }, + { + "epoch": 35.99, + "learning_rate": 3.20137116756206e-05, + "loss": 2.0759, + "step": 12432000 + }, + { + "epoch": 35.99, + "learning_rate": 3.2012988027973325e-05, + "loss": 2.0983, + "step": 12432500 + }, + { + "epoch": 35.99, + "learning_rate": 3.201226438032605e-05, + "loss": 2.0554, + "step": 12433000 + }, + { + "epoch": 35.99, + "learning_rate": 3.2011540732678776e-05, + "loss": 2.0906, + "step": 12433500 + }, + { + "epoch": 35.99, + "learning_rate": 3.201081853232679e-05, + "loss": 2.0784, + "step": 12434000 + }, + { + "epoch": 35.99, + "learning_rate": 3.2010094884679514e-05, + "loss": 2.068, + "step": 12434500 + }, + { + "epoch": 35.99, + "learning_rate": 3.2009371237032236e-05, + "loss": 2.06, + "step": 12435000 + }, + { + "epoch": 36.0, + "learning_rate": 3.200864903668026e-05, + "loss": 2.0647, + "step": 12435500 + }, + { + "epoch": 36.0, + "learning_rate": 3.200792538903298e-05, + "loss": 2.0761, + "step": 12436000 + }, + { + "epoch": 36.0, + "learning_rate": 3.2007203188680996e-05, + "loss": 2.0755, + "step": 12436500 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.6696317492810138, + "eval_accuracy_mlm": 0.6346618176717469, + "eval_accuracy_nsp": 0.8570164945713199, + "eval_loss": 2.170750141143799, + "eval_runtime": 331.4091, + "eval_samples_per_second": 1316.759, + "eval_steps_per_second": 54.866, + "step": 12436992 + }, + { + "epoch": 36.0, + "learning_rate": 3.200647954103372e-05, + "loss": 2.0835, + "step": 12437000 + }, + { + "epoch": 36.0, + "learning_rate": 3.200575589338644e-05, + "loss": 2.0695, + "step": 12437500 + }, + { + "epoch": 36.0, + "learning_rate": 3.200503224573916e-05, + "loss": 2.0772, + "step": 12438000 + }, + { + "epoch": 36.0, + "learning_rate": 3.2004308598091885e-05, + "loss": 2.0507, + "step": 12438500 + }, + { + "epoch": 36.01, + "learning_rate": 3.200358495044461e-05, + "loss": 2.0681, + "step": 12439000 + }, + { + "epoch": 36.01, + "learning_rate": 3.200286130279733e-05, + "loss": 2.0525, + "step": 12439500 + }, + { + "epoch": 36.01, + "learning_rate": 3.200213765515006e-05, + "loss": 2.0395, + "step": 12440000 + }, + { + "epoch": 36.01, + "learning_rate": 3.200141400750278e-05, + "loss": 2.0736, + "step": 12440500 + }, + { + "epoch": 36.01, + "learning_rate": 3.200069035985551e-05, + "loss": 2.0698, + "step": 12441000 + }, + { + "epoch": 36.01, + "learning_rate": 3.199996671220823e-05, + "loss": 2.0789, + "step": 12441500 + }, + { + "epoch": 36.01, + "learning_rate": 3.199924595915154e-05, + "loss": 2.0589, + "step": 12442000 + }, + { + "epoch": 36.02, + "learning_rate": 3.199852231150426e-05, + "loss": 2.0533, + "step": 12442500 + }, + { + "epoch": 36.02, + "learning_rate": 3.1997798663856985e-05, + "loss": 2.0502, + "step": 12443000 + }, + { + "epoch": 36.02, + "learning_rate": 3.199707501620971e-05, + "loss": 2.0702, + "step": 12443500 + }, + { + "epoch": 36.02, + "learning_rate": 3.199635136856243e-05, + "loss": 2.0126, + "step": 12444000 + }, + { + "epoch": 36.02, + "learning_rate": 3.199562772091516e-05, + "loss": 2.0865, + "step": 12444500 + }, + { + "epoch": 36.02, + "learning_rate": 3.199490407326788e-05, + "loss": 2.0577, + "step": 12445000 + }, + { + "epoch": 36.02, + "learning_rate": 3.19941804256206e-05, + "loss": 2.0676, + "step": 12445500 + }, + { + "epoch": 36.03, + "learning_rate": 3.1993456777973325e-05, + "loss": 2.0435, + "step": 12446000 + }, + { + "epoch": 36.03, + "learning_rate": 3.199273313032605e-05, + "loss": 2.0495, + "step": 12446500 + }, + { + "epoch": 36.03, + "learning_rate": 3.199200948267877e-05, + "loss": 2.0509, + "step": 12447000 + }, + { + "epoch": 36.03, + "learning_rate": 3.199128583503149e-05, + "loss": 2.0273, + "step": 12447500 + }, + { + "epoch": 36.03, + "learning_rate": 3.1990562187384214e-05, + "loss": 2.0517, + "step": 12448000 + }, + { + "epoch": 36.03, + "learning_rate": 3.198983853973694e-05, + "loss": 2.0624, + "step": 12448500 + }, + { + "epoch": 36.03, + "learning_rate": 3.1989114892089666e-05, + "loss": 2.0471, + "step": 12449000 + }, + { + "epoch": 36.04, + "learning_rate": 3.198839269173768e-05, + "loss": 2.0762, + "step": 12449500 + }, + { + "epoch": 36.04, + "learning_rate": 3.198766904409041e-05, + "loss": 2.0488, + "step": 12450000 + }, + { + "epoch": 36.04, + "learning_rate": 3.1986946843738426e-05, + "loss": 2.0747, + "step": 12450500 + }, + { + "epoch": 36.04, + "learning_rate": 3.198622464338644e-05, + "loss": 2.059, + "step": 12451000 + }, + { + "epoch": 36.04, + "learning_rate": 3.1985500995739163e-05, + "loss": 2.0505, + "step": 12451500 + }, + { + "epoch": 36.04, + "learning_rate": 3.1984777348091886e-05, + "loss": 2.0622, + "step": 12452000 + }, + { + "epoch": 36.04, + "learning_rate": 3.198405370044461e-05, + "loss": 2.0598, + "step": 12452500 + }, + { + "epoch": 36.05, + "learning_rate": 3.198333005279733e-05, + "loss": 2.0451, + "step": 12453000 + }, + { + "epoch": 36.05, + "learning_rate": 3.198260640515006e-05, + "loss": 2.0541, + "step": 12453500 + }, + { + "epoch": 36.05, + "learning_rate": 3.198188275750278e-05, + "loss": 2.0547, + "step": 12454000 + }, + { + "epoch": 36.05, + "learning_rate": 3.1981159109855504e-05, + "loss": 2.0726, + "step": 12454500 + }, + { + "epoch": 36.05, + "learning_rate": 3.1980435462208226e-05, + "loss": 2.0486, + "step": 12455000 + }, + { + "epoch": 36.05, + "learning_rate": 3.197971181456095e-05, + "loss": 2.0548, + "step": 12455500 + }, + { + "epoch": 36.06, + "learning_rate": 3.197898816691368e-05, + "loss": 2.0418, + "step": 12456000 + }, + { + "epoch": 36.06, + "learning_rate": 3.19782645192664e-05, + "loss": 2.0776, + "step": 12456500 + }, + { + "epoch": 36.06, + "learning_rate": 3.197754087161912e-05, + "loss": 2.0645, + "step": 12457000 + }, + { + "epoch": 36.06, + "learning_rate": 3.1976817223971844e-05, + "loss": 2.0543, + "step": 12457500 + }, + { + "epoch": 36.06, + "learning_rate": 3.1976093576324566e-05, + "loss": 2.0534, + "step": 12458000 + }, + { + "epoch": 36.06, + "learning_rate": 3.197536992867729e-05, + "loss": 2.0317, + "step": 12458500 + }, + { + "epoch": 36.06, + "learning_rate": 3.197464772832531e-05, + "loss": 2.0799, + "step": 12459000 + }, + { + "epoch": 36.07, + "learning_rate": 3.197392408067803e-05, + "loss": 2.0623, + "step": 12459500 + }, + { + "epoch": 36.07, + "learning_rate": 3.197320188032605e-05, + "loss": 2.0328, + "step": 12460000 + }, + { + "epoch": 36.07, + "learning_rate": 3.197247823267877e-05, + "loss": 2.0482, + "step": 12460500 + }, + { + "epoch": 36.07, + "learning_rate": 3.197175458503149e-05, + "loss": 2.0596, + "step": 12461000 + }, + { + "epoch": 36.07, + "learning_rate": 3.1971030937384215e-05, + "loss": 2.0633, + "step": 12461500 + }, + { + "epoch": 36.07, + "learning_rate": 3.197030728973694e-05, + "loss": 2.0215, + "step": 12462000 + }, + { + "epoch": 36.07, + "learning_rate": 3.196958364208966e-05, + "loss": 2.0716, + "step": 12462500 + }, + { + "epoch": 36.08, + "learning_rate": 3.196885999444238e-05, + "loss": 2.0564, + "step": 12463000 + }, + { + "epoch": 36.08, + "learning_rate": 3.196813779409041e-05, + "loss": 2.065, + "step": 12463500 + }, + { + "epoch": 36.08, + "learning_rate": 3.196741414644313e-05, + "loss": 2.0426, + "step": 12464000 + }, + { + "epoch": 36.08, + "learning_rate": 3.1966690498795855e-05, + "loss": 2.0379, + "step": 12464500 + }, + { + "epoch": 36.08, + "learning_rate": 3.196596685114858e-05, + "loss": 2.075, + "step": 12465000 + }, + { + "epoch": 36.08, + "learning_rate": 3.19652432035013e-05, + "loss": 2.0741, + "step": 12465500 + }, + { + "epoch": 36.08, + "learning_rate": 3.196451955585402e-05, + "loss": 2.0446, + "step": 12466000 + }, + { + "epoch": 36.09, + "learning_rate": 3.196379735550204e-05, + "loss": 2.0782, + "step": 12466500 + }, + { + "epoch": 36.09, + "learning_rate": 3.196307370785476e-05, + "loss": 2.0721, + "step": 12467000 + }, + { + "epoch": 36.09, + "learning_rate": 3.196235006020749e-05, + "loss": 2.09, + "step": 12467500 + }, + { + "epoch": 36.09, + "learning_rate": 3.196162641256021e-05, + "loss": 2.0738, + "step": 12468000 + }, + { + "epoch": 36.09, + "learning_rate": 3.196090276491293e-05, + "loss": 2.051, + "step": 12468500 + }, + { + "epoch": 36.09, + "learning_rate": 3.1960179117265656e-05, + "loss": 2.0632, + "step": 12469000 + }, + { + "epoch": 36.09, + "learning_rate": 3.195945691691367e-05, + "loss": 2.0712, + "step": 12469500 + }, + { + "epoch": 36.1, + "learning_rate": 3.195873326926639e-05, + "loss": 2.0805, + "step": 12470000 + }, + { + "epoch": 36.1, + "learning_rate": 3.1958009621619116e-05, + "loss": 2.0623, + "step": 12470500 + }, + { + "epoch": 36.1, + "learning_rate": 3.1957285973971845e-05, + "loss": 2.0721, + "step": 12471000 + }, + { + "epoch": 36.1, + "learning_rate": 3.195656377361986e-05, + "loss": 2.0635, + "step": 12471500 + }, + { + "epoch": 36.1, + "learning_rate": 3.195584012597259e-05, + "loss": 2.0499, + "step": 12472000 + }, + { + "epoch": 36.1, + "learning_rate": 3.195511647832531e-05, + "loss": 2.0377, + "step": 12472500 + }, + { + "epoch": 36.1, + "learning_rate": 3.1954392830678034e-05, + "loss": 2.0515, + "step": 12473000 + }, + { + "epoch": 36.11, + "learning_rate": 3.1953669183030756e-05, + "loss": 2.0584, + "step": 12473500 + }, + { + "epoch": 36.11, + "learning_rate": 3.195294553538348e-05, + "loss": 2.099, + "step": 12474000 + }, + { + "epoch": 36.11, + "learning_rate": 3.19522218877362e-05, + "loss": 2.0449, + "step": 12474500 + }, + { + "epoch": 36.11, + "learning_rate": 3.195149824008892e-05, + "loss": 2.0751, + "step": 12475000 + }, + { + "epoch": 36.11, + "learning_rate": 3.1950774592441645e-05, + "loss": 2.0738, + "step": 12475500 + }, + { + "epoch": 36.11, + "learning_rate": 3.195005239208966e-05, + "loss": 2.0558, + "step": 12476000 + }, + { + "epoch": 36.11, + "learning_rate": 3.194932874444239e-05, + "loss": 2.0682, + "step": 12476500 + }, + { + "epoch": 36.12, + "learning_rate": 3.194860509679511e-05, + "loss": 2.0579, + "step": 12477000 + }, + { + "epoch": 36.12, + "learning_rate": 3.1947881449147834e-05, + "loss": 2.0891, + "step": 12477500 + }, + { + "epoch": 36.12, + "learning_rate": 3.194715924879585e-05, + "loss": 2.0719, + "step": 12478000 + }, + { + "epoch": 36.12, + "learning_rate": 3.194643560114858e-05, + "loss": 2.066, + "step": 12478500 + }, + { + "epoch": 36.12, + "learning_rate": 3.1945713400796594e-05, + "loss": 2.0694, + "step": 12479000 + }, + { + "epoch": 36.12, + "learning_rate": 3.1944989753149316e-05, + "loss": 2.0415, + "step": 12479500 + }, + { + "epoch": 36.12, + "learning_rate": 3.194426610550204e-05, + "loss": 2.0602, + "step": 12480000 + }, + { + "epoch": 36.13, + "learning_rate": 3.194354245785476e-05, + "loss": 2.0712, + "step": 12480500 + }, + { + "epoch": 36.13, + "learning_rate": 3.194281881020749e-05, + "loss": 2.0438, + "step": 12481000 + }, + { + "epoch": 36.13, + "learning_rate": 3.194209516256021e-05, + "loss": 2.0441, + "step": 12481500 + }, + { + "epoch": 36.13, + "learning_rate": 3.1941371514912934e-05, + "loss": 2.0848, + "step": 12482000 + }, + { + "epoch": 36.13, + "learning_rate": 3.1940647867265656e-05, + "loss": 2.0767, + "step": 12482500 + }, + { + "epoch": 36.13, + "learning_rate": 3.193992566691367e-05, + "loss": 2.0787, + "step": 12483000 + }, + { + "epoch": 36.13, + "learning_rate": 3.1939202019266394e-05, + "loss": 2.0593, + "step": 12483500 + }, + { + "epoch": 36.14, + "learning_rate": 3.1938478371619116e-05, + "loss": 2.0529, + "step": 12484000 + }, + { + "epoch": 36.14, + "learning_rate": 3.193775472397184e-05, + "loss": 2.0689, + "step": 12484500 + }, + { + "epoch": 36.14, + "learning_rate": 3.193703107632456e-05, + "loss": 2.068, + "step": 12485000 + }, + { + "epoch": 36.14, + "learning_rate": 3.193630887597258e-05, + "loss": 2.0508, + "step": 12485500 + }, + { + "epoch": 36.14, + "learning_rate": 3.193558522832531e-05, + "loss": 2.065, + "step": 12486000 + }, + { + "epoch": 36.14, + "learning_rate": 3.1934861580678034e-05, + "loss": 2.0539, + "step": 12486500 + }, + { + "epoch": 36.14, + "learning_rate": 3.193413793303076e-05, + "loss": 2.0388, + "step": 12487000 + }, + { + "epoch": 36.15, + "learning_rate": 3.193341428538348e-05, + "loss": 2.0539, + "step": 12487500 + }, + { + "epoch": 36.15, + "learning_rate": 3.19326906377362e-05, + "loss": 2.0467, + "step": 12488000 + }, + { + "epoch": 36.15, + "learning_rate": 3.193196699008892e-05, + "loss": 2.0355, + "step": 12488500 + }, + { + "epoch": 36.15, + "learning_rate": 3.1931243342441646e-05, + "loss": 2.0612, + "step": 12489000 + }, + { + "epoch": 36.15, + "learning_rate": 3.193051969479437e-05, + "loss": 2.0495, + "step": 12489500 + }, + { + "epoch": 36.15, + "learning_rate": 3.192979604714709e-05, + "loss": 2.0295, + "step": 12490000 + }, + { + "epoch": 36.15, + "learning_rate": 3.192907239949981e-05, + "loss": 2.0481, + "step": 12490500 + }, + { + "epoch": 36.16, + "learning_rate": 3.1928350199147835e-05, + "loss": 2.0497, + "step": 12491000 + }, + { + "epoch": 36.16, + "learning_rate": 3.192762655150056e-05, + "loss": 2.0675, + "step": 12491500 + }, + { + "epoch": 36.16, + "learning_rate": 3.192690290385328e-05, + "loss": 2.0984, + "step": 12492000 + }, + { + "epoch": 36.16, + "learning_rate": 3.1926179256206e-05, + "loss": 2.0355, + "step": 12492500 + }, + { + "epoch": 36.16, + "learning_rate": 3.192545560855873e-05, + "loss": 2.0512, + "step": 12493000 + }, + { + "epoch": 36.16, + "learning_rate": 3.1924733408206746e-05, + "loss": 2.0467, + "step": 12493500 + }, + { + "epoch": 36.17, + "learning_rate": 3.192400976055947e-05, + "loss": 2.0507, + "step": 12494000 + }, + { + "epoch": 36.17, + "learning_rate": 3.192328611291219e-05, + "loss": 2.0522, + "step": 12494500 + }, + { + "epoch": 36.17, + "learning_rate": 3.192256246526491e-05, + "loss": 2.0341, + "step": 12495000 + }, + { + "epoch": 36.17, + "learning_rate": 3.1921840264912935e-05, + "loss": 2.078, + "step": 12495500 + }, + { + "epoch": 36.17, + "learning_rate": 3.192111661726566e-05, + "loss": 2.0741, + "step": 12496000 + }, + { + "epoch": 36.17, + "learning_rate": 3.192039296961838e-05, + "loss": 2.06, + "step": 12496500 + }, + { + "epoch": 36.17, + "learning_rate": 3.19196693219711e-05, + "loss": 2.0527, + "step": 12497000 + }, + { + "epoch": 36.18, + "learning_rate": 3.1918945674323824e-05, + "loss": 2.0608, + "step": 12497500 + }, + { + "epoch": 36.18, + "learning_rate": 3.1918222026676546e-05, + "loss": 2.0688, + "step": 12498000 + }, + { + "epoch": 36.18, + "learning_rate": 3.191749837902927e-05, + "loss": 2.0853, + "step": 12498500 + }, + { + "epoch": 36.18, + "learning_rate": 3.1916777625972584e-05, + "loss": 2.0431, + "step": 12499000 + }, + { + "epoch": 36.18, + "learning_rate": 3.1916053978325306e-05, + "loss": 2.0694, + "step": 12499500 + }, + { + "epoch": 36.18, + "learning_rate": 3.191533033067803e-05, + "loss": 2.0751, + "step": 12500000 + }, + { + "epoch": 36.18, + "learning_rate": 3.191460668303075e-05, + "loss": 2.0665, + "step": 12500500 + }, + { + "epoch": 36.19, + "learning_rate": 3.191388303538348e-05, + "loss": 2.0632, + "step": 12501000 + }, + { + "epoch": 36.19, + "learning_rate": 3.19131593877362e-05, + "loss": 2.0771, + "step": 12501500 + }, + { + "epoch": 36.19, + "learning_rate": 3.1912435740088924e-05, + "loss": 2.0455, + "step": 12502000 + }, + { + "epoch": 36.19, + "learning_rate": 3.191171353973694e-05, + "loss": 2.0751, + "step": 12502500 + }, + { + "epoch": 36.19, + "learning_rate": 3.191098989208967e-05, + "loss": 2.0598, + "step": 12503000 + }, + { + "epoch": 36.19, + "learning_rate": 3.191026624444239e-05, + "loss": 2.0535, + "step": 12503500 + }, + { + "epoch": 36.19, + "learning_rate": 3.190954259679511e-05, + "loss": 2.0529, + "step": 12504000 + }, + { + "epoch": 36.2, + "learning_rate": 3.1908818949147835e-05, + "loss": 2.0678, + "step": 12504500 + }, + { + "epoch": 36.2, + "learning_rate": 3.190809530150056e-05, + "loss": 2.0713, + "step": 12505000 + }, + { + "epoch": 36.2, + "learning_rate": 3.190737165385328e-05, + "loss": 2.0488, + "step": 12505500 + }, + { + "epoch": 36.2, + "learning_rate": 3.1906648006206e-05, + "loss": 2.0585, + "step": 12506000 + }, + { + "epoch": 36.2, + "learning_rate": 3.190592580585402e-05, + "loss": 2.0721, + "step": 12506500 + }, + { + "epoch": 36.2, + "learning_rate": 3.190520215820674e-05, + "loss": 2.0435, + "step": 12507000 + }, + { + "epoch": 36.2, + "learning_rate": 3.190447851055947e-05, + "loss": 2.05, + "step": 12507500 + }, + { + "epoch": 36.21, + "learning_rate": 3.190375486291219e-05, + "loss": 2.0783, + "step": 12508000 + }, + { + "epoch": 36.21, + "learning_rate": 3.190303121526492e-05, + "loss": 2.0287, + "step": 12508500 + }, + { + "epoch": 36.21, + "learning_rate": 3.190231046220823e-05, + "loss": 2.0612, + "step": 12509000 + }, + { + "epoch": 36.21, + "learning_rate": 3.190158681456095e-05, + "loss": 2.0493, + "step": 12509500 + }, + { + "epoch": 36.21, + "learning_rate": 3.1900863166913673e-05, + "loss": 2.0914, + "step": 12510000 + }, + { + "epoch": 36.21, + "learning_rate": 3.1900139519266396e-05, + "loss": 2.0358, + "step": 12510500 + }, + { + "epoch": 36.21, + "learning_rate": 3.189941587161912e-05, + "loss": 2.0578, + "step": 12511000 + }, + { + "epoch": 36.22, + "learning_rate": 3.189869222397184e-05, + "loss": 2.056, + "step": 12511500 + }, + { + "epoch": 36.22, + "learning_rate": 3.189796857632457e-05, + "loss": 2.067, + "step": 12512000 + }, + { + "epoch": 36.22, + "learning_rate": 3.189724492867729e-05, + "loss": 2.08, + "step": 12512500 + }, + { + "epoch": 36.22, + "learning_rate": 3.189652272832531e-05, + "loss": 2.0568, + "step": 12513000 + }, + { + "epoch": 36.22, + "learning_rate": 3.189580052797332e-05, + "loss": 2.0492, + "step": 12513500 + }, + { + "epoch": 36.22, + "learning_rate": 3.1895076880326045e-05, + "loss": 2.0488, + "step": 12514000 + }, + { + "epoch": 36.22, + "learning_rate": 3.189435323267877e-05, + "loss": 2.069, + "step": 12514500 + }, + { + "epoch": 36.23, + "learning_rate": 3.189362958503149e-05, + "loss": 2.0782, + "step": 12515000 + }, + { + "epoch": 36.23, + "learning_rate": 3.189290593738422e-05, + "loss": 2.0644, + "step": 12515500 + }, + { + "epoch": 36.23, + "learning_rate": 3.1892183737032234e-05, + "loss": 2.0795, + "step": 12516000 + }, + { + "epoch": 36.23, + "learning_rate": 3.189146008938496e-05, + "loss": 2.0847, + "step": 12516500 + }, + { + "epoch": 36.23, + "learning_rate": 3.189073788903298e-05, + "loss": 2.0753, + "step": 12517000 + }, + { + "epoch": 36.23, + "learning_rate": 3.18900142413857e-05, + "loss": 2.0615, + "step": 12517500 + }, + { + "epoch": 36.23, + "learning_rate": 3.188929059373842e-05, + "loss": 2.0735, + "step": 12518000 + }, + { + "epoch": 36.24, + "learning_rate": 3.1888566946091145e-05, + "loss": 2.0576, + "step": 12518500 + }, + { + "epoch": 36.24, + "learning_rate": 3.188784329844387e-05, + "loss": 2.0499, + "step": 12519000 + }, + { + "epoch": 36.24, + "learning_rate": 3.1887119650796596e-05, + "loss": 2.0407, + "step": 12519500 + }, + { + "epoch": 36.24, + "learning_rate": 3.188639600314932e-05, + "loss": 2.0644, + "step": 12520000 + }, + { + "epoch": 36.24, + "learning_rate": 3.188567235550204e-05, + "loss": 2.0722, + "step": 12520500 + }, + { + "epoch": 36.24, + "learning_rate": 3.188494870785476e-05, + "loss": 2.0538, + "step": 12521000 + }, + { + "epoch": 36.24, + "learning_rate": 3.1884225060207485e-05, + "loss": 2.0547, + "step": 12521500 + }, + { + "epoch": 36.25, + "learning_rate": 3.188350141256021e-05, + "loss": 2.0647, + "step": 12522000 + }, + { + "epoch": 36.25, + "learning_rate": 3.188277776491293e-05, + "loss": 2.064, + "step": 12522500 + }, + { + "epoch": 36.25, + "learning_rate": 3.188205411726565e-05, + "loss": 2.0364, + "step": 12523000 + }, + { + "epoch": 36.25, + "learning_rate": 3.188133046961838e-05, + "loss": 2.0598, + "step": 12523500 + }, + { + "epoch": 36.25, + "learning_rate": 3.18806068219711e-05, + "loss": 2.0673, + "step": 12524000 + }, + { + "epoch": 36.25, + "learning_rate": 3.1879883174323825e-05, + "loss": 2.0524, + "step": 12524500 + }, + { + "epoch": 36.25, + "learning_rate": 3.187915952667655e-05, + "loss": 2.0609, + "step": 12525000 + }, + { + "epoch": 36.26, + "learning_rate": 3.187843732632457e-05, + "loss": 2.0602, + "step": 12525500 + }, + { + "epoch": 36.26, + "learning_rate": 3.187771367867729e-05, + "loss": 2.0545, + "step": 12526000 + }, + { + "epoch": 36.26, + "learning_rate": 3.187699147832531e-05, + "loss": 2.0826, + "step": 12526500 + }, + { + "epoch": 36.26, + "learning_rate": 3.187626783067803e-05, + "loss": 2.0555, + "step": 12527000 + }, + { + "epoch": 36.26, + "learning_rate": 3.187554418303075e-05, + "loss": 2.0643, + "step": 12527500 + }, + { + "epoch": 36.26, + "learning_rate": 3.1874820535383474e-05, + "loss": 2.0474, + "step": 12528000 + }, + { + "epoch": 36.26, + "learning_rate": 3.18740968877362e-05, + "loss": 2.0573, + "step": 12528500 + }, + { + "epoch": 36.27, + "learning_rate": 3.187337324008892e-05, + "loss": 2.0887, + "step": 12529000 + }, + { + "epoch": 36.27, + "learning_rate": 3.187264959244165e-05, + "loss": 2.0556, + "step": 12529500 + }, + { + "epoch": 36.27, + "learning_rate": 3.187192594479437e-05, + "loss": 2.0809, + "step": 12530000 + }, + { + "epoch": 36.27, + "learning_rate": 3.187120229714709e-05, + "loss": 2.0732, + "step": 12530500 + }, + { + "epoch": 36.27, + "learning_rate": 3.1870480096795115e-05, + "loss": 2.0548, + "step": 12531000 + }, + { + "epoch": 36.27, + "learning_rate": 3.186975789644313e-05, + "loss": 2.081, + "step": 12531500 + }, + { + "epoch": 36.28, + "learning_rate": 3.186903424879585e-05, + "loss": 2.054, + "step": 12532000 + }, + { + "epoch": 36.28, + "learning_rate": 3.1868310601148575e-05, + "loss": 2.0834, + "step": 12532500 + }, + { + "epoch": 36.28, + "learning_rate": 3.18675869535013e-05, + "loss": 2.0576, + "step": 12533000 + }, + { + "epoch": 36.28, + "learning_rate": 3.186686330585402e-05, + "loss": 2.0484, + "step": 12533500 + }, + { + "epoch": 36.28, + "learning_rate": 3.186614110550204e-05, + "loss": 2.0445, + "step": 12534000 + }, + { + "epoch": 36.28, + "learning_rate": 3.1865417457854764e-05, + "loss": 2.0685, + "step": 12534500 + }, + { + "epoch": 36.28, + "learning_rate": 3.1864693810207486e-05, + "loss": 2.0674, + "step": 12535000 + }, + { + "epoch": 36.29, + "learning_rate": 3.186397016256021e-05, + "loss": 2.078, + "step": 12535500 + }, + { + "epoch": 36.29, + "learning_rate": 3.186324651491293e-05, + "loss": 2.0461, + "step": 12536000 + }, + { + "epoch": 36.29, + "learning_rate": 3.1862524314560946e-05, + "loss": 2.082, + "step": 12536500 + }, + { + "epoch": 36.29, + "learning_rate": 3.186180066691367e-05, + "loss": 2.0629, + "step": 12537000 + }, + { + "epoch": 36.29, + "learning_rate": 3.18610770192664e-05, + "loss": 2.0693, + "step": 12537500 + }, + { + "epoch": 36.29, + "learning_rate": 3.186035337161912e-05, + "loss": 2.0524, + "step": 12538000 + }, + { + "epoch": 36.29, + "learning_rate": 3.185962972397185e-05, + "loss": 2.0835, + "step": 12538500 + }, + { + "epoch": 36.3, + "learning_rate": 3.1858907523619864e-05, + "loss": 2.0698, + "step": 12539000 + }, + { + "epoch": 36.3, + "learning_rate": 3.1858183875972586e-05, + "loss": 2.0731, + "step": 12539500 + }, + { + "epoch": 36.3, + "learning_rate": 3.185746022832531e-05, + "loss": 2.0697, + "step": 12540000 + }, + { + "epoch": 36.3, + "learning_rate": 3.185673658067803e-05, + "loss": 2.0506, + "step": 12540500 + }, + { + "epoch": 36.3, + "learning_rate": 3.185601293303075e-05, + "loss": 2.0571, + "step": 12541000 + }, + { + "epoch": 36.3, + "learning_rate": 3.1855289285383475e-05, + "loss": 2.0708, + "step": 12541500 + }, + { + "epoch": 36.3, + "learning_rate": 3.18545656377362e-05, + "loss": 2.0597, + "step": 12542000 + }, + { + "epoch": 36.31, + "learning_rate": 3.185384343738422e-05, + "loss": 2.0473, + "step": 12542500 + }, + { + "epoch": 36.31, + "learning_rate": 3.185311978973694e-05, + "loss": 2.0741, + "step": 12543000 + }, + { + "epoch": 36.31, + "learning_rate": 3.1852396142089664e-05, + "loss": 2.0739, + "step": 12543500 + }, + { + "epoch": 36.31, + "learning_rate": 3.1851672494442387e-05, + "loss": 2.058, + "step": 12544000 + }, + { + "epoch": 36.31, + "learning_rate": 3.185094884679511e-05, + "loss": 2.0827, + "step": 12544500 + }, + { + "epoch": 36.31, + "learning_rate": 3.1850226646443124e-05, + "loss": 2.0693, + "step": 12545000 + }, + { + "epoch": 36.31, + "learning_rate": 3.1849502998795847e-05, + "loss": 2.0824, + "step": 12545500 + }, + { + "epoch": 36.32, + "learning_rate": 3.1848779351148576e-05, + "loss": 2.0731, + "step": 12546000 + }, + { + "epoch": 36.32, + "learning_rate": 3.18480557035013e-05, + "loss": 2.0742, + "step": 12546500 + }, + { + "epoch": 36.32, + "learning_rate": 3.184733350314932e-05, + "loss": 2.0738, + "step": 12547000 + }, + { + "epoch": 36.32, + "learning_rate": 3.184660985550204e-05, + "loss": 2.0807, + "step": 12547500 + }, + { + "epoch": 36.32, + "learning_rate": 3.1845886207854765e-05, + "loss": 2.0725, + "step": 12548000 + }, + { + "epoch": 36.32, + "learning_rate": 3.184516256020749e-05, + "loss": 2.0557, + "step": 12548500 + }, + { + "epoch": 36.32, + "learning_rate": 3.18444403598555e-05, + "loss": 2.0655, + "step": 12549000 + }, + { + "epoch": 36.33, + "learning_rate": 3.1843716712208225e-05, + "loss": 2.0949, + "step": 12549500 + }, + { + "epoch": 36.33, + "learning_rate": 3.184299306456095e-05, + "loss": 2.0792, + "step": 12550000 + }, + { + "epoch": 36.33, + "learning_rate": 3.1842269416913676e-05, + "loss": 2.0602, + "step": 12550500 + }, + { + "epoch": 36.33, + "learning_rate": 3.18415457692664e-05, + "loss": 2.0678, + "step": 12551000 + }, + { + "epoch": 36.33, + "learning_rate": 3.184082212161912e-05, + "loss": 2.0731, + "step": 12551500 + }, + { + "epoch": 36.33, + "learning_rate": 3.184009847397184e-05, + "loss": 2.0799, + "step": 12552000 + }, + { + "epoch": 36.33, + "learning_rate": 3.1839374826324565e-05, + "loss": 2.0524, + "step": 12552500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183865117867729e-05, + "loss": 2.0685, + "step": 12553000 + }, + { + "epoch": 36.34, + "learning_rate": 3.1837927531030016e-05, + "loss": 2.0711, + "step": 12553500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183720533067803e-05, + "loss": 2.0388, + "step": 12554000 + }, + { + "epoch": 36.34, + "learning_rate": 3.1836481683030754e-05, + "loss": 2.0613, + "step": 12554500 + }, + { + "epoch": 36.34, + "learning_rate": 3.1835758035383476e-05, + "loss": 2.0601, + "step": 12555000 + }, + { + "epoch": 36.34, + "learning_rate": 3.18350343877362e-05, + "loss": 2.0549, + "step": 12555500 + }, + { + "epoch": 36.34, + "learning_rate": 3.183431218738422e-05, + "loss": 2.067, + "step": 12556000 + }, + { + "epoch": 36.35, + "learning_rate": 3.1833589987032236e-05, + "loss": 2.0604, + "step": 12556500 + }, + { + "epoch": 36.35, + "learning_rate": 3.183286633938496e-05, + "loss": 2.0924, + "step": 12557000 + }, + { + "epoch": 36.35, + "learning_rate": 3.1832144139032974e-05, + "loss": 2.0479, + "step": 12557500 + }, + { + "epoch": 36.35, + "learning_rate": 3.1831420491385696e-05, + "loss": 2.0861, + "step": 12558000 + }, + { + "epoch": 36.35, + "learning_rate": 3.1830696843738425e-05, + "loss": 2.0665, + "step": 12558500 + }, + { + "epoch": 36.35, + "learning_rate": 3.182997319609115e-05, + "loss": 2.07, + "step": 12559000 + }, + { + "epoch": 36.35, + "learning_rate": 3.182924954844387e-05, + "loss": 2.0512, + "step": 12559500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182852590079659e-05, + "loss": 2.0528, + "step": 12560000 + }, + { + "epoch": 36.36, + "learning_rate": 3.1827802253149314e-05, + "loss": 2.0628, + "step": 12560500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182707860550204e-05, + "loss": 2.0573, + "step": 12561000 + }, + { + "epoch": 36.36, + "learning_rate": 3.1826354957854765e-05, + "loss": 2.0932, + "step": 12561500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182563131020749e-05, + "loss": 2.067, + "step": 12562000 + }, + { + "epoch": 36.36, + "learning_rate": 3.182490766256021e-05, + "loss": 2.0702, + "step": 12562500 + }, + { + "epoch": 36.36, + "learning_rate": 3.182418401491293e-05, + "loss": 2.0744, + "step": 12563000 + }, + { + "epoch": 36.37, + "learning_rate": 3.1823460367265654e-05, + "loss": 2.0666, + "step": 12563500 + }, + { + "epoch": 36.37, + "learning_rate": 3.182273816691368e-05, + "loss": 2.0565, + "step": 12564000 + }, + { + "epoch": 36.37, + "learning_rate": 3.18220145192664e-05, + "loss": 2.0499, + "step": 12564500 + }, + { + "epoch": 36.37, + "learning_rate": 3.182129087161912e-05, + "loss": 2.0701, + "step": 12565000 + }, + { + "epoch": 36.37, + "learning_rate": 3.182056722397184e-05, + "loss": 2.0617, + "step": 12565500 + }, + { + "epoch": 36.37, + "learning_rate": 3.1819843576324566e-05, + "loss": 2.0708, + "step": 12566000 + }, + { + "epoch": 36.37, + "learning_rate": 3.181911992867729e-05, + "loss": 2.0718, + "step": 12566500 + }, + { + "epoch": 36.38, + "learning_rate": 3.18183977283253e-05, + "loss": 2.0884, + "step": 12567000 + }, + { + "epoch": 36.38, + "learning_rate": 3.1817674080678026e-05, + "loss": 2.0645, + "step": 12567500 + }, + { + "epoch": 36.38, + "learning_rate": 3.181695043303075e-05, + "loss": 2.0532, + "step": 12568000 + }, + { + "epoch": 36.38, + "learning_rate": 3.181622678538348e-05, + "loss": 2.0727, + "step": 12568500 + }, + { + "epoch": 36.38, + "learning_rate": 3.18155031377362e-05, + "loss": 2.0658, + "step": 12569000 + }, + { + "epoch": 36.38, + "learning_rate": 3.181477949008893e-05, + "loss": 2.053, + "step": 12569500 + }, + { + "epoch": 36.39, + "learning_rate": 3.181405584244165e-05, + "loss": 2.0572, + "step": 12570000 + }, + { + "epoch": 36.39, + "learning_rate": 3.181333219479437e-05, + "loss": 2.0814, + "step": 12570500 + }, + { + "epoch": 36.39, + "learning_rate": 3.1812608547147095e-05, + "loss": 2.035, + "step": 12571000 + }, + { + "epoch": 36.39, + "learning_rate": 3.181188634679511e-05, + "loss": 2.0591, + "step": 12571500 + }, + { + "epoch": 36.39, + "learning_rate": 3.1811164146443126e-05, + "loss": 2.0613, + "step": 12572000 + }, + { + "epoch": 36.39, + "learning_rate": 3.181044049879585e-05, + "loss": 2.058, + "step": 12572500 + }, + { + "epoch": 36.39, + "learning_rate": 3.180971685114858e-05, + "loss": 2.0695, + "step": 12573000 + }, + { + "epoch": 36.4, + "learning_rate": 3.18089932035013e-05, + "loss": 2.1201, + "step": 12573500 + }, + { + "epoch": 36.4, + "learning_rate": 3.180826955585402e-05, + "loss": 2.0596, + "step": 12574000 + }, + { + "epoch": 36.4, + "learning_rate": 3.180754735550204e-05, + "loss": 2.0749, + "step": 12574500 + }, + { + "epoch": 36.4, + "learning_rate": 3.180682370785476e-05, + "loss": 2.0636, + "step": 12575000 + }, + { + "epoch": 36.4, + "learning_rate": 3.180610006020748e-05, + "loss": 2.0635, + "step": 12575500 + }, + { + "epoch": 36.4, + "learning_rate": 3.180537641256021e-05, + "loss": 2.0504, + "step": 12576000 + }, + { + "epoch": 36.4, + "learning_rate": 3.180465276491293e-05, + "loss": 2.0673, + "step": 12576500 + }, + { + "epoch": 36.41, + "learning_rate": 3.1803929117265655e-05, + "loss": 2.0859, + "step": 12577000 + }, + { + "epoch": 36.41, + "learning_rate": 3.180320546961838e-05, + "loss": 2.0704, + "step": 12577500 + }, + { + "epoch": 36.41, + "learning_rate": 3.18024832692664e-05, + "loss": 2.0792, + "step": 12578000 + }, + { + "epoch": 36.41, + "learning_rate": 3.180175962161912e-05, + "loss": 2.0506, + "step": 12578500 + }, + { + "epoch": 36.41, + "learning_rate": 3.180103742126714e-05, + "loss": 2.0946, + "step": 12579000 + }, + { + "epoch": 36.41, + "learning_rate": 3.180031377361986e-05, + "loss": 2.0607, + "step": 12579500 + }, + { + "epoch": 36.41, + "learning_rate": 3.179959012597258e-05, + "loss": 2.0874, + "step": 12580000 + }, + { + "epoch": 36.42, + "learning_rate": 3.1798866478325304e-05, + "loss": 2.042, + "step": 12580500 + }, + { + "epoch": 36.42, + "learning_rate": 3.1798142830678026e-05, + "loss": 2.0639, + "step": 12581000 + }, + { + "epoch": 36.42, + "learning_rate": 3.1797419183030755e-05, + "loss": 2.0736, + "step": 12581500 + }, + { + "epoch": 36.42, + "learning_rate": 3.179669553538348e-05, + "loss": 2.0681, + "step": 12582000 + }, + { + "epoch": 36.42, + "learning_rate": 3.17959718877362e-05, + "loss": 2.0664, + "step": 12582500 + }, + { + "epoch": 36.42, + "learning_rate": 3.179524824008892e-05, + "loss": 2.0801, + "step": 12583000 + }, + { + "epoch": 36.42, + "learning_rate": 3.179452459244165e-05, + "loss": 2.0522, + "step": 12583500 + }, + { + "epoch": 36.43, + "learning_rate": 3.179380094479437e-05, + "loss": 2.0806, + "step": 12584000 + }, + { + "epoch": 36.43, + "learning_rate": 3.1793077297147096e-05, + "loss": 2.0561, + "step": 12584500 + }, + { + "epoch": 36.43, + "learning_rate": 3.179235364949982e-05, + "loss": 2.0504, + "step": 12585000 + }, + { + "epoch": 36.43, + "learning_rate": 3.1791631449147833e-05, + "loss": 2.0542, + "step": 12585500 + }, + { + "epoch": 36.43, + "learning_rate": 3.1790907801500556e-05, + "loss": 2.0479, + "step": 12586000 + }, + { + "epoch": 36.43, + "learning_rate": 3.179018415385328e-05, + "loss": 2.0858, + "step": 12586500 + }, + { + "epoch": 36.43, + "learning_rate": 3.178946050620601e-05, + "loss": 2.0499, + "step": 12587000 + }, + { + "epoch": 36.44, + "learning_rate": 3.178873685855873e-05, + "loss": 2.0841, + "step": 12587500 + }, + { + "epoch": 36.44, + "learning_rate": 3.178801321091145e-05, + "loss": 2.0682, + "step": 12588000 + }, + { + "epoch": 36.44, + "learning_rate": 3.1787289563264174e-05, + "loss": 2.0732, + "step": 12588500 + }, + { + "epoch": 36.44, + "learning_rate": 3.1786565915616896e-05, + "loss": 2.0696, + "step": 12589000 + }, + { + "epoch": 36.44, + "learning_rate": 3.178584371526491e-05, + "loss": 2.073, + "step": 12589500 + }, + { + "epoch": 36.44, + "learning_rate": 3.1785120067617634e-05, + "loss": 2.0417, + "step": 12590000 + }, + { + "epoch": 36.44, + "learning_rate": 3.178439641997036e-05, + "loss": 2.0772, + "step": 12590500 + }, + { + "epoch": 36.45, + "learning_rate": 3.1783672772323085e-05, + "loss": 2.0733, + "step": 12591000 + }, + { + "epoch": 36.45, + "learning_rate": 3.178295057197111e-05, + "loss": 2.0526, + "step": 12591500 + }, + { + "epoch": 36.45, + "learning_rate": 3.178222692432383e-05, + "loss": 2.0773, + "step": 12592000 + }, + { + "epoch": 36.45, + "learning_rate": 3.178150327667655e-05, + "loss": 2.0613, + "step": 12592500 + }, + { + "epoch": 36.45, + "learning_rate": 3.1780779629029274e-05, + "loss": 2.0955, + "step": 12593000 + }, + { + "epoch": 36.45, + "learning_rate": 3.178005742867729e-05, + "loss": 2.0782, + "step": 12593500 + }, + { + "epoch": 36.45, + "learning_rate": 3.177933378103001e-05, + "loss": 2.0618, + "step": 12594000 + }, + { + "epoch": 36.46, + "learning_rate": 3.177861158067803e-05, + "loss": 2.051, + "step": 12594500 + }, + { + "epoch": 36.46, + "learning_rate": 3.177788938032605e-05, + "loss": 2.051, + "step": 12595000 + }, + { + "epoch": 36.46, + "learning_rate": 3.177716573267877e-05, + "loss": 2.0485, + "step": 12595500 + }, + { + "epoch": 36.46, + "learning_rate": 3.1776442085031494e-05, + "loss": 2.0578, + "step": 12596000 + }, + { + "epoch": 36.46, + "learning_rate": 3.1775718437384216e-05, + "loss": 2.0655, + "step": 12596500 + }, + { + "epoch": 36.46, + "learning_rate": 3.177499478973694e-05, + "loss": 2.0603, + "step": 12597000 + }, + { + "epoch": 36.46, + "learning_rate": 3.177427114208966e-05, + "loss": 2.0762, + "step": 12597500 + }, + { + "epoch": 36.47, + "learning_rate": 3.177354749444238e-05, + "loss": 2.0693, + "step": 12598000 + }, + { + "epoch": 36.47, + "learning_rate": 3.177282384679511e-05, + "loss": 2.0849, + "step": 12598500 + }, + { + "epoch": 36.47, + "learning_rate": 3.1772100199147834e-05, + "loss": 2.0501, + "step": 12599000 + }, + { + "epoch": 36.47, + "learning_rate": 3.1771376551500556e-05, + "loss": 2.0663, + "step": 12599500 + }, + { + "epoch": 36.47, + "learning_rate": 3.177065290385328e-05, + "loss": 2.0592, + "step": 12600000 + }, + { + "epoch": 36.47, + "learning_rate": 3.176992925620601e-05, + "loss": 2.0939, + "step": 12600500 + }, + { + "epoch": 36.47, + "learning_rate": 3.176920560855873e-05, + "loss": 2.063, + "step": 12601000 + }, + { + "epoch": 36.48, + "learning_rate": 3.176848196091145e-05, + "loss": 2.056, + "step": 12601500 + }, + { + "epoch": 36.48, + "learning_rate": 3.176775976055947e-05, + "loss": 2.0547, + "step": 12602000 + }, + { + "epoch": 36.48, + "learning_rate": 3.176703611291219e-05, + "loss": 2.0505, + "step": 12602500 + }, + { + "epoch": 36.48, + "learning_rate": 3.176631246526491e-05, + "loss": 2.0755, + "step": 12603000 + }, + { + "epoch": 36.48, + "learning_rate": 3.1765588817617634e-05, + "loss": 2.0679, + "step": 12603500 + }, + { + "epoch": 36.48, + "learning_rate": 3.176486661726566e-05, + "loss": 2.0531, + "step": 12604000 + }, + { + "epoch": 36.48, + "learning_rate": 3.176414296961838e-05, + "loss": 2.0535, + "step": 12604500 + }, + { + "epoch": 36.49, + "learning_rate": 3.17634193219711e-05, + "loss": 2.0584, + "step": 12605000 + }, + { + "epoch": 36.49, + "learning_rate": 3.1762695674323823e-05, + "loss": 2.0739, + "step": 12605500 + }, + { + "epoch": 36.49, + "learning_rate": 3.176197202667655e-05, + "loss": 2.059, + "step": 12606000 + }, + { + "epoch": 36.49, + "learning_rate": 3.1761248379029275e-05, + "loss": 2.0676, + "step": 12606500 + }, + { + "epoch": 36.49, + "learning_rate": 3.1760524731382e-05, + "loss": 2.0699, + "step": 12607000 + }, + { + "epoch": 36.49, + "learning_rate": 3.175980253103001e-05, + "loss": 2.0665, + "step": 12607500 + }, + { + "epoch": 36.49, + "learning_rate": 3.1759078883382735e-05, + "loss": 2.0838, + "step": 12608000 + }, + { + "epoch": 36.5, + "learning_rate": 3.175835523573546e-05, + "loss": 2.0707, + "step": 12608500 + }, + { + "epoch": 36.5, + "learning_rate": 3.175763158808818e-05, + "loss": 2.0458, + "step": 12609000 + }, + { + "epoch": 36.5, + "learning_rate": 3.175690794044091e-05, + "loss": 2.0703, + "step": 12609500 + }, + { + "epoch": 36.5, + "learning_rate": 3.1756185740088924e-05, + "loss": 2.0733, + "step": 12610000 + }, + { + "epoch": 36.5, + "learning_rate": 3.1755462092441646e-05, + "loss": 2.0484, + "step": 12610500 + }, + { + "epoch": 36.5, + "learning_rate": 3.175473844479437e-05, + "loss": 2.0336, + "step": 12611000 + }, + { + "epoch": 36.51, + "learning_rate": 3.175401479714709e-05, + "loss": 2.062, + "step": 12611500 + }, + { + "epoch": 36.51, + "learning_rate": 3.175329114949981e-05, + "loss": 2.0709, + "step": 12612000 + }, + { + "epoch": 36.51, + "learning_rate": 3.1752567501852535e-05, + "loss": 2.0859, + "step": 12612500 + }, + { + "epoch": 36.51, + "learning_rate": 3.1751843854205264e-05, + "loss": 2.0608, + "step": 12613000 + }, + { + "epoch": 36.51, + "learning_rate": 3.1751120206557986e-05, + "loss": 2.0506, + "step": 12613500 + }, + { + "epoch": 36.51, + "learning_rate": 3.175039655891071e-05, + "loss": 2.0451, + "step": 12614000 + }, + { + "epoch": 36.51, + "learning_rate": 3.174967291126343e-05, + "loss": 2.0358, + "step": 12614500 + }, + { + "epoch": 36.52, + "learning_rate": 3.174895071091145e-05, + "loss": 2.0668, + "step": 12615000 + }, + { + "epoch": 36.52, + "learning_rate": 3.1748227063264175e-05, + "loss": 2.0542, + "step": 12615500 + }, + { + "epoch": 36.52, + "learning_rate": 3.174750486291219e-05, + "loss": 2.0573, + "step": 12616000 + }, + { + "epoch": 36.52, + "learning_rate": 3.174678121526491e-05, + "loss": 2.0668, + "step": 12616500 + }, + { + "epoch": 36.52, + "learning_rate": 3.1746057567617635e-05, + "loss": 2.0519, + "step": 12617000 + }, + { + "epoch": 36.52, + "learning_rate": 3.174533391997036e-05, + "loss": 2.0695, + "step": 12617500 + }, + { + "epoch": 36.52, + "learning_rate": 3.1744610272323086e-05, + "loss": 2.0605, + "step": 12618000 + }, + { + "epoch": 36.53, + "learning_rate": 3.174388662467581e-05, + "loss": 2.0551, + "step": 12618500 + }, + { + "epoch": 36.53, + "learning_rate": 3.174316297702853e-05, + "loss": 2.0726, + "step": 12619000 + }, + { + "epoch": 36.53, + "learning_rate": 3.174243932938125e-05, + "loss": 2.0758, + "step": 12619500 + }, + { + "epoch": 36.53, + "learning_rate": 3.174171712902927e-05, + "loss": 2.0716, + "step": 12620000 + }, + { + "epoch": 36.53, + "learning_rate": 3.1740993481382e-05, + "loss": 2.0688, + "step": 12620500 + }, + { + "epoch": 36.53, + "learning_rate": 3.174026983373472e-05, + "loss": 2.052, + "step": 12621000 + }, + { + "epoch": 36.53, + "learning_rate": 3.173954618608744e-05, + "loss": 2.05, + "step": 12621500 + }, + { + "epoch": 36.54, + "learning_rate": 3.173882398573546e-05, + "loss": 2.0581, + "step": 12622000 + }, + { + "epoch": 36.54, + "learning_rate": 3.173810033808819e-05, + "loss": 2.0521, + "step": 12622500 + }, + { + "epoch": 36.54, + "learning_rate": 3.173737669044091e-05, + "loss": 2.0562, + "step": 12623000 + }, + { + "epoch": 36.54, + "learning_rate": 3.173665304279363e-05, + "loss": 2.0772, + "step": 12623500 + }, + { + "epoch": 36.54, + "learning_rate": 3.1735929395146353e-05, + "loss": 2.0666, + "step": 12624000 + }, + { + "epoch": 36.54, + "learning_rate": 3.1735205747499076e-05, + "loss": 2.0754, + "step": 12624500 + }, + { + "epoch": 36.54, + "learning_rate": 3.17344820998518e-05, + "loss": 2.0739, + "step": 12625000 + }, + { + "epoch": 36.55, + "learning_rate": 3.1733759899499813e-05, + "loss": 2.0664, + "step": 12625500 + }, + { + "epoch": 36.55, + "learning_rate": 3.1733036251852536e-05, + "loss": 2.0523, + "step": 12626000 + }, + { + "epoch": 36.55, + "learning_rate": 3.173231260420526e-05, + "loss": 2.0627, + "step": 12626500 + }, + { + "epoch": 36.55, + "learning_rate": 3.173159040385328e-05, + "loss": 2.0682, + "step": 12627000 + }, + { + "epoch": 36.55, + "learning_rate": 3.1730866756206e-05, + "loss": 2.0834, + "step": 12627500 + }, + { + "epoch": 36.55, + "learning_rate": 3.173014310855873e-05, + "loss": 2.0681, + "step": 12628000 + }, + { + "epoch": 36.55, + "learning_rate": 3.1729419460911454e-05, + "loss": 2.0664, + "step": 12628500 + }, + { + "epoch": 36.56, + "learning_rate": 3.172869726055947e-05, + "loss": 2.0528, + "step": 12629000 + }, + { + "epoch": 36.56, + "learning_rate": 3.172797361291219e-05, + "loss": 2.0527, + "step": 12629500 + }, + { + "epoch": 36.56, + "learning_rate": 3.1727249965264914e-05, + "loss": 2.0463, + "step": 12630000 + }, + { + "epoch": 36.56, + "learning_rate": 3.1726526317617636e-05, + "loss": 2.0894, + "step": 12630500 + }, + { + "epoch": 36.56, + "learning_rate": 3.172580411726566e-05, + "loss": 2.0656, + "step": 12631000 + }, + { + "epoch": 36.56, + "learning_rate": 3.172508046961838e-05, + "loss": 2.0634, + "step": 12631500 + }, + { + "epoch": 36.56, + "learning_rate": 3.17243568219711e-05, + "loss": 2.0582, + "step": 12632000 + }, + { + "epoch": 36.57, + "learning_rate": 3.1723633174323825e-05, + "loss": 2.0577, + "step": 12632500 + }, + { + "epoch": 36.57, + "learning_rate": 3.172291097397184e-05, + "loss": 2.0799, + "step": 12633000 + }, + { + "epoch": 36.57, + "learning_rate": 3.172218732632456e-05, + "loss": 2.0579, + "step": 12633500 + }, + { + "epoch": 36.57, + "learning_rate": 3.1721463678677285e-05, + "loss": 2.0683, + "step": 12634000 + }, + { + "epoch": 36.57, + "learning_rate": 3.1720740031030014e-05, + "loss": 2.0944, + "step": 12634500 + }, + { + "epoch": 36.57, + "learning_rate": 3.1720016383382736e-05, + "loss": 2.0434, + "step": 12635000 + }, + { + "epoch": 36.57, + "learning_rate": 3.171929273573546e-05, + "loss": 2.056, + "step": 12635500 + }, + { + "epoch": 36.58, + "learning_rate": 3.171856908808819e-05, + "loss": 2.0588, + "step": 12636000 + }, + { + "epoch": 36.58, + "learning_rate": 3.17178468877362e-05, + "loss": 2.0727, + "step": 12636500 + }, + { + "epoch": 36.58, + "learning_rate": 3.1717123240088925e-05, + "loss": 2.0743, + "step": 12637000 + }, + { + "epoch": 36.58, + "learning_rate": 3.171639959244165e-05, + "loss": 2.0527, + "step": 12637500 + }, + { + "epoch": 36.58, + "learning_rate": 3.171567594479437e-05, + "loss": 2.0651, + "step": 12638000 + }, + { + "epoch": 36.58, + "learning_rate": 3.171495229714709e-05, + "loss": 2.0901, + "step": 12638500 + }, + { + "epoch": 36.58, + "learning_rate": 3.1714230096795114e-05, + "loss": 2.0665, + "step": 12639000 + }, + { + "epoch": 36.59, + "learning_rate": 3.1713506449147837e-05, + "loss": 2.0571, + "step": 12639500 + }, + { + "epoch": 36.59, + "learning_rate": 3.171278280150056e-05, + "loss": 2.0418, + "step": 12640000 + }, + { + "epoch": 36.59, + "learning_rate": 3.171205915385328e-05, + "loss": 2.0524, + "step": 12640500 + }, + { + "epoch": 36.59, + "learning_rate": 3.1711335506206e-05, + "loss": 2.0844, + "step": 12641000 + }, + { + "epoch": 36.59, + "learning_rate": 3.1710611858558726e-05, + "loss": 2.0399, + "step": 12641500 + }, + { + "epoch": 36.59, + "learning_rate": 3.170988821091145e-05, + "loss": 2.0801, + "step": 12642000 + }, + { + "epoch": 36.59, + "learning_rate": 3.170916456326417e-05, + "loss": 2.0806, + "step": 12642500 + }, + { + "epoch": 36.6, + "learning_rate": 3.17084409156169e-05, + "loss": 2.0713, + "step": 12643000 + }, + { + "epoch": 36.6, + "learning_rate": 3.170771726796962e-05, + "loss": 2.0473, + "step": 12643500 + }, + { + "epoch": 36.6, + "learning_rate": 3.170699506761764e-05, + "loss": 2.0453, + "step": 12644000 + }, + { + "epoch": 36.6, + "learning_rate": 3.1706271419970366e-05, + "loss": 2.063, + "step": 12644500 + }, + { + "epoch": 36.6, + "learning_rate": 3.170554777232309e-05, + "loss": 2.0704, + "step": 12645000 + }, + { + "epoch": 36.6, + "learning_rate": 3.170482412467581e-05, + "loss": 2.0672, + "step": 12645500 + }, + { + "epoch": 36.6, + "learning_rate": 3.1704101924323826e-05, + "loss": 2.0567, + "step": 12646000 + }, + { + "epoch": 36.61, + "learning_rate": 3.170337972397184e-05, + "loss": 2.0655, + "step": 12646500 + }, + { + "epoch": 36.61, + "learning_rate": 3.1702656076324564e-05, + "loss": 2.0744, + "step": 12647000 + }, + { + "epoch": 36.61, + "learning_rate": 3.1701932428677286e-05, + "loss": 2.0839, + "step": 12647500 + }, + { + "epoch": 36.61, + "learning_rate": 3.1701208781030015e-05, + "loss": 2.0484, + "step": 12648000 + }, + { + "epoch": 36.61, + "learning_rate": 3.170048658067803e-05, + "loss": 2.0819, + "step": 12648500 + }, + { + "epoch": 36.61, + "learning_rate": 3.169976293303075e-05, + "loss": 2.0769, + "step": 12649000 + }, + { + "epoch": 36.62, + "learning_rate": 3.1699039285383475e-05, + "loss": 2.0761, + "step": 12649500 + }, + { + "epoch": 36.62, + "learning_rate": 3.16983156377362e-05, + "loss": 2.0794, + "step": 12650000 + }, + { + "epoch": 36.62, + "learning_rate": 3.169759199008892e-05, + "loss": 2.0877, + "step": 12650500 + }, + { + "epoch": 36.62, + "learning_rate": 3.169686834244165e-05, + "loss": 2.0623, + "step": 12651000 + }, + { + "epoch": 36.62, + "learning_rate": 3.169614469479437e-05, + "loss": 2.0667, + "step": 12651500 + }, + { + "epoch": 36.62, + "learning_rate": 3.169542104714709e-05, + "loss": 2.0622, + "step": 12652000 + }, + { + "epoch": 36.62, + "learning_rate": 3.1694697399499815e-05, + "loss": 2.0746, + "step": 12652500 + }, + { + "epoch": 36.63, + "learning_rate": 3.169397375185254e-05, + "loss": 2.0573, + "step": 12653000 + }, + { + "epoch": 36.63, + "learning_rate": 3.1693250104205266e-05, + "loss": 2.0526, + "step": 12653500 + }, + { + "epoch": 36.63, + "learning_rate": 3.169252645655799e-05, + "loss": 2.0641, + "step": 12654000 + }, + { + "epoch": 36.63, + "learning_rate": 3.169180280891071e-05, + "loss": 2.0551, + "step": 12654500 + }, + { + "epoch": 36.63, + "learning_rate": 3.169107916126343e-05, + "loss": 2.0704, + "step": 12655000 + }, + { + "epoch": 36.63, + "learning_rate": 3.1690355513616155e-05, + "loss": 2.067, + "step": 12655500 + }, + { + "epoch": 36.63, + "learning_rate": 3.168963186596888e-05, + "loss": 2.0509, + "step": 12656000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168890966561689e-05, + "loss": 2.0597, + "step": 12656500 + }, + { + "epoch": 36.64, + "learning_rate": 3.1688186017969615e-05, + "loss": 2.0444, + "step": 12657000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168746237032234e-05, + "loss": 2.0595, + "step": 12657500 + }, + { + "epoch": 36.64, + "learning_rate": 3.1686738722675066e-05, + "loss": 2.0894, + "step": 12658000 + }, + { + "epoch": 36.64, + "learning_rate": 3.168601507502779e-05, + "loss": 2.0656, + "step": 12658500 + }, + { + "epoch": 36.64, + "learning_rate": 3.168529287467581e-05, + "loss": 2.0933, + "step": 12659000 + }, + { + "epoch": 36.64, + "learning_rate": 3.1684570674323827e-05, + "loss": 2.0664, + "step": 12659500 + }, + { + "epoch": 36.65, + "learning_rate": 3.168384702667655e-05, + "loss": 2.0482, + "step": 12660000 + }, + { + "epoch": 36.65, + "learning_rate": 3.1683124826324564e-05, + "loss": 2.0539, + "step": 12660500 + }, + { + "epoch": 36.65, + "learning_rate": 3.168240262597259e-05, + "loss": 2.0585, + "step": 12661000 + }, + { + "epoch": 36.65, + "learning_rate": 3.168167897832531e-05, + "loss": 2.0746, + "step": 12661500 + }, + { + "epoch": 36.65, + "learning_rate": 3.168095533067803e-05, + "loss": 2.0833, + "step": 12662000 + }, + { + "epoch": 36.65, + "learning_rate": 3.1680231683030753e-05, + "loss": 2.0802, + "step": 12662500 + }, + { + "epoch": 36.65, + "learning_rate": 3.1679508035383476e-05, + "loss": 2.0608, + "step": 12663000 + }, + { + "epoch": 36.66, + "learning_rate": 3.16787843877362e-05, + "loss": 2.0587, + "step": 12663500 + }, + { + "epoch": 36.66, + "learning_rate": 3.167806074008892e-05, + "loss": 2.0797, + "step": 12664000 + }, + { + "epoch": 36.66, + "learning_rate": 3.167733709244164e-05, + "loss": 2.082, + "step": 12664500 + }, + { + "epoch": 36.66, + "learning_rate": 3.1676613444794365e-05, + "loss": 2.0864, + "step": 12665000 + }, + { + "epoch": 36.66, + "learning_rate": 3.167589124444239e-05, + "loss": 2.0772, + "step": 12665500 + }, + { + "epoch": 36.66, + "learning_rate": 3.1675167596795116e-05, + "loss": 2.067, + "step": 12666000 + }, + { + "epoch": 36.66, + "learning_rate": 3.167444394914784e-05, + "loss": 2.0584, + "step": 12666500 + }, + { + "epoch": 36.67, + "learning_rate": 3.167372030150056e-05, + "loss": 2.0672, + "step": 12667000 + }, + { + "epoch": 36.67, + "learning_rate": 3.167299665385328e-05, + "loss": 2.0786, + "step": 12667500 + }, + { + "epoch": 36.67, + "learning_rate": 3.1672273006206005e-05, + "loss": 2.0602, + "step": 12668000 + }, + { + "epoch": 36.67, + "learning_rate": 3.167155080585402e-05, + "loss": 2.0625, + "step": 12668500 + }, + { + "epoch": 36.67, + "learning_rate": 3.167082860550204e-05, + "loss": 2.0763, + "step": 12669000 + }, + { + "epoch": 36.67, + "learning_rate": 3.1670104957854765e-05, + "loss": 2.1129, + "step": 12669500 + }, + { + "epoch": 36.67, + "learning_rate": 3.166938131020749e-05, + "loss": 2.074, + "step": 12670000 + }, + { + "epoch": 36.68, + "learning_rate": 3.166865766256021e-05, + "loss": 2.0682, + "step": 12670500 + }, + { + "epoch": 36.68, + "learning_rate": 3.166793401491293e-05, + "loss": 2.0589, + "step": 12671000 + }, + { + "epoch": 36.68, + "learning_rate": 3.1667210367265654e-05, + "loss": 2.0605, + "step": 12671500 + }, + { + "epoch": 36.68, + "learning_rate": 3.1666486719618376e-05, + "loss": 2.0575, + "step": 12672000 + }, + { + "epoch": 36.68, + "learning_rate": 3.16657630719711e-05, + "loss": 2.0631, + "step": 12672500 + }, + { + "epoch": 36.68, + "learning_rate": 3.166503942432382e-05, + "loss": 2.067, + "step": 12673000 + }, + { + "epoch": 36.68, + "learning_rate": 3.166431722397184e-05, + "loss": 2.0627, + "step": 12673500 + }, + { + "epoch": 36.69, + "learning_rate": 3.1663593576324565e-05, + "loss": 2.0724, + "step": 12674000 + }, + { + "epoch": 36.69, + "learning_rate": 3.1662869928677294e-05, + "loss": 2.0721, + "step": 12674500 + }, + { + "epoch": 36.69, + "learning_rate": 3.1662146281030016e-05, + "loss": 2.0515, + "step": 12675000 + }, + { + "epoch": 36.69, + "learning_rate": 3.166142263338274e-05, + "loss": 2.074, + "step": 12675500 + }, + { + "epoch": 36.69, + "learning_rate": 3.166069898573546e-05, + "loss": 2.0665, + "step": 12676000 + }, + { + "epoch": 36.69, + "learning_rate": 3.165997533808818e-05, + "loss": 2.0734, + "step": 12676500 + }, + { + "epoch": 36.69, + "learning_rate": 3.1659251690440905e-05, + "loss": 2.0627, + "step": 12677000 + }, + { + "epoch": 36.7, + "learning_rate": 3.165852804279363e-05, + "loss": 2.0897, + "step": 12677500 + }, + { + "epoch": 36.7, + "learning_rate": 3.165780584244164e-05, + "loss": 2.0763, + "step": 12678000 + }, + { + "epoch": 36.7, + "learning_rate": 3.1657082194794365e-05, + "loss": 2.0686, + "step": 12678500 + }, + { + "epoch": 36.7, + "learning_rate": 3.1656358547147094e-05, + "loss": 2.0831, + "step": 12679000 + }, + { + "epoch": 36.7, + "learning_rate": 3.165563634679511e-05, + "loss": 2.0902, + "step": 12679500 + }, + { + "epoch": 36.7, + "learning_rate": 3.165491269914783e-05, + "loss": 2.0512, + "step": 12680000 + }, + { + "epoch": 36.7, + "learning_rate": 3.1654189051500554e-05, + "loss": 2.068, + "step": 12680500 + }, + { + "epoch": 36.71, + "learning_rate": 3.1653465403853283e-05, + "loss": 2.0822, + "step": 12681000 + }, + { + "epoch": 36.71, + "learning_rate": 3.1652741756206006e-05, + "loss": 2.0591, + "step": 12681500 + }, + { + "epoch": 36.71, + "learning_rate": 3.165201810855873e-05, + "loss": 2.0488, + "step": 12682000 + }, + { + "epoch": 36.71, + "learning_rate": 3.165129446091145e-05, + "loss": 2.0934, + "step": 12682500 + }, + { + "epoch": 36.71, + "learning_rate": 3.165057081326417e-05, + "loss": 2.0733, + "step": 12683000 + }, + { + "epoch": 36.71, + "learning_rate": 3.1649848612912195e-05, + "loss": 2.0722, + "step": 12683500 + }, + { + "epoch": 36.71, + "learning_rate": 3.164912496526492e-05, + "loss": 2.0554, + "step": 12684000 + }, + { + "epoch": 36.72, + "learning_rate": 3.164840131761764e-05, + "loss": 2.0537, + "step": 12684500 + }, + { + "epoch": 36.72, + "learning_rate": 3.164767766997036e-05, + "loss": 2.0735, + "step": 12685000 + }, + { + "epoch": 36.72, + "learning_rate": 3.1646954022323084e-05, + "loss": 2.0683, + "step": 12685500 + }, + { + "epoch": 36.72, + "learning_rate": 3.1646230374675806e-05, + "loss": 2.0938, + "step": 12686000 + }, + { + "epoch": 36.72, + "learning_rate": 3.164550817432382e-05, + "loss": 2.0483, + "step": 12686500 + }, + { + "epoch": 36.72, + "learning_rate": 3.1644784526676544e-05, + "loss": 2.0652, + "step": 12687000 + }, + { + "epoch": 36.73, + "learning_rate": 3.1644060879029266e-05, + "loss": 2.0851, + "step": 12687500 + }, + { + "epoch": 36.73, + "learning_rate": 3.1643337231381995e-05, + "loss": 2.0602, + "step": 12688000 + }, + { + "epoch": 36.73, + "learning_rate": 3.164261647832531e-05, + "loss": 2.0715, + "step": 12688500 + }, + { + "epoch": 36.73, + "learning_rate": 3.164189283067803e-05, + "loss": 2.0778, + "step": 12689000 + }, + { + "epoch": 36.73, + "learning_rate": 3.164117063032605e-05, + "loss": 2.0604, + "step": 12689500 + }, + { + "epoch": 36.73, + "learning_rate": 3.164044698267877e-05, + "loss": 2.055, + "step": 12690000 + }, + { + "epoch": 36.73, + "learning_rate": 3.163972333503149e-05, + "loss": 2.0502, + "step": 12690500 + }, + { + "epoch": 36.74, + "learning_rate": 3.163899968738422e-05, + "loss": 2.0592, + "step": 12691000 + }, + { + "epoch": 36.74, + "learning_rate": 3.1638276039736944e-05, + "loss": 2.0644, + "step": 12691500 + }, + { + "epoch": 36.74, + "learning_rate": 3.1637552392089666e-05, + "loss": 2.098, + "step": 12692000 + }, + { + "epoch": 36.74, + "learning_rate": 3.163682874444239e-05, + "loss": 2.0938, + "step": 12692500 + }, + { + "epoch": 36.74, + "learning_rate": 3.163610509679511e-05, + "loss": 2.0596, + "step": 12693000 + }, + { + "epoch": 36.74, + "learning_rate": 3.1635382896443126e-05, + "loss": 2.096, + "step": 12693500 + }, + { + "epoch": 36.74, + "learning_rate": 3.163465924879585e-05, + "loss": 2.0509, + "step": 12694000 + }, + { + "epoch": 36.75, + "learning_rate": 3.163393560114857e-05, + "loss": 2.0958, + "step": 12694500 + }, + { + "epoch": 36.75, + "learning_rate": 3.163321195350129e-05, + "loss": 2.0719, + "step": 12695000 + }, + { + "epoch": 36.75, + "learning_rate": 3.163248830585402e-05, + "loss": 2.0543, + "step": 12695500 + }, + { + "epoch": 36.75, + "learning_rate": 3.1631766105502044e-05, + "loss": 2.0775, + "step": 12696000 + }, + { + "epoch": 36.75, + "learning_rate": 3.163104390515006e-05, + "loss": 2.0577, + "step": 12696500 + }, + { + "epoch": 36.75, + "learning_rate": 3.163032025750278e-05, + "loss": 2.0538, + "step": 12697000 + }, + { + "epoch": 36.75, + "learning_rate": 3.1629596609855504e-05, + "loss": 2.0926, + "step": 12697500 + }, + { + "epoch": 36.76, + "learning_rate": 3.1628872962208227e-05, + "loss": 2.0751, + "step": 12698000 + }, + { + "epoch": 36.76, + "learning_rate": 3.162815076185625e-05, + "loss": 2.0637, + "step": 12698500 + }, + { + "epoch": 36.76, + "learning_rate": 3.162742711420897e-05, + "loss": 2.0587, + "step": 12699000 + }, + { + "epoch": 36.76, + "learning_rate": 3.1626703466561693e-05, + "loss": 2.0477, + "step": 12699500 + }, + { + "epoch": 36.76, + "learning_rate": 3.1625979818914416e-05, + "loss": 2.1098, + "step": 12700000 + }, + { + "epoch": 36.76, + "learning_rate": 3.162525617126714e-05, + "loss": 2.0774, + "step": 12700500 + }, + { + "epoch": 36.76, + "learning_rate": 3.1624533970915153e-05, + "loss": 2.0933, + "step": 12701000 + }, + { + "epoch": 36.77, + "learning_rate": 3.1623810323267876e-05, + "loss": 2.0576, + "step": 12701500 + }, + { + "epoch": 36.77, + "learning_rate": 3.16230866756206e-05, + "loss": 2.0706, + "step": 12702000 + }, + { + "epoch": 36.77, + "learning_rate": 3.162236302797332e-05, + "loss": 2.0675, + "step": 12702500 + }, + { + "epoch": 36.77, + "learning_rate": 3.162163938032605e-05, + "loss": 2.0766, + "step": 12703000 + }, + { + "epoch": 36.77, + "learning_rate": 3.162091573267877e-05, + "loss": 2.052, + "step": 12703500 + }, + { + "epoch": 36.77, + "learning_rate": 3.1620193532326794e-05, + "loss": 2.0706, + "step": 12704000 + }, + { + "epoch": 36.77, + "learning_rate": 3.1619469884679516e-05, + "loss": 2.0435, + "step": 12704500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161874623703224e-05, + "loss": 2.088, + "step": 12705000 + }, + { + "epoch": 36.78, + "learning_rate": 3.161802258938496e-05, + "loss": 2.0638, + "step": 12705500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161729894173768e-05, + "loss": 2.0671, + "step": 12706000 + }, + { + "epoch": 36.78, + "learning_rate": 3.1616575294090405e-05, + "loss": 2.0562, + "step": 12706500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161585164644313e-05, + "loss": 2.0838, + "step": 12707000 + }, + { + "epoch": 36.78, + "learning_rate": 3.161512799879585e-05, + "loss": 2.0816, + "step": 12707500 + }, + { + "epoch": 36.78, + "learning_rate": 3.161440435114857e-05, + "loss": 2.0669, + "step": 12708000 + }, + { + "epoch": 36.79, + "learning_rate": 3.1613680703501294e-05, + "loss": 2.0566, + "step": 12708500 + }, + { + "epoch": 36.79, + "learning_rate": 3.161295705585402e-05, + "loss": 2.0692, + "step": 12709000 + }, + { + "epoch": 36.79, + "learning_rate": 3.1612233408206745e-05, + "loss": 2.048, + "step": 12709500 + }, + { + "epoch": 36.79, + "learning_rate": 3.161151120785476e-05, + "loss": 2.0985, + "step": 12710000 + }, + { + "epoch": 36.79, + "learning_rate": 3.1610789007502776e-05, + "loss": 2.0501, + "step": 12710500 + }, + { + "epoch": 36.79, + "learning_rate": 3.1610065359855505e-05, + "loss": 2.0688, + "step": 12711000 + }, + { + "epoch": 36.79, + "learning_rate": 3.160934315950352e-05, + "loss": 2.0719, + "step": 12711500 + }, + { + "epoch": 36.8, + "learning_rate": 3.160861951185625e-05, + "loss": 2.0785, + "step": 12712000 + }, + { + "epoch": 36.8, + "learning_rate": 3.160789586420897e-05, + "loss": 2.0934, + "step": 12712500 + }, + { + "epoch": 36.8, + "learning_rate": 3.1607172216561694e-05, + "loss": 2.0412, + "step": 12713000 + }, + { + "epoch": 36.8, + "learning_rate": 3.1606448568914416e-05, + "loss": 2.0508, + "step": 12713500 + }, + { + "epoch": 36.8, + "learning_rate": 3.160572492126714e-05, + "loss": 2.0788, + "step": 12714000 + }, + { + "epoch": 36.8, + "learning_rate": 3.160500127361986e-05, + "loss": 2.0706, + "step": 12714500 + }, + { + "epoch": 36.8, + "learning_rate": 3.160427762597258e-05, + "loss": 2.0825, + "step": 12715000 + }, + { + "epoch": 36.81, + "learning_rate": 3.1603553978325305e-05, + "loss": 2.0819, + "step": 12715500 + }, + { + "epoch": 36.81, + "learning_rate": 3.160283033067803e-05, + "loss": 2.0483, + "step": 12716000 + }, + { + "epoch": 36.81, + "learning_rate": 3.160210668303075e-05, + "loss": 2.0716, + "step": 12716500 + }, + { + "epoch": 36.81, + "learning_rate": 3.160138303538347e-05, + "loss": 2.0915, + "step": 12717000 + }, + { + "epoch": 36.81, + "learning_rate": 3.16006593877362e-05, + "loss": 2.0612, + "step": 12717500 + }, + { + "epoch": 36.81, + "learning_rate": 3.159993718738422e-05, + "loss": 2.0707, + "step": 12718000 + }, + { + "epoch": 36.81, + "learning_rate": 3.1599213539736946e-05, + "loss": 2.0501, + "step": 12718500 + }, + { + "epoch": 36.82, + "learning_rate": 3.159848989208967e-05, + "loss": 2.066, + "step": 12719000 + }, + { + "epoch": 36.82, + "learning_rate": 3.159776624444239e-05, + "loss": 2.0651, + "step": 12719500 + }, + { + "epoch": 36.82, + "learning_rate": 3.159704259679511e-05, + "loss": 2.0551, + "step": 12720000 + }, + { + "epoch": 36.82, + "learning_rate": 3.1596318949147835e-05, + "loss": 2.106, + "step": 12720500 + }, + { + "epoch": 36.82, + "learning_rate": 3.159559530150056e-05, + "loss": 2.0619, + "step": 12721000 + }, + { + "epoch": 36.82, + "learning_rate": 3.159487310114857e-05, + "loss": 2.1086, + "step": 12721500 + }, + { + "epoch": 36.82, + "learning_rate": 3.1594150900796595e-05, + "loss": 2.0686, + "step": 12722000 + }, + { + "epoch": 36.83, + "learning_rate": 3.159342725314932e-05, + "loss": 2.0675, + "step": 12722500 + }, + { + "epoch": 36.83, + "learning_rate": 3.159270360550204e-05, + "loss": 2.0759, + "step": 12723000 + }, + { + "epoch": 36.83, + "learning_rate": 3.159197995785476e-05, + "loss": 2.0939, + "step": 12723500 + }, + { + "epoch": 36.83, + "learning_rate": 3.1591256310207484e-05, + "loss": 2.0779, + "step": 12724000 + }, + { + "epoch": 36.83, + "learning_rate": 3.1590532662560206e-05, + "loss": 2.0706, + "step": 12724500 + }, + { + "epoch": 36.83, + "learning_rate": 3.158980901491293e-05, + "loss": 2.0687, + "step": 12725000 + }, + { + "epoch": 36.84, + "learning_rate": 3.158908536726565e-05, + "loss": 2.0627, + "step": 12725500 + }, + { + "epoch": 36.84, + "learning_rate": 3.158836171961838e-05, + "loss": 2.0562, + "step": 12726000 + }, + { + "epoch": 36.84, + "learning_rate": 3.15876395192664e-05, + "loss": 2.0708, + "step": 12726500 + }, + { + "epoch": 36.84, + "learning_rate": 3.1586915871619124e-05, + "loss": 2.08, + "step": 12727000 + }, + { + "epoch": 36.84, + "learning_rate": 3.1586192223971846e-05, + "loss": 2.0824, + "step": 12727500 + }, + { + "epoch": 36.84, + "learning_rate": 3.158546857632457e-05, + "loss": 2.0634, + "step": 12728000 + }, + { + "epoch": 36.84, + "learning_rate": 3.1584746375972584e-05, + "loss": 2.0767, + "step": 12728500 + }, + { + "epoch": 36.85, + "learning_rate": 3.15840241756206e-05, + "loss": 2.0821, + "step": 12729000 + }, + { + "epoch": 36.85, + "learning_rate": 3.158330052797333e-05, + "loss": 2.0752, + "step": 12729500 + }, + { + "epoch": 36.85, + "learning_rate": 3.1582578327621344e-05, + "loss": 2.0675, + "step": 12730000 + }, + { + "epoch": 36.85, + "learning_rate": 3.1581854679974066e-05, + "loss": 2.0592, + "step": 12730500 + }, + { + "epoch": 36.85, + "learning_rate": 3.158113103232679e-05, + "loss": 2.0781, + "step": 12731000 + }, + { + "epoch": 36.85, + "learning_rate": 3.158040738467951e-05, + "loss": 2.055, + "step": 12731500 + }, + { + "epoch": 36.85, + "learning_rate": 3.157968373703223e-05, + "loss": 2.0775, + "step": 12732000 + }, + { + "epoch": 36.86, + "learning_rate": 3.1578960089384955e-05, + "loss": 2.0791, + "step": 12732500 + }, + { + "epoch": 36.86, + "learning_rate": 3.157823644173768e-05, + "loss": 2.0744, + "step": 12733000 + }, + { + "epoch": 36.86, + "learning_rate": 3.1577512794090406e-05, + "loss": 2.0881, + "step": 12733500 + }, + { + "epoch": 36.86, + "learning_rate": 3.157678914644313e-05, + "loss": 2.0961, + "step": 12734000 + }, + { + "epoch": 36.86, + "learning_rate": 3.157606549879585e-05, + "loss": 2.0858, + "step": 12734500 + }, + { + "epoch": 36.86, + "learning_rate": 3.157534185114858e-05, + "loss": 2.0725, + "step": 12735000 + }, + { + "epoch": 36.86, + "learning_rate": 3.15746182035013e-05, + "loss": 2.073, + "step": 12735500 + }, + { + "epoch": 36.87, + "learning_rate": 3.1573894555854024e-05, + "loss": 2.0817, + "step": 12736000 + }, + { + "epoch": 36.87, + "learning_rate": 3.157317090820675e-05, + "loss": 2.0584, + "step": 12736500 + }, + { + "epoch": 36.87, + "learning_rate": 3.157244726055947e-05, + "loss": 2.0604, + "step": 12737000 + }, + { + "epoch": 36.87, + "learning_rate": 3.1571725060207484e-05, + "loss": 2.0869, + "step": 12737500 + }, + { + "epoch": 36.87, + "learning_rate": 3.157100141256021e-05, + "loss": 2.0552, + "step": 12738000 + }, + { + "epoch": 36.87, + "learning_rate": 3.157027776491293e-05, + "loss": 2.0627, + "step": 12738500 + }, + { + "epoch": 36.87, + "learning_rate": 3.156955411726565e-05, + "loss": 2.037, + "step": 12739000 + }, + { + "epoch": 36.88, + "learning_rate": 3.156883046961838e-05, + "loss": 2.0663, + "step": 12739500 + }, + { + "epoch": 36.88, + "learning_rate": 3.1568108269266396e-05, + "loss": 2.0965, + "step": 12740000 + }, + { + "epoch": 36.88, + "learning_rate": 3.156738462161912e-05, + "loss": 2.0379, + "step": 12740500 + }, + { + "epoch": 36.88, + "learning_rate": 3.156666097397185e-05, + "loss": 2.0607, + "step": 12741000 + }, + { + "epoch": 36.88, + "learning_rate": 3.156593732632457e-05, + "loss": 2.0599, + "step": 12741500 + }, + { + "epoch": 36.88, + "learning_rate": 3.156521367867729e-05, + "loss": 2.0616, + "step": 12742000 + }, + { + "epoch": 36.88, + "learning_rate": 3.1564490031030014e-05, + "loss": 2.0908, + "step": 12742500 + }, + { + "epoch": 36.89, + "learning_rate": 3.1563766383382736e-05, + "loss": 2.054, + "step": 12743000 + }, + { + "epoch": 36.89, + "learning_rate": 3.156304273573546e-05, + "loss": 2.0677, + "step": 12743500 + }, + { + "epoch": 36.89, + "learning_rate": 3.156231908808818e-05, + "loss": 2.0786, + "step": 12744000 + }, + { + "epoch": 36.89, + "learning_rate": 3.15615954404409e-05, + "loss": 2.1018, + "step": 12744500 + }, + { + "epoch": 36.89, + "learning_rate": 3.1560871792793625e-05, + "loss": 2.0509, + "step": 12745000 + }, + { + "epoch": 36.89, + "learning_rate": 3.1560148145146354e-05, + "loss": 2.0691, + "step": 12745500 + }, + { + "epoch": 36.89, + "learning_rate": 3.1559424497499076e-05, + "loss": 2.0662, + "step": 12746000 + }, + { + "epoch": 36.9, + "learning_rate": 3.155870229714709e-05, + "loss": 2.0704, + "step": 12746500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1557978649499814e-05, + "loss": 2.0782, + "step": 12747000 + }, + { + "epoch": 36.9, + "learning_rate": 3.1557255001852536e-05, + "loss": 2.069, + "step": 12747500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1556531354205265e-05, + "loss": 2.11, + "step": 12748000 + }, + { + "epoch": 36.9, + "learning_rate": 3.155580915385328e-05, + "loss": 2.0636, + "step": 12748500 + }, + { + "epoch": 36.9, + "learning_rate": 3.1555085506206e-05, + "loss": 2.0516, + "step": 12749000 + }, + { + "epoch": 36.9, + "learning_rate": 3.155436185855873e-05, + "loss": 2.0641, + "step": 12749500 + }, + { + "epoch": 36.91, + "learning_rate": 3.1553638210911454e-05, + "loss": 2.0642, + "step": 12750000 + }, + { + "epoch": 36.91, + "learning_rate": 3.155291601055947e-05, + "loss": 2.0694, + "step": 12750500 + }, + { + "epoch": 36.91, + "learning_rate": 3.155219236291219e-05, + "loss": 2.049, + "step": 12751000 + }, + { + "epoch": 36.91, + "learning_rate": 3.1551468715264914e-05, + "loss": 2.0668, + "step": 12751500 + }, + { + "epoch": 36.91, + "learning_rate": 3.1550745067617636e-05, + "loss": 2.0779, + "step": 12752000 + }, + { + "epoch": 36.91, + "learning_rate": 3.155002141997036e-05, + "loss": 2.0885, + "step": 12752500 + }, + { + "epoch": 36.91, + "learning_rate": 3.154929921961838e-05, + "loss": 2.0349, + "step": 12753000 + }, + { + "epoch": 36.92, + "learning_rate": 3.15485755719711e-05, + "loss": 2.0686, + "step": 12753500 + }, + { + "epoch": 36.92, + "learning_rate": 3.154785337161912e-05, + "loss": 2.0688, + "step": 12754000 + }, + { + "epoch": 36.92, + "learning_rate": 3.1547131171267134e-05, + "loss": 2.0775, + "step": 12754500 + }, + { + "epoch": 36.92, + "learning_rate": 3.1546407523619857e-05, + "loss": 2.0653, + "step": 12755000 + }, + { + "epoch": 36.92, + "learning_rate": 3.154568387597258e-05, + "loss": 2.0572, + "step": 12755500 + }, + { + "epoch": 36.92, + "learning_rate": 3.154496022832531e-05, + "loss": 2.0532, + "step": 12756000 + }, + { + "epoch": 36.92, + "learning_rate": 3.154423658067803e-05, + "loss": 2.0693, + "step": 12756500 + }, + { + "epoch": 36.93, + "learning_rate": 3.154351293303075e-05, + "loss": 2.0566, + "step": 12757000 + }, + { + "epoch": 36.93, + "learning_rate": 3.154278928538348e-05, + "loss": 2.068, + "step": 12757500 + }, + { + "epoch": 36.93, + "learning_rate": 3.1542065637736203e-05, + "loss": 2.0328, + "step": 12758000 + }, + { + "epoch": 36.93, + "learning_rate": 3.1541341990088926e-05, + "loss": 2.0955, + "step": 12758500 + }, + { + "epoch": 36.93, + "learning_rate": 3.154061834244165e-05, + "loss": 2.0796, + "step": 12759000 + }, + { + "epoch": 36.93, + "learning_rate": 3.153989469479437e-05, + "loss": 2.0867, + "step": 12759500 + }, + { + "epoch": 36.93, + "learning_rate": 3.153917104714709e-05, + "loss": 2.0888, + "step": 12760000 + }, + { + "epoch": 36.94, + "learning_rate": 3.1538447399499815e-05, + "loss": 2.0613, + "step": 12760500 + }, + { + "epoch": 36.94, + "learning_rate": 3.153772375185254e-05, + "loss": 2.0667, + "step": 12761000 + }, + { + "epoch": 36.94, + "learning_rate": 3.153700010420526e-05, + "loss": 2.0723, + "step": 12761500 + }, + { + "epoch": 36.94, + "learning_rate": 3.153627645655798e-05, + "loss": 2.0533, + "step": 12762000 + }, + { + "epoch": 36.94, + "learning_rate": 3.1535552808910704e-05, + "loss": 2.0645, + "step": 12762500 + }, + { + "epoch": 36.94, + "learning_rate": 3.153482916126343e-05, + "loss": 2.0566, + "step": 12763000 + }, + { + "epoch": 36.95, + "learning_rate": 3.1534106960911455e-05, + "loss": 2.0647, + "step": 12763500 + }, + { + "epoch": 36.95, + "learning_rate": 3.1533386207854764e-05, + "loss": 2.0874, + "step": 12764000 + }, + { + "epoch": 36.95, + "learning_rate": 3.1532662560207486e-05, + "loss": 2.0741, + "step": 12764500 + }, + { + "epoch": 36.95, + "learning_rate": 3.153193891256021e-05, + "loss": 2.0715, + "step": 12765000 + }, + { + "epoch": 36.95, + "learning_rate": 3.153121526491293e-05, + "loss": 2.0716, + "step": 12765500 + }, + { + "epoch": 36.95, + "learning_rate": 3.153049161726566e-05, + "loss": 2.0678, + "step": 12766000 + }, + { + "epoch": 36.95, + "learning_rate": 3.152976796961838e-05, + "loss": 2.0927, + "step": 12766500 + }, + { + "epoch": 36.96, + "learning_rate": 3.1529044321971104e-05, + "loss": 2.0689, + "step": 12767000 + }, + { + "epoch": 36.96, + "learning_rate": 3.1528320674323826e-05, + "loss": 2.0645, + "step": 12767500 + }, + { + "epoch": 36.96, + "learning_rate": 3.1527599921267135e-05, + "loss": 2.0771, + "step": 12768000 + }, + { + "epoch": 36.96, + "learning_rate": 3.152687627361986e-05, + "loss": 2.032, + "step": 12768500 + }, + { + "epoch": 36.96, + "learning_rate": 3.152615262597258e-05, + "loss": 2.0926, + "step": 12769000 + }, + { + "epoch": 36.96, + "learning_rate": 3.152542897832531e-05, + "loss": 2.0896, + "step": 12769500 + }, + { + "epoch": 36.96, + "learning_rate": 3.152470533067803e-05, + "loss": 2.0903, + "step": 12770000 + }, + { + "epoch": 36.97, + "learning_rate": 3.152398168303075e-05, + "loss": 2.0673, + "step": 12770500 + }, + { + "epoch": 36.97, + "learning_rate": 3.1523259482678775e-05, + "loss": 2.0557, + "step": 12771000 + }, + { + "epoch": 36.97, + "learning_rate": 3.15225358350315e-05, + "loss": 2.072, + "step": 12771500 + }, + { + "epoch": 36.97, + "learning_rate": 3.152181363467951e-05, + "loss": 2.0714, + "step": 12772000 + }, + { + "epoch": 36.97, + "learning_rate": 3.1521089987032235e-05, + "loss": 2.0792, + "step": 12772500 + }, + { + "epoch": 36.97, + "learning_rate": 3.152036633938496e-05, + "loss": 2.0703, + "step": 12773000 + }, + { + "epoch": 36.97, + "learning_rate": 3.151964269173768e-05, + "loss": 2.0501, + "step": 12773500 + }, + { + "epoch": 36.98, + "learning_rate": 3.151891904409041e-05, + "loss": 2.0818, + "step": 12774000 + }, + { + "epoch": 36.98, + "learning_rate": 3.151819539644313e-05, + "loss": 2.108, + "step": 12774500 + }, + { + "epoch": 36.98, + "learning_rate": 3.151747319609115e-05, + "loss": 2.0914, + "step": 12775000 + }, + { + "epoch": 36.98, + "learning_rate": 3.151674954844387e-05, + "loss": 2.0618, + "step": 12775500 + }, + { + "epoch": 36.98, + "learning_rate": 3.151602590079659e-05, + "loss": 2.0905, + "step": 12776000 + }, + { + "epoch": 36.98, + "learning_rate": 3.151530225314931e-05, + "loss": 2.0843, + "step": 12776500 + }, + { + "epoch": 36.98, + "learning_rate": 3.1514578605502036e-05, + "loss": 2.0828, + "step": 12777000 + }, + { + "epoch": 36.99, + "learning_rate": 3.151385495785476e-05, + "loss": 2.0532, + "step": 12777500 + }, + { + "epoch": 36.99, + "learning_rate": 3.151313131020748e-05, + "loss": 2.0684, + "step": 12778000 + }, + { + "epoch": 36.99, + "learning_rate": 3.151240766256021e-05, + "loss": 2.0754, + "step": 12778500 + }, + { + "epoch": 36.99, + "learning_rate": 3.151168401491293e-05, + "loss": 2.0595, + "step": 12779000 + }, + { + "epoch": 36.99, + "learning_rate": 3.151096036726566e-05, + "loss": 2.0687, + "step": 12779500 + }, + { + "epoch": 36.99, + "learning_rate": 3.151023671961838e-05, + "loss": 2.0687, + "step": 12780000 + }, + { + "epoch": 36.99, + "learning_rate": 3.15095145192664e-05, + "loss": 2.0732, + "step": 12780500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150879087161912e-05, + "loss": 2.1001, + "step": 12781000 + }, + { + "epoch": 37.0, + "learning_rate": 3.1508068671267136e-05, + "loss": 2.0559, + "step": 12781500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150734502361986e-05, + "loss": 2.0639, + "step": 12782000 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.6695824526693803, + "eval_accuracy_mlm": 0.6348121816080246, + "eval_accuracy_nsp": 0.8559096762957565, + "eval_loss": 2.1710124015808105, + "eval_runtime": 331.6656, + "eval_samples_per_second": 1315.741, + "eval_steps_per_second": 54.823, + "step": 12782464 + }, + { + "epoch": 37.0, + "learning_rate": 3.150662137597258e-05, + "loss": 2.0733, + "step": 12782500 + }, + { + "epoch": 37.0, + "learning_rate": 3.150589772832531e-05, + "loss": 2.0443, + "step": 12783000 + }, + { + "epoch": 37.0, + "learning_rate": 3.150517408067803e-05, + "loss": 2.0477, + "step": 12783500 + }, + { + "epoch": 37.0, + "learning_rate": 3.1504450433030754e-05, + "loss": 2.0526, + "step": 12784000 + }, + { + "epoch": 37.01, + "learning_rate": 3.1503726785383476e-05, + "loss": 2.0618, + "step": 12784500 + }, + { + "epoch": 37.01, + "learning_rate": 3.15030031377362e-05, + "loss": 2.0671, + "step": 12785000 + }, + { + "epoch": 37.01, + "learning_rate": 3.150227949008892e-05, + "loss": 2.0591, + "step": 12785500 + }, + { + "epoch": 37.01, + "learning_rate": 3.150155584244165e-05, + "loss": 2.0296, + "step": 12786000 + }, + { + "epoch": 37.01, + "learning_rate": 3.150083219479437e-05, + "loss": 2.075, + "step": 12786500 + }, + { + "epoch": 37.01, + "learning_rate": 3.1500108547147094e-05, + "loss": 2.0501, + "step": 12787000 + }, + { + "epoch": 37.01, + "learning_rate": 3.1499384899499816e-05, + "loss": 2.0698, + "step": 12787500 + }, + { + "epoch": 37.02, + "learning_rate": 3.149866125185254e-05, + "loss": 2.057, + "step": 12788000 + }, + { + "epoch": 37.02, + "learning_rate": 3.149793905150056e-05, + "loss": 2.0676, + "step": 12788500 + }, + { + "epoch": 37.02, + "learning_rate": 3.149721540385328e-05, + "loss": 2.0529, + "step": 12789000 + }, + { + "epoch": 37.02, + "learning_rate": 3.1496491756206005e-05, + "loss": 2.0617, + "step": 12789500 + }, + { + "epoch": 37.02, + "learning_rate": 3.149576810855873e-05, + "loss": 2.0428, + "step": 12790000 + }, + { + "epoch": 37.02, + "learning_rate": 3.149504590820674e-05, + "loss": 2.0492, + "step": 12790500 + }, + { + "epoch": 37.02, + "learning_rate": 3.1494322260559465e-05, + "loss": 2.0331, + "step": 12791000 + }, + { + "epoch": 37.03, + "learning_rate": 3.149360006020749e-05, + "loss": 2.0371, + "step": 12791500 + }, + { + "epoch": 37.03, + "learning_rate": 3.149287641256021e-05, + "loss": 2.0314, + "step": 12792000 + }, + { + "epoch": 37.03, + "learning_rate": 3.149215276491293e-05, + "loss": 2.0745, + "step": 12792500 + }, + { + "epoch": 37.03, + "learning_rate": 3.1491429117265654e-05, + "loss": 2.0652, + "step": 12793000 + }, + { + "epoch": 37.03, + "learning_rate": 3.149070546961838e-05, + "loss": 2.0504, + "step": 12793500 + }, + { + "epoch": 37.03, + "learning_rate": 3.14899832692664e-05, + "loss": 2.0866, + "step": 12794000 + }, + { + "epoch": 37.03, + "learning_rate": 3.148925962161912e-05, + "loss": 2.0443, + "step": 12794500 + }, + { + "epoch": 37.04, + "learning_rate": 3.148853597397184e-05, + "loss": 2.0399, + "step": 12795000 + }, + { + "epoch": 37.04, + "learning_rate": 3.1487812326324566e-05, + "loss": 2.0583, + "step": 12795500 + }, + { + "epoch": 37.04, + "learning_rate": 3.148708867867729e-05, + "loss": 2.0441, + "step": 12796000 + }, + { + "epoch": 37.04, + "learning_rate": 3.148636503103001e-05, + "loss": 2.061, + "step": 12796500 + }, + { + "epoch": 37.04, + "learning_rate": 3.148564138338274e-05, + "loss": 2.0219, + "step": 12797000 + }, + { + "epoch": 37.04, + "learning_rate": 3.1484919183030755e-05, + "loss": 2.0553, + "step": 12797500 + }, + { + "epoch": 37.04, + "learning_rate": 3.148419553538348e-05, + "loss": 2.0748, + "step": 12798000 + }, + { + "epoch": 37.05, + "learning_rate": 3.14834718877362e-05, + "loss": 2.0494, + "step": 12798500 + }, + { + "epoch": 37.05, + "learning_rate": 3.148274824008892e-05, + "loss": 2.0528, + "step": 12799000 + }, + { + "epoch": 37.05, + "learning_rate": 3.1482024592441644e-05, + "loss": 2.0329, + "step": 12799500 + }, + { + "epoch": 37.05, + "learning_rate": 3.1481300944794366e-05, + "loss": 2.0571, + "step": 12800000 + }, + { + "epoch": 37.05, + "learning_rate": 3.148057729714709e-05, + "loss": 2.0598, + "step": 12800500 + }, + { + "epoch": 37.05, + "learning_rate": 3.147985364949982e-05, + "loss": 2.0591, + "step": 12801000 + }, + { + "epoch": 37.06, + "learning_rate": 3.147913000185254e-05, + "loss": 2.0435, + "step": 12801500 + }, + { + "epoch": 37.06, + "learning_rate": 3.147840780150056e-05, + "loss": 2.0701, + "step": 12802000 + }, + { + "epoch": 37.06, + "learning_rate": 3.1477684153853284e-05, + "loss": 2.0745, + "step": 12802500 + }, + { + "epoch": 37.06, + "learning_rate": 3.1476960506206006e-05, + "loss": 2.0636, + "step": 12803000 + }, + { + "epoch": 37.06, + "learning_rate": 3.147623685855873e-05, + "loss": 2.0603, + "step": 12803500 + }, + { + "epoch": 37.06, + "learning_rate": 3.147551321091145e-05, + "loss": 2.0392, + "step": 12804000 + }, + { + "epoch": 37.06, + "learning_rate": 3.1474791010559466e-05, + "loss": 2.0594, + "step": 12804500 + }, + { + "epoch": 37.07, + "learning_rate": 3.147406736291219e-05, + "loss": 2.0611, + "step": 12805000 + }, + { + "epoch": 37.07, + "learning_rate": 3.147334371526491e-05, + "loss": 2.0382, + "step": 12805500 + }, + { + "epoch": 37.07, + "learning_rate": 3.147262006761764e-05, + "loss": 2.027, + "step": 12806000 + }, + { + "epoch": 37.07, + "learning_rate": 3.1471897867265655e-05, + "loss": 2.0509, + "step": 12806500 + }, + { + "epoch": 37.07, + "learning_rate": 3.147117421961838e-05, + "loss": 2.0347, + "step": 12807000 + }, + { + "epoch": 37.07, + "learning_rate": 3.147045201926639e-05, + "loss": 2.05, + "step": 12807500 + }, + { + "epoch": 37.07, + "learning_rate": 3.1469728371619115e-05, + "loss": 2.0598, + "step": 12808000 + }, + { + "epoch": 37.08, + "learning_rate": 3.1469004723971844e-05, + "loss": 2.056, + "step": 12808500 + }, + { + "epoch": 37.08, + "learning_rate": 3.146828252361986e-05, + "loss": 2.0344, + "step": 12809000 + }, + { + "epoch": 37.08, + "learning_rate": 3.146755887597259e-05, + "loss": 2.0352, + "step": 12809500 + }, + { + "epoch": 37.08, + "learning_rate": 3.146683522832531e-05, + "loss": 2.0678, + "step": 12810000 + }, + { + "epoch": 37.08, + "learning_rate": 3.146611158067803e-05, + "loss": 2.0444, + "step": 12810500 + }, + { + "epoch": 37.08, + "learning_rate": 3.1465387933030755e-05, + "loss": 2.0615, + "step": 12811000 + }, + { + "epoch": 37.08, + "learning_rate": 3.146466428538348e-05, + "loss": 2.0288, + "step": 12811500 + }, + { + "epoch": 37.09, + "learning_rate": 3.14639406377362e-05, + "loss": 2.0388, + "step": 12812000 + }, + { + "epoch": 37.09, + "learning_rate": 3.146321699008892e-05, + "loss": 2.0561, + "step": 12812500 + }, + { + "epoch": 37.09, + "learning_rate": 3.1462493342441644e-05, + "loss": 2.0349, + "step": 12813000 + }, + { + "epoch": 37.09, + "learning_rate": 3.1461769694794367e-05, + "loss": 2.0255, + "step": 12813500 + }, + { + "epoch": 37.09, + "learning_rate": 3.146104604714709e-05, + "loss": 2.0367, + "step": 12814000 + }, + { + "epoch": 37.09, + "learning_rate": 3.146032239949981e-05, + "loss": 2.0356, + "step": 12814500 + }, + { + "epoch": 37.09, + "learning_rate": 3.145960019914783e-05, + "loss": 2.0546, + "step": 12815000 + }, + { + "epoch": 37.1, + "learning_rate": 3.1458876551500556e-05, + "loss": 2.068, + "step": 12815500 + }, + { + "epoch": 37.1, + "learning_rate": 3.1458152903853285e-05, + "loss": 2.0404, + "step": 12816000 + }, + { + "epoch": 37.1, + "learning_rate": 3.145742925620601e-05, + "loss": 2.0273, + "step": 12816500 + }, + { + "epoch": 37.1, + "learning_rate": 3.145670560855873e-05, + "loss": 2.0524, + "step": 12817000 + }, + { + "epoch": 37.1, + "learning_rate": 3.1455983408206745e-05, + "loss": 2.0588, + "step": 12817500 + }, + { + "epoch": 37.1, + "learning_rate": 3.145525976055947e-05, + "loss": 2.0481, + "step": 12818000 + }, + { + "epoch": 37.1, + "learning_rate": 3.145453611291219e-05, + "loss": 2.0685, + "step": 12818500 + }, + { + "epoch": 37.11, + "learning_rate": 3.145381246526491e-05, + "loss": 2.0495, + "step": 12819000 + }, + { + "epoch": 37.11, + "learning_rate": 3.145308881761764e-05, + "loss": 2.0588, + "step": 12819500 + }, + { + "epoch": 37.11, + "learning_rate": 3.145236516997036e-05, + "loss": 2.0462, + "step": 12820000 + }, + { + "epoch": 37.11, + "learning_rate": 3.1451641522323085e-05, + "loss": 2.0389, + "step": 12820500 + }, + { + "epoch": 37.11, + "learning_rate": 3.145091787467581e-05, + "loss": 2.0441, + "step": 12821000 + }, + { + "epoch": 37.11, + "learning_rate": 3.145019422702853e-05, + "loss": 2.036, + "step": 12821500 + }, + { + "epoch": 37.11, + "learning_rate": 3.1449472026676545e-05, + "loss": 2.0632, + "step": 12822000 + }, + { + "epoch": 37.12, + "learning_rate": 3.144874837902927e-05, + "loss": 2.0653, + "step": 12822500 + }, + { + "epoch": 37.12, + "learning_rate": 3.1448024731381996e-05, + "loss": 2.044, + "step": 12823000 + }, + { + "epoch": 37.12, + "learning_rate": 3.144730108373472e-05, + "loss": 2.049, + "step": 12823500 + }, + { + "epoch": 37.12, + "learning_rate": 3.144657743608744e-05, + "loss": 2.0516, + "step": 12824000 + }, + { + "epoch": 37.12, + "learning_rate": 3.144585378844016e-05, + "loss": 2.0379, + "step": 12824500 + }, + { + "epoch": 37.12, + "learning_rate": 3.144513014079289e-05, + "loss": 2.0465, + "step": 12825000 + }, + { + "epoch": 37.12, + "learning_rate": 3.1444406493145614e-05, + "loss": 2.0307, + "step": 12825500 + }, + { + "epoch": 37.13, + "learning_rate": 3.1443682845498336e-05, + "loss": 2.0547, + "step": 12826000 + }, + { + "epoch": 37.13, + "learning_rate": 3.1442962092441645e-05, + "loss": 2.0525, + "step": 12826500 + }, + { + "epoch": 37.13, + "learning_rate": 3.144223844479437e-05, + "loss": 2.0422, + "step": 12827000 + }, + { + "epoch": 37.13, + "learning_rate": 3.144151479714709e-05, + "loss": 2.0376, + "step": 12827500 + }, + { + "epoch": 37.13, + "learning_rate": 3.144079259679511e-05, + "loss": 2.0835, + "step": 12828000 + }, + { + "epoch": 37.13, + "learning_rate": 3.1440068949147834e-05, + "loss": 2.0259, + "step": 12828500 + }, + { + "epoch": 37.13, + "learning_rate": 3.143934674879585e-05, + "loss": 2.0629, + "step": 12829000 + }, + { + "epoch": 37.14, + "learning_rate": 3.143862310114857e-05, + "loss": 2.053, + "step": 12829500 + }, + { + "epoch": 37.14, + "learning_rate": 3.143790090079659e-05, + "loss": 2.0567, + "step": 12830000 + }, + { + "epoch": 37.14, + "learning_rate": 3.1437177253149317e-05, + "loss": 2.0595, + "step": 12830500 + }, + { + "epoch": 37.14, + "learning_rate": 3.143645360550204e-05, + "loss": 2.0538, + "step": 12831000 + }, + { + "epoch": 37.14, + "learning_rate": 3.143572995785477e-05, + "loss": 2.0499, + "step": 12831500 + }, + { + "epoch": 37.14, + "learning_rate": 3.143500631020749e-05, + "loss": 2.0701, + "step": 12832000 + }, + { + "epoch": 37.14, + "learning_rate": 3.143428266256021e-05, + "loss": 2.0489, + "step": 12832500 + }, + { + "epoch": 37.15, + "learning_rate": 3.1433559014912934e-05, + "loss": 2.0404, + "step": 12833000 + }, + { + "epoch": 37.15, + "learning_rate": 3.143283536726566e-05, + "loss": 2.0546, + "step": 12833500 + }, + { + "epoch": 37.15, + "learning_rate": 3.143211171961838e-05, + "loss": 2.0572, + "step": 12834000 + }, + { + "epoch": 37.15, + "learning_rate": 3.14313880719711e-05, + "loss": 2.0383, + "step": 12834500 + }, + { + "epoch": 37.15, + "learning_rate": 3.1430664424323823e-05, + "loss": 2.0599, + "step": 12835000 + }, + { + "epoch": 37.15, + "learning_rate": 3.1429940776676546e-05, + "loss": 2.0561, + "step": 12835500 + }, + { + "epoch": 37.15, + "learning_rate": 3.142921712902927e-05, + "loss": 2.0625, + "step": 12836000 + }, + { + "epoch": 37.16, + "learning_rate": 3.142849348138199e-05, + "loss": 2.0657, + "step": 12836500 + }, + { + "epoch": 37.16, + "learning_rate": 3.142776983373472e-05, + "loss": 2.0629, + "step": 12837000 + }, + { + "epoch": 37.16, + "learning_rate": 3.142704618608744e-05, + "loss": 2.0451, + "step": 12837500 + }, + { + "epoch": 37.16, + "learning_rate": 3.142632253844017e-05, + "loss": 2.0775, + "step": 12838000 + }, + { + "epoch": 37.16, + "learning_rate": 3.1425600338088186e-05, + "loss": 2.0539, + "step": 12838500 + }, + { + "epoch": 37.16, + "learning_rate": 3.142487669044091e-05, + "loss": 2.0889, + "step": 12839000 + }, + { + "epoch": 37.17, + "learning_rate": 3.142415304279363e-05, + "loss": 2.0352, + "step": 12839500 + }, + { + "epoch": 37.17, + "learning_rate": 3.142342939514635e-05, + "loss": 2.0453, + "step": 12840000 + }, + { + "epoch": 37.17, + "learning_rate": 3.1422705747499075e-05, + "loss": 2.061, + "step": 12840500 + }, + { + "epoch": 37.17, + "learning_rate": 3.14219820998518e-05, + "loss": 2.0602, + "step": 12841000 + }, + { + "epoch": 37.17, + "learning_rate": 3.142125845220452e-05, + "loss": 2.0656, + "step": 12841500 + }, + { + "epoch": 37.17, + "learning_rate": 3.142053480455724e-05, + "loss": 2.0397, + "step": 12842000 + }, + { + "epoch": 37.17, + "learning_rate": 3.1419812604205264e-05, + "loss": 2.0388, + "step": 12842500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141909040385328e-05, + "loss": 2.0363, + "step": 12843000 + }, + { + "epoch": 37.18, + "learning_rate": 3.1418368203501295e-05, + "loss": 2.054, + "step": 12843500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141764600314932e-05, + "loss": 2.0775, + "step": 12844000 + }, + { + "epoch": 37.18, + "learning_rate": 3.141692235550204e-05, + "loss": 2.0386, + "step": 12844500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141619870785476e-05, + "loss": 2.0678, + "step": 12845000 + }, + { + "epoch": 37.18, + "learning_rate": 3.1415475060207484e-05, + "loss": 2.041, + "step": 12845500 + }, + { + "epoch": 37.18, + "learning_rate": 3.141475141256021e-05, + "loss": 2.0508, + "step": 12846000 + }, + { + "epoch": 37.19, + "learning_rate": 3.1414027764912935e-05, + "loss": 2.0469, + "step": 12846500 + }, + { + "epoch": 37.19, + "learning_rate": 3.141330411726566e-05, + "loss": 2.0586, + "step": 12847000 + }, + { + "epoch": 37.19, + "learning_rate": 3.141258046961838e-05, + "loss": 2.0609, + "step": 12847500 + }, + { + "epoch": 37.19, + "learning_rate": 3.14118568219711e-05, + "loss": 2.0562, + "step": 12848000 + }, + { + "epoch": 37.19, + "learning_rate": 3.1411133174323824e-05, + "loss": 2.0672, + "step": 12848500 + }, + { + "epoch": 37.19, + "learning_rate": 3.1410409526676546e-05, + "loss": 2.0222, + "step": 12849000 + }, + { + "epoch": 37.19, + "learning_rate": 3.140968587902927e-05, + "loss": 2.0456, + "step": 12849500 + }, + { + "epoch": 37.2, + "learning_rate": 3.1408965125972584e-05, + "loss": 2.0156, + "step": 12850000 + }, + { + "epoch": 37.2, + "learning_rate": 3.1408241478325307e-05, + "loss": 2.0696, + "step": 12850500 + }, + { + "epoch": 37.2, + "learning_rate": 3.140751783067803e-05, + "loss": 2.0446, + "step": 12851000 + }, + { + "epoch": 37.2, + "learning_rate": 3.140679418303075e-05, + "loss": 2.0562, + "step": 12851500 + }, + { + "epoch": 37.2, + "learning_rate": 3.140607053538347e-05, + "loss": 2.0444, + "step": 12852000 + }, + { + "epoch": 37.2, + "learning_rate": 3.1405346887736195e-05, + "loss": 2.0644, + "step": 12852500 + }, + { + "epoch": 37.2, + "learning_rate": 3.140462324008892e-05, + "loss": 2.0397, + "step": 12853000 + }, + { + "epoch": 37.21, + "learning_rate": 3.140389959244165e-05, + "loss": 2.0498, + "step": 12853500 + }, + { + "epoch": 37.21, + "learning_rate": 3.140317594479437e-05, + "loss": 2.0677, + "step": 12854000 + }, + { + "epoch": 37.21, + "learning_rate": 3.14024522971471e-05, + "loss": 2.0474, + "step": 12854500 + }, + { + "epoch": 37.21, + "learning_rate": 3.1401730096795114e-05, + "loss": 2.0337, + "step": 12855000 + }, + { + "epoch": 37.21, + "learning_rate": 3.1401006449147836e-05, + "loss": 2.0331, + "step": 12855500 + }, + { + "epoch": 37.21, + "learning_rate": 3.140028424879585e-05, + "loss": 2.0735, + "step": 12856000 + }, + { + "epoch": 37.21, + "learning_rate": 3.139956204844387e-05, + "loss": 2.0488, + "step": 12856500 + }, + { + "epoch": 37.22, + "learning_rate": 3.1398838400796596e-05, + "loss": 2.0594, + "step": 12857000 + }, + { + "epoch": 37.22, + "learning_rate": 3.139811475314932e-05, + "loss": 2.0403, + "step": 12857500 + }, + { + "epoch": 37.22, + "learning_rate": 3.139739110550204e-05, + "loss": 2.053, + "step": 12858000 + }, + { + "epoch": 37.22, + "learning_rate": 3.139666745785476e-05, + "loss": 2.0542, + "step": 12858500 + }, + { + "epoch": 37.22, + "learning_rate": 3.1395943810207485e-05, + "loss": 2.0581, + "step": 12859000 + }, + { + "epoch": 37.22, + "learning_rate": 3.13952216098555e-05, + "loss": 2.027, + "step": 12859500 + }, + { + "epoch": 37.22, + "learning_rate": 3.139449796220822e-05, + "loss": 2.0397, + "step": 12860000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1393775761856245e-05, + "loss": 2.0382, + "step": 12860500 + }, + { + "epoch": 37.23, + "learning_rate": 3.139305211420897e-05, + "loss": 2.0236, + "step": 12861000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1392328466561696e-05, + "loss": 2.0529, + "step": 12861500 + }, + { + "epoch": 37.23, + "learning_rate": 3.139160481891442e-05, + "loss": 2.0593, + "step": 12862000 + }, + { + "epoch": 37.23, + "learning_rate": 3.139088117126714e-05, + "loss": 2.0597, + "step": 12862500 + }, + { + "epoch": 37.23, + "learning_rate": 3.139015752361986e-05, + "loss": 2.0431, + "step": 12863000 + }, + { + "epoch": 37.23, + "learning_rate": 3.1389433875972585e-05, + "loss": 2.0572, + "step": 12863500 + }, + { + "epoch": 37.24, + "learning_rate": 3.138871022832531e-05, + "loss": 2.0598, + "step": 12864000 + }, + { + "epoch": 37.24, + "learning_rate": 3.138798658067803e-05, + "loss": 2.0615, + "step": 12864500 + }, + { + "epoch": 37.24, + "learning_rate": 3.138726293303075e-05, + "loss": 2.0538, + "step": 12865000 + }, + { + "epoch": 37.24, + "learning_rate": 3.1386539285383474e-05, + "loss": 2.0478, + "step": 12865500 + }, + { + "epoch": 37.24, + "learning_rate": 3.1385815637736196e-05, + "loss": 2.0511, + "step": 12866000 + }, + { + "epoch": 37.24, + "learning_rate": 3.138509199008892e-05, + "loss": 2.0737, + "step": 12866500 + }, + { + "epoch": 37.24, + "learning_rate": 3.138436978973694e-05, + "loss": 2.0632, + "step": 12867000 + }, + { + "epoch": 37.25, + "learning_rate": 3.138364614208966e-05, + "loss": 2.0562, + "step": 12867500 + }, + { + "epoch": 37.25, + "learning_rate": 3.1382922494442385e-05, + "loss": 2.0515, + "step": 12868000 + }, + { + "epoch": 37.25, + "learning_rate": 3.1382198846795114e-05, + "loss": 2.0701, + "step": 12868500 + }, + { + "epoch": 37.25, + "learning_rate": 3.1381475199147837e-05, + "loss": 2.0683, + "step": 12869000 + }, + { + "epoch": 37.25, + "learning_rate": 3.138075299879585e-05, + "loss": 2.0691, + "step": 12869500 + }, + { + "epoch": 37.25, + "learning_rate": 3.1380029351148574e-05, + "loss": 2.0651, + "step": 12870000 + }, + { + "epoch": 37.25, + "learning_rate": 3.13793071507966e-05, + "loss": 2.0578, + "step": 12870500 + }, + { + "epoch": 37.26, + "learning_rate": 3.137858350314932e-05, + "loss": 2.0752, + "step": 12871000 + }, + { + "epoch": 37.26, + "learning_rate": 3.137785985550204e-05, + "loss": 2.0624, + "step": 12871500 + }, + { + "epoch": 37.26, + "learning_rate": 3.137713620785476e-05, + "loss": 2.0585, + "step": 12872000 + }, + { + "epoch": 37.26, + "learning_rate": 3.1376412560207486e-05, + "loss": 2.0778, + "step": 12872500 + }, + { + "epoch": 37.26, + "learning_rate": 3.137568891256021e-05, + "loss": 2.0747, + "step": 12873000 + }, + { + "epoch": 37.26, + "learning_rate": 3.137496526491293e-05, + "loss": 2.0462, + "step": 12873500 + }, + { + "epoch": 37.26, + "learning_rate": 3.1374243064560946e-05, + "loss": 2.0707, + "step": 12874000 + }, + { + "epoch": 37.27, + "learning_rate": 3.1373519416913675e-05, + "loss": 2.0582, + "step": 12874500 + }, + { + "epoch": 37.27, + "learning_rate": 3.13727957692664e-05, + "loss": 2.0685, + "step": 12875000 + }, + { + "epoch": 37.27, + "learning_rate": 3.137207212161912e-05, + "loss": 2.0375, + "step": 12875500 + }, + { + "epoch": 37.27, + "learning_rate": 3.137134847397185e-05, + "loss": 2.0823, + "step": 12876000 + }, + { + "epoch": 37.27, + "learning_rate": 3.137062482632457e-05, + "loss": 2.0763, + "step": 12876500 + }, + { + "epoch": 37.27, + "learning_rate": 3.136990117867729e-05, + "loss": 2.0604, + "step": 12877000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1369177531030015e-05, + "loss": 2.061, + "step": 12877500 + }, + { + "epoch": 37.28, + "learning_rate": 3.136845388338274e-05, + "loss": 2.0807, + "step": 12878000 + }, + { + "epoch": 37.28, + "learning_rate": 3.136773023573546e-05, + "loss": 2.0394, + "step": 12878500 + }, + { + "epoch": 37.28, + "learning_rate": 3.136700658808818e-05, + "loss": 2.0419, + "step": 12879000 + }, + { + "epoch": 37.28, + "learning_rate": 3.1366282940440904e-05, + "loss": 2.045, + "step": 12879500 + }, + { + "epoch": 37.28, + "learning_rate": 3.1365559292793626e-05, + "loss": 2.0743, + "step": 12880000 + }, + { + "epoch": 37.28, + "learning_rate": 3.136483564514635e-05, + "loss": 2.0666, + "step": 12880500 + }, + { + "epoch": 37.29, + "learning_rate": 3.136411199749907e-05, + "loss": 2.0324, + "step": 12881000 + }, + { + "epoch": 37.29, + "learning_rate": 3.136338979714709e-05, + "loss": 2.0815, + "step": 12881500 + }, + { + "epoch": 37.29, + "learning_rate": 3.1362666149499815e-05, + "loss": 2.0679, + "step": 12882000 + }, + { + "epoch": 37.29, + "learning_rate": 3.136194394914783e-05, + "loss": 2.0357, + "step": 12882500 + }, + { + "epoch": 37.29, + "learning_rate": 3.136122030150055e-05, + "loss": 2.0576, + "step": 12883000 + }, + { + "epoch": 37.29, + "learning_rate": 3.136049665385328e-05, + "loss": 2.0622, + "step": 12883500 + }, + { + "epoch": 37.29, + "learning_rate": 3.1359773006206004e-05, + "loss": 2.0647, + "step": 12884000 + }, + { + "epoch": 37.3, + "learning_rate": 3.1359049358558726e-05, + "loss": 2.0793, + "step": 12884500 + }, + { + "epoch": 37.3, + "learning_rate": 3.135832715820675e-05, + "loss": 2.0534, + "step": 12885000 + }, + { + "epoch": 37.3, + "learning_rate": 3.135760351055947e-05, + "loss": 2.0466, + "step": 12885500 + }, + { + "epoch": 37.3, + "learning_rate": 3.135687986291219e-05, + "loss": 2.0615, + "step": 12886000 + }, + { + "epoch": 37.3, + "learning_rate": 3.1356156215264915e-05, + "loss": 2.0571, + "step": 12886500 + }, + { + "epoch": 37.3, + "learning_rate": 3.135543256761764e-05, + "loss": 2.0773, + "step": 12887000 + }, + { + "epoch": 37.3, + "learning_rate": 3.135470891997036e-05, + "loss": 2.0507, + "step": 12887500 + }, + { + "epoch": 37.31, + "learning_rate": 3.135398527232308e-05, + "loss": 2.0288, + "step": 12888000 + }, + { + "epoch": 37.31, + "learning_rate": 3.1353261624675804e-05, + "loss": 2.0455, + "step": 12888500 + }, + { + "epoch": 37.31, + "learning_rate": 3.1352537977028526e-05, + "loss": 2.0661, + "step": 12889000 + }, + { + "epoch": 37.31, + "learning_rate": 3.135181432938125e-05, + "loss": 2.0501, + "step": 12889500 + }, + { + "epoch": 37.31, + "learning_rate": 3.135109068173398e-05, + "loss": 2.0811, + "step": 12890000 + }, + { + "epoch": 37.31, + "learning_rate": 3.13503670340867e-05, + "loss": 2.045, + "step": 12890500 + }, + { + "epoch": 37.31, + "learning_rate": 3.134964483373472e-05, + "loss": 2.0582, + "step": 12891000 + }, + { + "epoch": 37.32, + "learning_rate": 3.1348921186087445e-05, + "loss": 2.0642, + "step": 12891500 + }, + { + "epoch": 37.32, + "learning_rate": 3.134819753844017e-05, + "loss": 2.061, + "step": 12892000 + }, + { + "epoch": 37.32, + "learning_rate": 3.134747389079289e-05, + "loss": 2.0654, + "step": 12892500 + }, + { + "epoch": 37.32, + "learning_rate": 3.134675024314561e-05, + "loss": 2.0475, + "step": 12893000 + }, + { + "epoch": 37.32, + "learning_rate": 3.1346026595498333e-05, + "loss": 2.0303, + "step": 12893500 + }, + { + "epoch": 37.32, + "learning_rate": 3.134530439514635e-05, + "loss": 2.0547, + "step": 12894000 + }, + { + "epoch": 37.32, + "learning_rate": 3.134458074749908e-05, + "loss": 2.0492, + "step": 12894500 + }, + { + "epoch": 37.33, + "learning_rate": 3.13438570998518e-05, + "loss": 2.0527, + "step": 12895000 + }, + { + "epoch": 37.33, + "learning_rate": 3.1343134899499816e-05, + "loss": 2.0729, + "step": 12895500 + }, + { + "epoch": 37.33, + "learning_rate": 3.134241125185254e-05, + "loss": 2.0566, + "step": 12896000 + }, + { + "epoch": 37.33, + "learning_rate": 3.1341689051500554e-05, + "loss": 2.0602, + "step": 12896500 + }, + { + "epoch": 37.33, + "learning_rate": 3.1340965403853276e-05, + "loss": 2.0541, + "step": 12897000 + }, + { + "epoch": 37.33, + "learning_rate": 3.1340241756206e-05, + "loss": 2.0517, + "step": 12897500 + }, + { + "epoch": 37.33, + "learning_rate": 3.133951810855873e-05, + "loss": 2.0539, + "step": 12898000 + }, + { + "epoch": 37.34, + "learning_rate": 3.133879446091145e-05, + "loss": 2.0384, + "step": 12898500 + }, + { + "epoch": 37.34, + "learning_rate": 3.133807081326418e-05, + "loss": 2.0552, + "step": 12899000 + }, + { + "epoch": 37.34, + "learning_rate": 3.13373471656169e-05, + "loss": 2.062, + "step": 12899500 + }, + { + "epoch": 37.34, + "learning_rate": 3.133662351796962e-05, + "loss": 2.0334, + "step": 12900000 + }, + { + "epoch": 37.34, + "learning_rate": 3.1335899870322345e-05, + "loss": 2.0572, + "step": 12900500 + }, + { + "epoch": 37.34, + "learning_rate": 3.133517622267507e-05, + "loss": 2.0613, + "step": 12901000 + }, + { + "epoch": 37.34, + "learning_rate": 3.133445402232308e-05, + "loss": 2.0381, + "step": 12901500 + }, + { + "epoch": 37.35, + "learning_rate": 3.1333730374675805e-05, + "loss": 2.0464, + "step": 12902000 + }, + { + "epoch": 37.35, + "learning_rate": 3.133300672702853e-05, + "loss": 2.0224, + "step": 12902500 + }, + { + "epoch": 37.35, + "learning_rate": 3.133228307938125e-05, + "loss": 2.029, + "step": 12903000 + }, + { + "epoch": 37.35, + "learning_rate": 3.133155943173398e-05, + "loss": 2.05, + "step": 12903500 + }, + { + "epoch": 37.35, + "learning_rate": 3.1330837231381994e-05, + "loss": 2.0893, + "step": 12904000 + }, + { + "epoch": 37.35, + "learning_rate": 3.1330113583734716e-05, + "loss": 2.0721, + "step": 12904500 + }, + { + "epoch": 37.35, + "learning_rate": 3.132939138338273e-05, + "loss": 2.0575, + "step": 12905000 + }, + { + "epoch": 37.36, + "learning_rate": 3.1328667735735454e-05, + "loss": 2.0882, + "step": 12905500 + }, + { + "epoch": 37.36, + "learning_rate": 3.132794408808818e-05, + "loss": 2.0648, + "step": 12906000 + }, + { + "epoch": 37.36, + "learning_rate": 3.1327220440440905e-05, + "loss": 2.0655, + "step": 12906500 + }, + { + "epoch": 37.36, + "learning_rate": 3.132649824008893e-05, + "loss": 2.0813, + "step": 12907000 + }, + { + "epoch": 37.36, + "learning_rate": 3.132577459244165e-05, + "loss": 2.0641, + "step": 12907500 + }, + { + "epoch": 37.36, + "learning_rate": 3.132505094479437e-05, + "loss": 2.0575, + "step": 12908000 + }, + { + "epoch": 37.36, + "learning_rate": 3.1324327297147094e-05, + "loss": 2.0594, + "step": 12908500 + }, + { + "epoch": 37.37, + "learning_rate": 3.1323603649499817e-05, + "loss": 2.0893, + "step": 12909000 + }, + { + "epoch": 37.37, + "learning_rate": 3.132288000185254e-05, + "loss": 2.0487, + "step": 12909500 + }, + { + "epoch": 37.37, + "learning_rate": 3.132215635420526e-05, + "loss": 2.0791, + "step": 12910000 + }, + { + "epoch": 37.37, + "learning_rate": 3.132143270655798e-05, + "loss": 2.0554, + "step": 12910500 + }, + { + "epoch": 37.37, + "learning_rate": 3.1320709058910706e-05, + "loss": 2.0522, + "step": 12911000 + }, + { + "epoch": 37.37, + "learning_rate": 3.131998541126343e-05, + "loss": 2.0863, + "step": 12911500 + }, + { + "epoch": 37.37, + "learning_rate": 3.131926176361615e-05, + "loss": 2.0742, + "step": 12912000 + }, + { + "epoch": 37.38, + "learning_rate": 3.131853811596888e-05, + "loss": 2.0438, + "step": 12912500 + }, + { + "epoch": 37.38, + "learning_rate": 3.131781736291219e-05, + "loss": 2.0639, + "step": 12913000 + }, + { + "epoch": 37.38, + "learning_rate": 3.131709371526492e-05, + "loss": 2.0705, + "step": 12913500 + }, + { + "epoch": 37.38, + "learning_rate": 3.131637006761764e-05, + "loss": 2.053, + "step": 12914000 + }, + { + "epoch": 37.38, + "learning_rate": 3.131564641997036e-05, + "loss": 2.0696, + "step": 12914500 + }, + { + "epoch": 37.38, + "learning_rate": 3.1314922772323084e-05, + "loss": 2.0758, + "step": 12915000 + }, + { + "epoch": 37.39, + "learning_rate": 3.1314199124675806e-05, + "loss": 2.0698, + "step": 12915500 + }, + { + "epoch": 37.39, + "learning_rate": 3.131347547702853e-05, + "loss": 2.0601, + "step": 12916000 + }, + { + "epoch": 37.39, + "learning_rate": 3.131275182938126e-05, + "loss": 2.0536, + "step": 12916500 + }, + { + "epoch": 37.39, + "learning_rate": 3.131202818173398e-05, + "loss": 2.0587, + "step": 12917000 + }, + { + "epoch": 37.39, + "learning_rate": 3.13113045340867e-05, + "loss": 2.0739, + "step": 12917500 + }, + { + "epoch": 37.39, + "learning_rate": 3.131058233373472e-05, + "loss": 2.0908, + "step": 12918000 + }, + { + "epoch": 37.39, + "learning_rate": 3.130986013338273e-05, + "loss": 2.0574, + "step": 12918500 + }, + { + "epoch": 37.4, + "learning_rate": 3.1309136485735455e-05, + "loss": 2.077, + "step": 12919000 + }, + { + "epoch": 37.4, + "learning_rate": 3.130841283808818e-05, + "loss": 2.0645, + "step": 12919500 + }, + { + "epoch": 37.4, + "learning_rate": 3.1307689190440906e-05, + "loss": 2.0395, + "step": 12920000 + }, + { + "epoch": 37.4, + "learning_rate": 3.130696554279363e-05, + "loss": 2.0849, + "step": 12920500 + }, + { + "epoch": 37.4, + "learning_rate": 3.130624189514636e-05, + "loss": 2.0505, + "step": 12921000 + }, + { + "epoch": 37.4, + "learning_rate": 3.130551969479437e-05, + "loss": 2.0597, + "step": 12921500 + }, + { + "epoch": 37.4, + "learning_rate": 3.1304796047147095e-05, + "loss": 2.0682, + "step": 12922000 + }, + { + "epoch": 37.41, + "learning_rate": 3.130407239949982e-05, + "loss": 2.0476, + "step": 12922500 + }, + { + "epoch": 37.41, + "learning_rate": 3.130334875185254e-05, + "loss": 2.0432, + "step": 12923000 + }, + { + "epoch": 37.41, + "learning_rate": 3.130262510420526e-05, + "loss": 2.0715, + "step": 12923500 + }, + { + "epoch": 37.41, + "learning_rate": 3.1301901456557984e-05, + "loss": 2.0566, + "step": 12924000 + }, + { + "epoch": 37.41, + "learning_rate": 3.1301179256206006e-05, + "loss": 2.068, + "step": 12924500 + }, + { + "epoch": 37.41, + "learning_rate": 3.130045560855873e-05, + "loss": 2.0501, + "step": 12925000 + }, + { + "epoch": 37.41, + "learning_rate": 3.129973196091145e-05, + "loss": 2.0386, + "step": 12925500 + }, + { + "epoch": 37.42, + "learning_rate": 3.129900831326417e-05, + "loss": 2.0582, + "step": 12926000 + }, + { + "epoch": 37.42, + "learning_rate": 3.129828611291219e-05, + "loss": 2.0804, + "step": 12926500 + }, + { + "epoch": 37.42, + "learning_rate": 3.129756246526491e-05, + "loss": 2.0418, + "step": 12927000 + }, + { + "epoch": 37.42, + "learning_rate": 3.129684026491293e-05, + "loss": 2.0449, + "step": 12927500 + }, + { + "epoch": 37.42, + "learning_rate": 3.129611806456095e-05, + "loss": 2.04, + "step": 12928000 + }, + { + "epoch": 37.42, + "learning_rate": 3.129539441691368e-05, + "loss": 2.0667, + "step": 12928500 + }, + { + "epoch": 37.42, + "learning_rate": 3.12946707692664e-05, + "loss": 2.0602, + "step": 12929000 + }, + { + "epoch": 37.43, + "learning_rate": 3.129394712161912e-05, + "loss": 2.0692, + "step": 12929500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1293223473971845e-05, + "loss": 2.0568, + "step": 12930000 + }, + { + "epoch": 37.43, + "learning_rate": 3.129250127361986e-05, + "loss": 2.0477, + "step": 12930500 + }, + { + "epoch": 37.43, + "learning_rate": 3.129177762597258e-05, + "loss": 2.0329, + "step": 12931000 + }, + { + "epoch": 37.43, + "learning_rate": 3.1291053978325305e-05, + "loss": 2.0705, + "step": 12931500 + }, + { + "epoch": 37.43, + "learning_rate": 3.1290330330678034e-05, + "loss": 2.0621, + "step": 12932000 + }, + { + "epoch": 37.43, + "learning_rate": 3.1289606683030756e-05, + "loss": 2.0655, + "step": 12932500 + }, + { + "epoch": 37.44, + "learning_rate": 3.128888303538348e-05, + "loss": 2.0733, + "step": 12933000 + }, + { + "epoch": 37.44, + "learning_rate": 3.12881593877362e-05, + "loss": 2.0578, + "step": 12933500 + }, + { + "epoch": 37.44, + "learning_rate": 3.128743574008892e-05, + "loss": 2.0701, + "step": 12934000 + }, + { + "epoch": 37.44, + "learning_rate": 3.1286712092441645e-05, + "loss": 2.0704, + "step": 12934500 + }, + { + "epoch": 37.44, + "learning_rate": 3.128598844479437e-05, + "loss": 2.0413, + "step": 12935000 + }, + { + "epoch": 37.44, + "learning_rate": 3.128526479714709e-05, + "loss": 2.0599, + "step": 12935500 + }, + { + "epoch": 37.44, + "learning_rate": 3.128454114949982e-05, + "loss": 2.0868, + "step": 12936000 + }, + { + "epoch": 37.45, + "learning_rate": 3.1283818949147834e-05, + "loss": 2.0592, + "step": 12936500 + }, + { + "epoch": 37.45, + "learning_rate": 3.1283095301500556e-05, + "loss": 2.0408, + "step": 12937000 + }, + { + "epoch": 37.45, + "learning_rate": 3.1282371653853285e-05, + "loss": 2.0478, + "step": 12937500 + }, + { + "epoch": 37.45, + "learning_rate": 3.128164800620601e-05, + "loss": 2.0526, + "step": 12938000 + }, + { + "epoch": 37.45, + "learning_rate": 3.128092435855873e-05, + "loss": 2.0834, + "step": 12938500 + }, + { + "epoch": 37.45, + "learning_rate": 3.1280202158206745e-05, + "loss": 2.0592, + "step": 12939000 + }, + { + "epoch": 37.45, + "learning_rate": 3.127947851055947e-05, + "loss": 2.0545, + "step": 12939500 + }, + { + "epoch": 37.46, + "learning_rate": 3.127875486291219e-05, + "loss": 2.0677, + "step": 12940000 + }, + { + "epoch": 37.46, + "learning_rate": 3.127803121526491e-05, + "loss": 2.0807, + "step": 12940500 + }, + { + "epoch": 37.46, + "learning_rate": 3.1277309014912934e-05, + "loss": 2.0602, + "step": 12941000 + }, + { + "epoch": 37.46, + "learning_rate": 3.1276585367265656e-05, + "loss": 2.0448, + "step": 12941500 + }, + { + "epoch": 37.46, + "learning_rate": 3.127586171961838e-05, + "loss": 2.0497, + "step": 12942000 + }, + { + "epoch": 37.46, + "learning_rate": 3.1275139519266394e-05, + "loss": 2.0583, + "step": 12942500 + }, + { + "epoch": 37.46, + "learning_rate": 3.1274415871619116e-05, + "loss": 2.0252, + "step": 12943000 + }, + { + "epoch": 37.47, + "learning_rate": 3.1273692223971845e-05, + "loss": 2.0688, + "step": 12943500 + }, + { + "epoch": 37.47, + "learning_rate": 3.127296857632457e-05, + "loss": 2.0565, + "step": 12944000 + }, + { + "epoch": 37.47, + "learning_rate": 3.127224492867729e-05, + "loss": 2.0713, + "step": 12944500 + }, + { + "epoch": 37.47, + "learning_rate": 3.127152128103001e-05, + "loss": 2.066, + "step": 12945000 + }, + { + "epoch": 37.47, + "learning_rate": 3.1270797633382734e-05, + "loss": 2.0758, + "step": 12945500 + }, + { + "epoch": 37.47, + "learning_rate": 3.1270073985735456e-05, + "loss": 2.0659, + "step": 12946000 + }, + { + "epoch": 37.47, + "learning_rate": 3.1269350338088185e-05, + "loss": 2.0594, + "step": 12946500 + }, + { + "epoch": 37.48, + "learning_rate": 3.12686281377362e-05, + "loss": 2.0371, + "step": 12947000 + }, + { + "epoch": 37.48, + "learning_rate": 3.126790449008892e-05, + "loss": 2.0485, + "step": 12947500 + }, + { + "epoch": 37.48, + "learning_rate": 3.1267180842441646e-05, + "loss": 2.0671, + "step": 12948000 + }, + { + "epoch": 37.48, + "learning_rate": 3.126645864208966e-05, + "loss": 2.0744, + "step": 12948500 + }, + { + "epoch": 37.48, + "learning_rate": 3.126573499444238e-05, + "loss": 2.0475, + "step": 12949000 + }, + { + "epoch": 37.48, + "learning_rate": 3.1265011346795106e-05, + "loss": 2.0244, + "step": 12949500 + }, + { + "epoch": 37.48, + "learning_rate": 3.126428914644313e-05, + "loss": 2.0682, + "step": 12950000 + }, + { + "epoch": 37.49, + "learning_rate": 3.126356549879585e-05, + "loss": 2.0616, + "step": 12950500 + }, + { + "epoch": 37.49, + "learning_rate": 3.126284185114858e-05, + "loss": 2.0552, + "step": 12951000 + }, + { + "epoch": 37.49, + "learning_rate": 3.1262119650796595e-05, + "loss": 2.0635, + "step": 12951500 + }, + { + "epoch": 37.49, + "learning_rate": 3.126139600314932e-05, + "loss": 2.0865, + "step": 12952000 + }, + { + "epoch": 37.49, + "learning_rate": 3.126067235550204e-05, + "loss": 2.0461, + "step": 12952500 + }, + { + "epoch": 37.49, + "learning_rate": 3.125995015515006e-05, + "loss": 2.0444, + "step": 12953000 + }, + { + "epoch": 37.5, + "learning_rate": 3.1259226507502784e-05, + "loss": 2.0739, + "step": 12953500 + }, + { + "epoch": 37.5, + "learning_rate": 3.1258502859855506e-05, + "loss": 2.025, + "step": 12954000 + }, + { + "epoch": 37.5, + "learning_rate": 3.125777921220823e-05, + "loss": 2.0528, + "step": 12954500 + }, + { + "epoch": 37.5, + "learning_rate": 3.1257057011856244e-05, + "loss": 2.0711, + "step": 12955000 + }, + { + "epoch": 37.5, + "learning_rate": 3.1256333364208966e-05, + "loss": 2.0575, + "step": 12955500 + }, + { + "epoch": 37.5, + "learning_rate": 3.125560971656169e-05, + "loss": 2.053, + "step": 12956000 + }, + { + "epoch": 37.5, + "learning_rate": 3.125488606891441e-05, + "loss": 2.0497, + "step": 12956500 + }, + { + "epoch": 37.51, + "learning_rate": 3.125416242126713e-05, + "loss": 2.0575, + "step": 12957000 + }, + { + "epoch": 37.51, + "learning_rate": 3.125343877361986e-05, + "loss": 2.0728, + "step": 12957500 + }, + { + "epoch": 37.51, + "learning_rate": 3.1252715125972584e-05, + "loss": 2.0736, + "step": 12958000 + }, + { + "epoch": 37.51, + "learning_rate": 3.125199147832531e-05, + "loss": 2.0505, + "step": 12958500 + }, + { + "epoch": 37.51, + "learning_rate": 3.1251267830678035e-05, + "loss": 2.0499, + "step": 12959000 + }, + { + "epoch": 37.51, + "learning_rate": 3.125054418303076e-05, + "loss": 2.0535, + "step": 12959500 + }, + { + "epoch": 37.51, + "learning_rate": 3.124982053538348e-05, + "loss": 2.0633, + "step": 12960000 + }, + { + "epoch": 37.52, + "learning_rate": 3.12490968877362e-05, + "loss": 2.0805, + "step": 12960500 + }, + { + "epoch": 37.52, + "learning_rate": 3.1248373240088924e-05, + "loss": 2.037, + "step": 12961000 + }, + { + "epoch": 37.52, + "learning_rate": 3.1247649592441646e-05, + "loss": 2.0465, + "step": 12961500 + }, + { + "epoch": 37.52, + "learning_rate": 3.124692594479437e-05, + "loss": 2.039, + "step": 12962000 + }, + { + "epoch": 37.52, + "learning_rate": 3.124620229714709e-05, + "loss": 2.0435, + "step": 12962500 + }, + { + "epoch": 37.52, + "learning_rate": 3.124547864949981e-05, + "loss": 2.0831, + "step": 12963000 + }, + { + "epoch": 37.52, + "learning_rate": 3.1244756449147835e-05, + "loss": 2.096, + "step": 12963500 + }, + { + "epoch": 37.53, + "learning_rate": 3.124403280150056e-05, + "loss": 2.0609, + "step": 12964000 + }, + { + "epoch": 37.53, + "learning_rate": 3.124330915385328e-05, + "loss": 2.057, + "step": 12964500 + }, + { + "epoch": 37.53, + "learning_rate": 3.1242585506206e-05, + "loss": 2.0515, + "step": 12965000 + }, + { + "epoch": 37.53, + "learning_rate": 3.124186185855873e-05, + "loss": 2.0721, + "step": 12965500 + }, + { + "epoch": 37.53, + "learning_rate": 3.124113821091145e-05, + "loss": 2.0519, + "step": 12966000 + }, + { + "epoch": 37.53, + "learning_rate": 3.1240414563264176e-05, + "loss": 2.0615, + "step": 12966500 + }, + { + "epoch": 37.53, + "learning_rate": 3.12396909156169e-05, + "loss": 2.0499, + "step": 12967000 + }, + { + "epoch": 37.54, + "learning_rate": 3.1238970162560213e-05, + "loss": 2.064, + "step": 12967500 + }, + { + "epoch": 37.54, + "learning_rate": 3.1238246514912936e-05, + "loss": 2.0364, + "step": 12968000 + }, + { + "epoch": 37.54, + "learning_rate": 3.123752286726566e-05, + "loss": 2.0707, + "step": 12968500 + }, + { + "epoch": 37.54, + "learning_rate": 3.123679921961838e-05, + "loss": 2.0493, + "step": 12969000 + }, + { + "epoch": 37.54, + "learning_rate": 3.12360755719711e-05, + "loss": 2.0695, + "step": 12969500 + }, + { + "epoch": 37.54, + "learning_rate": 3.1235351924323825e-05, + "loss": 2.0385, + "step": 12970000 + }, + { + "epoch": 37.54, + "learning_rate": 3.123462827667655e-05, + "loss": 2.0549, + "step": 12970500 + }, + { + "epoch": 37.55, + "learning_rate": 3.123390462902927e-05, + "loss": 2.0601, + "step": 12971000 + }, + { + "epoch": 37.55, + "learning_rate": 3.123318098138199e-05, + "loss": 2.0542, + "step": 12971500 + }, + { + "epoch": 37.55, + "learning_rate": 3.1232458781030014e-05, + "loss": 2.0738, + "step": 12972000 + }, + { + "epoch": 37.55, + "learning_rate": 3.1231735133382736e-05, + "loss": 2.0707, + "step": 12972500 + }, + { + "epoch": 37.55, + "learning_rate": 3.123101148573546e-05, + "loss": 2.069, + "step": 12973000 + }, + { + "epoch": 37.55, + "learning_rate": 3.123028783808819e-05, + "loss": 2.0603, + "step": 12973500 + }, + { + "epoch": 37.55, + "learning_rate": 3.12295656377362e-05, + "loss": 2.0437, + "step": 12974000 + }, + { + "epoch": 37.56, + "learning_rate": 3.1228841990088925e-05, + "loss": 2.0795, + "step": 12974500 + }, + { + "epoch": 37.56, + "learning_rate": 3.122811834244165e-05, + "loss": 2.0322, + "step": 12975000 + }, + { + "epoch": 37.56, + "learning_rate": 3.122739469479437e-05, + "loss": 2.0521, + "step": 12975500 + }, + { + "epoch": 37.56, + "learning_rate": 3.122667104714709e-05, + "loss": 2.0425, + "step": 12976000 + }, + { + "epoch": 37.56, + "learning_rate": 3.1225948846795114e-05, + "loss": 2.0516, + "step": 12976500 + }, + { + "epoch": 37.56, + "learning_rate": 3.1225225199147836e-05, + "loss": 2.0721, + "step": 12977000 + }, + { + "epoch": 37.56, + "learning_rate": 3.122450155150056e-05, + "loss": 2.0462, + "step": 12977500 + }, + { + "epoch": 37.57, + "learning_rate": 3.122377790385328e-05, + "loss": 2.0781, + "step": 12978000 + }, + { + "epoch": 37.57, + "learning_rate": 3.1223054256206e-05, + "loss": 2.0781, + "step": 12978500 + }, + { + "epoch": 37.57, + "learning_rate": 3.1222330608558725e-05, + "loss": 2.0595, + "step": 12979000 + }, + { + "epoch": 37.57, + "learning_rate": 3.122160696091145e-05, + "loss": 2.0636, + "step": 12979500 + }, + { + "epoch": 37.57, + "learning_rate": 3.122088331326417e-05, + "loss": 2.0642, + "step": 12980000 + }, + { + "epoch": 37.57, + "learning_rate": 3.12201596656169e-05, + "loss": 2.0513, + "step": 12980500 + }, + { + "epoch": 37.57, + "learning_rate": 3.1219437465264914e-05, + "loss": 2.0417, + "step": 12981000 + }, + { + "epoch": 37.58, + "learning_rate": 3.1218713817617636e-05, + "loss": 2.0468, + "step": 12981500 + }, + { + "epoch": 37.58, + "learning_rate": 3.1217990169970365e-05, + "loss": 2.0397, + "step": 12982000 + }, + { + "epoch": 37.58, + "learning_rate": 3.121726796961838e-05, + "loss": 2.0677, + "step": 12982500 + }, + { + "epoch": 37.58, + "learning_rate": 3.12165443219711e-05, + "loss": 2.088, + "step": 12983000 + }, + { + "epoch": 37.58, + "learning_rate": 3.1215820674323825e-05, + "loss": 2.0451, + "step": 12983500 + }, + { + "epoch": 37.58, + "learning_rate": 3.121509702667655e-05, + "loss": 2.0528, + "step": 12984000 + }, + { + "epoch": 37.58, + "learning_rate": 3.121437337902927e-05, + "loss": 2.0534, + "step": 12984500 + }, + { + "epoch": 37.59, + "learning_rate": 3.121364973138199e-05, + "loss": 2.0491, + "step": 12985000 + }, + { + "epoch": 37.59, + "learning_rate": 3.1212926083734714e-05, + "loss": 2.0669, + "step": 12985500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1212202436087437e-05, + "loss": 2.0712, + "step": 12986000 + }, + { + "epoch": 37.59, + "learning_rate": 3.121148023573546e-05, + "loss": 2.0734, + "step": 12986500 + }, + { + "epoch": 37.59, + "learning_rate": 3.1210758035383474e-05, + "loss": 2.0759, + "step": 12987000 + }, + { + "epoch": 37.59, + "learning_rate": 3.12100343877362e-05, + "loss": 2.0645, + "step": 12987500 + }, + { + "epoch": 37.59, + "learning_rate": 3.120931074008892e-05, + "loss": 2.0643, + "step": 12988000 + }, + { + "epoch": 37.6, + "learning_rate": 3.120858709244165e-05, + "loss": 2.0725, + "step": 12988500 + }, + { + "epoch": 37.6, + "learning_rate": 3.120786344479437e-05, + "loss": 2.0572, + "step": 12989000 + }, + { + "epoch": 37.6, + "learning_rate": 3.120713979714709e-05, + "loss": 2.0993, + "step": 12989500 + }, + { + "epoch": 37.6, + "learning_rate": 3.1206416149499815e-05, + "loss": 2.0685, + "step": 12990000 + }, + { + "epoch": 37.6, + "learning_rate": 3.1205692501852544e-05, + "loss": 2.0974, + "step": 12990500 + }, + { + "epoch": 37.6, + "learning_rate": 3.120497030150056e-05, + "loss": 2.0513, + "step": 12991000 + }, + { + "epoch": 37.61, + "learning_rate": 3.1204248101148575e-05, + "loss": 2.0729, + "step": 12991500 + }, + { + "epoch": 37.61, + "learning_rate": 3.12035244535013e-05, + "loss": 2.0614, + "step": 12992000 + }, + { + "epoch": 37.61, + "learning_rate": 3.120280080585402e-05, + "loss": 2.0464, + "step": 12992500 + }, + { + "epoch": 37.61, + "learning_rate": 3.120207715820674e-05, + "loss": 2.0649, + "step": 12993000 + }, + { + "epoch": 37.61, + "learning_rate": 3.1201353510559464e-05, + "loss": 2.0765, + "step": 12993500 + }, + { + "epoch": 37.61, + "learning_rate": 3.120062986291219e-05, + "loss": 2.0598, + "step": 12994000 + }, + { + "epoch": 37.61, + "learning_rate": 3.1199906215264915e-05, + "loss": 2.0928, + "step": 12994500 + }, + { + "epoch": 37.62, + "learning_rate": 3.119918401491293e-05, + "loss": 2.0184, + "step": 12995000 + }, + { + "epoch": 37.62, + "learning_rate": 3.1198461814560946e-05, + "loss": 2.0607, + "step": 12995500 + }, + { + "epoch": 37.62, + "learning_rate": 3.119773961420897e-05, + "loss": 2.0571, + "step": 12996000 + }, + { + "epoch": 37.62, + "learning_rate": 3.119701596656169e-05, + "loss": 2.0721, + "step": 12996500 + }, + { + "epoch": 37.62, + "learning_rate": 3.119629231891442e-05, + "loss": 2.0784, + "step": 12997000 + }, + { + "epoch": 37.62, + "learning_rate": 3.119556867126714e-05, + "loss": 2.067, + "step": 12997500 + }, + { + "epoch": 37.62, + "learning_rate": 3.119484647091516e-05, + "loss": 2.079, + "step": 12998000 + }, + { + "epoch": 37.63, + "learning_rate": 3.119412282326788e-05, + "loss": 2.0682, + "step": 12998500 + }, + { + "epoch": 37.63, + "learning_rate": 3.11933991756206e-05, + "loss": 2.0483, + "step": 12999000 + }, + { + "epoch": 37.63, + "learning_rate": 3.1192675527973324e-05, + "loss": 2.0239, + "step": 12999500 + }, + { + "epoch": 37.63, + "learning_rate": 3.1191951880326046e-05, + "loss": 2.072, + "step": 13000000 + }, + { + "epoch": 37.63, + "learning_rate": 3.119122823267877e-05, + "loss": 2.0434, + "step": 13000500 + }, + { + "epoch": 37.63, + "learning_rate": 3.119050458503149e-05, + "loss": 2.0682, + "step": 13001000 + }, + { + "epoch": 37.63, + "learning_rate": 3.118978093738422e-05, + "loss": 2.0724, + "step": 13001500 + }, + { + "epoch": 37.64, + "learning_rate": 3.118905728973694e-05, + "loss": 2.0547, + "step": 13002000 + }, + { + "epoch": 37.64, + "learning_rate": 3.1188333642089664e-05, + "loss": 2.0732, + "step": 13002500 + }, + { + "epoch": 37.64, + "learning_rate": 3.1187609994442386e-05, + "loss": 2.0517, + "step": 13003000 + }, + { + "epoch": 37.64, + "learning_rate": 3.1186886346795115e-05, + "loss": 2.0773, + "step": 13003500 + }, + { + "epoch": 37.64, + "learning_rate": 3.118616269914784e-05, + "loss": 2.0505, + "step": 13004000 + }, + { + "epoch": 37.64, + "learning_rate": 3.118543905150056e-05, + "loss": 2.0826, + "step": 13004500 + }, + { + "epoch": 37.64, + "learning_rate": 3.1184716851148576e-05, + "loss": 2.0816, + "step": 13005000 + }, + { + "epoch": 37.65, + "learning_rate": 3.11839932035013e-05, + "loss": 2.0768, + "step": 13005500 + }, + { + "epoch": 37.65, + "learning_rate": 3.118326955585402e-05, + "loss": 2.0453, + "step": 13006000 + }, + { + "epoch": 37.65, + "learning_rate": 3.118254590820674e-05, + "loss": 2.0612, + "step": 13006500 + }, + { + "epoch": 37.65, + "learning_rate": 3.118182226055947e-05, + "loss": 2.0591, + "step": 13007000 + }, + { + "epoch": 37.65, + "learning_rate": 3.1181098612912193e-05, + "loss": 2.0704, + "step": 13007500 + }, + { + "epoch": 37.65, + "learning_rate": 3.1180374965264916e-05, + "loss": 2.0676, + "step": 13008000 + }, + { + "epoch": 37.65, + "learning_rate": 3.117965131761764e-05, + "loss": 2.0484, + "step": 13008500 + }, + { + "epoch": 37.66, + "learning_rate": 3.117892766997036e-05, + "loss": 2.0778, + "step": 13009000 + }, + { + "epoch": 37.66, + "learning_rate": 3.117820402232308e-05, + "loss": 2.0581, + "step": 13009500 + }, + { + "epoch": 37.66, + "learning_rate": 3.11774818219711e-05, + "loss": 2.0618, + "step": 13010000 + }, + { + "epoch": 37.66, + "learning_rate": 3.117675817432382e-05, + "loss": 2.0848, + "step": 13010500 + }, + { + "epoch": 37.66, + "learning_rate": 3.117603452667655e-05, + "loss": 2.084, + "step": 13011000 + }, + { + "epoch": 37.66, + "learning_rate": 3.117531087902927e-05, + "loss": 2.0389, + "step": 13011500 + }, + { + "epoch": 37.66, + "learning_rate": 3.1174587231381994e-05, + "loss": 2.0614, + "step": 13012000 + }, + { + "epoch": 37.67, + "learning_rate": 3.1173863583734716e-05, + "loss": 2.0797, + "step": 13012500 + }, + { + "epoch": 37.67, + "learning_rate": 3.1173139936087445e-05, + "loss": 2.055, + "step": 13013000 + }, + { + "epoch": 37.67, + "learning_rate": 3.117241628844017e-05, + "loss": 2.0407, + "step": 13013500 + }, + { + "epoch": 37.67, + "learning_rate": 3.117169264079289e-05, + "loss": 2.0679, + "step": 13014000 + }, + { + "epoch": 37.67, + "learning_rate": 3.117096899314561e-05, + "loss": 2.0674, + "step": 13014500 + }, + { + "epoch": 37.67, + "learning_rate": 3.1170245345498334e-05, + "loss": 2.0371, + "step": 13015000 + }, + { + "epoch": 37.67, + "learning_rate": 3.1169521697851056e-05, + "loss": 2.0629, + "step": 13015500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116879805020378e-05, + "loss": 2.0723, + "step": 13016000 + }, + { + "epoch": 37.68, + "learning_rate": 3.11680744025565e-05, + "loss": 2.0338, + "step": 13016500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116735075490922e-05, + "loss": 2.0541, + "step": 13017000 + }, + { + "epoch": 37.68, + "learning_rate": 3.116662710726195e-05, + "loss": 2.0581, + "step": 13017500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116590635420527e-05, + "loss": 2.0729, + "step": 13018000 + }, + { + "epoch": 37.68, + "learning_rate": 3.116518270655799e-05, + "loss": 2.0544, + "step": 13018500 + }, + { + "epoch": 37.68, + "learning_rate": 3.116445905891071e-05, + "loss": 2.0492, + "step": 13019000 + }, + { + "epoch": 37.69, + "learning_rate": 3.1163735411263434e-05, + "loss": 2.0652, + "step": 13019500 + }, + { + "epoch": 37.69, + "learning_rate": 3.1163011763616156e-05, + "loss": 2.0614, + "step": 13020000 + }, + { + "epoch": 37.69, + "learning_rate": 3.116228956326417e-05, + "loss": 2.0669, + "step": 13020500 + }, + { + "epoch": 37.69, + "learning_rate": 3.1161565915616894e-05, + "loss": 2.0642, + "step": 13021000 + }, + { + "epoch": 37.69, + "learning_rate": 3.116084226796962e-05, + "loss": 2.0572, + "step": 13021500 + }, + { + "epoch": 37.69, + "learning_rate": 3.1160118620322345e-05, + "loss": 2.046, + "step": 13022000 + }, + { + "epoch": 37.69, + "learning_rate": 3.115939497267507e-05, + "loss": 2.0786, + "step": 13022500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115867132502779e-05, + "loss": 2.0744, + "step": 13023000 + }, + { + "epoch": 37.7, + "learning_rate": 3.1157949124675805e-05, + "loss": 2.0651, + "step": 13023500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115722547702853e-05, + "loss": 2.0604, + "step": 13024000 + }, + { + "epoch": 37.7, + "learning_rate": 3.115650182938125e-05, + "loss": 2.068, + "step": 13024500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115577818173397e-05, + "loss": 2.0445, + "step": 13025000 + }, + { + "epoch": 37.7, + "learning_rate": 3.11550545340867e-05, + "loss": 2.0803, + "step": 13025500 + }, + { + "epoch": 37.7, + "learning_rate": 3.115433088643942e-05, + "loss": 2.0443, + "step": 13026000 + }, + { + "epoch": 37.71, + "learning_rate": 3.1153607238792146e-05, + "loss": 2.0782, + "step": 13026500 + }, + { + "epoch": 37.71, + "learning_rate": 3.1152883591144875e-05, + "loss": 2.0559, + "step": 13027000 + }, + { + "epoch": 37.71, + "learning_rate": 3.11521599434976e-05, + "loss": 2.068, + "step": 13027500 + }, + { + "epoch": 37.71, + "learning_rate": 3.115143774314561e-05, + "loss": 2.0674, + "step": 13028000 + }, + { + "epoch": 37.71, + "learning_rate": 3.1150714095498335e-05, + "loss": 2.0528, + "step": 13028500 + }, + { + "epoch": 37.71, + "learning_rate": 3.114999044785106e-05, + "loss": 2.0493, + "step": 13029000 + }, + { + "epoch": 37.72, + "learning_rate": 3.114926680020378e-05, + "loss": 2.0774, + "step": 13029500 + }, + { + "epoch": 37.72, + "learning_rate": 3.11485431525565e-05, + "loss": 2.0528, + "step": 13030000 + }, + { + "epoch": 37.72, + "learning_rate": 3.1147820952204524e-05, + "loss": 2.0843, + "step": 13030500 + }, + { + "epoch": 37.72, + "learning_rate": 3.114710019914783e-05, + "loss": 2.074, + "step": 13031000 + }, + { + "epoch": 37.72, + "learning_rate": 3.1146376551500555e-05, + "loss": 2.0806, + "step": 13031500 + }, + { + "epoch": 37.72, + "learning_rate": 3.114565290385328e-05, + "loss": 2.0679, + "step": 13032000 + }, + { + "epoch": 37.72, + "learning_rate": 3.1144929256206e-05, + "loss": 2.0684, + "step": 13032500 + }, + { + "epoch": 37.73, + "learning_rate": 3.114420705585402e-05, + "loss": 2.0601, + "step": 13033000 + }, + { + "epoch": 37.73, + "learning_rate": 3.114348340820675e-05, + "loss": 2.0453, + "step": 13033500 + }, + { + "epoch": 37.73, + "learning_rate": 3.1142761207854766e-05, + "loss": 2.0596, + "step": 13034000 + }, + { + "epoch": 37.73, + "learning_rate": 3.114203756020749e-05, + "loss": 2.0577, + "step": 13034500 + }, + { + "epoch": 37.73, + "learning_rate": 3.1141315359855504e-05, + "loss": 2.0553, + "step": 13035000 + }, + { + "epoch": 37.73, + "learning_rate": 3.1140591712208226e-05, + "loss": 2.0602, + "step": 13035500 + }, + { + "epoch": 37.73, + "learning_rate": 3.113986806456095e-05, + "loss": 2.0892, + "step": 13036000 + }, + { + "epoch": 37.74, + "learning_rate": 3.113914441691367e-05, + "loss": 2.0753, + "step": 13036500 + }, + { + "epoch": 37.74, + "learning_rate": 3.11384207692664e-05, + "loss": 2.0805, + "step": 13037000 + }, + { + "epoch": 37.74, + "learning_rate": 3.113769712161912e-05, + "loss": 2.0836, + "step": 13037500 + }, + { + "epoch": 37.74, + "learning_rate": 3.1136973473971844e-05, + "loss": 2.0609, + "step": 13038000 + }, + { + "epoch": 37.74, + "learning_rate": 3.1136249826324566e-05, + "loss": 2.0647, + "step": 13038500 + }, + { + "epoch": 37.74, + "learning_rate": 3.113552617867729e-05, + "loss": 2.0672, + "step": 13039000 + }, + { + "epoch": 37.74, + "learning_rate": 3.113480253103001e-05, + "loss": 2.0502, + "step": 13039500 + }, + { + "epoch": 37.75, + "learning_rate": 3.113407888338273e-05, + "loss": 2.0691, + "step": 13040000 + }, + { + "epoch": 37.75, + "learning_rate": 3.1133355235735455e-05, + "loss": 2.0763, + "step": 13040500 + }, + { + "epoch": 37.75, + "learning_rate": 3.113263303538348e-05, + "loss": 2.0577, + "step": 13041000 + }, + { + "epoch": 37.75, + "learning_rate": 3.11319093877362e-05, + "loss": 2.0579, + "step": 13041500 + }, + { + "epoch": 37.75, + "learning_rate": 3.113118574008892e-05, + "loss": 2.064, + "step": 13042000 + }, + { + "epoch": 37.75, + "learning_rate": 3.113046209244165e-05, + "loss": 2.0682, + "step": 13042500 + }, + { + "epoch": 37.75, + "learning_rate": 3.112973844479437e-05, + "loss": 2.0798, + "step": 13043000 + }, + { + "epoch": 37.76, + "learning_rate": 3.1129014797147096e-05, + "loss": 2.0705, + "step": 13043500 + }, + { + "epoch": 37.76, + "learning_rate": 3.112829114949982e-05, + "loss": 2.0457, + "step": 13044000 + }, + { + "epoch": 37.76, + "learning_rate": 3.112756750185254e-05, + "loss": 2.05, + "step": 13044500 + }, + { + "epoch": 37.76, + "learning_rate": 3.112684385420526e-05, + "loss": 2.0612, + "step": 13045000 + }, + { + "epoch": 37.76, + "learning_rate": 3.112612165385328e-05, + "loss": 2.0359, + "step": 13045500 + }, + { + "epoch": 37.76, + "learning_rate": 3.1125398006206e-05, + "loss": 2.0482, + "step": 13046000 + }, + { + "epoch": 37.76, + "learning_rate": 3.112467435855872e-05, + "loss": 2.087, + "step": 13046500 + }, + { + "epoch": 37.77, + "learning_rate": 3.112395071091145e-05, + "loss": 2.0763, + "step": 13047000 + }, + { + "epoch": 37.77, + "learning_rate": 3.1123227063264174e-05, + "loss": 2.0689, + "step": 13047500 + }, + { + "epoch": 37.77, + "learning_rate": 3.11225034156169e-05, + "loss": 2.0445, + "step": 13048000 + }, + { + "epoch": 37.77, + "learning_rate": 3.1121779767969625e-05, + "loss": 2.0461, + "step": 13048500 + }, + { + "epoch": 37.77, + "learning_rate": 3.112105612032235e-05, + "loss": 2.0613, + "step": 13049000 + }, + { + "epoch": 37.77, + "learning_rate": 3.112033391997036e-05, + "loss": 2.0532, + "step": 13049500 + }, + { + "epoch": 37.77, + "learning_rate": 3.1119610272323085e-05, + "loss": 2.0528, + "step": 13050000 + }, + { + "epoch": 37.78, + "learning_rate": 3.111888662467581e-05, + "loss": 2.0489, + "step": 13050500 + }, + { + "epoch": 37.78, + "learning_rate": 3.111816442432382e-05, + "loss": 2.0627, + "step": 13051000 + }, + { + "epoch": 37.78, + "learning_rate": 3.111744077667655e-05, + "loss": 2.0741, + "step": 13051500 + }, + { + "epoch": 37.78, + "learning_rate": 3.1116717129029274e-05, + "loss": 2.0807, + "step": 13052000 + }, + { + "epoch": 37.78, + "learning_rate": 3.1115993481381996e-05, + "loss": 2.0447, + "step": 13052500 + }, + { + "epoch": 37.78, + "learning_rate": 3.111526983373472e-05, + "loss": 2.0637, + "step": 13053000 + }, + { + "epoch": 37.78, + "learning_rate": 3.111454618608744e-05, + "loss": 2.0359, + "step": 13053500 + }, + { + "epoch": 37.79, + "learning_rate": 3.111382253844016e-05, + "loss": 2.0793, + "step": 13054000 + }, + { + "epoch": 37.79, + "learning_rate": 3.1113098890792885e-05, + "loss": 2.0547, + "step": 13054500 + }, + { + "epoch": 37.79, + "learning_rate": 3.111237524314561e-05, + "loss": 2.0743, + "step": 13055000 + }, + { + "epoch": 37.79, + "learning_rate": 3.1111651595498336e-05, + "loss": 2.0598, + "step": 13055500 + }, + { + "epoch": 37.79, + "learning_rate": 3.111092794785106e-05, + "loss": 2.0667, + "step": 13056000 + }, + { + "epoch": 37.79, + "learning_rate": 3.111020430020378e-05, + "loss": 2.0702, + "step": 13056500 + }, + { + "epoch": 37.79, + "learning_rate": 3.11094820998518e-05, + "loss": 2.0662, + "step": 13057000 + }, + { + "epoch": 37.8, + "learning_rate": 3.1108758452204525e-05, + "loss": 2.0537, + "step": 13057500 + }, + { + "epoch": 37.8, + "learning_rate": 3.110803480455725e-05, + "loss": 2.0857, + "step": 13058000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110731260420526e-05, + "loss": 2.0733, + "step": 13058500 + }, + { + "epoch": 37.8, + "learning_rate": 3.1106588956557985e-05, + "loss": 2.0607, + "step": 13059000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110586530891071e-05, + "loss": 2.0756, + "step": 13059500 + }, + { + "epoch": 37.8, + "learning_rate": 3.110514166126343e-05, + "loss": 2.039, + "step": 13060000 + }, + { + "epoch": 37.8, + "learning_rate": 3.110441801361615e-05, + "loss": 2.0757, + "step": 13060500 + }, + { + "epoch": 37.81, + "learning_rate": 3.1103694365968874e-05, + "loss": 2.0881, + "step": 13061000 + }, + { + "epoch": 37.81, + "learning_rate": 3.11029707183216e-05, + "loss": 2.0675, + "step": 13061500 + }, + { + "epoch": 37.81, + "learning_rate": 3.1102247070674325e-05, + "loss": 2.0592, + "step": 13062000 + }, + { + "epoch": 37.81, + "learning_rate": 3.110152342302705e-05, + "loss": 2.0589, + "step": 13062500 + }, + { + "epoch": 37.81, + "learning_rate": 3.110080122267507e-05, + "loss": 2.0908, + "step": 13063000 + }, + { + "epoch": 37.81, + "learning_rate": 3.110007757502779e-05, + "loss": 2.0524, + "step": 13063500 + }, + { + "epoch": 37.81, + "learning_rate": 3.1099353927380514e-05, + "loss": 2.0789, + "step": 13064000 + }, + { + "epoch": 37.82, + "learning_rate": 3.109863027973324e-05, + "loss": 2.089, + "step": 13064500 + }, + { + "epoch": 37.82, + "learning_rate": 3.109790807938125e-05, + "loss": 2.0471, + "step": 13065000 + }, + { + "epoch": 37.82, + "learning_rate": 3.1097184431733975e-05, + "loss": 2.0857, + "step": 13065500 + }, + { + "epoch": 37.82, + "learning_rate": 3.1096460784086704e-05, + "loss": 2.0523, + "step": 13066000 + }, + { + "epoch": 37.82, + "learning_rate": 3.1095737136439426e-05, + "loss": 2.0631, + "step": 13066500 + }, + { + "epoch": 37.82, + "learning_rate": 3.109501348879215e-05, + "loss": 2.0495, + "step": 13067000 + }, + { + "epoch": 37.83, + "learning_rate": 3.109428984114487e-05, + "loss": 2.0886, + "step": 13067500 + }, + { + "epoch": 37.83, + "learning_rate": 3.109356619349759e-05, + "loss": 2.0564, + "step": 13068000 + }, + { + "epoch": 37.83, + "learning_rate": 3.1092842545850315e-05, + "loss": 2.0927, + "step": 13068500 + }, + { + "epoch": 37.83, + "learning_rate": 3.109211889820304e-05, + "loss": 2.0418, + "step": 13069000 + }, + { + "epoch": 37.83, + "learning_rate": 3.109139525055576e-05, + "loss": 2.0646, + "step": 13069500 + }, + { + "epoch": 37.83, + "learning_rate": 3.109067160290849e-05, + "loss": 2.0707, + "step": 13070000 + }, + { + "epoch": 37.83, + "learning_rate": 3.1089949402556504e-05, + "loss": 2.0763, + "step": 13070500 + }, + { + "epoch": 37.84, + "learning_rate": 3.1089227202204526e-05, + "loss": 2.0855, + "step": 13071000 + }, + { + "epoch": 37.84, + "learning_rate": 3.108850355455725e-05, + "loss": 2.0744, + "step": 13071500 + }, + { + "epoch": 37.84, + "learning_rate": 3.108777990690997e-05, + "loss": 2.0608, + "step": 13072000 + }, + { + "epoch": 37.84, + "learning_rate": 3.108705625926269e-05, + "loss": 2.0469, + "step": 13072500 + }, + { + "epoch": 37.84, + "learning_rate": 3.108633405891071e-05, + "loss": 2.0781, + "step": 13073000 + }, + { + "epoch": 37.84, + "learning_rate": 3.108561041126343e-05, + "loss": 2.0749, + "step": 13073500 + }, + { + "epoch": 37.84, + "learning_rate": 3.108488676361615e-05, + "loss": 2.0825, + "step": 13074000 + }, + { + "epoch": 37.85, + "learning_rate": 3.108416311596888e-05, + "loss": 2.0708, + "step": 13074500 + }, + { + "epoch": 37.85, + "learning_rate": 3.1083439468321604e-05, + "loss": 2.0712, + "step": 13075000 + }, + { + "epoch": 37.85, + "learning_rate": 3.108271726796962e-05, + "loss": 2.0474, + "step": 13075500 + }, + { + "epoch": 37.85, + "learning_rate": 3.108199362032234e-05, + "loss": 2.0654, + "step": 13076000 + }, + { + "epoch": 37.85, + "learning_rate": 3.1081269972675064e-05, + "loss": 2.0559, + "step": 13076500 + }, + { + "epoch": 37.85, + "learning_rate": 3.1080546325027786e-05, + "loss": 2.0528, + "step": 13077000 + }, + { + "epoch": 37.85, + "learning_rate": 3.107982267738051e-05, + "loss": 2.0515, + "step": 13077500 + }, + { + "epoch": 37.86, + "learning_rate": 3.107909902973324e-05, + "loss": 2.0816, + "step": 13078000 + }, + { + "epoch": 37.86, + "learning_rate": 3.107837538208596e-05, + "loss": 2.0847, + "step": 13078500 + }, + { + "epoch": 37.86, + "learning_rate": 3.107765173443868e-05, + "loss": 2.072, + "step": 13079000 + }, + { + "epoch": 37.86, + "learning_rate": 3.1076928086791404e-05, + "loss": 2.0768, + "step": 13079500 + }, + { + "epoch": 37.86, + "learning_rate": 3.107620733373472e-05, + "loss": 2.0708, + "step": 13080000 + }, + { + "epoch": 37.86, + "learning_rate": 3.107548368608744e-05, + "loss": 2.0646, + "step": 13080500 + }, + { + "epoch": 37.86, + "learning_rate": 3.1074760038440164e-05, + "loss": 2.0765, + "step": 13081000 + }, + { + "epoch": 37.87, + "learning_rate": 3.1074036390792887e-05, + "loss": 2.0481, + "step": 13081500 + }, + { + "epoch": 37.87, + "learning_rate": 3.107331274314561e-05, + "loss": 2.0584, + "step": 13082000 + }, + { + "epoch": 37.87, + "learning_rate": 3.107258909549833e-05, + "loss": 2.0704, + "step": 13082500 + }, + { + "epoch": 37.87, + "learning_rate": 3.107186544785105e-05, + "loss": 2.04, + "step": 13083000 + }, + { + "epoch": 37.87, + "learning_rate": 3.107114180020378e-05, + "loss": 2.0568, + "step": 13083500 + }, + { + "epoch": 37.87, + "learning_rate": 3.1070418152556505e-05, + "loss": 2.0814, + "step": 13084000 + }, + { + "epoch": 37.87, + "learning_rate": 3.106969450490923e-05, + "loss": 2.0688, + "step": 13084500 + }, + { + "epoch": 37.88, + "learning_rate": 3.1068970857261956e-05, + "loss": 2.0679, + "step": 13085000 + }, + { + "epoch": 37.88, + "learning_rate": 3.106824720961468e-05, + "loss": 2.0804, + "step": 13085500 + }, + { + "epoch": 37.88, + "learning_rate": 3.10675235619674e-05, + "loss": 2.074, + "step": 13086000 + }, + { + "epoch": 37.88, + "learning_rate": 3.106679991432012e-05, + "loss": 2.073, + "step": 13086500 + }, + { + "epoch": 37.88, + "learning_rate": 3.1066076266672845e-05, + "loss": 2.062, + "step": 13087000 + }, + { + "epoch": 37.88, + "learning_rate": 3.106535261902557e-05, + "loss": 2.0791, + "step": 13087500 + }, + { + "epoch": 37.88, + "learning_rate": 3.106462897137829e-05, + "loss": 2.0748, + "step": 13088000 + }, + { + "epoch": 37.89, + "learning_rate": 3.106390532373101e-05, + "loss": 2.068, + "step": 13088500 + }, + { + "epoch": 37.89, + "learning_rate": 3.1063183123379034e-05, + "loss": 2.0415, + "step": 13089000 + }, + { + "epoch": 37.89, + "learning_rate": 3.1062459475731756e-05, + "loss": 2.073, + "step": 13089500 + }, + { + "epoch": 37.89, + "learning_rate": 3.106173582808448e-05, + "loss": 2.0688, + "step": 13090000 + }, + { + "epoch": 37.89, + "learning_rate": 3.1061013627732494e-05, + "loss": 2.0263, + "step": 13090500 + }, + { + "epoch": 37.89, + "learning_rate": 3.1060289980085216e-05, + "loss": 2.0741, + "step": 13091000 + }, + { + "epoch": 37.89, + "learning_rate": 3.105956633243794e-05, + "loss": 2.0441, + "step": 13091500 + }, + { + "epoch": 37.9, + "learning_rate": 3.105884268479066e-05, + "loss": 2.0585, + "step": 13092000 + }, + { + "epoch": 37.9, + "learning_rate": 3.105811903714339e-05, + "loss": 2.0773, + "step": 13092500 + }, + { + "epoch": 37.9, + "learning_rate": 3.105739538949611e-05, + "loss": 2.1005, + "step": 13093000 + }, + { + "epoch": 37.9, + "learning_rate": 3.1056671741848834e-05, + "loss": 2.0548, + "step": 13093500 + }, + { + "epoch": 37.9, + "learning_rate": 3.1055948094201556e-05, + "loss": 2.0559, + "step": 13094000 + }, + { + "epoch": 37.9, + "learning_rate": 3.1055224446554285e-05, + "loss": 2.0522, + "step": 13094500 + }, + { + "epoch": 37.9, + "learning_rate": 3.10545022462023e-05, + "loss": 2.0576, + "step": 13095000 + }, + { + "epoch": 37.91, + "learning_rate": 3.105377859855502e-05, + "loss": 2.0774, + "step": 13095500 + }, + { + "epoch": 37.91, + "learning_rate": 3.105305639820304e-05, + "loss": 2.0454, + "step": 13096000 + }, + { + "epoch": 37.91, + "learning_rate": 3.105233275055576e-05, + "loss": 2.0919, + "step": 13096500 + }, + { + "epoch": 37.91, + "learning_rate": 3.105160910290848e-05, + "loss": 2.0861, + "step": 13097000 + }, + { + "epoch": 37.91, + "learning_rate": 3.1050885455261205e-05, + "loss": 2.0446, + "step": 13097500 + }, + { + "epoch": 37.91, + "learning_rate": 3.1050161807613934e-05, + "loss": 2.0686, + "step": 13098000 + }, + { + "epoch": 37.91, + "learning_rate": 3.104943960726195e-05, + "loss": 2.0686, + "step": 13098500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104871595961467e-05, + "loss": 2.0757, + "step": 13099000 + }, + { + "epoch": 37.92, + "learning_rate": 3.104799375926269e-05, + "loss": 2.063, + "step": 13099500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104727011161541e-05, + "loss": 2.0573, + "step": 13100000 + }, + { + "epoch": 37.92, + "learning_rate": 3.104654646396814e-05, + "loss": 2.0646, + "step": 13100500 + }, + { + "epoch": 37.92, + "learning_rate": 3.104582281632086e-05, + "loss": 2.0649, + "step": 13101000 + }, + { + "epoch": 37.92, + "learning_rate": 3.104509916867358e-05, + "loss": 2.0574, + "step": 13101500 + }, + { + "epoch": 37.92, + "learning_rate": 3.1044375521026306e-05, + "loss": 2.0667, + "step": 13102000 + }, + { + "epoch": 37.93, + "learning_rate": 3.104365332067433e-05, + "loss": 2.031, + "step": 13102500 + }, + { + "epoch": 37.93, + "learning_rate": 3.104292967302705e-05, + "loss": 2.0743, + "step": 13103000 + }, + { + "epoch": 37.93, + "learning_rate": 3.104220602537977e-05, + "loss": 2.0723, + "step": 13103500 + }, + { + "epoch": 37.93, + "learning_rate": 3.1041482377732495e-05, + "loss": 2.0787, + "step": 13104000 + }, + { + "epoch": 37.93, + "learning_rate": 3.104075873008522e-05, + "loss": 2.0749, + "step": 13104500 + }, + { + "epoch": 37.93, + "learning_rate": 3.104003508243794e-05, + "loss": 2.0696, + "step": 13105000 + }, + { + "epoch": 37.94, + "learning_rate": 3.103931143479066e-05, + "loss": 2.0562, + "step": 13105500 + }, + { + "epoch": 37.94, + "learning_rate": 3.1038587787143383e-05, + "loss": 2.0592, + "step": 13106000 + }, + { + "epoch": 37.94, + "learning_rate": 3.1037865586791406e-05, + "loss": 2.0658, + "step": 13106500 + }, + { + "epoch": 37.94, + "learning_rate": 3.103714193914413e-05, + "loss": 2.0603, + "step": 13107000 + }, + { + "epoch": 37.94, + "learning_rate": 3.103641829149686e-05, + "loss": 2.0739, + "step": 13107500 + }, + { + "epoch": 37.94, + "learning_rate": 3.103569464384958e-05, + "loss": 2.0598, + "step": 13108000 + }, + { + "epoch": 37.94, + "learning_rate": 3.10349709962023e-05, + "loss": 2.0548, + "step": 13108500 + }, + { + "epoch": 37.95, + "learning_rate": 3.1034247348555024e-05, + "loss": 2.0726, + "step": 13109000 + }, + { + "epoch": 37.95, + "learning_rate": 3.103352514820304e-05, + "loss": 2.052, + "step": 13109500 + }, + { + "epoch": 37.95, + "learning_rate": 3.103280150055576e-05, + "loss": 2.0629, + "step": 13110000 + }, + { + "epoch": 37.95, + "learning_rate": 3.1032077852908484e-05, + "loss": 2.0712, + "step": 13110500 + }, + { + "epoch": 37.95, + "learning_rate": 3.1031355652556506e-05, + "loss": 2.0754, + "step": 13111000 + }, + { + "epoch": 37.95, + "learning_rate": 3.103063200490923e-05, + "loss": 2.0396, + "step": 13111500 + }, + { + "epoch": 37.95, + "learning_rate": 3.1029909804557244e-05, + "loss": 2.0422, + "step": 13112000 + }, + { + "epoch": 37.96, + "learning_rate": 3.1029186156909966e-05, + "loss": 2.0361, + "step": 13112500 + }, + { + "epoch": 37.96, + "learning_rate": 3.102846250926269e-05, + "loss": 2.0856, + "step": 13113000 + }, + { + "epoch": 37.96, + "learning_rate": 3.102773886161541e-05, + "loss": 2.0545, + "step": 13113500 + }, + { + "epoch": 37.96, + "learning_rate": 3.102701521396813e-05, + "loss": 2.0788, + "step": 13114000 + }, + { + "epoch": 37.96, + "learning_rate": 3.102629156632086e-05, + "loss": 2.0677, + "step": 13114500 + }, + { + "epoch": 37.96, + "learning_rate": 3.102556936596888e-05, + "loss": 2.0607, + "step": 13115000 + }, + { + "epoch": 37.96, + "learning_rate": 3.1024845718321606e-05, + "loss": 2.0728, + "step": 13115500 + }, + { + "epoch": 37.97, + "learning_rate": 3.102412207067433e-05, + "loss": 2.0563, + "step": 13116000 + }, + { + "epoch": 37.97, + "learning_rate": 3.102339842302705e-05, + "loss": 2.0363, + "step": 13116500 + }, + { + "epoch": 37.97, + "learning_rate": 3.1022676222675066e-05, + "loss": 2.0633, + "step": 13117000 + }, + { + "epoch": 37.97, + "learning_rate": 3.102195257502779e-05, + "loss": 2.0755, + "step": 13117500 + }, + { + "epoch": 37.97, + "learning_rate": 3.102122892738051e-05, + "loss": 2.0742, + "step": 13118000 + }, + { + "epoch": 37.97, + "learning_rate": 3.102050527973323e-05, + "loss": 2.0685, + "step": 13118500 + }, + { + "epoch": 37.97, + "learning_rate": 3.101978163208596e-05, + "loss": 2.0674, + "step": 13119000 + }, + { + "epoch": 37.98, + "learning_rate": 3.1019057984438684e-05, + "loss": 2.0524, + "step": 13119500 + }, + { + "epoch": 37.98, + "learning_rate": 3.10183357840867e-05, + "loss": 2.0875, + "step": 13120000 + }, + { + "epoch": 37.98, + "learning_rate": 3.101761213643942e-05, + "loss": 2.0385, + "step": 13120500 + }, + { + "epoch": 37.98, + "learning_rate": 3.1016888488792144e-05, + "loss": 2.0654, + "step": 13121000 + }, + { + "epoch": 37.98, + "learning_rate": 3.101616484114487e-05, + "loss": 2.0788, + "step": 13121500 + }, + { + "epoch": 37.98, + "learning_rate": 3.101544119349759e-05, + "loss": 2.063, + "step": 13122000 + }, + { + "epoch": 37.98, + "learning_rate": 3.101471754585032e-05, + "loss": 2.0616, + "step": 13122500 + }, + { + "epoch": 37.99, + "learning_rate": 3.101399389820304e-05, + "loss": 2.0683, + "step": 13123000 + }, + { + "epoch": 37.99, + "learning_rate": 3.101327025055576e-05, + "loss": 2.056, + "step": 13123500 + }, + { + "epoch": 37.99, + "learning_rate": 3.1012546602908485e-05, + "loss": 2.0578, + "step": 13124000 + }, + { + "epoch": 37.99, + "learning_rate": 3.101182440255651e-05, + "loss": 2.0557, + "step": 13124500 + }, + { + "epoch": 37.99, + "learning_rate": 3.101110075490923e-05, + "loss": 2.0658, + "step": 13125000 + }, + { + "epoch": 37.99, + "learning_rate": 3.101037710726195e-05, + "loss": 2.0562, + "step": 13125500 + }, + { + "epoch": 37.99, + "learning_rate": 3.1009653459614674e-05, + "loss": 2.0643, + "step": 13126000 + }, + { + "epoch": 38.0, + "learning_rate": 3.1008929811967396e-05, + "loss": 2.0631, + "step": 13126500 + }, + { + "epoch": 38.0, + "learning_rate": 3.100820761161541e-05, + "loss": 2.0552, + "step": 13127000 + }, + { + "epoch": 38.0, + "learning_rate": 3.1007483963968134e-05, + "loss": 2.068, + "step": 13127500 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.6700079034069664, + "eval_accuracy_mlm": 0.6351199675846458, + "eval_accuracy_nsp": 0.857055450908141, + "eval_loss": 2.176142454147339, + "eval_runtime": 331.6481, + "eval_samples_per_second": 1315.81, + "eval_steps_per_second": 54.826, + "step": 13127936 + }, + { + "epoch": 38.0, + "learning_rate": 3.100676031632086e-05, + "loss": 2.0675, + "step": 13128000 + }, + { + "epoch": 38.0, + "learning_rate": 3.1006036668673585e-05, + "loss": 2.0444, + "step": 13128500 + }, + { + "epoch": 38.0, + "learning_rate": 3.100531302102631e-05, + "loss": 2.0409, + "step": 13129000 + }, + { + "epoch": 38.0, + "learning_rate": 3.100459082067432e-05, + "loss": 2.0686, + "step": 13129500 + }, + { + "epoch": 38.01, + "learning_rate": 3.1003867173027045e-05, + "loss": 2.0486, + "step": 13130000 + }, + { + "epoch": 38.01, + "learning_rate": 3.1003143525379774e-05, + "loss": 2.0344, + "step": 13130500 + }, + { + "epoch": 38.01, + "learning_rate": 3.1002419877732496e-05, + "loss": 2.0368, + "step": 13131000 + }, + { + "epoch": 38.01, + "learning_rate": 3.100169623008522e-05, + "loss": 2.0158, + "step": 13131500 + }, + { + "epoch": 38.01, + "learning_rate": 3.100097258243794e-05, + "loss": 2.0411, + "step": 13132000 + }, + { + "epoch": 38.01, + "learning_rate": 3.100025038208596e-05, + "loss": 2.0327, + "step": 13132500 + }, + { + "epoch": 38.01, + "learning_rate": 3.0999526734438685e-05, + "loss": 2.0615, + "step": 13133000 + }, + { + "epoch": 38.02, + "learning_rate": 3.099880308679141e-05, + "loss": 2.03, + "step": 13133500 + }, + { + "epoch": 38.02, + "learning_rate": 3.099807943914413e-05, + "loss": 2.0321, + "step": 13134000 + }, + { + "epoch": 38.02, + "learning_rate": 3.099735579149685e-05, + "loss": 2.0642, + "step": 13134500 + }, + { + "epoch": 38.02, + "learning_rate": 3.0996632143849574e-05, + "loss": 2.0562, + "step": 13135000 + }, + { + "epoch": 38.02, + "learning_rate": 3.0995908496202296e-05, + "loss": 2.0545, + "step": 13135500 + }, + { + "epoch": 38.02, + "learning_rate": 3.099518629585031e-05, + "loss": 2.038, + "step": 13136000 + }, + { + "epoch": 38.02, + "learning_rate": 3.099446264820304e-05, + "loss": 2.0516, + "step": 13136500 + }, + { + "epoch": 38.03, + "learning_rate": 3.099373900055576e-05, + "loss": 2.0788, + "step": 13137000 + }, + { + "epoch": 38.03, + "learning_rate": 3.099301535290849e-05, + "loss": 2.0427, + "step": 13137500 + }, + { + "epoch": 38.03, + "learning_rate": 3.099229315255651e-05, + "loss": 2.0485, + "step": 13138000 + }, + { + "epoch": 38.03, + "learning_rate": 3.099156950490923e-05, + "loss": 2.0627, + "step": 13138500 + }, + { + "epoch": 38.03, + "learning_rate": 3.099084585726195e-05, + "loss": 2.0255, + "step": 13139000 + }, + { + "epoch": 38.03, + "learning_rate": 3.0990122209614674e-05, + "loss": 2.0424, + "step": 13139500 + }, + { + "epoch": 38.03, + "learning_rate": 3.09893985619674e-05, + "loss": 2.039, + "step": 13140000 + }, + { + "epoch": 38.04, + "learning_rate": 3.098867491432012e-05, + "loss": 2.0338, + "step": 13140500 + }, + { + "epoch": 38.04, + "learning_rate": 3.098795126667284e-05, + "loss": 2.051, + "step": 13141000 + }, + { + "epoch": 38.04, + "learning_rate": 3.098722761902556e-05, + "loss": 2.0356, + "step": 13141500 + }, + { + "epoch": 38.04, + "learning_rate": 3.098650397137829e-05, + "loss": 2.0686, + "step": 13142000 + }, + { + "epoch": 38.04, + "learning_rate": 3.0985780323731015e-05, + "loss": 2.0545, + "step": 13142500 + }, + { + "epoch": 38.04, + "learning_rate": 3.098505667608374e-05, + "loss": 2.0543, + "step": 13143000 + }, + { + "epoch": 38.05, + "learning_rate": 3.098433302843646e-05, + "loss": 2.0651, + "step": 13143500 + }, + { + "epoch": 38.05, + "learning_rate": 3.098360938078918e-05, + "loss": 2.0674, + "step": 13144000 + }, + { + "epoch": 38.05, + "learning_rate": 3.0982885733141904e-05, + "loss": 2.0535, + "step": 13144500 + }, + { + "epoch": 38.05, + "learning_rate": 3.098216208549463e-05, + "loss": 2.0751, + "step": 13145000 + }, + { + "epoch": 38.05, + "learning_rate": 3.098143988514265e-05, + "loss": 2.0583, + "step": 13145500 + }, + { + "epoch": 38.05, + "learning_rate": 3.098071623749537e-05, + "loss": 2.0376, + "step": 13146000 + }, + { + "epoch": 38.05, + "learning_rate": 3.097999258984809e-05, + "loss": 2.0326, + "step": 13146500 + }, + { + "epoch": 38.06, + "learning_rate": 3.0979268942200815e-05, + "loss": 2.0381, + "step": 13147000 + }, + { + "epoch": 38.06, + "learning_rate": 3.097854529455354e-05, + "loss": 2.0569, + "step": 13147500 + }, + { + "epoch": 38.06, + "learning_rate": 3.0977821646906266e-05, + "loss": 2.0758, + "step": 13148000 + }, + { + "epoch": 38.06, + "learning_rate": 3.097709799925899e-05, + "loss": 2.0258, + "step": 13148500 + }, + { + "epoch": 38.06, + "learning_rate": 3.097637435161171e-05, + "loss": 2.076, + "step": 13149000 + }, + { + "epoch": 38.06, + "learning_rate": 3.097565070396443e-05, + "loss": 2.0588, + "step": 13149500 + }, + { + "epoch": 38.06, + "learning_rate": 3.0974927056317155e-05, + "loss": 2.0564, + "step": 13150000 + }, + { + "epoch": 38.07, + "learning_rate": 3.097420485596517e-05, + "loss": 2.0389, + "step": 13150500 + }, + { + "epoch": 38.07, + "learning_rate": 3.097348265561319e-05, + "loss": 2.0427, + "step": 13151000 + }, + { + "epoch": 38.07, + "learning_rate": 3.0972759007965915e-05, + "loss": 2.0358, + "step": 13151500 + }, + { + "epoch": 38.07, + "learning_rate": 3.097203536031864e-05, + "loss": 2.0283, + "step": 13152000 + }, + { + "epoch": 38.07, + "learning_rate": 3.097131315996666e-05, + "loss": 2.0518, + "step": 13152500 + }, + { + "epoch": 38.07, + "learning_rate": 3.097058951231938e-05, + "loss": 2.0432, + "step": 13153000 + }, + { + "epoch": 38.07, + "learning_rate": 3.0969865864672104e-05, + "loss": 2.0553, + "step": 13153500 + }, + { + "epoch": 38.08, + "learning_rate": 3.0969142217024826e-05, + "loss": 2.0477, + "step": 13154000 + }, + { + "epoch": 38.08, + "learning_rate": 3.096842001667284e-05, + "loss": 2.0603, + "step": 13154500 + }, + { + "epoch": 38.08, + "learning_rate": 3.0967696369025564e-05, + "loss": 2.0692, + "step": 13155000 + }, + { + "epoch": 38.08, + "learning_rate": 3.096697272137829e-05, + "loss": 2.0532, + "step": 13155500 + }, + { + "epoch": 38.08, + "learning_rate": 3.0966249073731015e-05, + "loss": 2.0524, + "step": 13156000 + }, + { + "epoch": 38.08, + "learning_rate": 3.096552542608374e-05, + "loss": 2.0634, + "step": 13156500 + }, + { + "epoch": 38.08, + "learning_rate": 3.096480177843646e-05, + "loss": 2.0622, + "step": 13157000 + }, + { + "epoch": 38.09, + "learning_rate": 3.0964079578084475e-05, + "loss": 2.0458, + "step": 13157500 + }, + { + "epoch": 38.09, + "learning_rate": 3.09633559304372e-05, + "loss": 2.0406, + "step": 13158000 + }, + { + "epoch": 38.09, + "learning_rate": 3.096263228278992e-05, + "loss": 2.0522, + "step": 13158500 + }, + { + "epoch": 38.09, + "learning_rate": 3.096190863514264e-05, + "loss": 2.0351, + "step": 13159000 + }, + { + "epoch": 38.09, + "learning_rate": 3.0961184987495364e-05, + "loss": 2.0601, + "step": 13159500 + }, + { + "epoch": 38.09, + "learning_rate": 3.096046133984809e-05, + "loss": 2.0637, + "step": 13160000 + }, + { + "epoch": 38.09, + "learning_rate": 3.0959737692200816e-05, + "loss": 2.0692, + "step": 13160500 + }, + { + "epoch": 38.1, + "learning_rate": 3.0959014044553545e-05, + "loss": 2.039, + "step": 13161000 + }, + { + "epoch": 38.1, + "learning_rate": 3.095829039690627e-05, + "loss": 2.041, + "step": 13161500 + }, + { + "epoch": 38.1, + "learning_rate": 3.095756674925899e-05, + "loss": 2.0246, + "step": 13162000 + }, + { + "epoch": 38.1, + "learning_rate": 3.095684310161171e-05, + "loss": 2.0709, + "step": 13162500 + }, + { + "epoch": 38.1, + "learning_rate": 3.095612090125973e-05, + "loss": 2.0565, + "step": 13163000 + }, + { + "epoch": 38.1, + "learning_rate": 3.095539725361245e-05, + "loss": 2.0677, + "step": 13163500 + }, + { + "epoch": 38.1, + "learning_rate": 3.095467360596517e-05, + "loss": 2.0613, + "step": 13164000 + }, + { + "epoch": 38.11, + "learning_rate": 3.0953949958317894e-05, + "loss": 2.0654, + "step": 13164500 + }, + { + "epoch": 38.11, + "learning_rate": 3.0953226310670616e-05, + "loss": 2.0533, + "step": 13165000 + }, + { + "epoch": 38.11, + "learning_rate": 3.0952502663023345e-05, + "loss": 2.0604, + "step": 13165500 + }, + { + "epoch": 38.11, + "learning_rate": 3.095177901537607e-05, + "loss": 2.0331, + "step": 13166000 + }, + { + "epoch": 38.11, + "learning_rate": 3.095105536772879e-05, + "loss": 2.0613, + "step": 13166500 + }, + { + "epoch": 38.11, + "learning_rate": 3.095033172008152e-05, + "loss": 2.0579, + "step": 13167000 + }, + { + "epoch": 38.11, + "learning_rate": 3.0949609519729534e-05, + "loss": 2.0455, + "step": 13167500 + }, + { + "epoch": 38.12, + "learning_rate": 3.0948885872082256e-05, + "loss": 2.0396, + "step": 13168000 + }, + { + "epoch": 38.12, + "learning_rate": 3.094816222443498e-05, + "loss": 2.071, + "step": 13168500 + }, + { + "epoch": 38.12, + "learning_rate": 3.09474385767877e-05, + "loss": 2.0295, + "step": 13169000 + }, + { + "epoch": 38.12, + "learning_rate": 3.094671492914042e-05, + "loss": 2.0484, + "step": 13169500 + }, + { + "epoch": 38.12, + "learning_rate": 3.0945991281493145e-05, + "loss": 2.0289, + "step": 13170000 + }, + { + "epoch": 38.12, + "learning_rate": 3.094526763384587e-05, + "loss": 2.0656, + "step": 13170500 + }, + { + "epoch": 38.12, + "learning_rate": 3.0944543986198596e-05, + "loss": 2.0672, + "step": 13171000 + }, + { + "epoch": 38.13, + "learning_rate": 3.094382178584661e-05, + "loss": 2.04, + "step": 13171500 + }, + { + "epoch": 38.13, + "learning_rate": 3.0943098138199334e-05, + "loss": 2.0497, + "step": 13172000 + }, + { + "epoch": 38.13, + "learning_rate": 3.0942374490552056e-05, + "loss": 2.0488, + "step": 13172500 + }, + { + "epoch": 38.13, + "learning_rate": 3.094165084290478e-05, + "loss": 2.0531, + "step": 13173000 + }, + { + "epoch": 38.13, + "learning_rate": 3.0940928642552794e-05, + "loss": 2.0341, + "step": 13173500 + }, + { + "epoch": 38.13, + "learning_rate": 3.0940206442200816e-05, + "loss": 2.0768, + "step": 13174000 + }, + { + "epoch": 38.13, + "learning_rate": 3.0939482794553545e-05, + "loss": 2.0569, + "step": 13174500 + }, + { + "epoch": 38.14, + "learning_rate": 3.093875914690627e-05, + "loss": 2.0517, + "step": 13175000 + }, + { + "epoch": 38.14, + "learning_rate": 3.093803549925899e-05, + "loss": 2.0544, + "step": 13175500 + }, + { + "epoch": 38.14, + "learning_rate": 3.093731185161171e-05, + "loss": 2.0465, + "step": 13176000 + }, + { + "epoch": 38.14, + "learning_rate": 3.0936588203964434e-05, + "loss": 2.0513, + "step": 13176500 + }, + { + "epoch": 38.14, + "learning_rate": 3.0935864556317157e-05, + "loss": 2.0594, + "step": 13177000 + }, + { + "epoch": 38.14, + "learning_rate": 3.093514090866988e-05, + "loss": 2.0627, + "step": 13177500 + }, + { + "epoch": 38.14, + "learning_rate": 3.09344172610226e-05, + "loss": 2.0459, + "step": 13178000 + }, + { + "epoch": 38.15, + "learning_rate": 3.093369361337532e-05, + "loss": 2.0642, + "step": 13178500 + }, + { + "epoch": 38.15, + "learning_rate": 3.0932969965728045e-05, + "loss": 2.05, + "step": 13179000 + }, + { + "epoch": 38.15, + "learning_rate": 3.093224631808077e-05, + "loss": 2.0168, + "step": 13179500 + }, + { + "epoch": 38.15, + "learning_rate": 3.09315226704335e-05, + "loss": 2.0754, + "step": 13180000 + }, + { + "epoch": 38.15, + "learning_rate": 3.093079902278622e-05, + "loss": 2.0535, + "step": 13180500 + }, + { + "epoch": 38.15, + "learning_rate": 3.0930076822434235e-05, + "loss": 2.0654, + "step": 13181000 + }, + { + "epoch": 38.16, + "learning_rate": 3.092935317478696e-05, + "loss": 2.0595, + "step": 13181500 + }, + { + "epoch": 38.16, + "learning_rate": 3.0928629527139686e-05, + "loss": 2.0687, + "step": 13182000 + }, + { + "epoch": 38.16, + "learning_rate": 3.092790587949241e-05, + "loss": 2.0496, + "step": 13182500 + }, + { + "epoch": 38.16, + "learning_rate": 3.0927183679140424e-05, + "loss": 2.0673, + "step": 13183000 + }, + { + "epoch": 38.16, + "learning_rate": 3.0926461478788446e-05, + "loss": 2.036, + "step": 13183500 + }, + { + "epoch": 38.16, + "learning_rate": 3.092573783114117e-05, + "loss": 2.0326, + "step": 13184000 + }, + { + "epoch": 38.16, + "learning_rate": 3.092501418349389e-05, + "loss": 2.0425, + "step": 13184500 + }, + { + "epoch": 38.17, + "learning_rate": 3.092429053584661e-05, + "loss": 2.0667, + "step": 13185000 + }, + { + "epoch": 38.17, + "learning_rate": 3.0923566888199335e-05, + "loss": 2.0627, + "step": 13185500 + }, + { + "epoch": 38.17, + "learning_rate": 3.092284324055206e-05, + "loss": 2.0512, + "step": 13186000 + }, + { + "epoch": 38.17, + "learning_rate": 3.092211959290478e-05, + "loss": 2.0288, + "step": 13186500 + }, + { + "epoch": 38.17, + "learning_rate": 3.09213959452575e-05, + "loss": 2.0557, + "step": 13187000 + }, + { + "epoch": 38.17, + "learning_rate": 3.0920672297610224e-05, + "loss": 2.0422, + "step": 13187500 + }, + { + "epoch": 38.17, + "learning_rate": 3.0919948649962946e-05, + "loss": 2.0551, + "step": 13188000 + }, + { + "epoch": 38.18, + "learning_rate": 3.091922500231567e-05, + "loss": 2.0567, + "step": 13188500 + }, + { + "epoch": 38.18, + "learning_rate": 3.091850280196369e-05, + "loss": 2.0354, + "step": 13189000 + }, + { + "epoch": 38.18, + "learning_rate": 3.091777915431642e-05, + "loss": 2.0652, + "step": 13189500 + }, + { + "epoch": 38.18, + "learning_rate": 3.0917056953964435e-05, + "loss": 2.0318, + "step": 13190000 + }, + { + "epoch": 38.18, + "learning_rate": 3.091633330631716e-05, + "loss": 2.0602, + "step": 13190500 + }, + { + "epoch": 38.18, + "learning_rate": 3.091560965866988e-05, + "loss": 2.063, + "step": 13191000 + }, + { + "epoch": 38.18, + "learning_rate": 3.09148860110226e-05, + "loss": 2.0532, + "step": 13191500 + }, + { + "epoch": 38.19, + "learning_rate": 3.0914162363375324e-05, + "loss": 2.0853, + "step": 13192000 + }, + { + "epoch": 38.19, + "learning_rate": 3.0913440163023346e-05, + "loss": 2.0645, + "step": 13192500 + }, + { + "epoch": 38.19, + "learning_rate": 3.091271796267136e-05, + "loss": 2.0321, + "step": 13193000 + }, + { + "epoch": 38.19, + "learning_rate": 3.0911994315024084e-05, + "loss": 2.0609, + "step": 13193500 + }, + { + "epoch": 38.19, + "learning_rate": 3.0911270667376806e-05, + "loss": 2.0393, + "step": 13194000 + }, + { + "epoch": 38.19, + "learning_rate": 3.091054701972953e-05, + "loss": 2.0503, + "step": 13194500 + }, + { + "epoch": 38.19, + "learning_rate": 3.090982337208225e-05, + "loss": 2.0394, + "step": 13195000 + }, + { + "epoch": 38.2, + "learning_rate": 3.090909972443497e-05, + "loss": 2.0653, + "step": 13195500 + }, + { + "epoch": 38.2, + "learning_rate": 3.0908377524082995e-05, + "loss": 2.0732, + "step": 13196000 + }, + { + "epoch": 38.2, + "learning_rate": 3.090765387643572e-05, + "loss": 2.0613, + "step": 13196500 + }, + { + "epoch": 38.2, + "learning_rate": 3.090693022878845e-05, + "loss": 2.0329, + "step": 13197000 + }, + { + "epoch": 38.2, + "learning_rate": 3.090620802843646e-05, + "loss": 2.0427, + "step": 13197500 + }, + { + "epoch": 38.2, + "learning_rate": 3.0905484380789184e-05, + "loss": 2.0611, + "step": 13198000 + }, + { + "epoch": 38.2, + "learning_rate": 3.090476073314191e-05, + "loss": 2.064, + "step": 13198500 + }, + { + "epoch": 38.21, + "learning_rate": 3.090403708549463e-05, + "loss": 2.032, + "step": 13199000 + }, + { + "epoch": 38.21, + "learning_rate": 3.090331343784735e-05, + "loss": 2.027, + "step": 13199500 + }, + { + "epoch": 38.21, + "learning_rate": 3.0902589790200073e-05, + "loss": 2.0515, + "step": 13200000 + }, + { + "epoch": 38.21, + "learning_rate": 3.0901866142552796e-05, + "loss": 2.0472, + "step": 13200500 + }, + { + "epoch": 38.21, + "learning_rate": 3.0901142494905525e-05, + "loss": 2.0527, + "step": 13201000 + }, + { + "epoch": 38.21, + "learning_rate": 3.090041884725825e-05, + "loss": 2.0758, + "step": 13201500 + }, + { + "epoch": 38.21, + "learning_rate": 3.0899698094201556e-05, + "loss": 2.0322, + "step": 13202000 + }, + { + "epoch": 38.22, + "learning_rate": 3.089897444655428e-05, + "loss": 2.085, + "step": 13202500 + }, + { + "epoch": 38.22, + "learning_rate": 3.0898250798907e-05, + "loss": 2.0562, + "step": 13203000 + }, + { + "epoch": 38.22, + "learning_rate": 3.089752715125972e-05, + "loss": 2.0361, + "step": 13203500 + }, + { + "epoch": 38.22, + "learning_rate": 3.089680350361245e-05, + "loss": 2.0541, + "step": 13204000 + }, + { + "epoch": 38.22, + "learning_rate": 3.0896079855965174e-05, + "loss": 2.0667, + "step": 13204500 + }, + { + "epoch": 38.22, + "learning_rate": 3.08953562083179e-05, + "loss": 2.0458, + "step": 13205000 + }, + { + "epoch": 38.22, + "learning_rate": 3.0894632560670625e-05, + "loss": 2.0516, + "step": 13205500 + }, + { + "epoch": 38.23, + "learning_rate": 3.089390891302335e-05, + "loss": 2.0569, + "step": 13206000 + }, + { + "epoch": 38.23, + "learning_rate": 3.089318526537607e-05, + "loss": 2.0643, + "step": 13206500 + }, + { + "epoch": 38.23, + "learning_rate": 3.089246161772879e-05, + "loss": 2.0314, + "step": 13207000 + }, + { + "epoch": 38.23, + "learning_rate": 3.0891737970081514e-05, + "loss": 2.0496, + "step": 13207500 + }, + { + "epoch": 38.23, + "learning_rate": 3.0891014322434236e-05, + "loss": 2.0572, + "step": 13208000 + }, + { + "epoch": 38.23, + "learning_rate": 3.089029067478696e-05, + "loss": 2.0461, + "step": 13208500 + }, + { + "epoch": 38.23, + "learning_rate": 3.088956702713968e-05, + "loss": 2.0688, + "step": 13209000 + }, + { + "epoch": 38.24, + "learning_rate": 3.08888433794924e-05, + "loss": 2.0458, + "step": 13209500 + }, + { + "epoch": 38.24, + "learning_rate": 3.0888119731845125e-05, + "loss": 2.0616, + "step": 13210000 + }, + { + "epoch": 38.24, + "learning_rate": 3.088739897878844e-05, + "loss": 2.0691, + "step": 13210500 + }, + { + "epoch": 38.24, + "learning_rate": 3.088667533114116e-05, + "loss": 2.0724, + "step": 13211000 + }, + { + "epoch": 38.24, + "learning_rate": 3.0885951683493885e-05, + "loss": 2.0821, + "step": 13211500 + }, + { + "epoch": 38.24, + "learning_rate": 3.08852294831419e-05, + "loss": 2.0733, + "step": 13212000 + }, + { + "epoch": 38.24, + "learning_rate": 3.088450583549463e-05, + "loss": 2.0852, + "step": 13212500 + }, + { + "epoch": 38.25, + "learning_rate": 3.088378218784735e-05, + "loss": 2.0623, + "step": 13213000 + }, + { + "epoch": 38.25, + "learning_rate": 3.0883059987495374e-05, + "loss": 2.062, + "step": 13213500 + }, + { + "epoch": 38.25, + "learning_rate": 3.0882336339848097e-05, + "loss": 2.0368, + "step": 13214000 + }, + { + "epoch": 38.25, + "learning_rate": 3.088161269220082e-05, + "loss": 2.0602, + "step": 13214500 + }, + { + "epoch": 38.25, + "learning_rate": 3.088088904455354e-05, + "loss": 2.0582, + "step": 13215000 + }, + { + "epoch": 38.25, + "learning_rate": 3.088016539690626e-05, + "loss": 2.0544, + "step": 13215500 + }, + { + "epoch": 38.25, + "learning_rate": 3.0879441749258985e-05, + "loss": 2.0493, + "step": 13216000 + }, + { + "epoch": 38.26, + "learning_rate": 3.087871810161171e-05, + "loss": 2.0494, + "step": 13216500 + }, + { + "epoch": 38.26, + "learning_rate": 3.087799445396443e-05, + "loss": 2.0367, + "step": 13217000 + }, + { + "epoch": 38.26, + "learning_rate": 3.087727080631715e-05, + "loss": 2.0437, + "step": 13217500 + }, + { + "epoch": 38.26, + "learning_rate": 3.0876547158669874e-05, + "loss": 2.0385, + "step": 13218000 + }, + { + "epoch": 38.26, + "learning_rate": 3.08758249583179e-05, + "loss": 2.0361, + "step": 13218500 + }, + { + "epoch": 38.26, + "learning_rate": 3.087510131067062e-05, + "loss": 2.0354, + "step": 13219000 + }, + { + "epoch": 38.27, + "learning_rate": 3.087437766302335e-05, + "loss": 2.0591, + "step": 13219500 + }, + { + "epoch": 38.27, + "learning_rate": 3.0873655462671364e-05, + "loss": 2.0355, + "step": 13220000 + }, + { + "epoch": 38.27, + "learning_rate": 3.0872931815024086e-05, + "loss": 2.0462, + "step": 13220500 + }, + { + "epoch": 38.27, + "learning_rate": 3.087220816737681e-05, + "loss": 2.0587, + "step": 13221000 + }, + { + "epoch": 38.27, + "learning_rate": 3.087148451972953e-05, + "loss": 2.0351, + "step": 13221500 + }, + { + "epoch": 38.27, + "learning_rate": 3.087076087208225e-05, + "loss": 2.0438, + "step": 13222000 + }, + { + "epoch": 38.27, + "learning_rate": 3.0870037224434975e-05, + "loss": 2.0852, + "step": 13222500 + }, + { + "epoch": 38.28, + "learning_rate": 3.0869313576787704e-05, + "loss": 2.064, + "step": 13223000 + }, + { + "epoch": 38.28, + "learning_rate": 3.0868589929140426e-05, + "loss": 2.0496, + "step": 13223500 + }, + { + "epoch": 38.28, + "learning_rate": 3.086786628149315e-05, + "loss": 2.063, + "step": 13224000 + }, + { + "epoch": 38.28, + "learning_rate": 3.086714263384587e-05, + "loss": 2.0544, + "step": 13224500 + }, + { + "epoch": 38.28, + "learning_rate": 3.0866420433493886e-05, + "loss": 2.0686, + "step": 13225000 + }, + { + "epoch": 38.28, + "learning_rate": 3.086569678584661e-05, + "loss": 2.0512, + "step": 13225500 + }, + { + "epoch": 38.28, + "learning_rate": 3.086497313819933e-05, + "loss": 2.0555, + "step": 13226000 + }, + { + "epoch": 38.29, + "learning_rate": 3.086424949055205e-05, + "loss": 2.0608, + "step": 13226500 + }, + { + "epoch": 38.29, + "learning_rate": 3.086352584290478e-05, + "loss": 2.0683, + "step": 13227000 + }, + { + "epoch": 38.29, + "learning_rate": 3.0862803642552804e-05, + "loss": 2.0672, + "step": 13227500 + }, + { + "epoch": 38.29, + "learning_rate": 3.0862079994905526e-05, + "loss": 2.0568, + "step": 13228000 + }, + { + "epoch": 38.29, + "learning_rate": 3.086135634725825e-05, + "loss": 2.0552, + "step": 13228500 + }, + { + "epoch": 38.29, + "learning_rate": 3.086063269961097e-05, + "loss": 2.0666, + "step": 13229000 + }, + { + "epoch": 38.29, + "learning_rate": 3.085990905196369e-05, + "loss": 2.0701, + "step": 13229500 + }, + { + "epoch": 38.3, + "learning_rate": 3.0859185404316415e-05, + "loss": 2.0415, + "step": 13230000 + }, + { + "epoch": 38.3, + "learning_rate": 3.085846175666914e-05, + "loss": 2.0781, + "step": 13230500 + }, + { + "epoch": 38.3, + "learning_rate": 3.085773810902186e-05, + "loss": 2.0327, + "step": 13231000 + }, + { + "epoch": 38.3, + "learning_rate": 3.085701446137458e-05, + "loss": 2.0478, + "step": 13231500 + }, + { + "epoch": 38.3, + "learning_rate": 3.0856292261022604e-05, + "loss": 2.0515, + "step": 13232000 + }, + { + "epoch": 38.3, + "learning_rate": 3.0855568613375326e-05, + "loss": 2.0453, + "step": 13232500 + }, + { + "epoch": 38.3, + "learning_rate": 3.085484496572805e-05, + "loss": 2.0562, + "step": 13233000 + }, + { + "epoch": 38.31, + "learning_rate": 3.085412131808077e-05, + "loss": 2.0693, + "step": 13233500 + }, + { + "epoch": 38.31, + "learning_rate": 3.085339767043349e-05, + "loss": 2.05, + "step": 13234000 + }, + { + "epoch": 38.31, + "learning_rate": 3.0852675470081515e-05, + "loss": 2.0636, + "step": 13234500 + }, + { + "epoch": 38.31, + "learning_rate": 3.085195182243424e-05, + "loss": 2.0556, + "step": 13235000 + }, + { + "epoch": 38.31, + "learning_rate": 3.085122817478696e-05, + "loss": 2.0486, + "step": 13235500 + }, + { + "epoch": 38.31, + "learning_rate": 3.085050597443498e-05, + "loss": 2.0879, + "step": 13236000 + }, + { + "epoch": 38.31, + "learning_rate": 3.0849782326787704e-05, + "loss": 2.0543, + "step": 13236500 + }, + { + "epoch": 38.32, + "learning_rate": 3.084905867914043e-05, + "loss": 2.0437, + "step": 13237000 + }, + { + "epoch": 38.32, + "learning_rate": 3.084833503149315e-05, + "loss": 2.0697, + "step": 13237500 + }, + { + "epoch": 38.32, + "learning_rate": 3.084761138384587e-05, + "loss": 2.0279, + "step": 13238000 + }, + { + "epoch": 38.32, + "learning_rate": 3.0846887736198593e-05, + "loss": 2.0518, + "step": 13238500 + }, + { + "epoch": 38.32, + "learning_rate": 3.0846164088551316e-05, + "loss": 2.0708, + "step": 13239000 + }, + { + "epoch": 38.32, + "learning_rate": 3.084544044090404e-05, + "loss": 2.0497, + "step": 13239500 + }, + { + "epoch": 38.32, + "learning_rate": 3.0844718240552053e-05, + "loss": 2.0397, + "step": 13240000 + }, + { + "epoch": 38.33, + "learning_rate": 3.084399459290478e-05, + "loss": 2.0493, + "step": 13240500 + }, + { + "epoch": 38.33, + "learning_rate": 3.0843270945257505e-05, + "loss": 2.0672, + "step": 13241000 + }, + { + "epoch": 38.33, + "learning_rate": 3.084254729761023e-05, + "loss": 2.0457, + "step": 13241500 + }, + { + "epoch": 38.33, + "learning_rate": 3.0841823649962956e-05, + "loss": 2.0482, + "step": 13242000 + }, + { + "epoch": 38.33, + "learning_rate": 3.084110000231568e-05, + "loss": 2.0619, + "step": 13242500 + }, + { + "epoch": 38.33, + "learning_rate": 3.08403763546684e-05, + "loss": 2.0684, + "step": 13243000 + }, + { + "epoch": 38.33, + "learning_rate": 3.083965270702112e-05, + "loss": 2.0272, + "step": 13243500 + }, + { + "epoch": 38.34, + "learning_rate": 3.0838929059373845e-05, + "loss": 2.0715, + "step": 13244000 + }, + { + "epoch": 38.34, + "learning_rate": 3.083820541172657e-05, + "loss": 2.0691, + "step": 13244500 + }, + { + "epoch": 38.34, + "learning_rate": 3.083748176407929e-05, + "loss": 2.0417, + "step": 13245000 + }, + { + "epoch": 38.34, + "learning_rate": 3.0836759563727305e-05, + "loss": 2.0654, + "step": 13245500 + }, + { + "epoch": 38.34, + "learning_rate": 3.083603591608003e-05, + "loss": 2.0686, + "step": 13246000 + }, + { + "epoch": 38.34, + "learning_rate": 3.083531371572805e-05, + "loss": 2.0601, + "step": 13246500 + }, + { + "epoch": 38.34, + "learning_rate": 3.083459006808077e-05, + "loss": 2.0734, + "step": 13247000 + }, + { + "epoch": 38.35, + "learning_rate": 3.0833866420433494e-05, + "loss": 2.0459, + "step": 13247500 + }, + { + "epoch": 38.35, + "learning_rate": 3.0833142772786216e-05, + "loss": 2.0708, + "step": 13248000 + }, + { + "epoch": 38.35, + "learning_rate": 3.083242057243423e-05, + "loss": 2.0681, + "step": 13248500 + }, + { + "epoch": 38.35, + "learning_rate": 3.0831696924786954e-05, + "loss": 2.0499, + "step": 13249000 + }, + { + "epoch": 38.35, + "learning_rate": 3.083097327713968e-05, + "loss": 2.035, + "step": 13249500 + }, + { + "epoch": 38.35, + "learning_rate": 3.0830249629492405e-05, + "loss": 2.0272, + "step": 13250000 + }, + { + "epoch": 38.35, + "learning_rate": 3.0829525981845134e-05, + "loss": 2.0333, + "step": 13250500 + }, + { + "epoch": 38.36, + "learning_rate": 3.0828802334197856e-05, + "loss": 2.0619, + "step": 13251000 + }, + { + "epoch": 38.36, + "learning_rate": 3.082807868655058e-05, + "loss": 2.0427, + "step": 13251500 + }, + { + "epoch": 38.36, + "learning_rate": 3.0827356486198594e-05, + "loss": 2.0532, + "step": 13252000 + }, + { + "epoch": 38.36, + "learning_rate": 3.0826632838551316e-05, + "loss": 2.0505, + "step": 13252500 + }, + { + "epoch": 38.36, + "learning_rate": 3.082590919090404e-05, + "loss": 2.0603, + "step": 13253000 + }, + { + "epoch": 38.36, + "learning_rate": 3.0825188437847354e-05, + "loss": 2.0683, + "step": 13253500 + }, + { + "epoch": 38.36, + "learning_rate": 3.0824464790200077e-05, + "loss": 2.0352, + "step": 13254000 + }, + { + "epoch": 38.37, + "learning_rate": 3.08237411425528e-05, + "loss": 2.049, + "step": 13254500 + }, + { + "epoch": 38.37, + "learning_rate": 3.082301749490552e-05, + "loss": 2.052, + "step": 13255000 + }, + { + "epoch": 38.37, + "learning_rate": 3.082229384725824e-05, + "loss": 2.0657, + "step": 13255500 + }, + { + "epoch": 38.37, + "learning_rate": 3.0821570199610966e-05, + "loss": 2.0643, + "step": 13256000 + }, + { + "epoch": 38.37, + "learning_rate": 3.082084655196369e-05, + "loss": 2.0702, + "step": 13256500 + }, + { + "epoch": 38.37, + "learning_rate": 3.082012290431642e-05, + "loss": 2.0555, + "step": 13257000 + }, + { + "epoch": 38.38, + "learning_rate": 3.081939925666914e-05, + "loss": 2.0724, + "step": 13257500 + }, + { + "epoch": 38.38, + "learning_rate": 3.081867560902186e-05, + "loss": 2.0567, + "step": 13258000 + }, + { + "epoch": 38.38, + "learning_rate": 3.0817951961374583e-05, + "loss": 2.0504, + "step": 13258500 + }, + { + "epoch": 38.38, + "learning_rate": 3.0817228313727306e-05, + "loss": 2.0539, + "step": 13259000 + }, + { + "epoch": 38.38, + "learning_rate": 3.0816504666080035e-05, + "loss": 2.0519, + "step": 13259500 + }, + { + "epoch": 38.38, + "learning_rate": 3.081578101843276e-05, + "loss": 2.0304, + "step": 13260000 + }, + { + "epoch": 38.38, + "learning_rate": 3.081505737078548e-05, + "loss": 2.0753, + "step": 13260500 + }, + { + "epoch": 38.39, + "learning_rate": 3.08143337231382e-05, + "loss": 2.0772, + "step": 13261000 + }, + { + "epoch": 38.39, + "learning_rate": 3.0813610075490924e-05, + "loss": 2.0915, + "step": 13261500 + }, + { + "epoch": 38.39, + "learning_rate": 3.0812886427843646e-05, + "loss": 2.0526, + "step": 13262000 + }, + { + "epoch": 38.39, + "learning_rate": 3.081216278019637e-05, + "loss": 2.0552, + "step": 13262500 + }, + { + "epoch": 38.39, + "learning_rate": 3.081143913254909e-05, + "loss": 2.0381, + "step": 13263000 + }, + { + "epoch": 38.39, + "learning_rate": 3.081071548490181e-05, + "loss": 2.0515, + "step": 13263500 + }, + { + "epoch": 38.39, + "learning_rate": 3.080999183725454e-05, + "loss": 2.0301, + "step": 13264000 + }, + { + "epoch": 38.4, + "learning_rate": 3.080926963690256e-05, + "loss": 2.0425, + "step": 13264500 + }, + { + "epoch": 38.4, + "learning_rate": 3.0808545989255286e-05, + "loss": 2.0313, + "step": 13265000 + }, + { + "epoch": 38.4, + "learning_rate": 3.080782234160801e-05, + "loss": 2.0483, + "step": 13265500 + }, + { + "epoch": 38.4, + "learning_rate": 3.080709869396073e-05, + "loss": 2.0788, + "step": 13266000 + }, + { + "epoch": 38.4, + "learning_rate": 3.080637504631345e-05, + "loss": 2.0464, + "step": 13266500 + }, + { + "epoch": 38.4, + "learning_rate": 3.0805651398666175e-05, + "loss": 2.0619, + "step": 13267000 + }, + { + "epoch": 38.4, + "learning_rate": 3.08049277510189e-05, + "loss": 2.0564, + "step": 13267500 + }, + { + "epoch": 38.41, + "learning_rate": 3.080420410337162e-05, + "loss": 2.0287, + "step": 13268000 + }, + { + "epoch": 38.41, + "learning_rate": 3.080348045572434e-05, + "loss": 2.0649, + "step": 13268500 + }, + { + "epoch": 38.41, + "learning_rate": 3.080275970266766e-05, + "loss": 2.0546, + "step": 13269000 + }, + { + "epoch": 38.41, + "learning_rate": 3.080203605502038e-05, + "loss": 2.0475, + "step": 13269500 + }, + { + "epoch": 38.41, + "learning_rate": 3.08013124073731e-05, + "loss": 2.0733, + "step": 13270000 + }, + { + "epoch": 38.41, + "learning_rate": 3.0800588759725824e-05, + "loss": 2.0732, + "step": 13270500 + }, + { + "epoch": 38.41, + "learning_rate": 3.0799865112078546e-05, + "loss": 2.0585, + "step": 13271000 + }, + { + "epoch": 38.42, + "learning_rate": 3.079914435902186e-05, + "loss": 2.0621, + "step": 13271500 + }, + { + "epoch": 38.42, + "learning_rate": 3.0798420711374584e-05, + "loss": 2.0497, + "step": 13272000 + }, + { + "epoch": 38.42, + "learning_rate": 3.079769706372731e-05, + "loss": 2.0607, + "step": 13272500 + }, + { + "epoch": 38.42, + "learning_rate": 3.0796973416080035e-05, + "loss": 2.0391, + "step": 13273000 + }, + { + "epoch": 38.42, + "learning_rate": 3.079624976843276e-05, + "loss": 2.08, + "step": 13273500 + }, + { + "epoch": 38.42, + "learning_rate": 3.079552612078548e-05, + "loss": 2.0456, + "step": 13274000 + }, + { + "epoch": 38.42, + "learning_rate": 3.07948024731382e-05, + "loss": 2.0453, + "step": 13274500 + }, + { + "epoch": 38.43, + "learning_rate": 3.0794078825490924e-05, + "loss": 2.0495, + "step": 13275000 + }, + { + "epoch": 38.43, + "learning_rate": 3.079335662513894e-05, + "loss": 2.0519, + "step": 13275500 + }, + { + "epoch": 38.43, + "learning_rate": 3.079263297749166e-05, + "loss": 2.0603, + "step": 13276000 + }, + { + "epoch": 38.43, + "learning_rate": 3.0791910777139685e-05, + "loss": 2.0596, + "step": 13276500 + }, + { + "epoch": 38.43, + "learning_rate": 3.079118712949241e-05, + "loss": 2.0757, + "step": 13277000 + }, + { + "epoch": 38.43, + "learning_rate": 3.079046348184513e-05, + "loss": 2.0742, + "step": 13277500 + }, + { + "epoch": 38.43, + "learning_rate": 3.078973983419785e-05, + "loss": 2.0715, + "step": 13278000 + }, + { + "epoch": 38.44, + "learning_rate": 3.0789016186550573e-05, + "loss": 2.0789, + "step": 13278500 + }, + { + "epoch": 38.44, + "learning_rate": 3.07882925389033e-05, + "loss": 2.0439, + "step": 13279000 + }, + { + "epoch": 38.44, + "learning_rate": 3.0787568891256025e-05, + "loss": 2.0533, + "step": 13279500 + }, + { + "epoch": 38.44, + "learning_rate": 3.078684524360875e-05, + "loss": 2.0731, + "step": 13280000 + }, + { + "epoch": 38.44, + "learning_rate": 3.078612159596147e-05, + "loss": 2.0751, + "step": 13280500 + }, + { + "epoch": 38.44, + "learning_rate": 3.078539794831419e-05, + "loss": 2.0428, + "step": 13281000 + }, + { + "epoch": 38.44, + "learning_rate": 3.0784674300666914e-05, + "loss": 2.0797, + "step": 13281500 + }, + { + "epoch": 38.45, + "learning_rate": 3.0783950653019636e-05, + "loss": 2.08, + "step": 13282000 + }, + { + "epoch": 38.45, + "learning_rate": 3.078322845266766e-05, + "loss": 2.0331, + "step": 13282500 + }, + { + "epoch": 38.45, + "learning_rate": 3.078250480502038e-05, + "loss": 2.0424, + "step": 13283000 + }, + { + "epoch": 38.45, + "learning_rate": 3.07817811573731e-05, + "loss": 2.0764, + "step": 13283500 + }, + { + "epoch": 38.45, + "learning_rate": 3.078105895702112e-05, + "loss": 2.064, + "step": 13284000 + }, + { + "epoch": 38.45, + "learning_rate": 3.078033530937384e-05, + "loss": 2.069, + "step": 13284500 + }, + { + "epoch": 38.45, + "learning_rate": 3.077961166172656e-05, + "loss": 2.0596, + "step": 13285000 + }, + { + "epoch": 38.46, + "learning_rate": 3.0778888014079285e-05, + "loss": 2.0456, + "step": 13285500 + }, + { + "epoch": 38.46, + "learning_rate": 3.0778164366432014e-05, + "loss": 2.0444, + "step": 13286000 + }, + { + "epoch": 38.46, + "learning_rate": 3.0777440718784736e-05, + "loss": 2.0559, + "step": 13286500 + }, + { + "epoch": 38.46, + "learning_rate": 3.0776717071137465e-05, + "loss": 2.0447, + "step": 13287000 + }, + { + "epoch": 38.46, + "learning_rate": 3.077599342349019e-05, + "loss": 2.0494, + "step": 13287500 + }, + { + "epoch": 38.46, + "learning_rate": 3.0775272670433496e-05, + "loss": 2.0521, + "step": 13288000 + }, + { + "epoch": 38.46, + "learning_rate": 3.077454902278622e-05, + "loss": 2.0433, + "step": 13288500 + }, + { + "epoch": 38.47, + "learning_rate": 3.077382537513894e-05, + "loss": 2.0397, + "step": 13289000 + }, + { + "epoch": 38.47, + "learning_rate": 3.077310172749166e-05, + "loss": 2.0406, + "step": 13289500 + }, + { + "epoch": 38.47, + "learning_rate": 3.0772378079844385e-05, + "loss": 2.0451, + "step": 13290000 + }, + { + "epoch": 38.47, + "learning_rate": 3.077165587949241e-05, + "loss": 2.0591, + "step": 13290500 + }, + { + "epoch": 38.47, + "learning_rate": 3.077093223184513e-05, + "loss": 2.049, + "step": 13291000 + }, + { + "epoch": 38.47, + "learning_rate": 3.077020858419785e-05, + "loss": 2.0776, + "step": 13291500 + }, + { + "epoch": 38.47, + "learning_rate": 3.0769484936550574e-05, + "loss": 2.0652, + "step": 13292000 + }, + { + "epoch": 38.48, + "learning_rate": 3.0768761288903297e-05, + "loss": 2.0747, + "step": 13292500 + }, + { + "epoch": 38.48, + "learning_rate": 3.076803764125602e-05, + "loss": 2.056, + "step": 13293000 + }, + { + "epoch": 38.48, + "learning_rate": 3.076731399360874e-05, + "loss": 2.0539, + "step": 13293500 + }, + { + "epoch": 38.48, + "learning_rate": 3.076659034596147e-05, + "loss": 2.05, + "step": 13294000 + }, + { + "epoch": 38.48, + "learning_rate": 3.0765868145609486e-05, + "loss": 2.0765, + "step": 13294500 + }, + { + "epoch": 38.48, + "learning_rate": 3.0765144497962215e-05, + "loss": 2.0531, + "step": 13295000 + }, + { + "epoch": 38.49, + "learning_rate": 3.076442085031494e-05, + "loss": 2.0293, + "step": 13295500 + }, + { + "epoch": 38.49, + "learning_rate": 3.076369720266766e-05, + "loss": 2.0571, + "step": 13296000 + }, + { + "epoch": 38.49, + "learning_rate": 3.076297355502038e-05, + "loss": 2.0532, + "step": 13296500 + }, + { + "epoch": 38.49, + "learning_rate": 3.0762249907373103e-05, + "loss": 2.0564, + "step": 13297000 + }, + { + "epoch": 38.49, + "learning_rate": 3.076152770702112e-05, + "loss": 2.0338, + "step": 13297500 + }, + { + "epoch": 38.49, + "learning_rate": 3.076080405937384e-05, + "loss": 2.0317, + "step": 13298000 + }, + { + "epoch": 38.49, + "learning_rate": 3.0760080411726564e-05, + "loss": 2.052, + "step": 13298500 + }, + { + "epoch": 38.5, + "learning_rate": 3.0759356764079286e-05, + "loss": 2.0607, + "step": 13299000 + }, + { + "epoch": 38.5, + "learning_rate": 3.0758633116432015e-05, + "loss": 2.0648, + "step": 13299500 + }, + { + "epoch": 38.5, + "learning_rate": 3.075790946878474e-05, + "loss": 2.0747, + "step": 13300000 + }, + { + "epoch": 38.5, + "learning_rate": 3.075718726843275e-05, + "loss": 2.0699, + "step": 13300500 + }, + { + "epoch": 38.5, + "learning_rate": 3.0756463620785475e-05, + "loss": 2.0846, + "step": 13301000 + }, + { + "epoch": 38.5, + "learning_rate": 3.0755739973138204e-05, + "loss": 2.059, + "step": 13301500 + }, + { + "epoch": 38.5, + "learning_rate": 3.0755016325490926e-05, + "loss": 2.09, + "step": 13302000 + }, + { + "epoch": 38.51, + "learning_rate": 3.075429267784365e-05, + "loss": 2.0614, + "step": 13302500 + }, + { + "epoch": 38.51, + "learning_rate": 3.075356903019637e-05, + "loss": 2.0398, + "step": 13303000 + }, + { + "epoch": 38.51, + "learning_rate": 3.075284538254909e-05, + "loss": 2.0585, + "step": 13303500 + }, + { + "epoch": 38.51, + "learning_rate": 3.0752121734901815e-05, + "loss": 2.0643, + "step": 13304000 + }, + { + "epoch": 38.51, + "learning_rate": 3.075139808725454e-05, + "loss": 2.0609, + "step": 13304500 + }, + { + "epoch": 38.51, + "learning_rate": 3.0750674439607266e-05, + "loss": 2.0429, + "step": 13305000 + }, + { + "epoch": 38.51, + "learning_rate": 3.074995079195999e-05, + "loss": 2.0607, + "step": 13305500 + }, + { + "epoch": 38.52, + "learning_rate": 3.0749228591608004e-05, + "loss": 2.0784, + "step": 13306000 + }, + { + "epoch": 38.52, + "learning_rate": 3.0748504943960726e-05, + "loss": 2.0561, + "step": 13306500 + }, + { + "epoch": 38.52, + "learning_rate": 3.074778129631345e-05, + "loss": 2.0799, + "step": 13307000 + }, + { + "epoch": 38.52, + "learning_rate": 3.074705764866617e-05, + "loss": 2.0671, + "step": 13307500 + }, + { + "epoch": 38.52, + "learning_rate": 3.074633400101889e-05, + "loss": 2.06, + "step": 13308000 + }, + { + "epoch": 38.52, + "learning_rate": 3.074561035337162e-05, + "loss": 2.0535, + "step": 13308500 + }, + { + "epoch": 38.52, + "learning_rate": 3.074488815301964e-05, + "loss": 2.0417, + "step": 13309000 + }, + { + "epoch": 38.53, + "learning_rate": 3.0744164505372367e-05, + "loss": 2.0841, + "step": 13309500 + }, + { + "epoch": 38.53, + "learning_rate": 3.074344085772509e-05, + "loss": 2.0737, + "step": 13310000 + }, + { + "epoch": 38.53, + "learning_rate": 3.074271721007781e-05, + "loss": 2.0857, + "step": 13310500 + }, + { + "epoch": 38.53, + "learning_rate": 3.074199356243053e-05, + "loss": 2.0534, + "step": 13311000 + }, + { + "epoch": 38.53, + "learning_rate": 3.074127136207855e-05, + "loss": 2.0517, + "step": 13311500 + }, + { + "epoch": 38.53, + "learning_rate": 3.074054771443127e-05, + "loss": 2.0632, + "step": 13312000 + }, + { + "epoch": 38.53, + "learning_rate": 3.073982406678399e-05, + "loss": 2.0564, + "step": 13312500 + }, + { + "epoch": 38.54, + "learning_rate": 3.0739100419136715e-05, + "loss": 2.0527, + "step": 13313000 + }, + { + "epoch": 38.54, + "learning_rate": 3.073837677148944e-05, + "loss": 2.0491, + "step": 13313500 + }, + { + "epoch": 38.54, + "learning_rate": 3.073765312384217e-05, + "loss": 2.0512, + "step": 13314000 + }, + { + "epoch": 38.54, + "learning_rate": 3.073692947619489e-05, + "loss": 2.044, + "step": 13314500 + }, + { + "epoch": 38.54, + "learning_rate": 3.0736207275842904e-05, + "loss": 2.0395, + "step": 13315000 + }, + { + "epoch": 38.54, + "learning_rate": 3.073548507549092e-05, + "loss": 2.0561, + "step": 13315500 + }, + { + "epoch": 38.54, + "learning_rate": 3.073476287513894e-05, + "loss": 2.0429, + "step": 13316000 + }, + { + "epoch": 38.55, + "learning_rate": 3.0734039227491665e-05, + "loss": 2.0684, + "step": 13316500 + }, + { + "epoch": 38.55, + "learning_rate": 3.0733315579844394e-05, + "loss": 2.0406, + "step": 13317000 + }, + { + "epoch": 38.55, + "learning_rate": 3.0732591932197116e-05, + "loss": 2.058, + "step": 13317500 + }, + { + "epoch": 38.55, + "learning_rate": 3.073186828454984e-05, + "loss": 2.056, + "step": 13318000 + }, + { + "epoch": 38.55, + "learning_rate": 3.073114463690256e-05, + "loss": 2.059, + "step": 13318500 + }, + { + "epoch": 38.55, + "learning_rate": 3.0730422436550576e-05, + "loss": 2.0307, + "step": 13319000 + }, + { + "epoch": 38.55, + "learning_rate": 3.07296987889033e-05, + "loss": 2.0742, + "step": 13319500 + }, + { + "epoch": 38.56, + "learning_rate": 3.072897514125602e-05, + "loss": 2.0551, + "step": 13320000 + }, + { + "epoch": 38.56, + "learning_rate": 3.072825149360874e-05, + "loss": 2.0496, + "step": 13320500 + }, + { + "epoch": 38.56, + "learning_rate": 3.0727527845961465e-05, + "loss": 2.0521, + "step": 13321000 + }, + { + "epoch": 38.56, + "learning_rate": 3.0726804198314194e-05, + "loss": 2.0481, + "step": 13321500 + }, + { + "epoch": 38.56, + "learning_rate": 3.0726080550666916e-05, + "loss": 2.0861, + "step": 13322000 + }, + { + "epoch": 38.56, + "learning_rate": 3.072535835031493e-05, + "loss": 2.0653, + "step": 13322500 + }, + { + "epoch": 38.56, + "learning_rate": 3.0724634702667654e-05, + "loss": 2.0816, + "step": 13323000 + }, + { + "epoch": 38.57, + "learning_rate": 3.0723911055020376e-05, + "loss": 2.0799, + "step": 13323500 + }, + { + "epoch": 38.57, + "learning_rate": 3.0723187407373105e-05, + "loss": 2.0555, + "step": 13324000 + }, + { + "epoch": 38.57, + "learning_rate": 3.072246520702112e-05, + "loss": 2.0456, + "step": 13324500 + }, + { + "epoch": 38.57, + "learning_rate": 3.072174155937384e-05, + "loss": 2.052, + "step": 13325000 + }, + { + "epoch": 38.57, + "learning_rate": 3.0721017911726565e-05, + "loss": 2.0454, + "step": 13325500 + }, + { + "epoch": 38.57, + "learning_rate": 3.0720294264079294e-05, + "loss": 2.068, + "step": 13326000 + }, + { + "epoch": 38.57, + "learning_rate": 3.0719570616432016e-05, + "loss": 2.0844, + "step": 13326500 + }, + { + "epoch": 38.58, + "learning_rate": 3.071884696878474e-05, + "loss": 2.075, + "step": 13327000 + }, + { + "epoch": 38.58, + "learning_rate": 3.071812332113746e-05, + "loss": 2.0452, + "step": 13327500 + }, + { + "epoch": 38.58, + "learning_rate": 3.071739967349018e-05, + "loss": 2.042, + "step": 13328000 + }, + { + "epoch": 38.58, + "learning_rate": 3.0716676025842905e-05, + "loss": 2.0775, + "step": 13328500 + }, + { + "epoch": 38.58, + "learning_rate": 3.071595382549092e-05, + "loss": 2.0476, + "step": 13329000 + }, + { + "epoch": 38.58, + "learning_rate": 3.071523017784364e-05, + "loss": 2.0604, + "step": 13329500 + }, + { + "epoch": 38.58, + "learning_rate": 3.0714506530196365e-05, + "loss": 2.0683, + "step": 13330000 + }, + { + "epoch": 38.59, + "learning_rate": 3.0713782882549094e-05, + "loss": 2.0844, + "step": 13330500 + }, + { + "epoch": 38.59, + "learning_rate": 3.0713059234901817e-05, + "loss": 2.0661, + "step": 13331000 + }, + { + "epoch": 38.59, + "learning_rate": 3.0712335587254546e-05, + "loss": 2.0787, + "step": 13331500 + }, + { + "epoch": 38.59, + "learning_rate": 3.071161338690256e-05, + "loss": 2.0716, + "step": 13332000 + }, + { + "epoch": 38.59, + "learning_rate": 3.071089118655058e-05, + "loss": 2.0437, + "step": 13332500 + }, + { + "epoch": 38.59, + "learning_rate": 3.07101675389033e-05, + "loss": 2.0636, + "step": 13333000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070944389125602e-05, + "loss": 2.0619, + "step": 13333500 + }, + { + "epoch": 38.6, + "learning_rate": 3.070872024360874e-05, + "loss": 2.0577, + "step": 13334000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070799659596147e-05, + "loss": 2.0594, + "step": 13334500 + }, + { + "epoch": 38.6, + "learning_rate": 3.0707272948314195e-05, + "loss": 2.0463, + "step": 13335000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070654930066692e-05, + "loss": 2.0843, + "step": 13335500 + }, + { + "epoch": 38.6, + "learning_rate": 3.070582565301964e-05, + "loss": 2.0713, + "step": 13336000 + }, + { + "epoch": 38.6, + "learning_rate": 3.070510200537236e-05, + "loss": 2.0505, + "step": 13336500 + }, + { + "epoch": 38.61, + "learning_rate": 3.0704378357725084e-05, + "loss": 2.0519, + "step": 13337000 + }, + { + "epoch": 38.61, + "learning_rate": 3.0703654710077806e-05, + "loss": 2.0589, + "step": 13337500 + }, + { + "epoch": 38.61, + "learning_rate": 3.070293106243053e-05, + "loss": 2.0445, + "step": 13338000 + }, + { + "epoch": 38.61, + "learning_rate": 3.070220741478326e-05, + "loss": 2.0442, + "step": 13338500 + }, + { + "epoch": 38.61, + "learning_rate": 3.070148376713598e-05, + "loss": 2.0602, + "step": 13339000 + }, + { + "epoch": 38.61, + "learning_rate": 3.07007601194887e-05, + "loss": 2.068, + "step": 13339500 + }, + { + "epoch": 38.61, + "learning_rate": 3.0700036471841424e-05, + "loss": 2.0673, + "step": 13340000 + }, + { + "epoch": 38.62, + "learning_rate": 3.0699314271489446e-05, + "loss": 2.0824, + "step": 13340500 + }, + { + "epoch": 38.62, + "learning_rate": 3.069859062384217e-05, + "loss": 2.0991, + "step": 13341000 + }, + { + "epoch": 38.62, + "learning_rate": 3.069786697619489e-05, + "loss": 2.0638, + "step": 13341500 + }, + { + "epoch": 38.62, + "learning_rate": 3.069714332854761e-05, + "loss": 2.0481, + "step": 13342000 + }, + { + "epoch": 38.62, + "learning_rate": 3.0696419680900335e-05, + "loss": 2.0697, + "step": 13342500 + }, + { + "epoch": 38.62, + "learning_rate": 3.069569603325306e-05, + "loss": 2.0409, + "step": 13343000 + }, + { + "epoch": 38.62, + "learning_rate": 3.069497238560578e-05, + "loss": 2.0568, + "step": 13343500 + }, + { + "epoch": 38.63, + "learning_rate": 3.06942487379585e-05, + "loss": 2.0711, + "step": 13344000 + }, + { + "epoch": 38.63, + "learning_rate": 3.0693525090311224e-05, + "loss": 2.0633, + "step": 13344500 + }, + { + "epoch": 38.63, + "learning_rate": 3.0692801442663946e-05, + "loss": 2.0586, + "step": 13345000 + }, + { + "epoch": 38.63, + "learning_rate": 3.069207779501667e-05, + "loss": 2.0658, + "step": 13345500 + }, + { + "epoch": 38.63, + "learning_rate": 3.06913555946647e-05, + "loss": 2.0636, + "step": 13346000 + }, + { + "epoch": 38.63, + "learning_rate": 3.069063194701742e-05, + "loss": 2.0782, + "step": 13346500 + }, + { + "epoch": 38.63, + "learning_rate": 3.068990829937014e-05, + "loss": 2.0658, + "step": 13347000 + }, + { + "epoch": 38.64, + "learning_rate": 3.0689184651722864e-05, + "loss": 2.0522, + "step": 13347500 + }, + { + "epoch": 38.64, + "learning_rate": 3.0688461004075586e-05, + "loss": 2.0667, + "step": 13348000 + }, + { + "epoch": 38.64, + "learning_rate": 3.068773735642831e-05, + "loss": 2.0709, + "step": 13348500 + }, + { + "epoch": 38.64, + "learning_rate": 3.068701370878103e-05, + "loss": 2.0724, + "step": 13349000 + }, + { + "epoch": 38.64, + "learning_rate": 3.068629006113375e-05, + "loss": 2.0489, + "step": 13349500 + }, + { + "epoch": 38.64, + "learning_rate": 3.068556786078177e-05, + "loss": 2.0258, + "step": 13350000 + }, + { + "epoch": 38.64, + "learning_rate": 3.068484566042979e-05, + "loss": 2.072, + "step": 13350500 + }, + { + "epoch": 38.65, + "learning_rate": 3.068412201278251e-05, + "loss": 2.0442, + "step": 13351000 + }, + { + "epoch": 38.65, + "learning_rate": 3.0683398365135235e-05, + "loss": 2.08, + "step": 13351500 + }, + { + "epoch": 38.65, + "learning_rate": 3.068267471748796e-05, + "loss": 2.0274, + "step": 13352000 + }, + { + "epoch": 38.65, + "learning_rate": 3.068195106984068e-05, + "loss": 2.0581, + "step": 13352500 + }, + { + "epoch": 38.65, + "learning_rate": 3.06812274221934e-05, + "loss": 2.0515, + "step": 13353000 + }, + { + "epoch": 38.65, + "learning_rate": 3.0680505221841425e-05, + "loss": 2.0643, + "step": 13353500 + }, + { + "epoch": 38.65, + "learning_rate": 3.067978157419415e-05, + "loss": 2.0556, + "step": 13354000 + }, + { + "epoch": 38.66, + "learning_rate": 3.0679057926546876e-05, + "loss": 2.0379, + "step": 13354500 + }, + { + "epoch": 38.66, + "learning_rate": 3.06783342788996e-05, + "loss": 2.0551, + "step": 13355000 + }, + { + "epoch": 38.66, + "learning_rate": 3.067761063125232e-05, + "loss": 2.0355, + "step": 13355500 + }, + { + "epoch": 38.66, + "learning_rate": 3.067688698360504e-05, + "loss": 2.0577, + "step": 13356000 + }, + { + "epoch": 38.66, + "learning_rate": 3.0676163335957765e-05, + "loss": 2.0598, + "step": 13356500 + }, + { + "epoch": 38.66, + "learning_rate": 3.067543968831049e-05, + "loss": 2.0515, + "step": 13357000 + }, + { + "epoch": 38.66, + "learning_rate": 3.067471604066321e-05, + "loss": 2.0491, + "step": 13357500 + }, + { + "epoch": 38.67, + "learning_rate": 3.0673993840311225e-05, + "loss": 2.0634, + "step": 13358000 + }, + { + "epoch": 38.67, + "learning_rate": 3.067327163995925e-05, + "loss": 2.0648, + "step": 13358500 + }, + { + "epoch": 38.67, + "learning_rate": 3.067254799231197e-05, + "loss": 2.0815, + "step": 13359000 + }, + { + "epoch": 38.67, + "learning_rate": 3.067182434466469e-05, + "loss": 2.065, + "step": 13359500 + }, + { + "epoch": 38.67, + "learning_rate": 3.0671100697017414e-05, + "loss": 2.0336, + "step": 13360000 + }, + { + "epoch": 38.67, + "learning_rate": 3.067037849666543e-05, + "loss": 2.0702, + "step": 13360500 + }, + { + "epoch": 38.67, + "learning_rate": 3.066965484901816e-05, + "loss": 2.0538, + "step": 13361000 + }, + { + "epoch": 38.68, + "learning_rate": 3.066893120137088e-05, + "loss": 2.0539, + "step": 13361500 + }, + { + "epoch": 38.68, + "learning_rate": 3.06682075537236e-05, + "loss": 2.0563, + "step": 13362000 + }, + { + "epoch": 38.68, + "learning_rate": 3.0667483906076325e-05, + "loss": 2.0446, + "step": 13362500 + }, + { + "epoch": 38.68, + "learning_rate": 3.066676025842905e-05, + "loss": 2.0498, + "step": 13363000 + }, + { + "epoch": 38.68, + "learning_rate": 3.0666036610781776e-05, + "loss": 2.0612, + "step": 13363500 + }, + { + "epoch": 38.68, + "learning_rate": 3.06653129631345e-05, + "loss": 2.0389, + "step": 13364000 + }, + { + "epoch": 38.68, + "learning_rate": 3.066458931548722e-05, + "loss": 2.0357, + "step": 13364500 + }, + { + "epoch": 38.69, + "learning_rate": 3.066386566783994e-05, + "loss": 2.0613, + "step": 13365000 + }, + { + "epoch": 38.69, + "learning_rate": 3.066314491478325e-05, + "loss": 2.0441, + "step": 13365500 + }, + { + "epoch": 38.69, + "learning_rate": 3.0662421267135974e-05, + "loss": 2.073, + "step": 13366000 + }, + { + "epoch": 38.69, + "learning_rate": 3.0661697619488696e-05, + "loss": 2.052, + "step": 13366500 + }, + { + "epoch": 38.69, + "learning_rate": 3.0660973971841425e-05, + "loss": 2.0683, + "step": 13367000 + }, + { + "epoch": 38.69, + "learning_rate": 3.066025032419415e-05, + "loss": 2.0435, + "step": 13367500 + }, + { + "epoch": 38.69, + "learning_rate": 3.065952667654687e-05, + "loss": 2.0527, + "step": 13368000 + }, + { + "epoch": 38.7, + "learning_rate": 3.065880447619489e-05, + "loss": 2.0305, + "step": 13368500 + }, + { + "epoch": 38.7, + "learning_rate": 3.0658080828547614e-05, + "loss": 2.0688, + "step": 13369000 + }, + { + "epoch": 38.7, + "learning_rate": 3.0657357180900337e-05, + "loss": 2.0767, + "step": 13369500 + }, + { + "epoch": 38.7, + "learning_rate": 3.065663498054835e-05, + "loss": 2.0627, + "step": 13370000 + }, + { + "epoch": 38.7, + "learning_rate": 3.0655911332901074e-05, + "loss": 2.0416, + "step": 13370500 + }, + { + "epoch": 38.7, + "learning_rate": 3.06551876852538e-05, + "loss": 2.0779, + "step": 13371000 + }, + { + "epoch": 38.71, + "learning_rate": 3.0654464037606526e-05, + "loss": 2.0519, + "step": 13371500 + }, + { + "epoch": 38.71, + "learning_rate": 3.065374038995925e-05, + "loss": 2.0567, + "step": 13372000 + }, + { + "epoch": 38.71, + "learning_rate": 3.065301674231197e-05, + "loss": 2.066, + "step": 13372500 + }, + { + "epoch": 38.71, + "learning_rate": 3.065229309466469e-05, + "loss": 2.0439, + "step": 13373000 + }, + { + "epoch": 38.71, + "learning_rate": 3.0651569447017415e-05, + "loss": 2.0605, + "step": 13373500 + }, + { + "epoch": 38.71, + "learning_rate": 3.065084579937014e-05, + "loss": 2.047, + "step": 13374000 + }, + { + "epoch": 38.71, + "learning_rate": 3.065012215172286e-05, + "loss": 2.0521, + "step": 13374500 + }, + { + "epoch": 38.72, + "learning_rate": 3.064939850407558e-05, + "loss": 2.0632, + "step": 13375000 + }, + { + "epoch": 38.72, + "learning_rate": 3.0648676303723604e-05, + "loss": 2.0725, + "step": 13375500 + }, + { + "epoch": 38.72, + "learning_rate": 3.0647952656076326e-05, + "loss": 2.0763, + "step": 13376000 + }, + { + "epoch": 38.72, + "learning_rate": 3.064722900842905e-05, + "loss": 2.0506, + "step": 13376500 + }, + { + "epoch": 38.72, + "learning_rate": 3.064650536078178e-05, + "loss": 2.0585, + "step": 13377000 + }, + { + "epoch": 38.72, + "learning_rate": 3.06457817131345e-05, + "loss": 2.032, + "step": 13377500 + }, + { + "epoch": 38.72, + "learning_rate": 3.064505806548722e-05, + "loss": 2.0603, + "step": 13378000 + }, + { + "epoch": 38.73, + "learning_rate": 3.0644334417839944e-05, + "loss": 2.0777, + "step": 13378500 + }, + { + "epoch": 38.73, + "learning_rate": 3.0643610770192666e-05, + "loss": 2.0752, + "step": 13379000 + }, + { + "epoch": 38.73, + "learning_rate": 3.064288712254539e-05, + "loss": 2.0585, + "step": 13379500 + }, + { + "epoch": 38.73, + "learning_rate": 3.0642164922193404e-05, + "loss": 2.0523, + "step": 13380000 + }, + { + "epoch": 38.73, + "learning_rate": 3.0641441274546126e-05, + "loss": 2.0607, + "step": 13380500 + }, + { + "epoch": 38.73, + "learning_rate": 3.064071762689885e-05, + "loss": 2.0686, + "step": 13381000 + }, + { + "epoch": 38.73, + "learning_rate": 3.063999397925158e-05, + "loss": 2.0713, + "step": 13381500 + }, + { + "epoch": 38.74, + "learning_rate": 3.06392703316043e-05, + "loss": 2.0735, + "step": 13382000 + }, + { + "epoch": 38.74, + "learning_rate": 3.063854668395702e-05, + "loss": 2.066, + "step": 13382500 + }, + { + "epoch": 38.74, + "learning_rate": 3.063782303630975e-05, + "loss": 2.0782, + "step": 13383000 + }, + { + "epoch": 38.74, + "learning_rate": 3.0637100835957766e-05, + "loss": 2.0647, + "step": 13383500 + }, + { + "epoch": 38.74, + "learning_rate": 3.063637718831049e-05, + "loss": 2.0673, + "step": 13384000 + }, + { + "epoch": 38.74, + "learning_rate": 3.063565354066321e-05, + "loss": 2.0723, + "step": 13384500 + }, + { + "epoch": 38.74, + "learning_rate": 3.063492989301593e-05, + "loss": 2.0579, + "step": 13385000 + }, + { + "epoch": 38.75, + "learning_rate": 3.0634206245368655e-05, + "loss": 2.0686, + "step": 13385500 + }, + { + "epoch": 38.75, + "learning_rate": 3.063348259772138e-05, + "loss": 2.0435, + "step": 13386000 + }, + { + "epoch": 38.75, + "learning_rate": 3.06327589500741e-05, + "loss": 2.0718, + "step": 13386500 + }, + { + "epoch": 38.75, + "learning_rate": 3.063203530242683e-05, + "loss": 2.0637, + "step": 13387000 + }, + { + "epoch": 38.75, + "learning_rate": 3.0631313102074844e-05, + "loss": 2.0764, + "step": 13387500 + }, + { + "epoch": 38.75, + "learning_rate": 3.063059234901815e-05, + "loss": 2.0644, + "step": 13388000 + }, + { + "epoch": 38.75, + "learning_rate": 3.0629868701370875e-05, + "loss": 2.0564, + "step": 13388500 + }, + { + "epoch": 38.76, + "learning_rate": 3.06291465010189e-05, + "loss": 2.0602, + "step": 13389000 + }, + { + "epoch": 38.76, + "learning_rate": 3.062842285337162e-05, + "loss": 2.0628, + "step": 13389500 + }, + { + "epoch": 38.76, + "learning_rate": 3.062769920572434e-05, + "loss": 2.0628, + "step": 13390000 + }, + { + "epoch": 38.76, + "learning_rate": 3.0626975558077064e-05, + "loss": 2.0665, + "step": 13390500 + }, + { + "epoch": 38.76, + "learning_rate": 3.0626251910429793e-05, + "loss": 2.067, + "step": 13391000 + }, + { + "epoch": 38.76, + "learning_rate": 3.0625528262782516e-05, + "loss": 2.0661, + "step": 13391500 + }, + { + "epoch": 38.76, + "learning_rate": 3.062480461513524e-05, + "loss": 2.0535, + "step": 13392000 + }, + { + "epoch": 38.77, + "learning_rate": 3.062408096748796e-05, + "loss": 2.0737, + "step": 13392500 + }, + { + "epoch": 38.77, + "learning_rate": 3.062335731984068e-05, + "loss": 2.1037, + "step": 13393000 + }, + { + "epoch": 38.77, + "learning_rate": 3.0622633672193405e-05, + "loss": 2.0591, + "step": 13393500 + }, + { + "epoch": 38.77, + "learning_rate": 3.062191002454613e-05, + "loss": 2.0623, + "step": 13394000 + }, + { + "epoch": 38.77, + "learning_rate": 3.0621186376898856e-05, + "loss": 2.0556, + "step": 13394500 + }, + { + "epoch": 38.77, + "learning_rate": 3.062046272925158e-05, + "loss": 2.0579, + "step": 13395000 + }, + { + "epoch": 38.77, + "learning_rate": 3.06197390816043e-05, + "loss": 2.0707, + "step": 13395500 + }, + { + "epoch": 38.78, + "learning_rate": 3.061901543395702e-05, + "loss": 2.0692, + "step": 13396000 + }, + { + "epoch": 38.78, + "learning_rate": 3.0618291786309745e-05, + "loss": 2.0577, + "step": 13396500 + }, + { + "epoch": 38.78, + "learning_rate": 3.061756813866247e-05, + "loss": 2.0556, + "step": 13397000 + }, + { + "epoch": 38.78, + "learning_rate": 3.061684449101519e-05, + "loss": 2.0594, + "step": 13397500 + }, + { + "epoch": 38.78, + "learning_rate": 3.061612084336792e-05, + "loss": 2.0434, + "step": 13398000 + }, + { + "epoch": 38.78, + "learning_rate": 3.061539719572064e-05, + "loss": 2.0713, + "step": 13398500 + }, + { + "epoch": 38.78, + "learning_rate": 3.0614674995368656e-05, + "loss": 2.0428, + "step": 13399000 + }, + { + "epoch": 38.79, + "learning_rate": 3.061395279501668e-05, + "loss": 2.0674, + "step": 13399500 + }, + { + "epoch": 38.79, + "learning_rate": 3.06132291473694e-05, + "loss": 2.0491, + "step": 13400000 + }, + { + "epoch": 38.79, + "learning_rate": 3.0612506947017416e-05, + "loss": 2.0754, + "step": 13400500 + }, + { + "epoch": 38.79, + "learning_rate": 3.061178329937014e-05, + "loss": 2.046, + "step": 13401000 + }, + { + "epoch": 38.79, + "learning_rate": 3.061105965172286e-05, + "loss": 2.0644, + "step": 13401500 + }, + { + "epoch": 38.79, + "learning_rate": 3.061033600407558e-05, + "loss": 2.0762, + "step": 13402000 + }, + { + "epoch": 38.79, + "learning_rate": 3.0609612356428305e-05, + "loss": 2.0738, + "step": 13402500 + }, + { + "epoch": 38.8, + "learning_rate": 3.060888870878103e-05, + "loss": 2.0544, + "step": 13403000 + }, + { + "epoch": 38.8, + "learning_rate": 3.0608165061133756e-05, + "loss": 2.0888, + "step": 13403500 + }, + { + "epoch": 38.8, + "learning_rate": 3.060744141348648e-05, + "loss": 2.0842, + "step": 13404000 + }, + { + "epoch": 38.8, + "learning_rate": 3.0606719213134494e-05, + "loss": 2.0739, + "step": 13404500 + }, + { + "epoch": 38.8, + "learning_rate": 3.0605995565487216e-05, + "loss": 2.0733, + "step": 13405000 + }, + { + "epoch": 38.8, + "learning_rate": 3.060527191783994e-05, + "loss": 2.0834, + "step": 13405500 + }, + { + "epoch": 38.8, + "learning_rate": 3.060454827019267e-05, + "loss": 2.0404, + "step": 13406000 + }, + { + "epoch": 38.81, + "learning_rate": 3.060382462254539e-05, + "loss": 2.061, + "step": 13406500 + }, + { + "epoch": 38.81, + "learning_rate": 3.060310097489811e-05, + "loss": 2.0695, + "step": 13407000 + }, + { + "epoch": 38.81, + "learning_rate": 3.0602377327250834e-05, + "loss": 2.0769, + "step": 13407500 + }, + { + "epoch": 38.81, + "learning_rate": 3.0601653679603557e-05, + "loss": 2.055, + "step": 13408000 + }, + { + "epoch": 38.81, + "learning_rate": 3.060093003195628e-05, + "loss": 2.065, + "step": 13408500 + }, + { + "epoch": 38.81, + "learning_rate": 3.060020638430901e-05, + "loss": 2.0527, + "step": 13409000 + }, + { + "epoch": 38.82, + "learning_rate": 3.059948273666173e-05, + "loss": 2.0642, + "step": 13409500 + }, + { + "epoch": 38.82, + "learning_rate": 3.0598760536309746e-05, + "loss": 2.0539, + "step": 13410000 + }, + { + "epoch": 38.82, + "learning_rate": 3.059803688866247e-05, + "loss": 2.0639, + "step": 13410500 + }, + { + "epoch": 38.82, + "learning_rate": 3.059731324101519e-05, + "loss": 2.0612, + "step": 13411000 + }, + { + "epoch": 38.82, + "learning_rate": 3.0596591040663206e-05, + "loss": 2.0314, + "step": 13411500 + }, + { + "epoch": 38.82, + "learning_rate": 3.059586739301593e-05, + "loss": 2.0619, + "step": 13412000 + }, + { + "epoch": 38.82, + "learning_rate": 3.059514374536866e-05, + "loss": 2.0707, + "step": 13412500 + }, + { + "epoch": 38.83, + "learning_rate": 3.059442009772138e-05, + "loss": 2.0615, + "step": 13413000 + }, + { + "epoch": 38.83, + "learning_rate": 3.059369645007411e-05, + "loss": 2.0606, + "step": 13413500 + }, + { + "epoch": 38.83, + "learning_rate": 3.059297280242683e-05, + "loss": 2.0612, + "step": 13414000 + }, + { + "epoch": 38.83, + "learning_rate": 3.059224915477955e-05, + "loss": 2.0758, + "step": 13414500 + }, + { + "epoch": 38.83, + "learning_rate": 3.0591525507132275e-05, + "loss": 2.0699, + "step": 13415000 + }, + { + "epoch": 38.83, + "learning_rate": 3.059080330678029e-05, + "loss": 2.06, + "step": 13415500 + }, + { + "epoch": 38.83, + "learning_rate": 3.059007965913301e-05, + "loss": 2.0431, + "step": 13416000 + }, + { + "epoch": 38.84, + "learning_rate": 3.0589357458781035e-05, + "loss": 2.0779, + "step": 13416500 + }, + { + "epoch": 38.84, + "learning_rate": 3.058863381113376e-05, + "loss": 2.0723, + "step": 13417000 + }, + { + "epoch": 38.84, + "learning_rate": 3.058791016348648e-05, + "loss": 2.0556, + "step": 13417500 + }, + { + "epoch": 38.84, + "learning_rate": 3.05871865158392e-05, + "loss": 2.0642, + "step": 13418000 + }, + { + "epoch": 38.84, + "learning_rate": 3.0586462868191924e-05, + "loss": 2.0555, + "step": 13418500 + }, + { + "epoch": 38.84, + "learning_rate": 3.0585739220544646e-05, + "loss": 2.0786, + "step": 13419000 + }, + { + "epoch": 38.84, + "learning_rate": 3.058501702019266e-05, + "loss": 2.0398, + "step": 13419500 + }, + { + "epoch": 38.85, + "learning_rate": 3.0584293372545384e-05, + "loss": 2.0641, + "step": 13420000 + }, + { + "epoch": 38.85, + "learning_rate": 3.058356972489811e-05, + "loss": 2.0487, + "step": 13420500 + }, + { + "epoch": 38.85, + "learning_rate": 3.0582846077250835e-05, + "loss": 2.0748, + "step": 13421000 + }, + { + "epoch": 38.85, + "learning_rate": 3.058212387689886e-05, + "loss": 2.0593, + "step": 13421500 + }, + { + "epoch": 38.85, + "learning_rate": 3.058140022925158e-05, + "loss": 2.0644, + "step": 13422000 + }, + { + "epoch": 38.85, + "learning_rate": 3.0580678028899595e-05, + "loss": 2.0489, + "step": 13422500 + }, + { + "epoch": 38.85, + "learning_rate": 3.057995438125232e-05, + "loss": 2.0609, + "step": 13423000 + }, + { + "epoch": 38.86, + "learning_rate": 3.057923073360504e-05, + "loss": 2.0646, + "step": 13423500 + }, + { + "epoch": 38.86, + "learning_rate": 3.0578508533253055e-05, + "loss": 2.0458, + "step": 13424000 + }, + { + "epoch": 38.86, + "learning_rate": 3.0577784885605784e-05, + "loss": 2.0459, + "step": 13424500 + }, + { + "epoch": 38.86, + "learning_rate": 3.0577061237958506e-05, + "loss": 2.0729, + "step": 13425000 + }, + { + "epoch": 38.86, + "learning_rate": 3.057633759031123e-05, + "loss": 2.0748, + "step": 13425500 + }, + { + "epoch": 38.86, + "learning_rate": 3.057561394266395e-05, + "loss": 2.0495, + "step": 13426000 + }, + { + "epoch": 38.86, + "learning_rate": 3.057489029501667e-05, + "loss": 2.0574, + "step": 13426500 + }, + { + "epoch": 38.87, + "learning_rate": 3.0574166647369395e-05, + "loss": 2.0593, + "step": 13427000 + }, + { + "epoch": 38.87, + "learning_rate": 3.057344299972212e-05, + "loss": 2.0513, + "step": 13427500 + }, + { + "epoch": 38.87, + "learning_rate": 3.057271935207485e-05, + "loss": 2.0602, + "step": 13428000 + }, + { + "epoch": 38.87, + "learning_rate": 3.057199570442757e-05, + "loss": 2.1, + "step": 13428500 + }, + { + "epoch": 38.87, + "learning_rate": 3.057127205678029e-05, + "loss": 2.0673, + "step": 13429000 + }, + { + "epoch": 38.87, + "learning_rate": 3.057054985642831e-05, + "loss": 2.0414, + "step": 13429500 + }, + { + "epoch": 38.87, + "learning_rate": 3.0569826208781036e-05, + "loss": 2.0775, + "step": 13430000 + }, + { + "epoch": 38.88, + "learning_rate": 3.056910400842905e-05, + "loss": 2.0351, + "step": 13430500 + }, + { + "epoch": 38.88, + "learning_rate": 3.0568380360781773e-05, + "loss": 2.0714, + "step": 13431000 + }, + { + "epoch": 38.88, + "learning_rate": 3.0567656713134496e-05, + "loss": 2.0858, + "step": 13431500 + }, + { + "epoch": 38.88, + "learning_rate": 3.056693306548722e-05, + "loss": 2.0657, + "step": 13432000 + }, + { + "epoch": 38.88, + "learning_rate": 3.056620941783994e-05, + "loss": 2.0344, + "step": 13432500 + }, + { + "epoch": 38.88, + "learning_rate": 3.056548577019266e-05, + "loss": 2.0396, + "step": 13433000 + }, + { + "epoch": 38.88, + "learning_rate": 3.0564762122545385e-05, + "loss": 2.0637, + "step": 13433500 + }, + { + "epoch": 38.89, + "learning_rate": 3.056403992219341e-05, + "loss": 2.0691, + "step": 13434000 + }, + { + "epoch": 38.89, + "learning_rate": 3.056331627454613e-05, + "loss": 2.0481, + "step": 13434500 + }, + { + "epoch": 38.89, + "learning_rate": 3.056259262689885e-05, + "loss": 2.068, + "step": 13435000 + }, + { + "epoch": 38.89, + "learning_rate": 3.056186897925158e-05, + "loss": 2.0777, + "step": 13435500 + }, + { + "epoch": 38.89, + "learning_rate": 3.05611453316043e-05, + "loss": 2.0628, + "step": 13436000 + }, + { + "epoch": 38.89, + "learning_rate": 3.0560421683957025e-05, + "loss": 2.0459, + "step": 13436500 + }, + { + "epoch": 38.89, + "learning_rate": 3.055969803630975e-05, + "loss": 2.0672, + "step": 13437000 + }, + { + "epoch": 38.9, + "learning_rate": 3.055897438866247e-05, + "loss": 2.0674, + "step": 13437500 + }, + { + "epoch": 38.9, + "learning_rate": 3.055825074101519e-05, + "loss": 2.0755, + "step": 13438000 + }, + { + "epoch": 38.9, + "learning_rate": 3.0557527093367914e-05, + "loss": 2.0703, + "step": 13438500 + }, + { + "epoch": 38.9, + "learning_rate": 3.0556803445720636e-05, + "loss": 2.0767, + "step": 13439000 + }, + { + "epoch": 38.9, + "learning_rate": 3.055607979807336e-05, + "loss": 2.0619, + "step": 13439500 + }, + { + "epoch": 38.9, + "learning_rate": 3.055535615042609e-05, + "loss": 2.058, + "step": 13440000 + }, + { + "epoch": 38.9, + "learning_rate": 3.055463250277881e-05, + "loss": 2.0372, + "step": 13440500 + }, + { + "epoch": 38.91, + "learning_rate": 3.055390885513153e-05, + "loss": 2.0554, + "step": 13441000 + }, + { + "epoch": 38.91, + "learning_rate": 3.0553185207484254e-05, + "loss": 2.0502, + "step": 13441500 + }, + { + "epoch": 38.91, + "learning_rate": 3.0552461559836976e-05, + "loss": 2.066, + "step": 13442000 + }, + { + "epoch": 38.91, + "learning_rate": 3.055173935948499e-05, + "loss": 2.0376, + "step": 13442500 + }, + { + "epoch": 38.91, + "learning_rate": 3.055101571183772e-05, + "loss": 2.0557, + "step": 13443000 + }, + { + "epoch": 38.91, + "learning_rate": 3.055029206419044e-05, + "loss": 2.0859, + "step": 13443500 + }, + { + "epoch": 38.91, + "learning_rate": 3.0549568416543165e-05, + "loss": 2.0752, + "step": 13444000 + }, + { + "epoch": 38.92, + "learning_rate": 3.054884476889589e-05, + "loss": 2.0856, + "step": 13444500 + }, + { + "epoch": 38.92, + "learning_rate": 3.054812112124861e-05, + "loss": 2.0807, + "step": 13445000 + }, + { + "epoch": 38.92, + "learning_rate": 3.054739892089663e-05, + "loss": 2.0709, + "step": 13445500 + }, + { + "epoch": 38.92, + "learning_rate": 3.0546675273249354e-05, + "loss": 2.0738, + "step": 13446000 + }, + { + "epoch": 38.92, + "learning_rate": 3.054595307289737e-05, + "loss": 2.0543, + "step": 13446500 + }, + { + "epoch": 38.92, + "learning_rate": 3.054522942525009e-05, + "loss": 2.0774, + "step": 13447000 + }, + { + "epoch": 38.93, + "learning_rate": 3.0544505777602814e-05, + "loss": 2.0466, + "step": 13447500 + }, + { + "epoch": 38.93, + "learning_rate": 3.0543782129955537e-05, + "loss": 2.0429, + "step": 13448000 + }, + { + "epoch": 38.93, + "learning_rate": 3.054305848230826e-05, + "loss": 2.0772, + "step": 13448500 + }, + { + "epoch": 38.93, + "learning_rate": 3.054233483466099e-05, + "loss": 2.0737, + "step": 13449000 + }, + { + "epoch": 38.93, + "learning_rate": 3.054161118701371e-05, + "loss": 2.0603, + "step": 13449500 + }, + { + "epoch": 38.93, + "learning_rate": 3.054088753936644e-05, + "loss": 2.0852, + "step": 13450000 + }, + { + "epoch": 38.93, + "learning_rate": 3.054016389171916e-05, + "loss": 2.0691, + "step": 13450500 + }, + { + "epoch": 38.94, + "learning_rate": 3.0539440244071884e-05, + "loss": 2.0429, + "step": 13451000 + }, + { + "epoch": 38.94, + "learning_rate": 3.05387180437199e-05, + "loss": 2.0535, + "step": 13451500 + }, + { + "epoch": 38.94, + "learning_rate": 3.053799439607262e-05, + "loss": 2.0606, + "step": 13452000 + }, + { + "epoch": 38.94, + "learning_rate": 3.0537270748425344e-05, + "loss": 2.0684, + "step": 13452500 + }, + { + "epoch": 38.94, + "learning_rate": 3.0536547100778066e-05, + "loss": 2.0523, + "step": 13453000 + }, + { + "epoch": 38.94, + "learning_rate": 3.053582490042609e-05, + "loss": 2.0418, + "step": 13453500 + }, + { + "epoch": 38.94, + "learning_rate": 3.0535102700074104e-05, + "loss": 2.0794, + "step": 13454000 + }, + { + "epoch": 38.95, + "learning_rate": 3.0534379052426826e-05, + "loss": 2.0526, + "step": 13454500 + }, + { + "epoch": 38.95, + "learning_rate": 3.053365540477955e-05, + "loss": 2.0535, + "step": 13455000 + }, + { + "epoch": 38.95, + "learning_rate": 3.053293175713227e-05, + "loss": 2.0471, + "step": 13455500 + }, + { + "epoch": 38.95, + "learning_rate": 3.053220810948499e-05, + "loss": 2.0699, + "step": 13456000 + }, + { + "epoch": 38.95, + "learning_rate": 3.0531484461837715e-05, + "loss": 2.0878, + "step": 13456500 + }, + { + "epoch": 38.95, + "learning_rate": 3.053076081419044e-05, + "loss": 2.0693, + "step": 13457000 + }, + { + "epoch": 38.95, + "learning_rate": 3.0530037166543166e-05, + "loss": 2.0726, + "step": 13457500 + }, + { + "epoch": 38.96, + "learning_rate": 3.052931496619119e-05, + "loss": 2.056, + "step": 13458000 + }, + { + "epoch": 38.96, + "learning_rate": 3.052859131854391e-05, + "loss": 2.0871, + "step": 13458500 + }, + { + "epoch": 38.96, + "learning_rate": 3.0527869118191926e-05, + "loss": 2.0695, + "step": 13459000 + }, + { + "epoch": 38.96, + "learning_rate": 3.052714547054465e-05, + "loss": 2.0558, + "step": 13459500 + }, + { + "epoch": 38.96, + "learning_rate": 3.052642182289737e-05, + "loss": 2.0603, + "step": 13460000 + }, + { + "epoch": 38.96, + "learning_rate": 3.052569817525009e-05, + "loss": 2.0449, + "step": 13460500 + }, + { + "epoch": 38.96, + "learning_rate": 3.0524974527602815e-05, + "loss": 2.0627, + "step": 13461000 + }, + { + "epoch": 38.97, + "learning_rate": 3.052425087995554e-05, + "loss": 2.0594, + "step": 13461500 + }, + { + "epoch": 38.97, + "learning_rate": 3.052352867960356e-05, + "loss": 2.0689, + "step": 13462000 + }, + { + "epoch": 38.97, + "learning_rate": 3.0522806479251575e-05, + "loss": 2.083, + "step": 13462500 + }, + { + "epoch": 38.97, + "learning_rate": 3.05220828316043e-05, + "loss": 2.0561, + "step": 13463000 + }, + { + "epoch": 38.97, + "learning_rate": 3.052135918395702e-05, + "loss": 2.0645, + "step": 13463500 + }, + { + "epoch": 38.97, + "learning_rate": 3.052063553630974e-05, + "loss": 2.0761, + "step": 13464000 + }, + { + "epoch": 38.97, + "learning_rate": 3.0519911888662464e-05, + "loss": 2.0684, + "step": 13464500 + }, + { + "epoch": 38.98, + "learning_rate": 3.0519188241015186e-05, + "loss": 2.0678, + "step": 13465000 + }, + { + "epoch": 38.98, + "learning_rate": 3.0518464593367915e-05, + "loss": 2.0667, + "step": 13465500 + }, + { + "epoch": 38.98, + "learning_rate": 3.051774094572064e-05, + "loss": 2.0659, + "step": 13466000 + }, + { + "epoch": 38.98, + "learning_rate": 3.0517017298073363e-05, + "loss": 2.0566, + "step": 13466500 + }, + { + "epoch": 38.98, + "learning_rate": 3.0516293650426086e-05, + "loss": 2.0433, + "step": 13467000 + }, + { + "epoch": 38.98, + "learning_rate": 3.0515571450074104e-05, + "loss": 2.0649, + "step": 13467500 + }, + { + "epoch": 38.98, + "learning_rate": 3.0514847802426827e-05, + "loss": 2.0619, + "step": 13468000 + }, + { + "epoch": 38.99, + "learning_rate": 3.051412415477955e-05, + "loss": 2.0805, + "step": 13468500 + }, + { + "epoch": 38.99, + "learning_rate": 3.0513401954427568e-05, + "loss": 2.065, + "step": 13469000 + }, + { + "epoch": 38.99, + "learning_rate": 3.051267830678029e-05, + "loss": 2.0444, + "step": 13469500 + }, + { + "epoch": 38.99, + "learning_rate": 3.0511954659133012e-05, + "loss": 2.0311, + "step": 13470000 + }, + { + "epoch": 38.99, + "learning_rate": 3.0511231011485735e-05, + "loss": 2.069, + "step": 13470500 + }, + { + "epoch": 38.99, + "learning_rate": 3.051050736383846e-05, + "loss": 2.0379, + "step": 13471000 + }, + { + "epoch": 38.99, + "learning_rate": 3.0509785163486476e-05, + "loss": 2.0784, + "step": 13471500 + }, + { + "epoch": 39.0, + "learning_rate": 3.0509061515839198e-05, + "loss": 2.074, + "step": 13472000 + }, + { + "epoch": 39.0, + "learning_rate": 3.050833786819192e-05, + "loss": 2.0628, + "step": 13472500 + }, + { + "epoch": 39.0, + "learning_rate": 3.050761566783994e-05, + "loss": 2.0457, + "step": 13473000 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.670671881118002, + "eval_accuracy_mlm": 0.6356850853404965, + "eval_accuracy_nsp": 0.8582585142511446, + "eval_loss": 2.165445327758789, + "eval_runtime": 331.7981, + "eval_samples_per_second": 1315.216, + "eval_steps_per_second": 54.801, + "step": 13473408 + }, + { + "epoch": 39.0, + "learning_rate": 3.0506892020192668e-05, + "loss": 2.067, + "step": 13473500 + }, + { + "epoch": 39.0, + "learning_rate": 3.050616837254539e-05, + "loss": 2.0359, + "step": 13474000 + }, + { + "epoch": 39.0, + "learning_rate": 3.0505444724898113e-05, + "loss": 2.0454, + "step": 13474500 + }, + { + "epoch": 39.0, + "learning_rate": 3.0504721077250835e-05, + "loss": 2.0357, + "step": 13475000 + }, + { + "epoch": 39.01, + "learning_rate": 3.050399742960356e-05, + "loss": 2.0514, + "step": 13475500 + }, + { + "epoch": 39.01, + "learning_rate": 3.0503273781956283e-05, + "loss": 2.0459, + "step": 13476000 + }, + { + "epoch": 39.01, + "learning_rate": 3.0502550134309005e-05, + "loss": 2.0284, + "step": 13476500 + }, + { + "epoch": 39.01, + "learning_rate": 3.0501826486661727e-05, + "loss": 2.0664, + "step": 13477000 + }, + { + "epoch": 39.01, + "learning_rate": 3.050110283901445e-05, + "loss": 2.0234, + "step": 13477500 + }, + { + "epoch": 39.01, + "learning_rate": 3.050037919136717e-05, + "loss": 2.0301, + "step": 13478000 + }, + { + "epoch": 39.01, + "learning_rate": 3.0499655543719897e-05, + "loss": 2.0341, + "step": 13478500 + }, + { + "epoch": 39.02, + "learning_rate": 3.049893189607262e-05, + "loss": 2.0509, + "step": 13479000 + }, + { + "epoch": 39.02, + "learning_rate": 3.0498208248425342e-05, + "loss": 2.0225, + "step": 13479500 + }, + { + "epoch": 39.02, + "learning_rate": 3.049748604807336e-05, + "loss": 2.0345, + "step": 13480000 + }, + { + "epoch": 39.02, + "learning_rate": 3.0496762400426086e-05, + "loss": 2.0305, + "step": 13480500 + }, + { + "epoch": 39.02, + "learning_rate": 3.0496038752778812e-05, + "loss": 2.0365, + "step": 13481000 + }, + { + "epoch": 39.02, + "learning_rate": 3.0495315105131534e-05, + "loss": 2.049, + "step": 13481500 + }, + { + "epoch": 39.02, + "learning_rate": 3.0494591457484256e-05, + "loss": 2.0357, + "step": 13482000 + }, + { + "epoch": 39.03, + "learning_rate": 3.049386780983698e-05, + "loss": 2.0503, + "step": 13482500 + }, + { + "epoch": 39.03, + "learning_rate": 3.04931441621897e-05, + "loss": 2.0263, + "step": 13483000 + }, + { + "epoch": 39.03, + "learning_rate": 3.0492420514542423e-05, + "loss": 2.0413, + "step": 13483500 + }, + { + "epoch": 39.03, + "learning_rate": 3.0491698314190442e-05, + "loss": 2.0307, + "step": 13484000 + }, + { + "epoch": 39.03, + "learning_rate": 3.049097611383846e-05, + "loss": 2.0425, + "step": 13484500 + }, + { + "epoch": 39.03, + "learning_rate": 3.0490252466191183e-05, + "loss": 2.0416, + "step": 13485000 + }, + { + "epoch": 39.04, + "learning_rate": 3.0489528818543905e-05, + "loss": 2.0431, + "step": 13485500 + }, + { + "epoch": 39.04, + "learning_rate": 3.0488805170896628e-05, + "loss": 2.0321, + "step": 13486000 + }, + { + "epoch": 39.04, + "learning_rate": 3.048808152324935e-05, + "loss": 2.0438, + "step": 13486500 + }, + { + "epoch": 39.04, + "learning_rate": 3.0487357875602076e-05, + "loss": 2.0413, + "step": 13487000 + }, + { + "epoch": 39.04, + "learning_rate": 3.048663567525009e-05, + "loss": 2.0389, + "step": 13487500 + }, + { + "epoch": 39.04, + "learning_rate": 3.048591202760282e-05, + "loss": 2.0395, + "step": 13488000 + }, + { + "epoch": 39.04, + "learning_rate": 3.0485188379955542e-05, + "loss": 2.0539, + "step": 13488500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0484464732308265e-05, + "loss": 2.0372, + "step": 13489000 + }, + { + "epoch": 39.05, + "learning_rate": 3.0483741084660987e-05, + "loss": 2.0397, + "step": 13489500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0483017437013712e-05, + "loss": 2.0421, + "step": 13490000 + }, + { + "epoch": 39.05, + "learning_rate": 3.0482293789366435e-05, + "loss": 2.0502, + "step": 13490500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0481570141719157e-05, + "loss": 2.0402, + "step": 13491000 + }, + { + "epoch": 39.05, + "learning_rate": 3.0480847941367176e-05, + "loss": 2.0459, + "step": 13491500 + }, + { + "epoch": 39.05, + "learning_rate": 3.0480124293719898e-05, + "loss": 2.0406, + "step": 13492000 + }, + { + "epoch": 39.06, + "learning_rate": 3.0479402093367914e-05, + "loss": 2.0685, + "step": 13492500 + }, + { + "epoch": 39.06, + "learning_rate": 3.047867844572064e-05, + "loss": 2.037, + "step": 13493000 + }, + { + "epoch": 39.06, + "learning_rate": 3.047795479807336e-05, + "loss": 2.0194, + "step": 13493500 + }, + { + "epoch": 39.06, + "learning_rate": 3.0477231150426084e-05, + "loss": 2.0433, + "step": 13494000 + }, + { + "epoch": 39.06, + "learning_rate": 3.0476507502778806e-05, + "loss": 2.0437, + "step": 13494500 + }, + { + "epoch": 39.06, + "learning_rate": 3.0475785302426825e-05, + "loss": 2.0862, + "step": 13495000 + }, + { + "epoch": 39.06, + "learning_rate": 3.047506165477955e-05, + "loss": 2.0431, + "step": 13495500 + }, + { + "epoch": 39.07, + "learning_rate": 3.047433945442757e-05, + "loss": 2.0625, + "step": 13496000 + }, + { + "epoch": 39.07, + "learning_rate": 3.0473615806780292e-05, + "loss": 2.0451, + "step": 13496500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0472892159133014e-05, + "loss": 2.0657, + "step": 13497000 + }, + { + "epoch": 39.07, + "learning_rate": 3.047216851148574e-05, + "loss": 2.062, + "step": 13497500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0471444863838462e-05, + "loss": 2.0422, + "step": 13498000 + }, + { + "epoch": 39.07, + "learning_rate": 3.0470721216191184e-05, + "loss": 2.038, + "step": 13498500 + }, + { + "epoch": 39.07, + "learning_rate": 3.0469997568543906e-05, + "loss": 2.0319, + "step": 13499000 + }, + { + "epoch": 39.08, + "learning_rate": 3.0469275368191925e-05, + "loss": 2.0661, + "step": 13499500 + }, + { + "epoch": 39.08, + "learning_rate": 3.0468551720544647e-05, + "loss": 2.0323, + "step": 13500000 + }, + { + "epoch": 39.08, + "learning_rate": 3.046782807289737e-05, + "loss": 2.0137, + "step": 13500500 + }, + { + "epoch": 39.08, + "learning_rate": 3.0467104425250092e-05, + "loss": 2.0332, + "step": 13501000 + }, + { + "epoch": 39.08, + "learning_rate": 3.0466380777602814e-05, + "loss": 2.0544, + "step": 13501500 + }, + { + "epoch": 39.08, + "learning_rate": 3.046565712995554e-05, + "loss": 2.0399, + "step": 13502000 + }, + { + "epoch": 39.08, + "learning_rate": 3.0464933482308262e-05, + "loss": 2.029, + "step": 13502500 + }, + { + "epoch": 39.09, + "learning_rate": 3.046420983466099e-05, + "loss": 2.0543, + "step": 13503000 + }, + { + "epoch": 39.09, + "learning_rate": 3.0463486187013713e-05, + "loss": 2.0439, + "step": 13503500 + }, + { + "epoch": 39.09, + "learning_rate": 3.0462762539366435e-05, + "loss": 2.0558, + "step": 13504000 + }, + { + "epoch": 39.09, + "learning_rate": 3.0462038891719158e-05, + "loss": 2.0375, + "step": 13504500 + }, + { + "epoch": 39.09, + "learning_rate": 3.0461316691367177e-05, + "loss": 2.0737, + "step": 13505000 + }, + { + "epoch": 39.09, + "learning_rate": 3.04605930437199e-05, + "loss": 2.0258, + "step": 13505500 + }, + { + "epoch": 39.09, + "learning_rate": 3.045986939607262e-05, + "loss": 2.0499, + "step": 13506000 + }, + { + "epoch": 39.1, + "learning_rate": 3.0459145748425343e-05, + "loss": 2.0614, + "step": 13506500 + }, + { + "epoch": 39.1, + "learning_rate": 3.0458422100778066e-05, + "loss": 2.0475, + "step": 13507000 + }, + { + "epoch": 39.1, + "learning_rate": 3.045769845313079e-05, + "loss": 2.0333, + "step": 13507500 + }, + { + "epoch": 39.1, + "learning_rate": 3.0456976252778807e-05, + "loss": 2.0399, + "step": 13508000 + }, + { + "epoch": 39.1, + "learning_rate": 3.0456254052426826e-05, + "loss": 2.0643, + "step": 13508500 + }, + { + "epoch": 39.1, + "learning_rate": 3.0455530404779548e-05, + "loss": 2.0295, + "step": 13509000 + }, + { + "epoch": 39.1, + "learning_rate": 3.045480675713227e-05, + "loss": 2.0685, + "step": 13509500 + }, + { + "epoch": 39.11, + "learning_rate": 3.0454083109484992e-05, + "loss": 2.0386, + "step": 13510000 + }, + { + "epoch": 39.11, + "learning_rate": 3.0453360909133015e-05, + "loss": 2.0454, + "step": 13510500 + }, + { + "epoch": 39.11, + "learning_rate": 3.045263726148574e-05, + "loss": 2.0556, + "step": 13511000 + }, + { + "epoch": 39.11, + "learning_rate": 3.0451913613838463e-05, + "loss": 2.0348, + "step": 13511500 + }, + { + "epoch": 39.11, + "learning_rate": 3.0451189966191185e-05, + "loss": 2.0393, + "step": 13512000 + }, + { + "epoch": 39.11, + "learning_rate": 3.0450466318543907e-05, + "loss": 2.0387, + "step": 13512500 + }, + { + "epoch": 39.11, + "learning_rate": 3.044974267089663e-05, + "loss": 2.06, + "step": 13513000 + }, + { + "epoch": 39.12, + "learning_rate": 3.0449019023249355e-05, + "loss": 2.0253, + "step": 13513500 + }, + { + "epoch": 39.12, + "learning_rate": 3.0448295375602077e-05, + "loss": 2.0646, + "step": 13514000 + }, + { + "epoch": 39.12, + "learning_rate": 3.04475717279548e-05, + "loss": 2.0578, + "step": 13514500 + }, + { + "epoch": 39.12, + "learning_rate": 3.044684808030752e-05, + "loss": 2.031, + "step": 13515000 + }, + { + "epoch": 39.12, + "learning_rate": 3.0446124432660244e-05, + "loss": 2.0369, + "step": 13515500 + }, + { + "epoch": 39.12, + "learning_rate": 3.0445400785012966e-05, + "loss": 2.0339, + "step": 13516000 + }, + { + "epoch": 39.12, + "learning_rate": 3.0444677137365692e-05, + "loss": 2.0486, + "step": 13516500 + }, + { + "epoch": 39.13, + "learning_rate": 3.0443954937013707e-05, + "loss": 2.0413, + "step": 13517000 + }, + { + "epoch": 39.13, + "learning_rate": 3.0443232736661726e-05, + "loss": 2.0709, + "step": 13517500 + }, + { + "epoch": 39.13, + "learning_rate": 3.044251053630975e-05, + "loss": 2.0424, + "step": 13518000 + }, + { + "epoch": 39.13, + "learning_rate": 3.044178688866247e-05, + "loss": 2.0397, + "step": 13518500 + }, + { + "epoch": 39.13, + "learning_rate": 3.0441063241015193e-05, + "loss": 2.0279, + "step": 13519000 + }, + { + "epoch": 39.13, + "learning_rate": 3.044033959336792e-05, + "loss": 2.0272, + "step": 13519500 + }, + { + "epoch": 39.13, + "learning_rate": 3.043961594572064e-05, + "loss": 2.0383, + "step": 13520000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0438892298073363e-05, + "loss": 2.0444, + "step": 13520500 + }, + { + "epoch": 39.14, + "learning_rate": 3.0438168650426085e-05, + "loss": 2.0415, + "step": 13521000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0437445002778808e-05, + "loss": 2.0605, + "step": 13521500 + }, + { + "epoch": 39.14, + "learning_rate": 3.043672135513153e-05, + "loss": 2.0253, + "step": 13522000 + }, + { + "epoch": 39.14, + "learning_rate": 3.0435997707484255e-05, + "loss": 2.0229, + "step": 13522500 + }, + { + "epoch": 39.14, + "learning_rate": 3.0435274059836978e-05, + "loss": 2.0311, + "step": 13523000 + }, + { + "epoch": 39.14, + "learning_rate": 3.04345504121897e-05, + "loss": 2.0445, + "step": 13523500 + }, + { + "epoch": 39.15, + "learning_rate": 3.0433826764542422e-05, + "loss": 2.0631, + "step": 13524000 + }, + { + "epoch": 39.15, + "learning_rate": 3.0433103116895144e-05, + "loss": 2.036, + "step": 13524500 + }, + { + "epoch": 39.15, + "learning_rate": 3.0432379469247873e-05, + "loss": 2.0316, + "step": 13525000 + }, + { + "epoch": 39.15, + "learning_rate": 3.0431655821600596e-05, + "loss": 2.0671, + "step": 13525500 + }, + { + "epoch": 39.15, + "learning_rate": 3.0430933621248615e-05, + "loss": 2.0393, + "step": 13526000 + }, + { + "epoch": 39.15, + "learning_rate": 3.0430209973601337e-05, + "loss": 2.0367, + "step": 13526500 + }, + { + "epoch": 39.16, + "learning_rate": 3.042948922054465e-05, + "loss": 2.0505, + "step": 13527000 + }, + { + "epoch": 39.16, + "learning_rate": 3.042876557289737e-05, + "loss": 2.0367, + "step": 13527500 + }, + { + "epoch": 39.16, + "learning_rate": 3.042804337254539e-05, + "loss": 2.0679, + "step": 13528000 + }, + { + "epoch": 39.16, + "learning_rate": 3.0427319724898112e-05, + "loss": 2.0318, + "step": 13528500 + }, + { + "epoch": 39.16, + "learning_rate": 3.0426596077250835e-05, + "loss": 2.0507, + "step": 13529000 + }, + { + "epoch": 39.16, + "learning_rate": 3.0425872429603557e-05, + "loss": 2.0602, + "step": 13529500 + }, + { + "epoch": 39.16, + "learning_rate": 3.042514878195628e-05, + "loss": 2.0265, + "step": 13530000 + }, + { + "epoch": 39.17, + "learning_rate": 3.0424426581604298e-05, + "loss": 2.0723, + "step": 13530500 + }, + { + "epoch": 39.17, + "learning_rate": 3.042370293395702e-05, + "loss": 2.0363, + "step": 13531000 + }, + { + "epoch": 39.17, + "learning_rate": 3.0422979286309743e-05, + "loss": 2.0536, + "step": 13531500 + }, + { + "epoch": 39.17, + "learning_rate": 3.0422255638662468e-05, + "loss": 2.0248, + "step": 13532000 + }, + { + "epoch": 39.17, + "learning_rate": 3.042153199101519e-05, + "loss": 2.0305, + "step": 13532500 + }, + { + "epoch": 39.17, + "learning_rate": 3.042080834336792e-05, + "loss": 2.0487, + "step": 13533000 + }, + { + "epoch": 39.17, + "learning_rate": 3.042008469572064e-05, + "loss": 2.0231, + "step": 13533500 + }, + { + "epoch": 39.18, + "learning_rate": 3.0419361048073364e-05, + "loss": 2.0644, + "step": 13534000 + }, + { + "epoch": 39.18, + "learning_rate": 3.0418637400426086e-05, + "loss": 2.0479, + "step": 13534500 + }, + { + "epoch": 39.18, + "learning_rate": 3.041791375277881e-05, + "loss": 2.0669, + "step": 13535000 + }, + { + "epoch": 39.18, + "learning_rate": 3.041719010513153e-05, + "loss": 2.0484, + "step": 13535500 + }, + { + "epoch": 39.18, + "learning_rate": 3.041646790477955e-05, + "loss": 2.0485, + "step": 13536000 + }, + { + "epoch": 39.18, + "learning_rate": 3.0415744257132272e-05, + "loss": 2.0443, + "step": 13536500 + }, + { + "epoch": 39.18, + "learning_rate": 3.0415020609484994e-05, + "loss": 2.0371, + "step": 13537000 + }, + { + "epoch": 39.19, + "learning_rate": 3.041429696183772e-05, + "loss": 2.0536, + "step": 13537500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0413573314190442e-05, + "loss": 2.0338, + "step": 13538000 + }, + { + "epoch": 39.19, + "learning_rate": 3.0412849666543164e-05, + "loss": 2.045, + "step": 13538500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0412127466191183e-05, + "loss": 2.0274, + "step": 13539000 + }, + { + "epoch": 39.19, + "learning_rate": 3.0411403818543905e-05, + "loss": 2.0597, + "step": 13539500 + }, + { + "epoch": 39.19, + "learning_rate": 3.0410680170896627e-05, + "loss": 2.0434, + "step": 13540000 + }, + { + "epoch": 39.19, + "learning_rate": 3.0409956523249356e-05, + "loss": 2.0411, + "step": 13540500 + }, + { + "epoch": 39.2, + "learning_rate": 3.040923287560208e-05, + "loss": 2.0563, + "step": 13541000 + }, + { + "epoch": 39.2, + "learning_rate": 3.04085092279548e-05, + "loss": 2.0552, + "step": 13541500 + }, + { + "epoch": 39.2, + "learning_rate": 3.0407785580307523e-05, + "loss": 2.0558, + "step": 13542000 + }, + { + "epoch": 39.2, + "learning_rate": 3.0407061932660245e-05, + "loss": 2.0372, + "step": 13542500 + }, + { + "epoch": 39.2, + "learning_rate": 3.040633828501297e-05, + "loss": 2.0438, + "step": 13543000 + }, + { + "epoch": 39.2, + "learning_rate": 3.0405614637365693e-05, + "loss": 2.0439, + "step": 13543500 + }, + { + "epoch": 39.2, + "learning_rate": 3.0404890989718416e-05, + "loss": 2.0479, + "step": 13544000 + }, + { + "epoch": 39.21, + "learning_rate": 3.0404168789366434e-05, + "loss": 2.0549, + "step": 13544500 + }, + { + "epoch": 39.21, + "learning_rate": 3.040344658901445e-05, + "loss": 2.0674, + "step": 13545000 + }, + { + "epoch": 39.21, + "learning_rate": 3.040272438866247e-05, + "loss": 2.0384, + "step": 13545500 + }, + { + "epoch": 39.21, + "learning_rate": 3.040200074101519e-05, + "loss": 2.0668, + "step": 13546000 + }, + { + "epoch": 39.21, + "learning_rate": 3.0401278540663207e-05, + "loss": 2.0592, + "step": 13546500 + }, + { + "epoch": 39.21, + "learning_rate": 3.0400554893015932e-05, + "loss": 2.0595, + "step": 13547000 + }, + { + "epoch": 39.21, + "learning_rate": 3.0399831245368655e-05, + "loss": 2.0357, + "step": 13547500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0399107597721384e-05, + "loss": 2.0719, + "step": 13548000 + }, + { + "epoch": 39.22, + "learning_rate": 3.0398383950074106e-05, + "loss": 2.0378, + "step": 13548500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0397660302426828e-05, + "loss": 2.0603, + "step": 13549000 + }, + { + "epoch": 39.22, + "learning_rate": 3.039693665477955e-05, + "loss": 2.0602, + "step": 13549500 + }, + { + "epoch": 39.22, + "learning_rate": 3.0396213007132273e-05, + "loss": 2.0576, + "step": 13550000 + }, + { + "epoch": 39.22, + "learning_rate": 3.0395489359484998e-05, + "loss": 2.0485, + "step": 13550500 + }, + { + "epoch": 39.22, + "learning_rate": 3.039476571183772e-05, + "loss": 2.052, + "step": 13551000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0394042064190443e-05, + "loss": 2.0601, + "step": 13551500 + }, + { + "epoch": 39.23, + "learning_rate": 3.0393319863838458e-05, + "loss": 2.0523, + "step": 13552000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0392596216191184e-05, + "loss": 2.0463, + "step": 13552500 + }, + { + "epoch": 39.23, + "learning_rate": 3.0391872568543906e-05, + "loss": 2.0385, + "step": 13553000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0391148920896628e-05, + "loss": 2.0348, + "step": 13553500 + }, + { + "epoch": 39.23, + "learning_rate": 3.039042527324935e-05, + "loss": 2.0455, + "step": 13554000 + }, + { + "epoch": 39.23, + "learning_rate": 3.0389701625602073e-05, + "loss": 2.0388, + "step": 13554500 + }, + { + "epoch": 39.24, + "learning_rate": 3.0388977977954802e-05, + "loss": 2.0338, + "step": 13555000 + }, + { + "epoch": 39.24, + "learning_rate": 3.0388254330307524e-05, + "loss": 2.0377, + "step": 13555500 + }, + { + "epoch": 39.24, + "learning_rate": 3.038753068266025e-05, + "loss": 2.0357, + "step": 13556000 + }, + { + "epoch": 39.24, + "learning_rate": 3.0386807035012972e-05, + "loss": 2.0475, + "step": 13556500 + }, + { + "epoch": 39.24, + "learning_rate": 3.0386083387365694e-05, + "loss": 2.0504, + "step": 13557000 + }, + { + "epoch": 39.24, + "learning_rate": 3.0385359739718416e-05, + "loss": 2.0359, + "step": 13557500 + }, + { + "epoch": 39.24, + "learning_rate": 3.038463609207114e-05, + "loss": 2.055, + "step": 13558000 + }, + { + "epoch": 39.25, + "learning_rate": 3.0383913891719157e-05, + "loss": 2.0547, + "step": 13558500 + }, + { + "epoch": 39.25, + "learning_rate": 3.038319024407188e-05, + "loss": 2.045, + "step": 13559000 + }, + { + "epoch": 39.25, + "learning_rate": 3.03824680437199e-05, + "loss": 2.0613, + "step": 13559500 + }, + { + "epoch": 39.25, + "learning_rate": 3.038174439607262e-05, + "loss": 2.0347, + "step": 13560000 + }, + { + "epoch": 39.25, + "learning_rate": 3.0381020748425343e-05, + "loss": 2.0447, + "step": 13560500 + }, + { + "epoch": 39.25, + "learning_rate": 3.0380297100778065e-05, + "loss": 2.0493, + "step": 13561000 + }, + { + "epoch": 39.25, + "learning_rate": 3.0379573453130788e-05, + "loss": 2.0416, + "step": 13561500 + }, + { + "epoch": 39.26, + "learning_rate": 3.037884980548351e-05, + "loss": 2.0547, + "step": 13562000 + }, + { + "epoch": 39.26, + "learning_rate": 3.037812615783624e-05, + "loss": 2.0542, + "step": 13562500 + }, + { + "epoch": 39.26, + "learning_rate": 3.0377403957484258e-05, + "loss": 2.0555, + "step": 13563000 + }, + { + "epoch": 39.26, + "learning_rate": 3.037668030983698e-05, + "loss": 2.0608, + "step": 13563500 + }, + { + "epoch": 39.26, + "learning_rate": 3.0375956662189702e-05, + "loss": 2.0355, + "step": 13564000 + }, + { + "epoch": 39.26, + "learning_rate": 3.0375233014542424e-05, + "loss": 2.0527, + "step": 13564500 + }, + { + "epoch": 39.27, + "learning_rate": 3.037450936689515e-05, + "loss": 2.0336, + "step": 13565000 + }, + { + "epoch": 39.27, + "learning_rate": 3.0373785719247872e-05, + "loss": 2.0376, + "step": 13565500 + }, + { + "epoch": 39.27, + "learning_rate": 3.0373062071600595e-05, + "loss": 2.0461, + "step": 13566000 + }, + { + "epoch": 39.27, + "learning_rate": 3.0372338423953317e-05, + "loss": 2.0568, + "step": 13566500 + }, + { + "epoch": 39.27, + "learning_rate": 3.037161477630604e-05, + "loss": 2.0285, + "step": 13567000 + }, + { + "epoch": 39.27, + "learning_rate": 3.037089112865876e-05, + "loss": 2.0646, + "step": 13567500 + }, + { + "epoch": 39.27, + "learning_rate": 3.0370167481011487e-05, + "loss": 2.035, + "step": 13568000 + }, + { + "epoch": 39.28, + "learning_rate": 3.036944383336421e-05, + "loss": 2.0536, + "step": 13568500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0368721633012225e-05, + "loss": 2.0171, + "step": 13569000 + }, + { + "epoch": 39.28, + "learning_rate": 3.036799798536495e-05, + "loss": 2.0602, + "step": 13569500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0367274337717676e-05, + "loss": 2.0534, + "step": 13570000 + }, + { + "epoch": 39.28, + "learning_rate": 3.03665506900704e-05, + "loss": 2.0448, + "step": 13570500 + }, + { + "epoch": 39.28, + "learning_rate": 3.0365827042423124e-05, + "loss": 2.0476, + "step": 13571000 + }, + { + "epoch": 39.28, + "learning_rate": 3.0365103394775846e-05, + "loss": 2.0513, + "step": 13571500 + }, + { + "epoch": 39.29, + "learning_rate": 3.036438119442386e-05, + "loss": 2.0524, + "step": 13572000 + }, + { + "epoch": 39.29, + "learning_rate": 3.0363657546776587e-05, + "loss": 2.0585, + "step": 13572500 + }, + { + "epoch": 39.29, + "learning_rate": 3.036293389912931e-05, + "loss": 2.0389, + "step": 13573000 + }, + { + "epoch": 39.29, + "learning_rate": 3.036221025148203e-05, + "loss": 2.0696, + "step": 13573500 + }, + { + "epoch": 39.29, + "learning_rate": 3.0361486603834754e-05, + "loss": 2.0436, + "step": 13574000 + }, + { + "epoch": 39.29, + "learning_rate": 3.0360765850778066e-05, + "loss": 2.0545, + "step": 13574500 + }, + { + "epoch": 39.29, + "learning_rate": 3.036004220313079e-05, + "loss": 2.0642, + "step": 13575000 + }, + { + "epoch": 39.3, + "learning_rate": 3.0359318555483514e-05, + "loss": 2.0497, + "step": 13575500 + }, + { + "epoch": 39.3, + "learning_rate": 3.0358594907836236e-05, + "loss": 2.0778, + "step": 13576000 + }, + { + "epoch": 39.3, + "learning_rate": 3.035787126018896e-05, + "loss": 2.0471, + "step": 13576500 + }, + { + "epoch": 39.3, + "learning_rate": 3.035714761254168e-05, + "loss": 2.0699, + "step": 13577000 + }, + { + "epoch": 39.3, + "learning_rate": 3.035642396489441e-05, + "loss": 2.048, + "step": 13577500 + }, + { + "epoch": 39.3, + "learning_rate": 3.0355701764542425e-05, + "loss": 2.0522, + "step": 13578000 + }, + { + "epoch": 39.3, + "learning_rate": 3.035497811689515e-05, + "loss": 2.0436, + "step": 13578500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0354254469247873e-05, + "loss": 2.0499, + "step": 13579000 + }, + { + "epoch": 39.31, + "learning_rate": 3.0353530821600595e-05, + "loss": 2.0429, + "step": 13579500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0352807173953318e-05, + "loss": 2.0255, + "step": 13580000 + }, + { + "epoch": 39.31, + "learning_rate": 3.035208352630604e-05, + "loss": 2.049, + "step": 13580500 + }, + { + "epoch": 39.31, + "learning_rate": 3.035136132595406e-05, + "loss": 2.0651, + "step": 13581000 + }, + { + "epoch": 39.31, + "learning_rate": 3.035063767830678e-05, + "loss": 2.0712, + "step": 13581500 + }, + { + "epoch": 39.31, + "learning_rate": 3.0349914030659503e-05, + "loss": 2.0539, + "step": 13582000 + }, + { + "epoch": 39.32, + "learning_rate": 3.0349190383012225e-05, + "loss": 2.0669, + "step": 13582500 + }, + { + "epoch": 39.32, + "learning_rate": 3.034846673536495e-05, + "loss": 2.0493, + "step": 13583000 + }, + { + "epoch": 39.32, + "learning_rate": 3.0347744535012967e-05, + "loss": 2.0554, + "step": 13583500 + }, + { + "epoch": 39.32, + "learning_rate": 3.034702088736569e-05, + "loss": 2.0468, + "step": 13584000 + }, + { + "epoch": 39.32, + "learning_rate": 3.0346297239718415e-05, + "loss": 2.0488, + "step": 13584500 + }, + { + "epoch": 39.32, + "learning_rate": 3.034557359207114e-05, + "loss": 2.0582, + "step": 13585000 + }, + { + "epoch": 39.32, + "learning_rate": 3.034485139171916e-05, + "loss": 2.0764, + "step": 13585500 + }, + { + "epoch": 39.33, + "learning_rate": 3.034412774407188e-05, + "loss": 2.0536, + "step": 13586000 + }, + { + "epoch": 39.33, + "learning_rate": 3.0343404096424604e-05, + "loss": 2.0244, + "step": 13586500 + }, + { + "epoch": 39.33, + "learning_rate": 3.034268044877733e-05, + "loss": 2.0689, + "step": 13587000 + }, + { + "epoch": 39.33, + "learning_rate": 3.034195680113005e-05, + "loss": 2.0549, + "step": 13587500 + }, + { + "epoch": 39.33, + "learning_rate": 3.0341233153482774e-05, + "loss": 2.0379, + "step": 13588000 + }, + { + "epoch": 39.33, + "learning_rate": 3.0340509505835496e-05, + "loss": 2.0471, + "step": 13588500 + }, + { + "epoch": 39.33, + "learning_rate": 3.0339787305483515e-05, + "loss": 2.0456, + "step": 13589000 + }, + { + "epoch": 39.34, + "learning_rate": 3.0339063657836237e-05, + "loss": 2.0388, + "step": 13589500 + }, + { + "epoch": 39.34, + "learning_rate": 3.033834001018896e-05, + "loss": 2.0565, + "step": 13590000 + }, + { + "epoch": 39.34, + "learning_rate": 3.033761636254168e-05, + "loss": 2.0467, + "step": 13590500 + }, + { + "epoch": 39.34, + "learning_rate": 3.0336892714894404e-05, + "loss": 2.0282, + "step": 13591000 + }, + { + "epoch": 39.34, + "learning_rate": 3.033616906724713e-05, + "loss": 2.0444, + "step": 13591500 + }, + { + "epoch": 39.34, + "learning_rate": 3.033544541959985e-05, + "loss": 2.0598, + "step": 13592000 + }, + { + "epoch": 39.34, + "learning_rate": 3.0334721771952577e-05, + "loss": 2.0655, + "step": 13592500 + }, + { + "epoch": 39.35, + "learning_rate": 3.0333998124305303e-05, + "loss": 2.0385, + "step": 13593000 + }, + { + "epoch": 39.35, + "learning_rate": 3.033327592395332e-05, + "loss": 2.0475, + "step": 13593500 + }, + { + "epoch": 39.35, + "learning_rate": 3.033255227630604e-05, + "loss": 2.071, + "step": 13594000 + }, + { + "epoch": 39.35, + "learning_rate": 3.033183007595406e-05, + "loss": 2.0423, + "step": 13594500 + }, + { + "epoch": 39.35, + "learning_rate": 3.0331106428306782e-05, + "loss": 2.0534, + "step": 13595000 + }, + { + "epoch": 39.35, + "learning_rate": 3.0330382780659504e-05, + "loss": 2.0352, + "step": 13595500 + }, + { + "epoch": 39.35, + "learning_rate": 3.032965913301223e-05, + "loss": 2.0534, + "step": 13596000 + }, + { + "epoch": 39.36, + "learning_rate": 3.0328935485364952e-05, + "loss": 2.053, + "step": 13596500 + }, + { + "epoch": 39.36, + "learning_rate": 3.0328211837717674e-05, + "loss": 2.0586, + "step": 13597000 + }, + { + "epoch": 39.36, + "learning_rate": 3.0327488190070396e-05, + "loss": 2.0464, + "step": 13597500 + }, + { + "epoch": 39.36, + "learning_rate": 3.032676454242312e-05, + "loss": 2.0724, + "step": 13598000 + }, + { + "epoch": 39.36, + "learning_rate": 3.0326042342071138e-05, + "loss": 2.0686, + "step": 13598500 + }, + { + "epoch": 39.36, + "learning_rate": 3.032531869442386e-05, + "loss": 2.0552, + "step": 13599000 + }, + { + "epoch": 39.36, + "learning_rate": 3.0324595046776582e-05, + "loss": 2.0301, + "step": 13599500 + }, + { + "epoch": 39.37, + "learning_rate": 3.032387139912931e-05, + "loss": 2.042, + "step": 13600000 + }, + { + "epoch": 39.37, + "learning_rate": 3.0323147751482033e-05, + "loss": 2.0598, + "step": 13600500 + }, + { + "epoch": 39.37, + "learning_rate": 3.0322425551130052e-05, + "loss": 2.033, + "step": 13601000 + }, + { + "epoch": 39.37, + "learning_rate": 3.0321703350778068e-05, + "loss": 2.0295, + "step": 13601500 + }, + { + "epoch": 39.37, + "learning_rate": 3.0320979703130793e-05, + "loss": 2.0363, + "step": 13602000 + }, + { + "epoch": 39.37, + "learning_rate": 3.0320256055483516e-05, + "loss": 2.0476, + "step": 13602500 + }, + { + "epoch": 39.38, + "learning_rate": 3.0319532407836238e-05, + "loss": 2.0525, + "step": 13603000 + }, + { + "epoch": 39.38, + "learning_rate": 3.031880876018896e-05, + "loss": 2.0729, + "step": 13603500 + }, + { + "epoch": 39.38, + "learning_rate": 3.0318085112541682e-05, + "loss": 2.0565, + "step": 13604000 + }, + { + "epoch": 39.38, + "learning_rate": 3.0317361464894405e-05, + "loss": 2.0689, + "step": 13604500 + }, + { + "epoch": 39.38, + "learning_rate": 3.031663781724713e-05, + "loss": 2.0834, + "step": 13605000 + }, + { + "epoch": 39.38, + "learning_rate": 3.0315914169599852e-05, + "loss": 2.0377, + "step": 13605500 + }, + { + "epoch": 39.38, + "learning_rate": 3.0315191969247868e-05, + "loss": 2.0466, + "step": 13606000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0314468321600594e-05, + "loss": 2.0518, + "step": 13606500 + }, + { + "epoch": 39.39, + "learning_rate": 3.0313744673953316e-05, + "loss": 2.0336, + "step": 13607000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0313021026306045e-05, + "loss": 2.0384, + "step": 13607500 + }, + { + "epoch": 39.39, + "learning_rate": 3.031229882595406e-05, + "loss": 2.0586, + "step": 13608000 + }, + { + "epoch": 39.39, + "learning_rate": 3.0311575178306783e-05, + "loss": 2.066, + "step": 13608500 + }, + { + "epoch": 39.39, + "learning_rate": 3.0310851530659505e-05, + "loss": 2.0432, + "step": 13609000 + }, + { + "epoch": 39.39, + "learning_rate": 3.031012788301223e-05, + "loss": 2.0442, + "step": 13609500 + }, + { + "epoch": 39.4, + "learning_rate": 3.0309404235364953e-05, + "loss": 2.0527, + "step": 13610000 + }, + { + "epoch": 39.4, + "learning_rate": 3.0308682035012968e-05, + "loss": 2.0526, + "step": 13610500 + }, + { + "epoch": 39.4, + "learning_rate": 3.0307958387365694e-05, + "loss": 2.0414, + "step": 13611000 + }, + { + "epoch": 39.4, + "learning_rate": 3.0307234739718416e-05, + "loss": 2.0252, + "step": 13611500 + }, + { + "epoch": 39.4, + "learning_rate": 3.030651109207114e-05, + "loss": 2.0355, + "step": 13612000 + }, + { + "epoch": 39.4, + "learning_rate": 3.030578744442386e-05, + "loss": 2.0518, + "step": 13612500 + }, + { + "epoch": 39.4, + "learning_rate": 3.0305063796776583e-05, + "loss": 2.0556, + "step": 13613000 + }, + { + "epoch": 39.41, + "learning_rate": 3.0304341596424602e-05, + "loss": 2.0421, + "step": 13613500 + }, + { + "epoch": 39.41, + "learning_rate": 3.0303617948777324e-05, + "loss": 2.0582, + "step": 13614000 + }, + { + "epoch": 39.41, + "learning_rate": 3.0302894301130046e-05, + "loss": 2.0427, + "step": 13614500 + }, + { + "epoch": 39.41, + "learning_rate": 3.0302170653482775e-05, + "loss": 2.0524, + "step": 13615000 + }, + { + "epoch": 39.41, + "learning_rate": 3.0301447005835497e-05, + "loss": 2.0416, + "step": 13615500 + }, + { + "epoch": 39.41, + "learning_rate": 3.0300724805483516e-05, + "loss": 2.0515, + "step": 13616000 + }, + { + "epoch": 39.41, + "learning_rate": 3.030000115783624e-05, + "loss": 2.0326, + "step": 13616500 + }, + { + "epoch": 39.42, + "learning_rate": 3.029927751018896e-05, + "loss": 2.0558, + "step": 13617000 + }, + { + "epoch": 39.42, + "learning_rate": 3.0298553862541683e-05, + "loss": 2.0582, + "step": 13617500 + }, + { + "epoch": 39.42, + "learning_rate": 3.0297831662189702e-05, + "loss": 2.0616, + "step": 13618000 + }, + { + "epoch": 39.42, + "learning_rate": 3.0297108014542424e-05, + "loss": 2.0451, + "step": 13618500 + }, + { + "epoch": 39.42, + "learning_rate": 3.0296384366895147e-05, + "loss": 2.0612, + "step": 13619000 + }, + { + "epoch": 39.42, + "learning_rate": 3.029566071924787e-05, + "loss": 2.0445, + "step": 13619500 + }, + { + "epoch": 39.42, + "learning_rate": 3.0294937071600594e-05, + "loss": 2.0662, + "step": 13620000 + }, + { + "epoch": 39.43, + "learning_rate": 3.0294213423953317e-05, + "loss": 2.0599, + "step": 13620500 + }, + { + "epoch": 39.43, + "learning_rate": 3.029348977630604e-05, + "loss": 2.0414, + "step": 13621000 + }, + { + "epoch": 39.43, + "learning_rate": 3.029276612865876e-05, + "loss": 2.0573, + "step": 13621500 + }, + { + "epoch": 39.43, + "learning_rate": 3.0292042481011483e-05, + "loss": 2.0239, + "step": 13622000 + }, + { + "epoch": 39.43, + "learning_rate": 3.0291318833364212e-05, + "loss": 2.043, + "step": 13622500 + }, + { + "epoch": 39.43, + "learning_rate": 3.0290598080307525e-05, + "loss": 2.0685, + "step": 13623000 + }, + { + "epoch": 39.43, + "learning_rate": 3.0289874432660247e-05, + "loss": 2.0441, + "step": 13623500 + }, + { + "epoch": 39.44, + "learning_rate": 3.0289150785012972e-05, + "loss": 2.0487, + "step": 13624000 + }, + { + "epoch": 39.44, + "learning_rate": 3.0288427137365695e-05, + "loss": 2.0342, + "step": 13624500 + }, + { + "epoch": 39.44, + "learning_rate": 3.028770493701371e-05, + "loss": 2.0286, + "step": 13625000 + }, + { + "epoch": 39.44, + "learning_rate": 3.0286981289366432e-05, + "loss": 2.0082, + "step": 13625500 + }, + { + "epoch": 39.44, + "learning_rate": 3.0286257641719158e-05, + "loss": 2.022, + "step": 13626000 + }, + { + "epoch": 39.44, + "learning_rate": 3.028553399407188e-05, + "loss": 2.0388, + "step": 13626500 + }, + { + "epoch": 39.44, + "learning_rate": 3.0284810346424603e-05, + "loss": 2.0221, + "step": 13627000 + }, + { + "epoch": 39.45, + "learning_rate": 3.0284086698777325e-05, + "loss": 2.0518, + "step": 13627500 + }, + { + "epoch": 39.45, + "learning_rate": 3.0283363051130047e-05, + "loss": 2.0633, + "step": 13628000 + }, + { + "epoch": 39.45, + "learning_rate": 3.0282639403482773e-05, + "loss": 2.029, + "step": 13628500 + }, + { + "epoch": 39.45, + "learning_rate": 3.0281917203130788e-05, + "loss": 2.0316, + "step": 13629000 + }, + { + "epoch": 39.45, + "learning_rate": 3.028119355548351e-05, + "loss": 2.0457, + "step": 13629500 + }, + { + "epoch": 39.45, + "learning_rate": 3.028046990783624e-05, + "loss": 2.0704, + "step": 13630000 + }, + { + "epoch": 39.45, + "learning_rate": 3.027974626018896e-05, + "loss": 2.0682, + "step": 13630500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0279022612541684e-05, + "loss": 2.0457, + "step": 13631000 + }, + { + "epoch": 39.46, + "learning_rate": 3.0278301859484996e-05, + "loss": 2.0317, + "step": 13631500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0277578211837722e-05, + "loss": 2.0449, + "step": 13632000 + }, + { + "epoch": 39.46, + "learning_rate": 3.0276854564190444e-05, + "loss": 2.0408, + "step": 13632500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0276130916543166e-05, + "loss": 2.0607, + "step": 13633000 + }, + { + "epoch": 39.46, + "learning_rate": 3.0275408716191185e-05, + "loss": 2.0793, + "step": 13633500 + }, + { + "epoch": 39.46, + "learning_rate": 3.0274685068543907e-05, + "loss": 2.0515, + "step": 13634000 + }, + { + "epoch": 39.47, + "learning_rate": 3.027396142089663e-05, + "loss": 2.0483, + "step": 13634500 + }, + { + "epoch": 39.47, + "learning_rate": 3.0273237773249352e-05, + "loss": 2.0518, + "step": 13635000 + }, + { + "epoch": 39.47, + "learning_rate": 3.0272514125602074e-05, + "loss": 2.0527, + "step": 13635500 + }, + { + "epoch": 39.47, + "learning_rate": 3.0271790477954796e-05, + "loss": 2.0736, + "step": 13636000 + }, + { + "epoch": 39.47, + "learning_rate": 3.0271066830307522e-05, + "loss": 2.0561, + "step": 13636500 + }, + { + "epoch": 39.47, + "learning_rate": 3.0270343182660244e-05, + "loss": 2.0623, + "step": 13637000 + }, + { + "epoch": 39.47, + "learning_rate": 3.026962098230826e-05, + "loss": 2.0547, + "step": 13637500 + }, + { + "epoch": 39.48, + "learning_rate": 3.026889733466099e-05, + "loss": 2.0223, + "step": 13638000 + }, + { + "epoch": 39.48, + "learning_rate": 3.026817368701371e-05, + "loss": 2.0437, + "step": 13638500 + }, + { + "epoch": 39.48, + "learning_rate": 3.0267450039366437e-05, + "loss": 2.0585, + "step": 13639000 + }, + { + "epoch": 39.48, + "learning_rate": 3.026672639171916e-05, + "loss": 2.0743, + "step": 13639500 + }, + { + "epoch": 39.48, + "learning_rate": 3.026600274407188e-05, + "loss": 2.0545, + "step": 13640000 + }, + { + "epoch": 39.48, + "learning_rate": 3.0265280543719897e-05, + "loss": 2.0243, + "step": 13640500 + }, + { + "epoch": 39.49, + "learning_rate": 3.0264556896072622e-05, + "loss": 2.0414, + "step": 13641000 + }, + { + "epoch": 39.49, + "learning_rate": 3.0263833248425345e-05, + "loss": 2.0474, + "step": 13641500 + }, + { + "epoch": 39.49, + "learning_rate": 3.0263109600778067e-05, + "loss": 2.0397, + "step": 13642000 + }, + { + "epoch": 39.49, + "learning_rate": 3.026238595313079e-05, + "loss": 2.045, + "step": 13642500 + }, + { + "epoch": 39.49, + "learning_rate": 3.026166230548351e-05, + "loss": 2.0526, + "step": 13643000 + }, + { + "epoch": 39.49, + "learning_rate": 3.0260938657836237e-05, + "loss": 2.0616, + "step": 13643500 + }, + { + "epoch": 39.49, + "learning_rate": 3.026021501018896e-05, + "loss": 2.0591, + "step": 13644000 + }, + { + "epoch": 39.5, + "learning_rate": 3.025949136254168e-05, + "loss": 2.0612, + "step": 13644500 + }, + { + "epoch": 39.5, + "learning_rate": 3.025876771489441e-05, + "loss": 2.0501, + "step": 13645000 + }, + { + "epoch": 39.5, + "learning_rate": 3.0258044067247133e-05, + "loss": 2.0457, + "step": 13645500 + }, + { + "epoch": 39.5, + "learning_rate": 3.0257321866895148e-05, + "loss": 2.0516, + "step": 13646000 + }, + { + "epoch": 39.5, + "learning_rate": 3.0256598219247874e-05, + "loss": 2.0463, + "step": 13646500 + }, + { + "epoch": 39.5, + "learning_rate": 3.0255874571600596e-05, + "loss": 2.0763, + "step": 13647000 + }, + { + "epoch": 39.5, + "learning_rate": 3.0255150923953318e-05, + "loss": 2.0634, + "step": 13647500 + }, + { + "epoch": 39.51, + "learning_rate": 3.025442727630604e-05, + "loss": 2.0368, + "step": 13648000 + }, + { + "epoch": 39.51, + "learning_rate": 3.0253703628658763e-05, + "loss": 2.0761, + "step": 13648500 + }, + { + "epoch": 39.51, + "learning_rate": 3.0252979981011488e-05, + "loss": 2.0394, + "step": 13649000 + }, + { + "epoch": 39.51, + "learning_rate": 3.025225633336421e-05, + "loss": 2.0557, + "step": 13649500 + }, + { + "epoch": 39.51, + "learning_rate": 3.0251534133012226e-05, + "loss": 2.0434, + "step": 13650000 + }, + { + "epoch": 39.51, + "learning_rate": 3.025081337995554e-05, + "loss": 2.0481, + "step": 13650500 + }, + { + "epoch": 39.51, + "learning_rate": 3.025008973230826e-05, + "loss": 2.0571, + "step": 13651000 + }, + { + "epoch": 39.52, + "learning_rate": 3.0249366084660986e-05, + "loss": 2.0574, + "step": 13651500 + }, + { + "epoch": 39.52, + "learning_rate": 3.024864243701371e-05, + "loss": 2.0454, + "step": 13652000 + }, + { + "epoch": 39.52, + "learning_rate": 3.0247920236661724e-05, + "loss": 2.0788, + "step": 13652500 + }, + { + "epoch": 39.52, + "learning_rate": 3.0247196589014453e-05, + "loss": 2.0495, + "step": 13653000 + }, + { + "epoch": 39.52, + "learning_rate": 3.0246472941367175e-05, + "loss": 2.0395, + "step": 13653500 + }, + { + "epoch": 39.52, + "learning_rate": 3.02457492937199e-05, + "loss": 2.0243, + "step": 13654000 + }, + { + "epoch": 39.52, + "learning_rate": 3.0245025646072623e-05, + "loss": 2.0571, + "step": 13654500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0244301998425345e-05, + "loss": 2.0565, + "step": 13655000 + }, + { + "epoch": 39.53, + "learning_rate": 3.0243578350778068e-05, + "loss": 2.0479, + "step": 13655500 + }, + { + "epoch": 39.53, + "learning_rate": 3.024285470313079e-05, + "loss": 2.0359, + "step": 13656000 + }, + { + "epoch": 39.53, + "learning_rate": 3.0242131055483512e-05, + "loss": 2.0444, + "step": 13656500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0241407407836238e-05, + "loss": 2.0549, + "step": 13657000 + }, + { + "epoch": 39.53, + "learning_rate": 3.024068376018896e-05, + "loss": 2.0387, + "step": 13657500 + }, + { + "epoch": 39.53, + "learning_rate": 3.0239960112541682e-05, + "loss": 2.0752, + "step": 13658000 + }, + { + "epoch": 39.54, + "learning_rate": 3.02392379121897e-05, + "loss": 2.0787, + "step": 13658500 + }, + { + "epoch": 39.54, + "learning_rate": 3.0238514264542423e-05, + "loss": 2.0489, + "step": 13659000 + }, + { + "epoch": 39.54, + "learning_rate": 3.0237790616895146e-05, + "loss": 2.0379, + "step": 13659500 + }, + { + "epoch": 39.54, + "learning_rate": 3.0237066969247875e-05, + "loss": 2.0568, + "step": 13660000 + }, + { + "epoch": 39.54, + "learning_rate": 3.0236343321600597e-05, + "loss": 2.0679, + "step": 13660500 + }, + { + "epoch": 39.54, + "learning_rate": 3.023561967395332e-05, + "loss": 2.0428, + "step": 13661000 + }, + { + "epoch": 39.54, + "learning_rate": 3.023489602630604e-05, + "loss": 2.0529, + "step": 13661500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0234172378658763e-05, + "loss": 2.0355, + "step": 13662000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0233450178306782e-05, + "loss": 2.0726, + "step": 13662500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0232726530659505e-05, + "loss": 2.0441, + "step": 13663000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0232002883012227e-05, + "loss": 2.0284, + "step": 13663500 + }, + { + "epoch": 39.55, + "learning_rate": 3.0231279235364952e-05, + "loss": 2.0535, + "step": 13664000 + }, + { + "epoch": 39.55, + "learning_rate": 3.0230557035012968e-05, + "loss": 2.048, + "step": 13664500 + }, + { + "epoch": 39.55, + "learning_rate": 3.022983338736569e-05, + "loss": 2.0665, + "step": 13665000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0229109739718413e-05, + "loss": 2.0609, + "step": 13665500 + }, + { + "epoch": 39.56, + "learning_rate": 3.022838753936643e-05, + "loss": 2.0495, + "step": 13666000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0227663891719154e-05, + "loss": 2.0404, + "step": 13666500 + }, + { + "epoch": 39.56, + "learning_rate": 3.0226940244071876e-05, + "loss": 2.0411, + "step": 13667000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0226216596424605e-05, + "loss": 2.0258, + "step": 13667500 + }, + { + "epoch": 39.56, + "learning_rate": 3.0225492948777327e-05, + "loss": 2.0681, + "step": 13668000 + }, + { + "epoch": 39.56, + "learning_rate": 3.0224769301130053e-05, + "loss": 2.0536, + "step": 13668500 + }, + { + "epoch": 39.57, + "learning_rate": 3.0224045653482775e-05, + "loss": 2.0712, + "step": 13669000 + }, + { + "epoch": 39.57, + "learning_rate": 3.0223322005835497e-05, + "loss": 2.0581, + "step": 13669500 + }, + { + "epoch": 39.57, + "learning_rate": 3.022259835818822e-05, + "loss": 2.0685, + "step": 13670000 + }, + { + "epoch": 39.57, + "learning_rate": 3.022187615783624e-05, + "loss": 2.0571, + "step": 13670500 + }, + { + "epoch": 39.57, + "learning_rate": 3.022115251018896e-05, + "loss": 2.0593, + "step": 13671000 + }, + { + "epoch": 39.57, + "learning_rate": 3.0220428862541683e-05, + "loss": 2.0456, + "step": 13671500 + }, + { + "epoch": 39.57, + "learning_rate": 3.0219705214894405e-05, + "loss": 2.0521, + "step": 13672000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0218981567247127e-05, + "loss": 2.0733, + "step": 13672500 + }, + { + "epoch": 39.58, + "learning_rate": 3.0218257919599853e-05, + "loss": 2.0551, + "step": 13673000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0217534271952575e-05, + "loss": 2.0261, + "step": 13673500 + }, + { + "epoch": 39.58, + "learning_rate": 3.0216810624305297e-05, + "loss": 2.0587, + "step": 13674000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0216086976658026e-05, + "loss": 2.0436, + "step": 13674500 + }, + { + "epoch": 39.58, + "learning_rate": 3.021536332901075e-05, + "loss": 2.0973, + "step": 13675000 + }, + { + "epoch": 39.58, + "learning_rate": 3.0214641128658768e-05, + "loss": 2.0221, + "step": 13675500 + }, + { + "epoch": 39.59, + "learning_rate": 3.021391748101149e-05, + "loss": 2.07, + "step": 13676000 + }, + { + "epoch": 39.59, + "learning_rate": 3.0213193833364212e-05, + "loss": 2.0494, + "step": 13676500 + }, + { + "epoch": 39.59, + "learning_rate": 3.0212470185716934e-05, + "loss": 2.0657, + "step": 13677000 + }, + { + "epoch": 39.59, + "learning_rate": 3.0211746538069657e-05, + "loss": 2.0572, + "step": 13677500 + }, + { + "epoch": 39.59, + "learning_rate": 3.021102289042238e-05, + "loss": 2.0581, + "step": 13678000 + }, + { + "epoch": 39.59, + "learning_rate": 3.0210299242775104e-05, + "loss": 2.0645, + "step": 13678500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0209575595127827e-05, + "loss": 2.0328, + "step": 13679000 + }, + { + "epoch": 39.6, + "learning_rate": 3.020885194748055e-05, + "loss": 2.0698, + "step": 13679500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0208129747128568e-05, + "loss": 2.0627, + "step": 13680000 + }, + { + "epoch": 39.6, + "learning_rate": 3.0207407546776583e-05, + "loss": 2.055, + "step": 13680500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0206683899129306e-05, + "loss": 2.0599, + "step": 13681000 + }, + { + "epoch": 39.6, + "learning_rate": 3.0205960251482028e-05, + "loss": 2.0349, + "step": 13681500 + }, + { + "epoch": 39.6, + "learning_rate": 3.0205238051130047e-05, + "loss": 2.0645, + "step": 13682000 + }, + { + "epoch": 39.61, + "learning_rate": 3.0204514403482776e-05, + "loss": 2.0492, + "step": 13682500 + }, + { + "epoch": 39.61, + "learning_rate": 3.020379220313079e-05, + "loss": 2.0427, + "step": 13683000 + }, + { + "epoch": 39.61, + "learning_rate": 3.0203068555483517e-05, + "loss": 2.0413, + "step": 13683500 + }, + { + "epoch": 39.61, + "learning_rate": 3.020234490783624e-05, + "loss": 2.0575, + "step": 13684000 + }, + { + "epoch": 39.61, + "learning_rate": 3.020162126018896e-05, + "loss": 2.0501, + "step": 13684500 + }, + { + "epoch": 39.61, + "learning_rate": 3.020089905983698e-05, + "loss": 2.0483, + "step": 13685000 + }, + { + "epoch": 39.61, + "learning_rate": 3.0200175412189703e-05, + "loss": 2.0336, + "step": 13685500 + }, + { + "epoch": 39.62, + "learning_rate": 3.0199451764542425e-05, + "loss": 2.0262, + "step": 13686000 + }, + { + "epoch": 39.62, + "learning_rate": 3.0198728116895147e-05, + "loss": 2.0406, + "step": 13686500 + }, + { + "epoch": 39.62, + "learning_rate": 3.019800446924787e-05, + "loss": 2.0149, + "step": 13687000 + }, + { + "epoch": 39.62, + "learning_rate": 3.019728082160059e-05, + "loss": 2.0811, + "step": 13687500 + }, + { + "epoch": 39.62, + "learning_rate": 3.0196557173953317e-05, + "loss": 2.0493, + "step": 13688000 + }, + { + "epoch": 39.62, + "learning_rate": 3.019583352630604e-05, + "loss": 2.0499, + "step": 13688500 + }, + { + "epoch": 39.62, + "learning_rate": 3.019510987865876e-05, + "loss": 2.0718, + "step": 13689000 + }, + { + "epoch": 39.63, + "learning_rate": 3.0194386231011484e-05, + "loss": 2.0569, + "step": 13689500 + }, + { + "epoch": 39.63, + "learning_rate": 3.0193662583364213e-05, + "loss": 2.0568, + "step": 13690000 + }, + { + "epoch": 39.63, + "learning_rate": 3.0192938935716935e-05, + "loss": 2.0452, + "step": 13690500 + }, + { + "epoch": 39.63, + "learning_rate": 3.0192216735364954e-05, + "loss": 2.0453, + "step": 13691000 + }, + { + "epoch": 39.63, + "learning_rate": 3.019149453501297e-05, + "loss": 2.0511, + "step": 13691500 + }, + { + "epoch": 39.63, + "learning_rate": 3.0190770887365695e-05, + "loss": 2.0402, + "step": 13692000 + }, + { + "epoch": 39.63, + "learning_rate": 3.019004868701371e-05, + "loss": 2.0575, + "step": 13692500 + }, + { + "epoch": 39.64, + "learning_rate": 3.0189325039366433e-05, + "loss": 2.0482, + "step": 13693000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0188601391719155e-05, + "loss": 2.0625, + "step": 13693500 + }, + { + "epoch": 39.64, + "learning_rate": 3.018787774407188e-05, + "loss": 2.0696, + "step": 13694000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0187154096424603e-05, + "loss": 2.0476, + "step": 13694500 + }, + { + "epoch": 39.64, + "learning_rate": 3.0186430448777325e-05, + "loss": 2.07, + "step": 13695000 + }, + { + "epoch": 39.64, + "learning_rate": 3.0185706801130048e-05, + "loss": 2.0452, + "step": 13695500 + }, + { + "epoch": 39.64, + "learning_rate": 3.018498315348277e-05, + "loss": 2.0586, + "step": 13696000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0184259505835495e-05, + "loss": 2.0492, + "step": 13696500 + }, + { + "epoch": 39.65, + "learning_rate": 3.0183535858188218e-05, + "loss": 2.0514, + "step": 13697000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0182812210540943e-05, + "loss": 2.0562, + "step": 13697500 + }, + { + "epoch": 39.65, + "learning_rate": 3.018208856289367e-05, + "loss": 2.0712, + "step": 13698000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0181366362541684e-05, + "loss": 2.0622, + "step": 13698500 + }, + { + "epoch": 39.65, + "learning_rate": 3.0180644162189703e-05, + "loss": 2.0478, + "step": 13699000 + }, + { + "epoch": 39.65, + "learning_rate": 3.0179920514542426e-05, + "loss": 2.0524, + "step": 13699500 + }, + { + "epoch": 39.66, + "learning_rate": 3.0179196866895148e-05, + "loss": 2.0614, + "step": 13700000 + }, + { + "epoch": 39.66, + "learning_rate": 3.0178474666543167e-05, + "loss": 2.0506, + "step": 13700500 + }, + { + "epoch": 39.66, + "learning_rate": 3.017775101889589e-05, + "loss": 2.0362, + "step": 13701000 + }, + { + "epoch": 39.66, + "learning_rate": 3.017702737124861e-05, + "loss": 2.0628, + "step": 13701500 + }, + { + "epoch": 39.66, + "learning_rate": 3.017630517089663e-05, + "loss": 2.0461, + "step": 13702000 + }, + { + "epoch": 39.66, + "learning_rate": 3.0175581523249352e-05, + "loss": 2.0537, + "step": 13702500 + }, + { + "epoch": 39.66, + "learning_rate": 3.0174857875602075e-05, + "loss": 2.0636, + "step": 13703000 + }, + { + "epoch": 39.67, + "learning_rate": 3.0174134227954797e-05, + "loss": 2.063, + "step": 13703500 + }, + { + "epoch": 39.67, + "learning_rate": 3.017341058030752e-05, + "loss": 2.0412, + "step": 13704000 + }, + { + "epoch": 39.67, + "learning_rate": 3.0172686932660245e-05, + "loss": 2.0472, + "step": 13704500 + }, + { + "epoch": 39.67, + "learning_rate": 3.017196328501297e-05, + "loss": 2.0674, + "step": 13705000 + }, + { + "epoch": 39.67, + "learning_rate": 3.0171239637365696e-05, + "loss": 2.0362, + "step": 13705500 + }, + { + "epoch": 39.67, + "learning_rate": 3.0170515989718418e-05, + "loss": 2.0616, + "step": 13706000 + }, + { + "epoch": 39.67, + "learning_rate": 3.016979234207114e-05, + "loss": 2.0448, + "step": 13706500 + }, + { + "epoch": 39.68, + "learning_rate": 3.0169068694423863e-05, + "loss": 2.0565, + "step": 13707000 + }, + { + "epoch": 39.68, + "learning_rate": 3.0168345046776585e-05, + "loss": 2.0572, + "step": 13707500 + }, + { + "epoch": 39.68, + "learning_rate": 3.0167621399129307e-05, + "loss": 2.0461, + "step": 13708000 + }, + { + "epoch": 39.68, + "learning_rate": 3.0166897751482033e-05, + "loss": 2.0424, + "step": 13708500 + }, + { + "epoch": 39.68, + "learning_rate": 3.0166174103834755e-05, + "loss": 2.0469, + "step": 13709000 + }, + { + "epoch": 39.68, + "learning_rate": 3.0165450456187477e-05, + "loss": 2.0424, + "step": 13709500 + }, + { + "epoch": 39.68, + "learning_rate": 3.01647268085402e-05, + "loss": 2.0461, + "step": 13710000 + }, + { + "epoch": 39.69, + "learning_rate": 3.016400460818822e-05, + "loss": 2.0473, + "step": 13710500 + }, + { + "epoch": 39.69, + "learning_rate": 3.0163282407836234e-05, + "loss": 2.0533, + "step": 13711000 + }, + { + "epoch": 39.69, + "learning_rate": 3.016255876018896e-05, + "loss": 2.0687, + "step": 13711500 + }, + { + "epoch": 39.69, + "learning_rate": 3.0161835112541682e-05, + "loss": 2.0482, + "step": 13712000 + }, + { + "epoch": 39.69, + "learning_rate": 3.016111146489441e-05, + "loss": 2.0454, + "step": 13712500 + }, + { + "epoch": 39.69, + "learning_rate": 3.0160389264542426e-05, + "loss": 2.0473, + "step": 13713000 + }, + { + "epoch": 39.69, + "learning_rate": 3.015966561689515e-05, + "loss": 2.0696, + "step": 13713500 + }, + { + "epoch": 39.7, + "learning_rate": 3.0158943416543168e-05, + "loss": 2.0453, + "step": 13714000 + }, + { + "epoch": 39.7, + "learning_rate": 3.015821976889589e-05, + "loss": 2.0533, + "step": 13714500 + }, + { + "epoch": 39.7, + "learning_rate": 3.0157496121248612e-05, + "loss": 2.0645, + "step": 13715000 + }, + { + "epoch": 39.7, + "learning_rate": 3.0156772473601334e-05, + "loss": 2.0718, + "step": 13715500 + }, + { + "epoch": 39.7, + "learning_rate": 3.015604882595406e-05, + "loss": 2.0466, + "step": 13716000 + }, + { + "epoch": 39.7, + "learning_rate": 3.0155325178306782e-05, + "loss": 2.0337, + "step": 13716500 + }, + { + "epoch": 39.71, + "learning_rate": 3.0154601530659504e-05, + "loss": 2.0442, + "step": 13717000 + }, + { + "epoch": 39.71, + "learning_rate": 3.0153877883012227e-05, + "loss": 2.0722, + "step": 13717500 + }, + { + "epoch": 39.71, + "learning_rate": 3.015315423536495e-05, + "loss": 2.0482, + "step": 13718000 + }, + { + "epoch": 39.71, + "learning_rate": 3.0152432035012968e-05, + "loss": 2.0397, + "step": 13718500 + }, + { + "epoch": 39.71, + "learning_rate": 3.015170838736569e-05, + "loss": 2.049, + "step": 13719000 + }, + { + "epoch": 39.71, + "learning_rate": 3.0150984739718412e-05, + "loss": 2.0454, + "step": 13719500 + }, + { + "epoch": 39.71, + "learning_rate": 3.015026109207114e-05, + "loss": 2.0836, + "step": 13720000 + }, + { + "epoch": 39.72, + "learning_rate": 3.014953889171916e-05, + "loss": 2.057, + "step": 13720500 + }, + { + "epoch": 39.72, + "learning_rate": 3.0148815244071883e-05, + "loss": 2.0813, + "step": 13721000 + }, + { + "epoch": 39.72, + "learning_rate": 3.0148091596424605e-05, + "loss": 2.0463, + "step": 13721500 + }, + { + "epoch": 39.72, + "learning_rate": 3.0147367948777327e-05, + "loss": 2.0317, + "step": 13722000 + }, + { + "epoch": 39.72, + "learning_rate": 3.014664430113005e-05, + "loss": 2.0703, + "step": 13722500 + }, + { + "epoch": 39.72, + "learning_rate": 3.0145920653482775e-05, + "loss": 2.0424, + "step": 13723000 + }, + { + "epoch": 39.72, + "learning_rate": 3.0145197005835497e-05, + "loss": 2.0494, + "step": 13723500 + }, + { + "epoch": 39.73, + "learning_rate": 3.014447335818822e-05, + "loss": 2.0561, + "step": 13724000 + }, + { + "epoch": 39.73, + "learning_rate": 3.014374971054094e-05, + "loss": 2.0504, + "step": 13724500 + }, + { + "epoch": 39.73, + "learning_rate": 3.0143026062893664e-05, + "loss": 2.0603, + "step": 13725000 + }, + { + "epoch": 39.73, + "learning_rate": 3.0142302415246386e-05, + "loss": 2.0517, + "step": 13725500 + }, + { + "epoch": 39.73, + "learning_rate": 3.014157876759911e-05, + "loss": 2.066, + "step": 13726000 + }, + { + "epoch": 39.73, + "learning_rate": 3.0140855119951834e-05, + "loss": 2.0545, + "step": 13726500 + }, + { + "epoch": 39.73, + "learning_rate": 3.0140131472304563e-05, + "loss": 2.0828, + "step": 13727000 + }, + { + "epoch": 39.74, + "learning_rate": 3.0139407824657285e-05, + "loss": 2.0596, + "step": 13727500 + }, + { + "epoch": 39.74, + "learning_rate": 3.01386856243053e-05, + "loss": 2.0505, + "step": 13728000 + }, + { + "epoch": 39.74, + "learning_rate": 3.0137961976658023e-05, + "loss": 2.045, + "step": 13728500 + }, + { + "epoch": 39.74, + "learning_rate": 3.013723832901075e-05, + "loss": 2.0467, + "step": 13729000 + }, + { + "epoch": 39.74, + "learning_rate": 3.013651468136347e-05, + "loss": 2.0589, + "step": 13729500 + }, + { + "epoch": 39.74, + "learning_rate": 3.0135791033716193e-05, + "loss": 2.0485, + "step": 13730000 + }, + { + "epoch": 39.74, + "learning_rate": 3.0135068833364212e-05, + "loss": 2.0648, + "step": 13730500 + }, + { + "epoch": 39.75, + "learning_rate": 3.0134345185716934e-05, + "loss": 2.0425, + "step": 13731000 + }, + { + "epoch": 39.75, + "learning_rate": 3.0133621538069656e-05, + "loss": 2.0805, + "step": 13731500 + }, + { + "epoch": 39.75, + "learning_rate": 3.013289789042238e-05, + "loss": 2.0398, + "step": 13732000 + }, + { + "epoch": 39.75, + "learning_rate": 3.01321742427751e-05, + "loss": 2.0479, + "step": 13732500 + }, + { + "epoch": 39.75, + "learning_rate": 3.0131450595127823e-05, + "loss": 2.0431, + "step": 13733000 + }, + { + "epoch": 39.75, + "learning_rate": 3.0130729842071135e-05, + "loss": 2.0371, + "step": 13733500 + }, + { + "epoch": 39.75, + "learning_rate": 3.013000619442386e-05, + "loss": 2.0449, + "step": 13734000 + }, + { + "epoch": 39.76, + "learning_rate": 3.0129282546776583e-05, + "loss": 2.0439, + "step": 13734500 + }, + { + "epoch": 39.76, + "learning_rate": 3.0128558899129312e-05, + "loss": 2.0607, + "step": 13735000 + }, + { + "epoch": 39.76, + "learning_rate": 3.0127835251482034e-05, + "loss": 2.0745, + "step": 13735500 + }, + { + "epoch": 39.76, + "learning_rate": 3.0127111603834757e-05, + "loss": 2.0807, + "step": 13736000 + }, + { + "epoch": 39.76, + "learning_rate": 3.012638795618748e-05, + "loss": 2.049, + "step": 13736500 + }, + { + "epoch": 39.76, + "learning_rate": 3.0125665755835498e-05, + "loss": 2.0525, + "step": 13737000 + }, + { + "epoch": 39.76, + "learning_rate": 3.012494210818822e-05, + "loss": 2.0582, + "step": 13737500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0124218460540942e-05, + "loss": 2.0532, + "step": 13738000 + }, + { + "epoch": 39.77, + "learning_rate": 3.0123494812893665e-05, + "loss": 2.0567, + "step": 13738500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0122771165246387e-05, + "loss": 2.0426, + "step": 13739000 + }, + { + "epoch": 39.77, + "learning_rate": 3.0122047517599112e-05, + "loss": 2.0665, + "step": 13739500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0121325317247128e-05, + "loss": 2.0633, + "step": 13740000 + }, + { + "epoch": 39.77, + "learning_rate": 3.012060166959985e-05, + "loss": 2.0289, + "step": 13740500 + }, + { + "epoch": 39.77, + "learning_rate": 3.0119878021952576e-05, + "loss": 2.0387, + "step": 13741000 + }, + { + "epoch": 39.78, + "learning_rate": 3.011915582160059e-05, + "loss": 2.0418, + "step": 13741500 + }, + { + "epoch": 39.78, + "learning_rate": 3.0118432173953314e-05, + "loss": 2.041, + "step": 13742000 + }, + { + "epoch": 39.78, + "learning_rate": 3.0117708526306043e-05, + "loss": 2.0174, + "step": 13742500 + }, + { + "epoch": 39.78, + "learning_rate": 3.0116984878658765e-05, + "loss": 2.0437, + "step": 13743000 + }, + { + "epoch": 39.78, + "learning_rate": 3.011626123101149e-05, + "loss": 2.0643, + "step": 13743500 + }, + { + "epoch": 39.78, + "learning_rate": 3.0115537583364213e-05, + "loss": 2.0686, + "step": 13744000 + }, + { + "epoch": 39.78, + "learning_rate": 3.0114813935716935e-05, + "loss": 2.0469, + "step": 13744500 + }, + { + "epoch": 39.79, + "learning_rate": 3.011409173536495e-05, + "loss": 2.0761, + "step": 13745000 + }, + { + "epoch": 39.79, + "learning_rate": 3.0113368087717676e-05, + "loss": 2.0775, + "step": 13745500 + }, + { + "epoch": 39.79, + "learning_rate": 3.01126444400704e-05, + "loss": 2.0508, + "step": 13746000 + }, + { + "epoch": 39.79, + "learning_rate": 3.011192079242312e-05, + "loss": 2.0474, + "step": 13746500 + }, + { + "epoch": 39.79, + "learning_rate": 3.0111197144775843e-05, + "loss": 2.0491, + "step": 13747000 + }, + { + "epoch": 39.79, + "learning_rate": 3.0110473497128565e-05, + "loss": 2.0442, + "step": 13747500 + }, + { + "epoch": 39.79, + "learning_rate": 3.010974984948129e-05, + "loss": 2.0625, + "step": 13748000 + }, + { + "epoch": 39.8, + "learning_rate": 3.0109026201834013e-05, + "loss": 2.0459, + "step": 13748500 + }, + { + "epoch": 39.8, + "learning_rate": 3.010830400148203e-05, + "loss": 2.0337, + "step": 13749000 + }, + { + "epoch": 39.8, + "learning_rate": 3.010758035383475e-05, + "loss": 2.0451, + "step": 13749500 + }, + { + "epoch": 39.8, + "learning_rate": 3.010685670618748e-05, + "loss": 2.0486, + "step": 13750000 + }, + { + "epoch": 39.8, + "learning_rate": 3.0106133058540202e-05, + "loss": 2.0596, + "step": 13750500 + }, + { + "epoch": 39.8, + "learning_rate": 3.0105409410892928e-05, + "loss": 2.0416, + "step": 13751000 + }, + { + "epoch": 39.8, + "learning_rate": 3.010468576324565e-05, + "loss": 2.0561, + "step": 13751500 + }, + { + "epoch": 39.81, + "learning_rate": 3.0103962115598372e-05, + "loss": 2.0574, + "step": 13752000 + }, + { + "epoch": 39.81, + "learning_rate": 3.0103238467951094e-05, + "loss": 2.0529, + "step": 13752500 + }, + { + "epoch": 39.81, + "learning_rate": 3.0102514820303816e-05, + "loss": 2.0485, + "step": 13753000 + }, + { + "epoch": 39.81, + "learning_rate": 3.0101792619951835e-05, + "loss": 2.0502, + "step": 13753500 + }, + { + "epoch": 39.81, + "learning_rate": 3.0101068972304558e-05, + "loss": 2.0579, + "step": 13754000 + }, + { + "epoch": 39.81, + "learning_rate": 3.010034532465728e-05, + "loss": 2.0877, + "step": 13754500 + }, + { + "epoch": 39.82, + "learning_rate": 3.0099621677010002e-05, + "loss": 2.0587, + "step": 13755000 + }, + { + "epoch": 39.82, + "learning_rate": 3.009889947665802e-05, + "loss": 2.075, + "step": 13755500 + }, + { + "epoch": 39.82, + "learning_rate": 3.0098175829010743e-05, + "loss": 2.0603, + "step": 13756000 + }, + { + "epoch": 39.82, + "learning_rate": 3.0097452181363466e-05, + "loss": 2.0598, + "step": 13756500 + }, + { + "epoch": 39.82, + "learning_rate": 3.0096728533716195e-05, + "loss": 2.0419, + "step": 13757000 + }, + { + "epoch": 39.82, + "learning_rate": 3.0096004886068917e-05, + "loss": 2.0328, + "step": 13757500 + }, + { + "epoch": 39.82, + "learning_rate": 3.0095281238421642e-05, + "loss": 2.0643, + "step": 13758000 + }, + { + "epoch": 39.83, + "learning_rate": 3.0094557590774365e-05, + "loss": 2.0506, + "step": 13758500 + }, + { + "epoch": 39.83, + "learning_rate": 3.0093833943127087e-05, + "loss": 2.0588, + "step": 13759000 + }, + { + "epoch": 39.83, + "learning_rate": 3.009311029547981e-05, + "loss": 2.037, + "step": 13759500 + }, + { + "epoch": 39.83, + "learning_rate": 3.009238664783253e-05, + "loss": 2.0431, + "step": 13760000 + }, + { + "epoch": 39.83, + "learning_rate": 3.0091663000185254e-05, + "loss": 2.0282, + "step": 13760500 + }, + { + "epoch": 39.83, + "learning_rate": 3.009093935253798e-05, + "loss": 2.052, + "step": 13761000 + }, + { + "epoch": 39.83, + "learning_rate": 3.0090217152185995e-05, + "loss": 2.0635, + "step": 13761500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0089494951834014e-05, + "loss": 2.078, + "step": 13762000 + }, + { + "epoch": 39.84, + "learning_rate": 3.0088771304186736e-05, + "loss": 2.0675, + "step": 13762500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0088047656539458e-05, + "loss": 2.0581, + "step": 13763000 + }, + { + "epoch": 39.84, + "learning_rate": 3.008732400889218e-05, + "loss": 2.0869, + "step": 13763500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0086600361244906e-05, + "loss": 2.0296, + "step": 13764000 + }, + { + "epoch": 39.84, + "learning_rate": 3.008587671359763e-05, + "loss": 2.0463, + "step": 13764500 + }, + { + "epoch": 39.84, + "learning_rate": 3.0085153065950354e-05, + "loss": 2.0711, + "step": 13765000 + }, + { + "epoch": 39.85, + "learning_rate": 3.008442941830308e-05, + "loss": 2.0339, + "step": 13765500 + }, + { + "epoch": 39.85, + "learning_rate": 3.0083707217951095e-05, + "loss": 2.0474, + "step": 13766000 + }, + { + "epoch": 39.85, + "learning_rate": 3.0082983570303817e-05, + "loss": 2.0473, + "step": 13766500 + }, + { + "epoch": 39.85, + "learning_rate": 3.0082259922656543e-05, + "loss": 2.0689, + "step": 13767000 + }, + { + "epoch": 39.85, + "learning_rate": 3.0081536275009265e-05, + "loss": 2.0479, + "step": 13767500 + }, + { + "epoch": 39.85, + "learning_rate": 3.0080812627361987e-05, + "loss": 2.0298, + "step": 13768000 + }, + { + "epoch": 39.85, + "learning_rate": 3.008008897971471e-05, + "loss": 2.0623, + "step": 13768500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0079365332067432e-05, + "loss": 2.0611, + "step": 13769000 + }, + { + "epoch": 39.86, + "learning_rate": 3.0078641684420154e-05, + "loss": 2.0763, + "step": 13769500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0077919484068173e-05, + "loss": 2.0557, + "step": 13770000 + }, + { + "epoch": 39.86, + "learning_rate": 3.0077197283716192e-05, + "loss": 2.0324, + "step": 13770500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0076473636068914e-05, + "loss": 2.0462, + "step": 13771000 + }, + { + "epoch": 39.86, + "learning_rate": 3.0075749988421636e-05, + "loss": 2.0435, + "step": 13771500 + }, + { + "epoch": 39.86, + "learning_rate": 3.0075026340774365e-05, + "loss": 2.0594, + "step": 13772000 + }, + { + "epoch": 39.87, + "learning_rate": 3.0074302693127088e-05, + "loss": 2.0472, + "step": 13772500 + }, + { + "epoch": 39.87, + "learning_rate": 3.007357904547981e-05, + "loss": 2.0491, + "step": 13773000 + }, + { + "epoch": 39.87, + "learning_rate": 3.007285684512783e-05, + "loss": 2.06, + "step": 13773500 + }, + { + "epoch": 39.87, + "learning_rate": 3.007213319748055e-05, + "loss": 2.0596, + "step": 13774000 + }, + { + "epoch": 39.87, + "learning_rate": 3.0071409549833273e-05, + "loss": 2.0684, + "step": 13774500 + }, + { + "epoch": 39.87, + "learning_rate": 3.0070685902185996e-05, + "loss": 2.0829, + "step": 13775000 + }, + { + "epoch": 39.87, + "learning_rate": 3.0069962254538718e-05, + "loss": 2.0599, + "step": 13775500 + }, + { + "epoch": 39.88, + "learning_rate": 3.0069238606891443e-05, + "loss": 2.0512, + "step": 13776000 + }, + { + "epoch": 39.88, + "learning_rate": 3.006851640653946e-05, + "loss": 2.0426, + "step": 13776500 + }, + { + "epoch": 39.88, + "learning_rate": 3.006779275889218e-05, + "loss": 2.034, + "step": 13777000 + }, + { + "epoch": 39.88, + "learning_rate": 3.0067069111244907e-05, + "loss": 2.0528, + "step": 13777500 + }, + { + "epoch": 39.88, + "learning_rate": 3.006634546359763e-05, + "loss": 2.0671, + "step": 13778000 + }, + { + "epoch": 39.88, + "learning_rate": 3.0065623263245645e-05, + "loss": 2.0662, + "step": 13778500 + }, + { + "epoch": 39.88, + "learning_rate": 3.006489961559837e-05, + "loss": 2.0341, + "step": 13779000 + }, + { + "epoch": 39.89, + "learning_rate": 3.0064175967951096e-05, + "loss": 2.0566, + "step": 13779500 + }, + { + "epoch": 39.89, + "learning_rate": 3.006345232030382e-05, + "loss": 2.0755, + "step": 13780000 + }, + { + "epoch": 39.89, + "learning_rate": 3.0062730119951837e-05, + "loss": 2.0419, + "step": 13780500 + }, + { + "epoch": 39.89, + "learning_rate": 3.006200647230456e-05, + "loss": 2.0668, + "step": 13781000 + }, + { + "epoch": 39.89, + "learning_rate": 3.006128282465728e-05, + "loss": 2.0723, + "step": 13781500 + }, + { + "epoch": 39.89, + "learning_rate": 3.0060559177010007e-05, + "loss": 2.0432, + "step": 13782000 + }, + { + "epoch": 39.89, + "learning_rate": 3.005983552936273e-05, + "loss": 2.0508, + "step": 13782500 + }, + { + "epoch": 39.9, + "learning_rate": 3.005911188171545e-05, + "loss": 2.0564, + "step": 13783000 + }, + { + "epoch": 39.9, + "learning_rate": 3.005838968136347e-05, + "loss": 2.0695, + "step": 13783500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0057666033716193e-05, + "loss": 2.0339, + "step": 13784000 + }, + { + "epoch": 39.9, + "learning_rate": 3.0056942386068915e-05, + "loss": 2.0665, + "step": 13784500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0056218738421637e-05, + "loss": 2.0369, + "step": 13785000 + }, + { + "epoch": 39.9, + "learning_rate": 3.005549509077436e-05, + "loss": 2.081, + "step": 13785500 + }, + { + "epoch": 39.9, + "learning_rate": 3.0054771443127082e-05, + "loss": 2.0483, + "step": 13786000 + }, + { + "epoch": 39.91, + "learning_rate": 3.00540492427751e-05, + "loss": 2.0534, + "step": 13786500 + }, + { + "epoch": 39.91, + "learning_rate": 3.005332559512783e-05, + "loss": 2.0654, + "step": 13787000 + }, + { + "epoch": 39.91, + "learning_rate": 3.0052601947480552e-05, + "loss": 2.056, + "step": 13787500 + }, + { + "epoch": 39.91, + "learning_rate": 3.0051878299833274e-05, + "loss": 2.0607, + "step": 13788000 + }, + { + "epoch": 39.91, + "learning_rate": 3.0051154652185996e-05, + "loss": 2.0395, + "step": 13788500 + }, + { + "epoch": 39.91, + "learning_rate": 3.0050431004538722e-05, + "loss": 2.057, + "step": 13789000 + }, + { + "epoch": 39.91, + "learning_rate": 3.0049707356891444e-05, + "loss": 2.056, + "step": 13789500 + }, + { + "epoch": 39.92, + "learning_rate": 3.0048983709244166e-05, + "loss": 2.0287, + "step": 13790000 + }, + { + "epoch": 39.92, + "learning_rate": 3.0048261508892185e-05, + "loss": 2.0396, + "step": 13790500 + }, + { + "epoch": 39.92, + "learning_rate": 3.0047537861244908e-05, + "loss": 2.0423, + "step": 13791000 + }, + { + "epoch": 39.92, + "learning_rate": 3.004681421359763e-05, + "loss": 2.0727, + "step": 13791500 + }, + { + "epoch": 39.92, + "learning_rate": 3.0046090565950352e-05, + "loss": 2.031, + "step": 13792000 + }, + { + "epoch": 39.92, + "learning_rate": 3.0045366918303074e-05, + "loss": 2.0786, + "step": 13792500 + }, + { + "epoch": 39.93, + "learning_rate": 3.0044643270655797e-05, + "loss": 2.0731, + "step": 13793000 + }, + { + "epoch": 39.93, + "learning_rate": 3.0043921070303815e-05, + "loss": 2.0495, + "step": 13793500 + }, + { + "epoch": 39.93, + "learning_rate": 3.0043197422656538e-05, + "loss": 2.0576, + "step": 13794000 + }, + { + "epoch": 39.93, + "learning_rate": 3.004247522230456e-05, + "loss": 2.0412, + "step": 13794500 + }, + { + "epoch": 39.93, + "learning_rate": 3.0041751574657286e-05, + "loss": 2.0529, + "step": 13795000 + }, + { + "epoch": 39.93, + "learning_rate": 3.0041027927010008e-05, + "loss": 2.0499, + "step": 13795500 + }, + { + "epoch": 39.93, + "learning_rate": 3.004030427936273e-05, + "loss": 2.0686, + "step": 13796000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0039580631715452e-05, + "loss": 2.0635, + "step": 13796500 + }, + { + "epoch": 39.94, + "learning_rate": 3.0038856984068175e-05, + "loss": 2.0713, + "step": 13797000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0038133336420897e-05, + "loss": 2.048, + "step": 13797500 + }, + { + "epoch": 39.94, + "learning_rate": 3.0037409688773622e-05, + "loss": 2.0541, + "step": 13798000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0036687488421638e-05, + "loss": 2.0624, + "step": 13798500 + }, + { + "epoch": 39.94, + "learning_rate": 3.003596384077436e-05, + "loss": 2.0601, + "step": 13799000 + }, + { + "epoch": 39.94, + "learning_rate": 3.0035240193127086e-05, + "loss": 2.0536, + "step": 13799500 + }, + { + "epoch": 39.95, + "learning_rate": 3.00345179927751e-05, + "loss": 2.0662, + "step": 13800000 + }, + { + "epoch": 39.95, + "learning_rate": 3.0033794345127824e-05, + "loss": 2.0678, + "step": 13800500 + }, + { + "epoch": 39.95, + "learning_rate": 3.0033070697480546e-05, + "loss": 2.0624, + "step": 13801000 + }, + { + "epoch": 39.95, + "learning_rate": 3.003234704983327e-05, + "loss": 2.0618, + "step": 13801500 + }, + { + "epoch": 39.95, + "learning_rate": 3.0031623402185997e-05, + "loss": 2.0309, + "step": 13802000 + }, + { + "epoch": 39.95, + "learning_rate": 3.0030899754538723e-05, + "loss": 2.0441, + "step": 13802500 + }, + { + "epoch": 39.95, + "learning_rate": 3.0030176106891445e-05, + "loss": 2.0305, + "step": 13803000 + }, + { + "epoch": 39.96, + "learning_rate": 3.0029452459244167e-05, + "loss": 2.0778, + "step": 13803500 + }, + { + "epoch": 39.96, + "learning_rate": 3.002872881159689e-05, + "loss": 2.0494, + "step": 13804000 + }, + { + "epoch": 39.96, + "learning_rate": 3.0028005163949612e-05, + "loss": 2.025, + "step": 13804500 + }, + { + "epoch": 39.96, + "learning_rate": 3.002728296359763e-05, + "loss": 2.0497, + "step": 13805000 + }, + { + "epoch": 39.96, + "learning_rate": 3.0026559315950353e-05, + "loss": 2.0632, + "step": 13805500 + }, + { + "epoch": 39.96, + "learning_rate": 3.0025837115598372e-05, + "loss": 2.0593, + "step": 13806000 + }, + { + "epoch": 39.96, + "learning_rate": 3.0025113467951094e-05, + "loss": 2.0818, + "step": 13806500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0024389820303816e-05, + "loss": 2.0342, + "step": 13807000 + }, + { + "epoch": 39.97, + "learning_rate": 3.0023667619951835e-05, + "loss": 2.0495, + "step": 13807500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0022943972304557e-05, + "loss": 2.0523, + "step": 13808000 + }, + { + "epoch": 39.97, + "learning_rate": 3.002222032465728e-05, + "loss": 2.0411, + "step": 13808500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0021496677010002e-05, + "loss": 2.0529, + "step": 13809000 + }, + { + "epoch": 39.97, + "learning_rate": 3.002077302936273e-05, + "loss": 2.047, + "step": 13809500 + }, + { + "epoch": 39.97, + "learning_rate": 3.0020049381715453e-05, + "loss": 2.036, + "step": 13810000 + }, + { + "epoch": 39.98, + "learning_rate": 3.0019325734068175e-05, + "loss": 2.0671, + "step": 13810500 + }, + { + "epoch": 39.98, + "learning_rate": 3.00186020864209e-05, + "loss": 2.0604, + "step": 13811000 + }, + { + "epoch": 39.98, + "learning_rate": 3.0017878438773623e-05, + "loss": 2.0671, + "step": 13811500 + }, + { + "epoch": 39.98, + "learning_rate": 3.0017154791126345e-05, + "loss": 2.0569, + "step": 13812000 + }, + { + "epoch": 39.98, + "learning_rate": 3.001643259077436e-05, + "loss": 2.0469, + "step": 13812500 + }, + { + "epoch": 39.98, + "learning_rate": 3.001571039042238e-05, + "loss": 2.0436, + "step": 13813000 + }, + { + "epoch": 39.98, + "learning_rate": 3.0014986742775102e-05, + "loss": 2.0437, + "step": 13813500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0014263095127824e-05, + "loss": 2.0407, + "step": 13814000 + }, + { + "epoch": 39.99, + "learning_rate": 3.001353944748055e-05, + "loss": 2.0631, + "step": 13814500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0012815799833272e-05, + "loss": 2.0607, + "step": 13815000 + }, + { + "epoch": 39.99, + "learning_rate": 3.0012092152185995e-05, + "loss": 2.0498, + "step": 13815500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0011368504538717e-05, + "loss": 2.062, + "step": 13816000 + }, + { + "epoch": 39.99, + "learning_rate": 3.001064485689144e-05, + "loss": 2.0513, + "step": 13816500 + }, + { + "epoch": 39.99, + "learning_rate": 3.0009922656539465e-05, + "loss": 2.0367, + "step": 13817000 + }, + { + "epoch": 40.0, + "learning_rate": 3.0009199008892187e-05, + "loss": 2.0887, + "step": 13817500 + }, + { + "epoch": 40.0, + "learning_rate": 3.000847536124491e-05, + "loss": 2.0401, + "step": 13818000 + }, + { + "epoch": 40.0, + "learning_rate": 3.000775171359763e-05, + "loss": 2.0678, + "step": 13818500 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.6710043969127615, + "eval_accuracy_mlm": 0.6363494969737239, + "eval_accuracy_nsp": 0.8568583776748109, + "eval_loss": 2.165112018585205, + "eval_runtime": 331.1724, + "eval_samples_per_second": 1317.7, + "eval_steps_per_second": 54.905, + "step": 13818880 } ], "max_steps": 34547200, "num_train_epochs": 100, - "total_flos": 1.4199760659998734e+19, + "total_flos": 1.893300081164393e+19, "trial_name": null, "trial_params": null }