diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 40.0, - "global_step": 13818880, + "epoch": 50.0, + "global_step": 17273600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -166268,11 +166268,41581 @@ "eval_samples_per_second": 1317.7, "eval_steps_per_second": 54.905, "step": 13818880 + }, + { + "epoch": 40.0, + "learning_rate": 3.0007028065950354e-05, + "loss": 2.0527, + "step": 13819000 + }, + { + "epoch": 40.0, + "learning_rate": 3.0006304418303076e-05, + "loss": 2.0217, + "step": 13819500 + }, + { + "epoch": 40.0, + "learning_rate": 3.00055807706558e-05, + "loss": 2.0087, + "step": 13820000 + }, + { + "epoch": 40.0, + "learning_rate": 3.0004857123008524e-05, + "loss": 2.0424, + "step": 13820500 + }, + { + "epoch": 40.01, + "learning_rate": 3.0004133475361246e-05, + "loss": 2.0629, + "step": 13821000 + }, + { + "epoch": 40.01, + "learning_rate": 3.0003409827713968e-05, + "loss": 2.0474, + "step": 13821500 + }, + { + "epoch": 40.01, + "learning_rate": 3.000268618006669e-05, + "loss": 2.0426, + "step": 13822000 + }, + { + "epoch": 40.01, + "learning_rate": 3.0001962532419413e-05, + "loss": 2.0398, + "step": 13822500 + }, + { + "epoch": 40.01, + "learning_rate": 3.000123888477214e-05, + "loss": 2.0361, + "step": 13823000 + }, + { + "epoch": 40.01, + "learning_rate": 3.0000516684420154e-05, + "loss": 2.0254, + "step": 13823500 + }, + { + "epoch": 40.01, + "learning_rate": 2.9999793036772883e-05, + "loss": 2.0243, + "step": 13824000 + }, + { + "epoch": 40.02, + "learning_rate": 2.9999069389125605e-05, + "loss": 2.0572, + "step": 13824500 + }, + { + "epoch": 40.02, + "learning_rate": 2.9998345741478327e-05, + "loss": 2.0565, + "step": 13825000 + }, + { + "epoch": 40.02, + "learning_rate": 2.9997623541126346e-05, + "loss": 2.0421, + "step": 13825500 + }, + { + "epoch": 40.02, + "learning_rate": 2.999689989347907e-05, + "loss": 2.0447, + "step": 13826000 + }, + { + "epoch": 40.02, + "learning_rate": 2.999617624583179e-05, + "loss": 2.0275, + "step": 13826500 + }, + { + "epoch": 40.02, + "learning_rate": 2.9995452598184516e-05, + "loss": 2.0414, + "step": 13827000 + }, + { + "epoch": 40.02, + "learning_rate": 2.999472895053724e-05, + "loss": 2.0275, + "step": 13827500 + }, + { + "epoch": 40.03, + "learning_rate": 2.999400530288996e-05, + "loss": 2.0679, + "step": 13828000 + }, + { + "epoch": 40.03, + "learning_rate": 2.9993283102537976e-05, + "loss": 2.0333, + "step": 13828500 + }, + { + "epoch": 40.03, + "learning_rate": 2.9992559454890702e-05, + "loss": 2.0289, + "step": 13829000 + }, + { + "epoch": 40.03, + "learning_rate": 2.9991835807243424e-05, + "loss": 2.0394, + "step": 13829500 + }, + { + "epoch": 40.03, + "learning_rate": 2.9991112159596146e-05, + "loss": 2.0503, + "step": 13830000 + }, + { + "epoch": 40.03, + "learning_rate": 2.999038851194887e-05, + "loss": 2.0426, + "step": 13830500 + }, + { + "epoch": 40.04, + "learning_rate": 2.998966486430159e-05, + "loss": 2.0397, + "step": 13831000 + }, + { + "epoch": 40.04, + "learning_rate": 2.9988942663949617e-05, + "loss": 2.0423, + "step": 13831500 + }, + { + "epoch": 40.04, + "learning_rate": 2.998821901630234e-05, + "loss": 2.0345, + "step": 13832000 + }, + { + "epoch": 40.04, + "learning_rate": 2.998749536865506e-05, + "loss": 2.04, + "step": 13832500 + }, + { + "epoch": 40.04, + "learning_rate": 2.9986771721007783e-05, + "loss": 2.051, + "step": 13833000 + }, + { + "epoch": 40.04, + "learning_rate": 2.9986048073360506e-05, + "loss": 2.0393, + "step": 13833500 + }, + { + "epoch": 40.04, + "learning_rate": 2.9985324425713228e-05, + "loss": 2.0331, + "step": 13834000 + }, + { + "epoch": 40.05, + "learning_rate": 2.9984600778065953e-05, + "loss": 2.0446, + "step": 13834500 + }, + { + "epoch": 40.05, + "learning_rate": 2.9983877130418676e-05, + "loss": 2.0422, + "step": 13835000 + }, + { + "epoch": 40.05, + "learning_rate": 2.9983153482771398e-05, + "loss": 2.0339, + "step": 13835500 + }, + { + "epoch": 40.05, + "learning_rate": 2.998242983512412e-05, + "loss": 2.0355, + "step": 13836000 + }, + { + "epoch": 40.05, + "learning_rate": 2.9981706187476842e-05, + "loss": 2.0386, + "step": 13836500 + }, + { + "epoch": 40.05, + "learning_rate": 2.9980985434420155e-05, + "loss": 2.0141, + "step": 13837000 + }, + { + "epoch": 40.05, + "learning_rate": 2.9980261786772877e-05, + "loss": 2.0345, + "step": 13837500 + }, + { + "epoch": 40.06, + "learning_rate": 2.9979538139125603e-05, + "loss": 2.0358, + "step": 13838000 + }, + { + "epoch": 40.06, + "learning_rate": 2.9978814491478325e-05, + "loss": 2.0317, + "step": 13838500 + }, + { + "epoch": 40.06, + "learning_rate": 2.9978090843831054e-05, + "loss": 2.0342, + "step": 13839000 + }, + { + "epoch": 40.06, + "learning_rate": 2.9977367196183776e-05, + "loss": 2.0342, + "step": 13839500 + }, + { + "epoch": 40.06, + "learning_rate": 2.9976643548536498e-05, + "loss": 2.0355, + "step": 13840000 + }, + { + "epoch": 40.06, + "learning_rate": 2.9975921348184517e-05, + "loss": 2.025, + "step": 13840500 + }, + { + "epoch": 40.06, + "learning_rate": 2.997519770053724e-05, + "loss": 2.0298, + "step": 13841000 + }, + { + "epoch": 40.07, + "learning_rate": 2.997447405288996e-05, + "loss": 2.0214, + "step": 13841500 + }, + { + "epoch": 40.07, + "learning_rate": 2.9973750405242684e-05, + "loss": 2.0446, + "step": 13842000 + }, + { + "epoch": 40.07, + "learning_rate": 2.9973026757595406e-05, + "loss": 2.0383, + "step": 13842500 + }, + { + "epoch": 40.07, + "learning_rate": 2.997230310994813e-05, + "loss": 2.0285, + "step": 13843000 + }, + { + "epoch": 40.07, + "learning_rate": 2.9971579462300854e-05, + "loss": 2.0569, + "step": 13843500 + }, + { + "epoch": 40.07, + "learning_rate": 2.9970855814653576e-05, + "loss": 2.0497, + "step": 13844000 + }, + { + "epoch": 40.07, + "learning_rate": 2.9970133614301592e-05, + "loss": 2.0273, + "step": 13844500 + }, + { + "epoch": 40.08, + "learning_rate": 2.9969409966654317e-05, + "loss": 2.0547, + "step": 13845000 + }, + { + "epoch": 40.08, + "learning_rate": 2.996868631900704e-05, + "loss": 2.0409, + "step": 13845500 + }, + { + "epoch": 40.08, + "learning_rate": 2.9967962671359762e-05, + "loss": 2.0245, + "step": 13846000 + }, + { + "epoch": 40.08, + "learning_rate": 2.996723902371249e-05, + "loss": 2.0499, + "step": 13846500 + }, + { + "epoch": 40.08, + "learning_rate": 2.9966516823360506e-05, + "loss": 2.0109, + "step": 13847000 + }, + { + "epoch": 40.08, + "learning_rate": 2.9965794623008525e-05, + "loss": 2.0434, + "step": 13847500 + }, + { + "epoch": 40.08, + "learning_rate": 2.9965070975361248e-05, + "loss": 2.036, + "step": 13848000 + }, + { + "epoch": 40.09, + "learning_rate": 2.996434732771397e-05, + "loss": 2.0232, + "step": 13848500 + }, + { + "epoch": 40.09, + "learning_rate": 2.9963623680066692e-05, + "loss": 2.0497, + "step": 13849000 + }, + { + "epoch": 40.09, + "learning_rate": 2.9962900032419418e-05, + "loss": 2.041, + "step": 13849500 + }, + { + "epoch": 40.09, + "learning_rate": 2.996217638477214e-05, + "loss": 2.038, + "step": 13850000 + }, + { + "epoch": 40.09, + "learning_rate": 2.9961452737124862e-05, + "loss": 2.0471, + "step": 13850500 + }, + { + "epoch": 40.09, + "learning_rate": 2.9960729089477584e-05, + "loss": 2.0379, + "step": 13851000 + }, + { + "epoch": 40.09, + "learning_rate": 2.9960005441830307e-05, + "loss": 2.0328, + "step": 13851500 + }, + { + "epoch": 40.1, + "learning_rate": 2.9959283241478326e-05, + "loss": 2.0325, + "step": 13852000 + }, + { + "epoch": 40.1, + "learning_rate": 2.9958559593831048e-05, + "loss": 2.0528, + "step": 13852500 + }, + { + "epoch": 40.1, + "learning_rate": 2.995783594618377e-05, + "loss": 2.0208, + "step": 13853000 + }, + { + "epoch": 40.1, + "learning_rate": 2.9957112298536492e-05, + "loss": 2.0328, + "step": 13853500 + }, + { + "epoch": 40.1, + "learning_rate": 2.995638865088922e-05, + "loss": 2.0698, + "step": 13854000 + }, + { + "epoch": 40.1, + "learning_rate": 2.9955665003241943e-05, + "loss": 2.0506, + "step": 13854500 + }, + { + "epoch": 40.1, + "learning_rate": 2.9954942802889962e-05, + "loss": 2.0308, + "step": 13855000 + }, + { + "epoch": 40.11, + "learning_rate": 2.9954219155242685e-05, + "loss": 2.0576, + "step": 13855500 + }, + { + "epoch": 40.11, + "learning_rate": 2.9953495507595407e-05, + "loss": 2.0622, + "step": 13856000 + }, + { + "epoch": 40.11, + "learning_rate": 2.9952771859948133e-05, + "loss": 2.0524, + "step": 13856500 + }, + { + "epoch": 40.11, + "learning_rate": 2.9952048212300855e-05, + "loss": 2.04, + "step": 13857000 + }, + { + "epoch": 40.11, + "learning_rate": 2.9951324564653577e-05, + "loss": 2.0509, + "step": 13857500 + }, + { + "epoch": 40.11, + "learning_rate": 2.99506009170063e-05, + "loss": 2.0165, + "step": 13858000 + }, + { + "epoch": 40.11, + "learning_rate": 2.994987726935902e-05, + "loss": 2.0342, + "step": 13858500 + }, + { + "epoch": 40.12, + "learning_rate": 2.9949153621711744e-05, + "loss": 2.0416, + "step": 13859000 + }, + { + "epoch": 40.12, + "learning_rate": 2.994842997406447e-05, + "loss": 2.069, + "step": 13859500 + }, + { + "epoch": 40.12, + "learning_rate": 2.994770632641719e-05, + "loss": 2.0275, + "step": 13860000 + }, + { + "epoch": 40.12, + "learning_rate": 2.9946982678769914e-05, + "loss": 2.0371, + "step": 13860500 + }, + { + "epoch": 40.12, + "learning_rate": 2.9946259031122643e-05, + "loss": 2.039, + "step": 13861000 + }, + { + "epoch": 40.12, + "learning_rate": 2.994553683077066e-05, + "loss": 2.0111, + "step": 13861500 + }, + { + "epoch": 40.12, + "learning_rate": 2.9944813183123384e-05, + "loss": 2.0319, + "step": 13862000 + }, + { + "epoch": 40.13, + "learning_rate": 2.9944089535476106e-05, + "loss": 2.0485, + "step": 13862500 + }, + { + "epoch": 40.13, + "learning_rate": 2.994336588782883e-05, + "loss": 2.0401, + "step": 13863000 + }, + { + "epoch": 40.13, + "learning_rate": 2.9942643687476844e-05, + "loss": 2.0641, + "step": 13863500 + }, + { + "epoch": 40.13, + "learning_rate": 2.9941921487124863e-05, + "loss": 2.033, + "step": 13864000 + }, + { + "epoch": 40.13, + "learning_rate": 2.9941197839477585e-05, + "loss": 2.033, + "step": 13864500 + }, + { + "epoch": 40.13, + "learning_rate": 2.9940474191830307e-05, + "loss": 2.0291, + "step": 13865000 + }, + { + "epoch": 40.13, + "learning_rate": 2.9939750544183033e-05, + "loss": 2.0603, + "step": 13865500 + }, + { + "epoch": 40.14, + "learning_rate": 2.9939026896535755e-05, + "loss": 2.0244, + "step": 13866000 + }, + { + "epoch": 40.14, + "learning_rate": 2.9938303248888477e-05, + "loss": 2.0186, + "step": 13866500 + }, + { + "epoch": 40.14, + "learning_rate": 2.99375796012412e-05, + "loss": 2.0392, + "step": 13867000 + }, + { + "epoch": 40.14, + "learning_rate": 2.9936855953593922e-05, + "loss": 2.0516, + "step": 13867500 + }, + { + "epoch": 40.14, + "learning_rate": 2.9936132305946644e-05, + "loss": 2.0324, + "step": 13868000 + }, + { + "epoch": 40.14, + "learning_rate": 2.9935410105594663e-05, + "loss": 2.0576, + "step": 13868500 + }, + { + "epoch": 40.15, + "learning_rate": 2.9934686457947392e-05, + "loss": 2.0459, + "step": 13869000 + }, + { + "epoch": 40.15, + "learning_rate": 2.9933962810300114e-05, + "loss": 2.0327, + "step": 13869500 + }, + { + "epoch": 40.15, + "learning_rate": 2.9933239162652837e-05, + "loss": 2.0082, + "step": 13870000 + }, + { + "epoch": 40.15, + "learning_rate": 2.993251551500556e-05, + "loss": 2.0471, + "step": 13870500 + }, + { + "epoch": 40.15, + "learning_rate": 2.9931791867358284e-05, + "loss": 2.0438, + "step": 13871000 + }, + { + "epoch": 40.15, + "learning_rate": 2.9931068219711007e-05, + "loss": 2.0501, + "step": 13871500 + }, + { + "epoch": 40.15, + "learning_rate": 2.993034457206373e-05, + "loss": 2.0472, + "step": 13872000 + }, + { + "epoch": 40.16, + "learning_rate": 2.992962092441645e-05, + "loss": 2.0366, + "step": 13872500 + }, + { + "epoch": 40.16, + "learning_rate": 2.9928897276769173e-05, + "loss": 2.0474, + "step": 13873000 + }, + { + "epoch": 40.16, + "learning_rate": 2.9928173629121896e-05, + "loss": 2.042, + "step": 13873500 + }, + { + "epoch": 40.16, + "learning_rate": 2.9927451428769915e-05, + "loss": 2.043, + "step": 13874000 + }, + { + "epoch": 40.16, + "learning_rate": 2.9926727781122637e-05, + "loss": 2.0434, + "step": 13874500 + }, + { + "epoch": 40.16, + "learning_rate": 2.992600413347536e-05, + "loss": 2.0715, + "step": 13875000 + }, + { + "epoch": 40.16, + "learning_rate": 2.9925280485828085e-05, + "loss": 2.0344, + "step": 13875500 + }, + { + "epoch": 40.17, + "learning_rate": 2.9924558285476107e-05, + "loss": 2.0285, + "step": 13876000 + }, + { + "epoch": 40.17, + "learning_rate": 2.992383463782883e-05, + "loss": 2.0485, + "step": 13876500 + }, + { + "epoch": 40.17, + "learning_rate": 2.992311099018155e-05, + "loss": 2.0417, + "step": 13877000 + }, + { + "epoch": 40.17, + "learning_rate": 2.9922387342534274e-05, + "loss": 2.0342, + "step": 13877500 + }, + { + "epoch": 40.17, + "learning_rate": 2.9921663694887e-05, + "loss": 2.0303, + "step": 13878000 + }, + { + "epoch": 40.17, + "learning_rate": 2.992094004723972e-05, + "loss": 2.0356, + "step": 13878500 + }, + { + "epoch": 40.17, + "learning_rate": 2.9920216399592444e-05, + "loss": 2.0473, + "step": 13879000 + }, + { + "epoch": 40.18, + "learning_rate": 2.991949419924046e-05, + "loss": 2.052, + "step": 13879500 + }, + { + "epoch": 40.18, + "learning_rate": 2.9918770551593185e-05, + "loss": 2.0492, + "step": 13880000 + }, + { + "epoch": 40.18, + "learning_rate": 2.9918046903945907e-05, + "loss": 2.0321, + "step": 13880500 + }, + { + "epoch": 40.18, + "learning_rate": 2.991732325629863e-05, + "loss": 2.0342, + "step": 13881000 + }, + { + "epoch": 40.18, + "learning_rate": 2.991659960865135e-05, + "loss": 2.0518, + "step": 13881500 + }, + { + "epoch": 40.18, + "learning_rate": 2.991587740829937e-05, + "loss": 2.0505, + "step": 13882000 + }, + { + "epoch": 40.18, + "learning_rate": 2.9915153760652093e-05, + "loss": 2.0375, + "step": 13882500 + }, + { + "epoch": 40.19, + "learning_rate": 2.9914431560300112e-05, + "loss": 2.049, + "step": 13883000 + }, + { + "epoch": 40.19, + "learning_rate": 2.9913707912652837e-05, + "loss": 2.047, + "step": 13883500 + }, + { + "epoch": 40.19, + "learning_rate": 2.9912985712300856e-05, + "loss": 2.0709, + "step": 13884000 + }, + { + "epoch": 40.19, + "learning_rate": 2.991226206465358e-05, + "loss": 2.0439, + "step": 13884500 + }, + { + "epoch": 40.19, + "learning_rate": 2.99115384170063e-05, + "loss": 2.0545, + "step": 13885000 + }, + { + "epoch": 40.19, + "learning_rate": 2.991081621665432e-05, + "loss": 2.0536, + "step": 13885500 + }, + { + "epoch": 40.19, + "learning_rate": 2.9910092569007042e-05, + "loss": 2.0365, + "step": 13886000 + }, + { + "epoch": 40.2, + "learning_rate": 2.990937036865506e-05, + "loss": 2.0235, + "step": 13886500 + }, + { + "epoch": 40.2, + "learning_rate": 2.9908646721007783e-05, + "loss": 2.0291, + "step": 13887000 + }, + { + "epoch": 40.2, + "learning_rate": 2.9907923073360505e-05, + "loss": 2.0407, + "step": 13887500 + }, + { + "epoch": 40.2, + "learning_rate": 2.9907199425713228e-05, + "loss": 2.031, + "step": 13888000 + }, + { + "epoch": 40.2, + "learning_rate": 2.990647577806595e-05, + "loss": 2.0425, + "step": 13888500 + }, + { + "epoch": 40.2, + "learning_rate": 2.9905752130418675e-05, + "loss": 2.0378, + "step": 13889000 + }, + { + "epoch": 40.2, + "learning_rate": 2.9905028482771398e-05, + "loss": 2.0221, + "step": 13889500 + }, + { + "epoch": 40.21, + "learning_rate": 2.990430483512412e-05, + "loss": 2.0064, + "step": 13890000 + }, + { + "epoch": 40.21, + "learning_rate": 2.9903581187476842e-05, + "loss": 2.0707, + "step": 13890500 + }, + { + "epoch": 40.21, + "learning_rate": 2.9902857539829564e-05, + "loss": 2.037, + "step": 13891000 + }, + { + "epoch": 40.21, + "learning_rate": 2.9902133892182293e-05, + "loss": 2.054, + "step": 13891500 + }, + { + "epoch": 40.21, + "learning_rate": 2.9901410244535016e-05, + "loss": 2.0303, + "step": 13892000 + }, + { + "epoch": 40.21, + "learning_rate": 2.9900686596887738e-05, + "loss": 2.042, + "step": 13892500 + }, + { + "epoch": 40.21, + "learning_rate": 2.9899962949240464e-05, + "loss": 2.0267, + "step": 13893000 + }, + { + "epoch": 40.22, + "learning_rate": 2.9899239301593186e-05, + "loss": 2.0286, + "step": 13893500 + }, + { + "epoch": 40.22, + "learning_rate": 2.98985171012412e-05, + "loss": 2.025, + "step": 13894000 + }, + { + "epoch": 40.22, + "learning_rate": 2.9897793453593927e-05, + "loss": 2.0414, + "step": 13894500 + }, + { + "epoch": 40.22, + "learning_rate": 2.989706980594665e-05, + "loss": 2.0383, + "step": 13895000 + }, + { + "epoch": 40.22, + "learning_rate": 2.989634615829937e-05, + "loss": 2.0415, + "step": 13895500 + }, + { + "epoch": 40.22, + "learning_rate": 2.9895622510652094e-05, + "loss": 2.023, + "step": 13896000 + }, + { + "epoch": 40.22, + "learning_rate": 2.9894898863004816e-05, + "loss": 2.0276, + "step": 13896500 + }, + { + "epoch": 40.23, + "learning_rate": 2.9894175215357538e-05, + "loss": 2.0097, + "step": 13897000 + }, + { + "epoch": 40.23, + "learning_rate": 2.9893451567710264e-05, + "loss": 2.0544, + "step": 13897500 + }, + { + "epoch": 40.23, + "learning_rate": 2.989272936735828e-05, + "loss": 2.0377, + "step": 13898000 + }, + { + "epoch": 40.23, + "learning_rate": 2.9892005719711008e-05, + "loss": 2.0261, + "step": 13898500 + }, + { + "epoch": 40.23, + "learning_rate": 2.9891283519359027e-05, + "loss": 2.0386, + "step": 13899000 + }, + { + "epoch": 40.23, + "learning_rate": 2.9890561319007043e-05, + "loss": 2.0374, + "step": 13899500 + }, + { + "epoch": 40.23, + "learning_rate": 2.9889837671359765e-05, + "loss": 2.0386, + "step": 13900000 + }, + { + "epoch": 40.24, + "learning_rate": 2.9889114023712487e-05, + "loss": 2.0265, + "step": 13900500 + }, + { + "epoch": 40.24, + "learning_rate": 2.9888390376065213e-05, + "loss": 2.0637, + "step": 13901000 + }, + { + "epoch": 40.24, + "learning_rate": 2.9887666728417935e-05, + "loss": 2.0485, + "step": 13901500 + }, + { + "epoch": 40.24, + "learning_rate": 2.9886943080770657e-05, + "loss": 2.0324, + "step": 13902000 + }, + { + "epoch": 40.24, + "learning_rate": 2.9886220880418676e-05, + "loss": 2.0385, + "step": 13902500 + }, + { + "epoch": 40.24, + "learning_rate": 2.98854972327714e-05, + "loss": 2.0355, + "step": 13903000 + }, + { + "epoch": 40.24, + "learning_rate": 2.988477358512412e-05, + "loss": 2.0687, + "step": 13903500 + }, + { + "epoch": 40.25, + "learning_rate": 2.988405138477214e-05, + "loss": 2.0489, + "step": 13904000 + }, + { + "epoch": 40.25, + "learning_rate": 2.9883327737124862e-05, + "loss": 2.074, + "step": 13904500 + }, + { + "epoch": 40.25, + "learning_rate": 2.9882604089477584e-05, + "loss": 2.0613, + "step": 13905000 + }, + { + "epoch": 40.25, + "learning_rate": 2.9881880441830306e-05, + "loss": 2.0467, + "step": 13905500 + }, + { + "epoch": 40.25, + "learning_rate": 2.988115679418303e-05, + "loss": 2.04, + "step": 13906000 + }, + { + "epoch": 40.25, + "learning_rate": 2.9880433146535758e-05, + "loss": 2.0118, + "step": 13906500 + }, + { + "epoch": 40.26, + "learning_rate": 2.9879710946183777e-05, + "loss": 2.0266, + "step": 13907000 + }, + { + "epoch": 40.26, + "learning_rate": 2.98789872985365e-05, + "loss": 2.0602, + "step": 13907500 + }, + { + "epoch": 40.26, + "learning_rate": 2.987826365088922e-05, + "loss": 2.0247, + "step": 13908000 + }, + { + "epoch": 40.26, + "learning_rate": 2.9877540003241943e-05, + "loss": 2.0423, + "step": 13908500 + }, + { + "epoch": 40.26, + "learning_rate": 2.9876816355594666e-05, + "loss": 2.0727, + "step": 13909000 + }, + { + "epoch": 40.26, + "learning_rate": 2.987609270794739e-05, + "loss": 2.0259, + "step": 13909500 + }, + { + "epoch": 40.26, + "learning_rate": 2.9875369060300113e-05, + "loss": 2.015, + "step": 13910000 + }, + { + "epoch": 40.27, + "learning_rate": 2.9874645412652836e-05, + "loss": 2.0258, + "step": 13910500 + }, + { + "epoch": 40.27, + "learning_rate": 2.9873921765005558e-05, + "loss": 2.0549, + "step": 13911000 + }, + { + "epoch": 40.27, + "learning_rate": 2.987319811735828e-05, + "loss": 2.0636, + "step": 13911500 + }, + { + "epoch": 40.27, + "learning_rate": 2.9872474469711002e-05, + "loss": 2.0249, + "step": 13912000 + }, + { + "epoch": 40.27, + "learning_rate": 2.9871750822063728e-05, + "loss": 2.0515, + "step": 13912500 + }, + { + "epoch": 40.27, + "learning_rate": 2.987102717441645e-05, + "loss": 2.0199, + "step": 13913000 + }, + { + "epoch": 40.27, + "learning_rate": 2.987030352676918e-05, + "loss": 2.0123, + "step": 13913500 + }, + { + "epoch": 40.28, + "learning_rate": 2.98695798791219e-05, + "loss": 2.038, + "step": 13914000 + }, + { + "epoch": 40.28, + "learning_rate": 2.9868856231474624e-05, + "loss": 2.0682, + "step": 13914500 + }, + { + "epoch": 40.28, + "learning_rate": 2.9868134031122643e-05, + "loss": 2.0304, + "step": 13915000 + }, + { + "epoch": 40.28, + "learning_rate": 2.9867410383475365e-05, + "loss": 2.0442, + "step": 13915500 + }, + { + "epoch": 40.28, + "learning_rate": 2.9866686735828087e-05, + "loss": 2.0657, + "step": 13916000 + }, + { + "epoch": 40.28, + "learning_rate": 2.986596308818081e-05, + "loss": 2.0661, + "step": 13916500 + }, + { + "epoch": 40.28, + "learning_rate": 2.986523944053353e-05, + "loss": 2.0723, + "step": 13917000 + }, + { + "epoch": 40.29, + "learning_rate": 2.9864515792886254e-05, + "loss": 2.0416, + "step": 13917500 + }, + { + "epoch": 40.29, + "learning_rate": 2.986379214523898e-05, + "loss": 2.0222, + "step": 13918000 + }, + { + "epoch": 40.29, + "learning_rate": 2.98630684975917e-05, + "loss": 2.0725, + "step": 13918500 + }, + { + "epoch": 40.29, + "learning_rate": 2.9862346297239717e-05, + "loss": 2.0287, + "step": 13919000 + }, + { + "epoch": 40.29, + "learning_rate": 2.9861624096887736e-05, + "loss": 2.0503, + "step": 13919500 + }, + { + "epoch": 40.29, + "learning_rate": 2.986090044924046e-05, + "loss": 2.0433, + "step": 13920000 + }, + { + "epoch": 40.29, + "learning_rate": 2.986017680159318e-05, + "loss": 2.0344, + "step": 13920500 + }, + { + "epoch": 40.3, + "learning_rate": 2.985945315394591e-05, + "loss": 2.0511, + "step": 13921000 + }, + { + "epoch": 40.3, + "learning_rate": 2.9858729506298632e-05, + "loss": 2.0581, + "step": 13921500 + }, + { + "epoch": 40.3, + "learning_rate": 2.985800730594665e-05, + "loss": 2.0444, + "step": 13922000 + }, + { + "epoch": 40.3, + "learning_rate": 2.9857283658299373e-05, + "loss": 2.0541, + "step": 13922500 + }, + { + "epoch": 40.3, + "learning_rate": 2.9856560010652095e-05, + "loss": 2.0319, + "step": 13923000 + }, + { + "epoch": 40.3, + "learning_rate": 2.9855836363004817e-05, + "loss": 2.0564, + "step": 13923500 + }, + { + "epoch": 40.3, + "learning_rate": 2.9855112715357543e-05, + "loss": 2.0537, + "step": 13924000 + }, + { + "epoch": 40.31, + "learning_rate": 2.9854389067710265e-05, + "loss": 2.0292, + "step": 13924500 + }, + { + "epoch": 40.31, + "learning_rate": 2.9853665420062988e-05, + "loss": 2.0693, + "step": 13925000 + }, + { + "epoch": 40.31, + "learning_rate": 2.985294177241571e-05, + "loss": 2.0272, + "step": 13925500 + }, + { + "epoch": 40.31, + "learning_rate": 2.9852218124768432e-05, + "loss": 2.0714, + "step": 13926000 + }, + { + "epoch": 40.31, + "learning_rate": 2.9851494477121154e-05, + "loss": 2.0556, + "step": 13926500 + }, + { + "epoch": 40.31, + "learning_rate": 2.985077082947388e-05, + "loss": 2.0304, + "step": 13927000 + }, + { + "epoch": 40.31, + "learning_rate": 2.9850047181826602e-05, + "loss": 2.027, + "step": 13927500 + }, + { + "epoch": 40.32, + "learning_rate": 2.984932353417933e-05, + "loss": 2.0654, + "step": 13928000 + }, + { + "epoch": 40.32, + "learning_rate": 2.9848601333827347e-05, + "loss": 2.0487, + "step": 13928500 + }, + { + "epoch": 40.32, + "learning_rate": 2.984787768618007e-05, + "loss": 2.0268, + "step": 13929000 + }, + { + "epoch": 40.32, + "learning_rate": 2.9847154038532795e-05, + "loss": 2.0437, + "step": 13929500 + }, + { + "epoch": 40.32, + "learning_rate": 2.9846430390885517e-05, + "loss": 2.042, + "step": 13930000 + }, + { + "epoch": 40.32, + "learning_rate": 2.9845708190533532e-05, + "loss": 2.0542, + "step": 13930500 + }, + { + "epoch": 40.32, + "learning_rate": 2.9844984542886255e-05, + "loss": 2.0526, + "step": 13931000 + }, + { + "epoch": 40.33, + "learning_rate": 2.984426089523898e-05, + "loss": 2.0337, + "step": 13931500 + }, + { + "epoch": 40.33, + "learning_rate": 2.9843537247591702e-05, + "loss": 2.0452, + "step": 13932000 + }, + { + "epoch": 40.33, + "learning_rate": 2.9842813599944425e-05, + "loss": 2.0771, + "step": 13932500 + }, + { + "epoch": 40.33, + "learning_rate": 2.9842089952297147e-05, + "loss": 2.0429, + "step": 13933000 + }, + { + "epoch": 40.33, + "learning_rate": 2.984136630464987e-05, + "loss": 2.0298, + "step": 13933500 + }, + { + "epoch": 40.33, + "learning_rate": 2.9840642657002595e-05, + "loss": 2.021, + "step": 13934000 + }, + { + "epoch": 40.33, + "learning_rate": 2.9839921903945907e-05, + "loss": 2.0244, + "step": 13934500 + }, + { + "epoch": 40.34, + "learning_rate": 2.9839199703593923e-05, + "loss": 2.06, + "step": 13935000 + }, + { + "epoch": 40.34, + "learning_rate": 2.983847750324194e-05, + "loss": 2.0331, + "step": 13935500 + }, + { + "epoch": 40.34, + "learning_rate": 2.9837753855594664e-05, + "loss": 2.0379, + "step": 13936000 + }, + { + "epoch": 40.34, + "learning_rate": 2.9837030207947393e-05, + "loss": 2.0385, + "step": 13936500 + }, + { + "epoch": 40.34, + "learning_rate": 2.9836306560300115e-05, + "loss": 2.0331, + "step": 13937000 + }, + { + "epoch": 40.34, + "learning_rate": 2.9835582912652837e-05, + "loss": 2.0577, + "step": 13937500 + }, + { + "epoch": 40.34, + "learning_rate": 2.983485926500556e-05, + "loss": 2.0307, + "step": 13938000 + }, + { + "epoch": 40.35, + "learning_rate": 2.983413561735828e-05, + "loss": 2.0575, + "step": 13938500 + }, + { + "epoch": 40.35, + "learning_rate": 2.9833411969711007e-05, + "loss": 2.0526, + "step": 13939000 + }, + { + "epoch": 40.35, + "learning_rate": 2.9832689769359023e-05, + "loss": 2.0715, + "step": 13939500 + }, + { + "epoch": 40.35, + "learning_rate": 2.9831966121711745e-05, + "loss": 2.0389, + "step": 13940000 + }, + { + "epoch": 40.35, + "learning_rate": 2.983124247406447e-05, + "loss": 2.0475, + "step": 13940500 + }, + { + "epoch": 40.35, + "learning_rate": 2.9830518826417193e-05, + "loss": 2.0631, + "step": 13941000 + }, + { + "epoch": 40.35, + "learning_rate": 2.9829795178769915e-05, + "loss": 2.0371, + "step": 13941500 + }, + { + "epoch": 40.36, + "learning_rate": 2.9829071531122637e-05, + "loss": 2.0099, + "step": 13942000 + }, + { + "epoch": 40.36, + "learning_rate": 2.982834788347536e-05, + "loss": 2.0584, + "step": 13942500 + }, + { + "epoch": 40.36, + "learning_rate": 2.9827624235828082e-05, + "loss": 2.0585, + "step": 13943000 + }, + { + "epoch": 40.36, + "learning_rate": 2.982690058818081e-05, + "loss": 2.048, + "step": 13943500 + }, + { + "epoch": 40.36, + "learning_rate": 2.9826176940533533e-05, + "loss": 2.0291, + "step": 13944000 + }, + { + "epoch": 40.36, + "learning_rate": 2.9825454740181552e-05, + "loss": 2.0598, + "step": 13944500 + }, + { + "epoch": 40.37, + "learning_rate": 2.9824731092534274e-05, + "loss": 2.0483, + "step": 13945000 + }, + { + "epoch": 40.37, + "learning_rate": 2.9824007444886997e-05, + "loss": 2.0732, + "step": 13945500 + }, + { + "epoch": 40.37, + "learning_rate": 2.9823283797239722e-05, + "loss": 2.038, + "step": 13946000 + }, + { + "epoch": 40.37, + "learning_rate": 2.9822560149592444e-05, + "loss": 2.0328, + "step": 13946500 + }, + { + "epoch": 40.37, + "learning_rate": 2.9821836501945167e-05, + "loss": 2.0457, + "step": 13947000 + }, + { + "epoch": 40.37, + "learning_rate": 2.982111285429789e-05, + "loss": 2.05, + "step": 13947500 + }, + { + "epoch": 40.37, + "learning_rate": 2.982038920665061e-05, + "loss": 2.0555, + "step": 13948000 + }, + { + "epoch": 40.38, + "learning_rate": 2.981966700629863e-05, + "loss": 2.0617, + "step": 13948500 + }, + { + "epoch": 40.38, + "learning_rate": 2.9818946253241942e-05, + "loss": 2.0463, + "step": 13949000 + }, + { + "epoch": 40.38, + "learning_rate": 2.9818222605594665e-05, + "loss": 2.0264, + "step": 13949500 + }, + { + "epoch": 40.38, + "learning_rate": 2.9817498957947387e-05, + "loss": 2.0268, + "step": 13950000 + }, + { + "epoch": 40.38, + "learning_rate": 2.981677531030011e-05, + "loss": 2.0428, + "step": 13950500 + }, + { + "epoch": 40.38, + "learning_rate": 2.9816053109948128e-05, + "loss": 2.0563, + "step": 13951000 + }, + { + "epoch": 40.38, + "learning_rate": 2.9815329462300857e-05, + "loss": 2.0295, + "step": 13951500 + }, + { + "epoch": 40.39, + "learning_rate": 2.981460581465358e-05, + "loss": 2.0654, + "step": 13952000 + }, + { + "epoch": 40.39, + "learning_rate": 2.98138821670063e-05, + "loss": 2.0157, + "step": 13952500 + }, + { + "epoch": 40.39, + "learning_rate": 2.9813158519359024e-05, + "loss": 2.0392, + "step": 13953000 + }, + { + "epoch": 40.39, + "learning_rate": 2.9812434871711746e-05, + "loss": 2.0251, + "step": 13953500 + }, + { + "epoch": 40.39, + "learning_rate": 2.981171122406447e-05, + "loss": 2.0592, + "step": 13954000 + }, + { + "epoch": 40.39, + "learning_rate": 2.9810987576417194e-05, + "loss": 2.0356, + "step": 13954500 + }, + { + "epoch": 40.39, + "learning_rate": 2.9810263928769916e-05, + "loss": 2.0408, + "step": 13955000 + }, + { + "epoch": 40.4, + "learning_rate": 2.9809543175713228e-05, + "loss": 2.0325, + "step": 13955500 + }, + { + "epoch": 40.4, + "learning_rate": 2.980881952806595e-05, + "loss": 2.0526, + "step": 13956000 + }, + { + "epoch": 40.4, + "learning_rate": 2.9808095880418673e-05, + "loss": 2.0308, + "step": 13956500 + }, + { + "epoch": 40.4, + "learning_rate": 2.98073722327714e-05, + "loss": 2.0262, + "step": 13957000 + }, + { + "epoch": 40.4, + "learning_rate": 2.980664858512412e-05, + "loss": 2.0554, + "step": 13957500 + }, + { + "epoch": 40.4, + "learning_rate": 2.9805924937476843e-05, + "loss": 2.067, + "step": 13958000 + }, + { + "epoch": 40.4, + "learning_rate": 2.9805201289829565e-05, + "loss": 2.0664, + "step": 13958500 + }, + { + "epoch": 40.41, + "learning_rate": 2.9804477642182294e-05, + "loss": 2.0358, + "step": 13959000 + }, + { + "epoch": 40.41, + "learning_rate": 2.980375544183031e-05, + "loss": 2.0623, + "step": 13959500 + }, + { + "epoch": 40.41, + "learning_rate": 2.9803031794183035e-05, + "loss": 2.0526, + "step": 13960000 + }, + { + "epoch": 40.41, + "learning_rate": 2.9802308146535757e-05, + "loss": 2.0836, + "step": 13960500 + }, + { + "epoch": 40.41, + "learning_rate": 2.980158449888848e-05, + "loss": 2.0455, + "step": 13961000 + }, + { + "epoch": 40.41, + "learning_rate": 2.9800860851241202e-05, + "loss": 2.0537, + "step": 13961500 + }, + { + "epoch": 40.41, + "learning_rate": 2.980013865088922e-05, + "loss": 2.0446, + "step": 13962000 + }, + { + "epoch": 40.42, + "learning_rate": 2.9799415003241943e-05, + "loss": 2.0412, + "step": 13962500 + }, + { + "epoch": 40.42, + "learning_rate": 2.9798691355594665e-05, + "loss": 2.0579, + "step": 13963000 + }, + { + "epoch": 40.42, + "learning_rate": 2.9797967707947388e-05, + "loss": 2.0298, + "step": 13963500 + }, + { + "epoch": 40.42, + "learning_rate": 2.979724406030011e-05, + "loss": 2.0462, + "step": 13964000 + }, + { + "epoch": 40.42, + "learning_rate": 2.9796520412652835e-05, + "loss": 2.0449, + "step": 13964500 + }, + { + "epoch": 40.42, + "learning_rate": 2.9795796765005558e-05, + "loss": 2.054, + "step": 13965000 + }, + { + "epoch": 40.42, + "learning_rate": 2.979507311735828e-05, + "loss": 2.0595, + "step": 13965500 + }, + { + "epoch": 40.43, + "learning_rate": 2.97943509170063e-05, + "loss": 2.0575, + "step": 13966000 + }, + { + "epoch": 40.43, + "learning_rate": 2.9793627269359024e-05, + "loss": 2.0447, + "step": 13966500 + }, + { + "epoch": 40.43, + "learning_rate": 2.9792905069007043e-05, + "loss": 2.0386, + "step": 13967000 + }, + { + "epoch": 40.43, + "learning_rate": 2.9792181421359766e-05, + "loss": 2.0732, + "step": 13967500 + }, + { + "epoch": 40.43, + "learning_rate": 2.9791457773712488e-05, + "loss": 2.0549, + "step": 13968000 + }, + { + "epoch": 40.43, + "learning_rate": 2.979073412606521e-05, + "loss": 2.0669, + "step": 13968500 + }, + { + "epoch": 40.43, + "learning_rate": 2.9790010478417936e-05, + "loss": 2.0271, + "step": 13969000 + }, + { + "epoch": 40.44, + "learning_rate": 2.9789286830770658e-05, + "loss": 2.0438, + "step": 13969500 + }, + { + "epoch": 40.44, + "learning_rate": 2.978856318312338e-05, + "loss": 2.0318, + "step": 13970000 + }, + { + "epoch": 40.44, + "learning_rate": 2.9787839535476102e-05, + "loss": 2.0196, + "step": 13970500 + }, + { + "epoch": 40.44, + "learning_rate": 2.9787115887828825e-05, + "loss": 2.0363, + "step": 13971000 + }, + { + "epoch": 40.44, + "learning_rate": 2.9786393687476844e-05, + "loss": 2.0441, + "step": 13971500 + }, + { + "epoch": 40.44, + "learning_rate": 2.9785670039829566e-05, + "loss": 2.0416, + "step": 13972000 + }, + { + "epoch": 40.44, + "learning_rate": 2.9784947839477585e-05, + "loss": 2.048, + "step": 13972500 + }, + { + "epoch": 40.45, + "learning_rate": 2.9784224191830307e-05, + "loss": 2.042, + "step": 13973000 + }, + { + "epoch": 40.45, + "learning_rate": 2.978350054418303e-05, + "loss": 2.0584, + "step": 13973500 + }, + { + "epoch": 40.45, + "learning_rate": 2.9782776896535758e-05, + "loss": 2.0529, + "step": 13974000 + }, + { + "epoch": 40.45, + "learning_rate": 2.978205324888848e-05, + "loss": 2.0206, + "step": 13974500 + }, + { + "epoch": 40.45, + "learning_rate": 2.9781329601241203e-05, + "loss": 2.0499, + "step": 13975000 + }, + { + "epoch": 40.45, + "learning_rate": 2.9780605953593925e-05, + "loss": 2.0436, + "step": 13975500 + }, + { + "epoch": 40.45, + "learning_rate": 2.9779885200537237e-05, + "loss": 2.0175, + "step": 13976000 + }, + { + "epoch": 40.46, + "learning_rate": 2.9779161552889963e-05, + "loss": 2.0379, + "step": 13976500 + }, + { + "epoch": 40.46, + "learning_rate": 2.9778437905242685e-05, + "loss": 2.0604, + "step": 13977000 + }, + { + "epoch": 40.46, + "learning_rate": 2.9777714257595407e-05, + "loss": 2.0216, + "step": 13977500 + }, + { + "epoch": 40.46, + "learning_rate": 2.977699060994813e-05, + "loss": 2.0444, + "step": 13978000 + }, + { + "epoch": 40.46, + "learning_rate": 2.9776266962300852e-05, + "loss": 2.0671, + "step": 13978500 + }, + { + "epoch": 40.46, + "learning_rate": 2.9775543314653574e-05, + "loss": 2.0441, + "step": 13979000 + }, + { + "epoch": 40.46, + "learning_rate": 2.97748196670063e-05, + "loss": 2.0365, + "step": 13979500 + }, + { + "epoch": 40.47, + "learning_rate": 2.9774096019359022e-05, + "loss": 2.0489, + "step": 13980000 + }, + { + "epoch": 40.47, + "learning_rate": 2.9773373819007037e-05, + "loss": 2.0215, + "step": 13980500 + }, + { + "epoch": 40.47, + "learning_rate": 2.9772650171359763e-05, + "loss": 2.0469, + "step": 13981000 + }, + { + "epoch": 40.47, + "learning_rate": 2.977192652371249e-05, + "loss": 2.0352, + "step": 13981500 + }, + { + "epoch": 40.47, + "learning_rate": 2.9771202876065214e-05, + "loss": 2.043, + "step": 13982000 + }, + { + "epoch": 40.47, + "learning_rate": 2.9770479228417937e-05, + "loss": 2.0704, + "step": 13982500 + }, + { + "epoch": 40.48, + "learning_rate": 2.9769757028065952e-05, + "loss": 2.0766, + "step": 13983000 + }, + { + "epoch": 40.48, + "learning_rate": 2.9769033380418678e-05, + "loss": 2.0489, + "step": 13983500 + }, + { + "epoch": 40.48, + "learning_rate": 2.97683097327714e-05, + "loss": 2.0573, + "step": 13984000 + }, + { + "epoch": 40.48, + "learning_rate": 2.9767586085124122e-05, + "loss": 2.0459, + "step": 13984500 + }, + { + "epoch": 40.48, + "learning_rate": 2.9766862437476844e-05, + "loss": 2.0254, + "step": 13985000 + }, + { + "epoch": 40.48, + "learning_rate": 2.9766138789829567e-05, + "loss": 2.0549, + "step": 13985500 + }, + { + "epoch": 40.48, + "learning_rate": 2.9765416589477586e-05, + "loss": 2.0611, + "step": 13986000 + }, + { + "epoch": 40.49, + "learning_rate": 2.9764692941830308e-05, + "loss": 2.01, + "step": 13986500 + }, + { + "epoch": 40.49, + "learning_rate": 2.976396929418303e-05, + "loss": 2.0598, + "step": 13987000 + }, + { + "epoch": 40.49, + "learning_rate": 2.9763245646535752e-05, + "loss": 2.0153, + "step": 13987500 + }, + { + "epoch": 40.49, + "learning_rate": 2.9762521998888478e-05, + "loss": 2.0586, + "step": 13988000 + }, + { + "epoch": 40.49, + "learning_rate": 2.9761798351241204e-05, + "loss": 2.0063, + "step": 13988500 + }, + { + "epoch": 40.49, + "learning_rate": 2.976107470359393e-05, + "loss": 2.0323, + "step": 13989000 + }, + { + "epoch": 40.49, + "learning_rate": 2.976035105594665e-05, + "loss": 2.0748, + "step": 13989500 + }, + { + "epoch": 40.5, + "learning_rate": 2.9759627408299374e-05, + "loss": 2.0757, + "step": 13990000 + }, + { + "epoch": 40.5, + "learning_rate": 2.9758903760652096e-05, + "loss": 2.0394, + "step": 13990500 + }, + { + "epoch": 40.5, + "learning_rate": 2.9758183007595408e-05, + "loss": 2.0208, + "step": 13991000 + }, + { + "epoch": 40.5, + "learning_rate": 2.975745935994813e-05, + "loss": 2.0359, + "step": 13991500 + }, + { + "epoch": 40.5, + "learning_rate": 2.975673715959615e-05, + "loss": 2.0459, + "step": 13992000 + }, + { + "epoch": 40.5, + "learning_rate": 2.975601351194887e-05, + "loss": 2.045, + "step": 13992500 + }, + { + "epoch": 40.5, + "learning_rate": 2.9755289864301594e-05, + "loss": 2.0209, + "step": 13993000 + }, + { + "epoch": 40.51, + "learning_rate": 2.9754566216654316e-05, + "loss": 2.0294, + "step": 13993500 + }, + { + "epoch": 40.51, + "learning_rate": 2.9753844016302335e-05, + "loss": 2.0396, + "step": 13994000 + }, + { + "epoch": 40.51, + "learning_rate": 2.9753120368655057e-05, + "loss": 2.0392, + "step": 13994500 + }, + { + "epoch": 40.51, + "learning_rate": 2.975239672100778e-05, + "loss": 2.0594, + "step": 13995000 + }, + { + "epoch": 40.51, + "learning_rate": 2.97516730733605e-05, + "loss": 2.0442, + "step": 13995500 + }, + { + "epoch": 40.51, + "learning_rate": 2.975095087300852e-05, + "loss": 2.052, + "step": 13996000 + }, + { + "epoch": 40.51, + "learning_rate": 2.975022722536125e-05, + "loss": 2.0684, + "step": 13996500 + }, + { + "epoch": 40.52, + "learning_rate": 2.9749503577713972e-05, + "loss": 2.0623, + "step": 13997000 + }, + { + "epoch": 40.52, + "learning_rate": 2.9748779930066694e-05, + "loss": 2.042, + "step": 13997500 + }, + { + "epoch": 40.52, + "learning_rate": 2.9748056282419416e-05, + "loss": 2.0645, + "step": 13998000 + }, + { + "epoch": 40.52, + "learning_rate": 2.9747334082067435e-05, + "loss": 2.0455, + "step": 13998500 + }, + { + "epoch": 40.52, + "learning_rate": 2.9746610434420157e-05, + "loss": 2.0267, + "step": 13999000 + }, + { + "epoch": 40.52, + "learning_rate": 2.974588678677288e-05, + "loss": 2.0496, + "step": 13999500 + }, + { + "epoch": 40.52, + "learning_rate": 2.9745163139125602e-05, + "loss": 2.0741, + "step": 14000000 + }, + { + "epoch": 40.53, + "learning_rate": 2.9744439491478328e-05, + "loss": 2.0363, + "step": 14000500 + }, + { + "epoch": 40.53, + "learning_rate": 2.974371584383105e-05, + "loss": 2.029, + "step": 14001000 + }, + { + "epoch": 40.53, + "learning_rate": 2.9742992196183772e-05, + "loss": 2.0327, + "step": 14001500 + }, + { + "epoch": 40.53, + "learning_rate": 2.9742268548536494e-05, + "loss": 2.0394, + "step": 14002000 + }, + { + "epoch": 40.53, + "learning_rate": 2.9741544900889216e-05, + "loss": 2.0473, + "step": 14002500 + }, + { + "epoch": 40.53, + "learning_rate": 2.9740821253241942e-05, + "loss": 2.0338, + "step": 14003000 + }, + { + "epoch": 40.53, + "learning_rate": 2.9740097605594664e-05, + "loss": 2.0442, + "step": 14003500 + }, + { + "epoch": 40.54, + "learning_rate": 2.9739373957947393e-05, + "loss": 2.0365, + "step": 14004000 + }, + { + "epoch": 40.54, + "learning_rate": 2.9738650310300116e-05, + "loss": 2.0585, + "step": 14004500 + }, + { + "epoch": 40.54, + "learning_rate": 2.9737926662652838e-05, + "loss": 2.0641, + "step": 14005000 + }, + { + "epoch": 40.54, + "learning_rate": 2.973720301500556e-05, + "loss": 2.0603, + "step": 14005500 + }, + { + "epoch": 40.54, + "learning_rate": 2.9736479367358282e-05, + "loss": 2.0569, + "step": 14006000 + }, + { + "epoch": 40.54, + "learning_rate": 2.97357571670063e-05, + "loss": 2.0345, + "step": 14006500 + }, + { + "epoch": 40.54, + "learning_rate": 2.9735033519359023e-05, + "loss": 2.056, + "step": 14007000 + }, + { + "epoch": 40.55, + "learning_rate": 2.9734309871711746e-05, + "loss": 2.0445, + "step": 14007500 + }, + { + "epoch": 40.55, + "learning_rate": 2.9733586224064468e-05, + "loss": 2.0576, + "step": 14008000 + }, + { + "epoch": 40.55, + "learning_rate": 2.9732864023712487e-05, + "loss": 2.0562, + "step": 14008500 + }, + { + "epoch": 40.55, + "learning_rate": 2.973214037606521e-05, + "loss": 2.0564, + "step": 14009000 + }, + { + "epoch": 40.55, + "learning_rate": 2.973141672841793e-05, + "loss": 2.0282, + "step": 14009500 + }, + { + "epoch": 40.55, + "learning_rate": 2.9730693080770654e-05, + "loss": 2.0679, + "step": 14010000 + }, + { + "epoch": 40.55, + "learning_rate": 2.972996943312338e-05, + "loss": 2.0298, + "step": 14010500 + }, + { + "epoch": 40.56, + "learning_rate": 2.9729245785476105e-05, + "loss": 2.0443, + "step": 14011000 + }, + { + "epoch": 40.56, + "learning_rate": 2.972852213782883e-05, + "loss": 2.0548, + "step": 14011500 + }, + { + "epoch": 40.56, + "learning_rate": 2.9727798490181553e-05, + "loss": 2.0426, + "step": 14012000 + }, + { + "epoch": 40.56, + "learning_rate": 2.9727076289829568e-05, + "loss": 2.0422, + "step": 14012500 + }, + { + "epoch": 40.56, + "learning_rate": 2.9726352642182294e-05, + "loss": 2.0387, + "step": 14013000 + }, + { + "epoch": 40.56, + "learning_rate": 2.9725628994535016e-05, + "loss": 2.0321, + "step": 14013500 + }, + { + "epoch": 40.56, + "learning_rate": 2.9724905346887738e-05, + "loss": 2.0528, + "step": 14014000 + }, + { + "epoch": 40.57, + "learning_rate": 2.9724183146535757e-05, + "loss": 2.0263, + "step": 14014500 + }, + { + "epoch": 40.57, + "learning_rate": 2.972345949888848e-05, + "loss": 2.0317, + "step": 14015000 + }, + { + "epoch": 40.57, + "learning_rate": 2.9722735851241202e-05, + "loss": 2.0558, + "step": 14015500 + }, + { + "epoch": 40.57, + "learning_rate": 2.9722012203593924e-05, + "loss": 2.0401, + "step": 14016000 + }, + { + "epoch": 40.57, + "learning_rate": 2.9721288555946646e-05, + "loss": 2.05, + "step": 14016500 + }, + { + "epoch": 40.57, + "learning_rate": 2.972056490829937e-05, + "loss": 2.0573, + "step": 14017000 + }, + { + "epoch": 40.57, + "learning_rate": 2.9719841260652094e-05, + "loss": 2.06, + "step": 14017500 + }, + { + "epoch": 40.58, + "learning_rate": 2.9719117613004816e-05, + "loss": 2.0499, + "step": 14018000 + }, + { + "epoch": 40.58, + "learning_rate": 2.9718393965357545e-05, + "loss": 2.021, + "step": 14018500 + }, + { + "epoch": 40.58, + "learning_rate": 2.9717670317710268e-05, + "loss": 2.0429, + "step": 14019000 + }, + { + "epoch": 40.58, + "learning_rate": 2.971694667006299e-05, + "loss": 2.0546, + "step": 14019500 + }, + { + "epoch": 40.58, + "learning_rate": 2.9716223022415712e-05, + "loss": 2.0468, + "step": 14020000 + }, + { + "epoch": 40.58, + "learning_rate": 2.9715499374768434e-05, + "loss": 2.0425, + "step": 14020500 + }, + { + "epoch": 40.59, + "learning_rate": 2.9714778621711746e-05, + "loss": 2.0451, + "step": 14021000 + }, + { + "epoch": 40.59, + "learning_rate": 2.971405497406447e-05, + "loss": 2.048, + "step": 14021500 + }, + { + "epoch": 40.59, + "learning_rate": 2.9713331326417194e-05, + "loss": 2.0745, + "step": 14022000 + }, + { + "epoch": 40.59, + "learning_rate": 2.9712607678769917e-05, + "loss": 2.0266, + "step": 14022500 + }, + { + "epoch": 40.59, + "learning_rate": 2.9711885478417932e-05, + "loss": 2.0462, + "step": 14023000 + }, + { + "epoch": 40.59, + "learning_rate": 2.971116327806595e-05, + "loss": 2.0622, + "step": 14023500 + }, + { + "epoch": 40.59, + "learning_rate": 2.9710439630418673e-05, + "loss": 2.0327, + "step": 14024000 + }, + { + "epoch": 40.6, + "learning_rate": 2.9709715982771396e-05, + "loss": 2.024, + "step": 14024500 + }, + { + "epoch": 40.6, + "learning_rate": 2.970899233512412e-05, + "loss": 2.0547, + "step": 14025000 + }, + { + "epoch": 40.6, + "learning_rate": 2.9708268687476843e-05, + "loss": 2.0554, + "step": 14025500 + }, + { + "epoch": 40.6, + "learning_rate": 2.9707545039829566e-05, + "loss": 2.0507, + "step": 14026000 + }, + { + "epoch": 40.6, + "learning_rate": 2.9706821392182295e-05, + "loss": 2.063, + "step": 14026500 + }, + { + "epoch": 40.6, + "learning_rate": 2.9706097744535017e-05, + "loss": 2.0482, + "step": 14027000 + }, + { + "epoch": 40.6, + "learning_rate": 2.970537409688774e-05, + "loss": 2.0671, + "step": 14027500 + }, + { + "epoch": 40.61, + "learning_rate": 2.970465044924046e-05, + "loss": 2.0479, + "step": 14028000 + }, + { + "epoch": 40.61, + "learning_rate": 2.9703926801593184e-05, + "loss": 2.0436, + "step": 14028500 + }, + { + "epoch": 40.61, + "learning_rate": 2.970320315394591e-05, + "loss": 2.043, + "step": 14029000 + }, + { + "epoch": 40.61, + "learning_rate": 2.970247950629863e-05, + "loss": 2.0464, + "step": 14029500 + }, + { + "epoch": 40.61, + "learning_rate": 2.9701755858651354e-05, + "loss": 2.0383, + "step": 14030000 + }, + { + "epoch": 40.61, + "learning_rate": 2.9701032211004076e-05, + "loss": 2.0351, + "step": 14030500 + }, + { + "epoch": 40.61, + "learning_rate": 2.9700310010652095e-05, + "loss": 2.0619, + "step": 14031000 + }, + { + "epoch": 40.62, + "learning_rate": 2.9699586363004817e-05, + "loss": 2.0234, + "step": 14031500 + }, + { + "epoch": 40.62, + "learning_rate": 2.969886271535754e-05, + "loss": 2.0278, + "step": 14032000 + }, + { + "epoch": 40.62, + "learning_rate": 2.969813906771026e-05, + "loss": 2.0545, + "step": 14032500 + }, + { + "epoch": 40.62, + "learning_rate": 2.969741686735828e-05, + "loss": 2.0754, + "step": 14033000 + }, + { + "epoch": 40.62, + "learning_rate": 2.969669321971101e-05, + "loss": 2.0523, + "step": 14033500 + }, + { + "epoch": 40.62, + "learning_rate": 2.9695969572063732e-05, + "loss": 2.0125, + "step": 14034000 + }, + { + "epoch": 40.62, + "learning_rate": 2.9695245924416454e-05, + "loss": 2.0728, + "step": 14034500 + }, + { + "epoch": 40.63, + "learning_rate": 2.9694522276769176e-05, + "loss": 2.0304, + "step": 14035000 + }, + { + "epoch": 40.63, + "learning_rate": 2.96937986291219e-05, + "loss": 2.0506, + "step": 14035500 + }, + { + "epoch": 40.63, + "learning_rate": 2.969307498147462e-05, + "loss": 2.0487, + "step": 14036000 + }, + { + "epoch": 40.63, + "learning_rate": 2.969235278112264e-05, + "loss": 2.0337, + "step": 14036500 + }, + { + "epoch": 40.63, + "learning_rate": 2.9691629133475362e-05, + "loss": 2.0223, + "step": 14037000 + }, + { + "epoch": 40.63, + "learning_rate": 2.9690905485828084e-05, + "loss": 2.0578, + "step": 14037500 + }, + { + "epoch": 40.63, + "learning_rate": 2.9690183285476103e-05, + "loss": 2.0284, + "step": 14038000 + }, + { + "epoch": 40.64, + "learning_rate": 2.9689459637828825e-05, + "loss": 2.0679, + "step": 14038500 + }, + { + "epoch": 40.64, + "learning_rate": 2.9688737437476844e-05, + "loss": 2.0469, + "step": 14039000 + }, + { + "epoch": 40.64, + "learning_rate": 2.9688013789829566e-05, + "loss": 2.0193, + "step": 14039500 + }, + { + "epoch": 40.64, + "learning_rate": 2.968729014218229e-05, + "loss": 2.0289, + "step": 14040000 + }, + { + "epoch": 40.64, + "learning_rate": 2.968656649453501e-05, + "loss": 2.0474, + "step": 14040500 + }, + { + "epoch": 40.64, + "learning_rate": 2.968584284688774e-05, + "loss": 2.032, + "step": 14041000 + }, + { + "epoch": 40.64, + "learning_rate": 2.9685119199240462e-05, + "loss": 2.0353, + "step": 14041500 + }, + { + "epoch": 40.65, + "learning_rate": 2.9684395551593184e-05, + "loss": 2.0526, + "step": 14042000 + }, + { + "epoch": 40.65, + "learning_rate": 2.968367190394591e-05, + "loss": 2.0272, + "step": 14042500 + }, + { + "epoch": 40.65, + "learning_rate": 2.9682948256298632e-05, + "loss": 2.035, + "step": 14043000 + }, + { + "epoch": 40.65, + "learning_rate": 2.9682224608651354e-05, + "loss": 2.0506, + "step": 14043500 + }, + { + "epoch": 40.65, + "learning_rate": 2.9681500961004077e-05, + "loss": 2.0517, + "step": 14044000 + }, + { + "epoch": 40.65, + "learning_rate": 2.96807773133568e-05, + "loss": 2.0279, + "step": 14044500 + }, + { + "epoch": 40.65, + "learning_rate": 2.9680053665709525e-05, + "loss": 2.0385, + "step": 14045000 + }, + { + "epoch": 40.66, + "learning_rate": 2.9679330018062247e-05, + "loss": 2.0264, + "step": 14045500 + }, + { + "epoch": 40.66, + "learning_rate": 2.967860637041497e-05, + "loss": 2.0319, + "step": 14046000 + }, + { + "epoch": 40.66, + "learning_rate": 2.967788272276769e-05, + "loss": 2.0586, + "step": 14046500 + }, + { + "epoch": 40.66, + "learning_rate": 2.9677159075120413e-05, + "loss": 2.0285, + "step": 14047000 + }, + { + "epoch": 40.66, + "learning_rate": 2.9676435427473136e-05, + "loss": 2.0387, + "step": 14047500 + }, + { + "epoch": 40.66, + "learning_rate": 2.9675711779825865e-05, + "loss": 2.0254, + "step": 14048000 + }, + { + "epoch": 40.66, + "learning_rate": 2.9674988132178587e-05, + "loss": 2.0423, + "step": 14048500 + }, + { + "epoch": 40.67, + "learning_rate": 2.9674264484531313e-05, + "loss": 2.0463, + "step": 14049000 + }, + { + "epoch": 40.67, + "learning_rate": 2.9673542284179328e-05, + "loss": 2.0424, + "step": 14049500 + }, + { + "epoch": 40.67, + "learning_rate": 2.967281863653205e-05, + "loss": 2.0359, + "step": 14050000 + }, + { + "epoch": 40.67, + "learning_rate": 2.9672094988884776e-05, + "loss": 2.0436, + "step": 14050500 + }, + { + "epoch": 40.67, + "learning_rate": 2.967137278853279e-05, + "loss": 2.0444, + "step": 14051000 + }, + { + "epoch": 40.67, + "learning_rate": 2.967065058818081e-05, + "loss": 2.051, + "step": 14051500 + }, + { + "epoch": 40.67, + "learning_rate": 2.9669926940533533e-05, + "loss": 2.0443, + "step": 14052000 + }, + { + "epoch": 40.68, + "learning_rate": 2.9669203292886255e-05, + "loss": 2.0351, + "step": 14052500 + }, + { + "epoch": 40.68, + "learning_rate": 2.9668479645238977e-05, + "loss": 2.0211, + "step": 14053000 + }, + { + "epoch": 40.68, + "learning_rate": 2.96677559975917e-05, + "loss": 2.0748, + "step": 14053500 + }, + { + "epoch": 40.68, + "learning_rate": 2.966703379723972e-05, + "loss": 2.0538, + "step": 14054000 + }, + { + "epoch": 40.68, + "learning_rate": 2.966631014959244e-05, + "loss": 2.0576, + "step": 14054500 + }, + { + "epoch": 40.68, + "learning_rate": 2.9665586501945163e-05, + "loss": 2.0637, + "step": 14055000 + }, + { + "epoch": 40.68, + "learning_rate": 2.966486285429789e-05, + "loss": 2.0454, + "step": 14055500 + }, + { + "epoch": 40.69, + "learning_rate": 2.966414065394591e-05, + "loss": 2.0467, + "step": 14056000 + }, + { + "epoch": 40.69, + "learning_rate": 2.9663417006298633e-05, + "loss": 2.0467, + "step": 14056500 + }, + { + "epoch": 40.69, + "learning_rate": 2.9662693358651355e-05, + "loss": 2.0642, + "step": 14057000 + }, + { + "epoch": 40.69, + "learning_rate": 2.9661969711004077e-05, + "loss": 2.0279, + "step": 14057500 + }, + { + "epoch": 40.69, + "learning_rate": 2.96612460633568e-05, + "loss": 2.0448, + "step": 14058000 + }, + { + "epoch": 40.69, + "learning_rate": 2.9660522415709525e-05, + "loss": 2.0465, + "step": 14058500 + }, + { + "epoch": 40.7, + "learning_rate": 2.9659798768062248e-05, + "loss": 2.0448, + "step": 14059000 + }, + { + "epoch": 40.7, + "learning_rate": 2.965907512041497e-05, + "loss": 2.0165, + "step": 14059500 + }, + { + "epoch": 40.7, + "learning_rate": 2.9658351472767692e-05, + "loss": 2.0575, + "step": 14060000 + }, + { + "epoch": 40.7, + "learning_rate": 2.9657627825120414e-05, + "loss": 2.0519, + "step": 14060500 + }, + { + "epoch": 40.7, + "learning_rate": 2.965690417747314e-05, + "loss": 2.0714, + "step": 14061000 + }, + { + "epoch": 40.7, + "learning_rate": 2.9656180529825862e-05, + "loss": 2.0379, + "step": 14061500 + }, + { + "epoch": 40.7, + "learning_rate": 2.9655458329473878e-05, + "loss": 2.0492, + "step": 14062000 + }, + { + "epoch": 40.71, + "learning_rate": 2.96547346818266e-05, + "loss": 2.053, + "step": 14062500 + }, + { + "epoch": 40.71, + "learning_rate": 2.965401248147462e-05, + "loss": 2.0404, + "step": 14063000 + }, + { + "epoch": 40.71, + "learning_rate": 2.9653288833827348e-05, + "loss": 2.0571, + "step": 14063500 + }, + { + "epoch": 40.71, + "learning_rate": 2.965256518618007e-05, + "loss": 2.0379, + "step": 14064000 + }, + { + "epoch": 40.71, + "learning_rate": 2.9651841538532792e-05, + "loss": 2.0691, + "step": 14064500 + }, + { + "epoch": 40.71, + "learning_rate": 2.9651117890885515e-05, + "loss": 2.0315, + "step": 14065000 + }, + { + "epoch": 40.71, + "learning_rate": 2.965039424323824e-05, + "loss": 2.0466, + "step": 14065500 + }, + { + "epoch": 40.72, + "learning_rate": 2.9649670595590962e-05, + "loss": 2.0403, + "step": 14066000 + }, + { + "epoch": 40.72, + "learning_rate": 2.9648946947943685e-05, + "loss": 2.036, + "step": 14066500 + }, + { + "epoch": 40.72, + "learning_rate": 2.96482247475917e-05, + "loss": 2.0498, + "step": 14067000 + }, + { + "epoch": 40.72, + "learning_rate": 2.964750254723972e-05, + "loss": 2.0575, + "step": 14067500 + }, + { + "epoch": 40.72, + "learning_rate": 2.964677889959244e-05, + "loss": 2.0489, + "step": 14068000 + }, + { + "epoch": 40.72, + "learning_rate": 2.964605669924046e-05, + "loss": 2.0426, + "step": 14068500 + }, + { + "epoch": 40.72, + "learning_rate": 2.9645333051593183e-05, + "loss": 2.0405, + "step": 14069000 + }, + { + "epoch": 40.73, + "learning_rate": 2.9644609403945905e-05, + "loss": 2.0524, + "step": 14069500 + }, + { + "epoch": 40.73, + "learning_rate": 2.9643885756298627e-05, + "loss": 2.0804, + "step": 14070000 + }, + { + "epoch": 40.73, + "learning_rate": 2.9643163555946646e-05, + "loss": 2.0334, + "step": 14070500 + }, + { + "epoch": 40.73, + "learning_rate": 2.9642439908299375e-05, + "loss": 2.0239, + "step": 14071000 + }, + { + "epoch": 40.73, + "learning_rate": 2.9641716260652097e-05, + "loss": 2.0504, + "step": 14071500 + }, + { + "epoch": 40.73, + "learning_rate": 2.964099261300482e-05, + "loss": 2.062, + "step": 14072000 + }, + { + "epoch": 40.73, + "learning_rate": 2.964026896535754e-05, + "loss": 2.0547, + "step": 14072500 + }, + { + "epoch": 40.74, + "learning_rate": 2.9639545317710264e-05, + "loss": 2.0475, + "step": 14073000 + }, + { + "epoch": 40.74, + "learning_rate": 2.963882167006299e-05, + "loss": 2.0513, + "step": 14073500 + }, + { + "epoch": 40.74, + "learning_rate": 2.9638098022415712e-05, + "loss": 2.0499, + "step": 14074000 + }, + { + "epoch": 40.74, + "learning_rate": 2.9637374374768434e-05, + "loss": 2.0577, + "step": 14074500 + }, + { + "epoch": 40.74, + "learning_rate": 2.9636650727121156e-05, + "loss": 2.045, + "step": 14075000 + }, + { + "epoch": 40.74, + "learning_rate": 2.963592707947388e-05, + "loss": 2.0581, + "step": 14075500 + }, + { + "epoch": 40.74, + "learning_rate": 2.9635203431826604e-05, + "loss": 2.069, + "step": 14076000 + }, + { + "epoch": 40.75, + "learning_rate": 2.963448123147462e-05, + "loss": 2.0699, + "step": 14076500 + }, + { + "epoch": 40.75, + "learning_rate": 2.9633757583827342e-05, + "loss": 2.0711, + "step": 14077000 + }, + { + "epoch": 40.75, + "learning_rate": 2.9633033936180064e-05, + "loss": 2.0521, + "step": 14077500 + }, + { + "epoch": 40.75, + "learning_rate": 2.9632310288532793e-05, + "loss": 2.0655, + "step": 14078000 + }, + { + "epoch": 40.75, + "learning_rate": 2.9631586640885515e-05, + "loss": 2.0796, + "step": 14078500 + }, + { + "epoch": 40.75, + "learning_rate": 2.963086299323824e-05, + "loss": 2.0628, + "step": 14079000 + }, + { + "epoch": 40.75, + "learning_rate": 2.9630139345590963e-05, + "loss": 2.0505, + "step": 14079500 + }, + { + "epoch": 40.76, + "learning_rate": 2.9629418592534275e-05, + "loss": 2.0605, + "step": 14080000 + }, + { + "epoch": 40.76, + "learning_rate": 2.9628694944886998e-05, + "loss": 2.048, + "step": 14080500 + }, + { + "epoch": 40.76, + "learning_rate": 2.962797129723972e-05, + "loss": 2.0432, + "step": 14081000 + }, + { + "epoch": 40.76, + "learning_rate": 2.9627247649592442e-05, + "loss": 2.0549, + "step": 14081500 + }, + { + "epoch": 40.76, + "learning_rate": 2.9626524001945168e-05, + "loss": 2.0621, + "step": 14082000 + }, + { + "epoch": 40.76, + "learning_rate": 2.962580035429789e-05, + "loss": 2.0308, + "step": 14082500 + }, + { + "epoch": 40.76, + "learning_rate": 2.9625076706650612e-05, + "loss": 2.0725, + "step": 14083000 + }, + { + "epoch": 40.77, + "learning_rate": 2.9624353059003334e-05, + "loss": 2.0208, + "step": 14083500 + }, + { + "epoch": 40.77, + "learning_rate": 2.9623629411356057e-05, + "loss": 2.0373, + "step": 14084000 + }, + { + "epoch": 40.77, + "learning_rate": 2.9622907211004076e-05, + "loss": 2.0555, + "step": 14084500 + }, + { + "epoch": 40.77, + "learning_rate": 2.9622183563356798e-05, + "loss": 2.0586, + "step": 14085000 + }, + { + "epoch": 40.77, + "learning_rate": 2.962145991570952e-05, + "loss": 2.0437, + "step": 14085500 + }, + { + "epoch": 40.77, + "learning_rate": 2.962073626806225e-05, + "loss": 2.0363, + "step": 14086000 + }, + { + "epoch": 40.77, + "learning_rate": 2.9620014067710268e-05, + "loss": 2.0438, + "step": 14086500 + }, + { + "epoch": 40.78, + "learning_rate": 2.961929042006299e-05, + "loss": 2.0464, + "step": 14087000 + }, + { + "epoch": 40.78, + "learning_rate": 2.9618566772415713e-05, + "loss": 2.0301, + "step": 14087500 + }, + { + "epoch": 40.78, + "learning_rate": 2.9617843124768435e-05, + "loss": 2.0426, + "step": 14088000 + }, + { + "epoch": 40.78, + "learning_rate": 2.9617119477121157e-05, + "loss": 2.0534, + "step": 14088500 + }, + { + "epoch": 40.78, + "learning_rate": 2.961639582947388e-05, + "loss": 2.0567, + "step": 14089000 + }, + { + "epoch": 40.78, + "learning_rate": 2.9615672181826605e-05, + "loss": 2.0575, + "step": 14089500 + }, + { + "epoch": 40.78, + "learning_rate": 2.9614948534179327e-05, + "loss": 2.0414, + "step": 14090000 + }, + { + "epoch": 40.79, + "learning_rate": 2.961422488653205e-05, + "loss": 2.0587, + "step": 14090500 + }, + { + "epoch": 40.79, + "learning_rate": 2.9613502686180068e-05, + "loss": 2.0404, + "step": 14091000 + }, + { + "epoch": 40.79, + "learning_rate": 2.9612780485828084e-05, + "loss": 2.0387, + "step": 14091500 + }, + { + "epoch": 40.79, + "learning_rate": 2.9612056838180806e-05, + "loss": 2.0542, + "step": 14092000 + }, + { + "epoch": 40.79, + "learning_rate": 2.9611334637828825e-05, + "loss": 2.0223, + "step": 14092500 + }, + { + "epoch": 40.79, + "learning_rate": 2.9610610990181547e-05, + "loss": 2.0649, + "step": 14093000 + }, + { + "epoch": 40.79, + "learning_rate": 2.9609887342534276e-05, + "loss": 2.0597, + "step": 14093500 + }, + { + "epoch": 40.8, + "learning_rate": 2.9609163694887e-05, + "loss": 2.0329, + "step": 14094000 + }, + { + "epoch": 40.8, + "learning_rate": 2.960844004723972e-05, + "loss": 2.0739, + "step": 14094500 + }, + { + "epoch": 40.8, + "learning_rate": 2.9607716399592443e-05, + "loss": 2.0397, + "step": 14095000 + }, + { + "epoch": 40.8, + "learning_rate": 2.960699275194517e-05, + "loss": 2.057, + "step": 14095500 + }, + { + "epoch": 40.8, + "learning_rate": 2.960626910429789e-05, + "loss": 2.0483, + "step": 14096000 + }, + { + "epoch": 40.8, + "learning_rate": 2.9605545456650613e-05, + "loss": 2.0298, + "step": 14096500 + }, + { + "epoch": 40.81, + "learning_rate": 2.9604824703593925e-05, + "loss": 2.0395, + "step": 14097000 + }, + { + "epoch": 40.81, + "learning_rate": 2.9604101055946648e-05, + "loss": 2.0567, + "step": 14097500 + }, + { + "epoch": 40.81, + "learning_rate": 2.960337740829937e-05, + "loss": 2.0565, + "step": 14098000 + }, + { + "epoch": 40.81, + "learning_rate": 2.960265520794739e-05, + "loss": 2.0472, + "step": 14098500 + }, + { + "epoch": 40.81, + "learning_rate": 2.960193156030011e-05, + "loss": 2.0413, + "step": 14099000 + }, + { + "epoch": 40.81, + "learning_rate": 2.9601207912652833e-05, + "loss": 2.0543, + "step": 14099500 + }, + { + "epoch": 40.81, + "learning_rate": 2.9600484265005555e-05, + "loss": 2.0845, + "step": 14100000 + }, + { + "epoch": 40.82, + "learning_rate": 2.959976061735828e-05, + "loss": 2.0457, + "step": 14100500 + }, + { + "epoch": 40.82, + "learning_rate": 2.9599036969711007e-05, + "loss": 2.0392, + "step": 14101000 + }, + { + "epoch": 40.82, + "learning_rate": 2.9598313322063732e-05, + "loss": 2.0181, + "step": 14101500 + }, + { + "epoch": 40.82, + "learning_rate": 2.9597591121711748e-05, + "loss": 2.0732, + "step": 14102000 + }, + { + "epoch": 40.82, + "learning_rate": 2.959686747406447e-05, + "loss": 2.0423, + "step": 14102500 + }, + { + "epoch": 40.82, + "learning_rate": 2.9596143826417196e-05, + "loss": 2.0432, + "step": 14103000 + }, + { + "epoch": 40.82, + "learning_rate": 2.9595420178769918e-05, + "loss": 2.0636, + "step": 14103500 + }, + { + "epoch": 40.83, + "learning_rate": 2.9594697978417934e-05, + "loss": 2.0525, + "step": 14104000 + }, + { + "epoch": 40.83, + "learning_rate": 2.9593974330770656e-05, + "loss": 2.0494, + "step": 14104500 + }, + { + "epoch": 40.83, + "learning_rate": 2.959325068312338e-05, + "loss": 2.0917, + "step": 14105000 + }, + { + "epoch": 40.83, + "learning_rate": 2.9592527035476104e-05, + "loss": 2.0462, + "step": 14105500 + }, + { + "epoch": 40.83, + "learning_rate": 2.9591803387828826e-05, + "loss": 2.0611, + "step": 14106000 + }, + { + "epoch": 40.83, + "learning_rate": 2.9591079740181548e-05, + "loss": 2.0611, + "step": 14106500 + }, + { + "epoch": 40.83, + "learning_rate": 2.9590357539829567e-05, + "loss": 2.0633, + "step": 14107000 + }, + { + "epoch": 40.84, + "learning_rate": 2.9589635339477583e-05, + "loss": 2.0379, + "step": 14107500 + }, + { + "epoch": 40.84, + "learning_rate": 2.9588911691830308e-05, + "loss": 2.0683, + "step": 14108000 + }, + { + "epoch": 40.84, + "learning_rate": 2.958818804418303e-05, + "loss": 2.0338, + "step": 14108500 + }, + { + "epoch": 40.84, + "learning_rate": 2.958746439653576e-05, + "loss": 2.0505, + "step": 14109000 + }, + { + "epoch": 40.84, + "learning_rate": 2.958674074888848e-05, + "loss": 2.0403, + "step": 14109500 + }, + { + "epoch": 40.84, + "learning_rate": 2.9586017101241204e-05, + "loss": 2.068, + "step": 14110000 + }, + { + "epoch": 40.84, + "learning_rate": 2.9585293453593926e-05, + "loss": 2.0503, + "step": 14110500 + }, + { + "epoch": 40.85, + "learning_rate": 2.958456980594665e-05, + "loss": 2.0686, + "step": 14111000 + }, + { + "epoch": 40.85, + "learning_rate": 2.958384615829937e-05, + "loss": 2.0834, + "step": 14111500 + }, + { + "epoch": 40.85, + "learning_rate": 2.9583122510652096e-05, + "loss": 2.051, + "step": 14112000 + }, + { + "epoch": 40.85, + "learning_rate": 2.958239886300482e-05, + "loss": 2.0634, + "step": 14112500 + }, + { + "epoch": 40.85, + "learning_rate": 2.958167521535754e-05, + "loss": 2.0398, + "step": 14113000 + }, + { + "epoch": 40.85, + "learning_rate": 2.9580951567710263e-05, + "loss": 2.038, + "step": 14113500 + }, + { + "epoch": 40.85, + "learning_rate": 2.9580230814653575e-05, + "loss": 2.0825, + "step": 14114000 + }, + { + "epoch": 40.86, + "learning_rate": 2.9579507167006297e-05, + "loss": 2.0483, + "step": 14114500 + }, + { + "epoch": 40.86, + "learning_rate": 2.957878351935902e-05, + "loss": 2.0476, + "step": 14115000 + }, + { + "epoch": 40.86, + "learning_rate": 2.9578059871711745e-05, + "loss": 2.0346, + "step": 14115500 + }, + { + "epoch": 40.86, + "learning_rate": 2.957733622406447e-05, + "loss": 2.0678, + "step": 14116000 + }, + { + "epoch": 40.86, + "learning_rate": 2.9576612576417197e-05, + "loss": 2.057, + "step": 14116500 + }, + { + "epoch": 40.86, + "learning_rate": 2.957588892876992e-05, + "loss": 2.0747, + "step": 14117000 + }, + { + "epoch": 40.86, + "learning_rate": 2.9575166728417934e-05, + "loss": 2.0443, + "step": 14117500 + }, + { + "epoch": 40.87, + "learning_rate": 2.957444308077066e-05, + "loss": 2.0506, + "step": 14118000 + }, + { + "epoch": 40.87, + "learning_rate": 2.9573719433123382e-05, + "loss": 2.0502, + "step": 14118500 + }, + { + "epoch": 40.87, + "learning_rate": 2.9572995785476104e-05, + "loss": 2.0598, + "step": 14119000 + }, + { + "epoch": 40.87, + "learning_rate": 2.9572272137828827e-05, + "loss": 2.0255, + "step": 14119500 + }, + { + "epoch": 40.87, + "learning_rate": 2.957154849018155e-05, + "loss": 2.0674, + "step": 14120000 + }, + { + "epoch": 40.87, + "learning_rate": 2.957082484253427e-05, + "loss": 2.0184, + "step": 14120500 + }, + { + "epoch": 40.87, + "learning_rate": 2.9570101194886997e-05, + "loss": 2.0621, + "step": 14121000 + }, + { + "epoch": 40.88, + "learning_rate": 2.956937754723972e-05, + "loss": 2.0409, + "step": 14121500 + }, + { + "epoch": 40.88, + "learning_rate": 2.9568658241478325e-05, + "loss": 2.0433, + "step": 14122000 + }, + { + "epoch": 40.88, + "learning_rate": 2.9567934593831047e-05, + "loss": 2.0701, + "step": 14122500 + }, + { + "epoch": 40.88, + "learning_rate": 2.9567210946183772e-05, + "loss": 2.0428, + "step": 14123000 + }, + { + "epoch": 40.88, + "learning_rate": 2.9566488745831788e-05, + "loss": 2.0908, + "step": 14123500 + }, + { + "epoch": 40.88, + "learning_rate": 2.9565766545479807e-05, + "loss": 2.0541, + "step": 14124000 + }, + { + "epoch": 40.88, + "learning_rate": 2.9565042897832536e-05, + "loss": 2.0566, + "step": 14124500 + }, + { + "epoch": 40.89, + "learning_rate": 2.9564319250185258e-05, + "loss": 2.0541, + "step": 14125000 + }, + { + "epoch": 40.89, + "learning_rate": 2.956359560253798e-05, + "loss": 2.0548, + "step": 14125500 + }, + { + "epoch": 40.89, + "learning_rate": 2.9562871954890703e-05, + "loss": 2.0532, + "step": 14126000 + }, + { + "epoch": 40.89, + "learning_rate": 2.9562148307243425e-05, + "loss": 2.0386, + "step": 14126500 + }, + { + "epoch": 40.89, + "learning_rate": 2.9561424659596147e-05, + "loss": 2.0751, + "step": 14127000 + }, + { + "epoch": 40.89, + "learning_rate": 2.9560701011948873e-05, + "loss": 2.0571, + "step": 14127500 + }, + { + "epoch": 40.89, + "learning_rate": 2.9559977364301595e-05, + "loss": 2.0598, + "step": 14128000 + }, + { + "epoch": 40.9, + "learning_rate": 2.9559253716654317e-05, + "loss": 2.0355, + "step": 14128500 + }, + { + "epoch": 40.9, + "learning_rate": 2.955853006900704e-05, + "loss": 2.0536, + "step": 14129000 + }, + { + "epoch": 40.9, + "learning_rate": 2.955780642135976e-05, + "loss": 2.0491, + "step": 14129500 + }, + { + "epoch": 40.9, + "learning_rate": 2.9557082773712487e-05, + "loss": 2.0396, + "step": 14130000 + }, + { + "epoch": 40.9, + "learning_rate": 2.955635912606521e-05, + "loss": 2.0537, + "step": 14130500 + }, + { + "epoch": 40.9, + "learning_rate": 2.9555635478417932e-05, + "loss": 2.067, + "step": 14131000 + }, + { + "epoch": 40.9, + "learning_rate": 2.955491183077066e-05, + "loss": 2.0439, + "step": 14131500 + }, + { + "epoch": 40.91, + "learning_rate": 2.9554188183123383e-05, + "loss": 2.0379, + "step": 14132000 + }, + { + "epoch": 40.91, + "learning_rate": 2.9553464535476105e-05, + "loss": 2.0475, + "step": 14132500 + }, + { + "epoch": 40.91, + "learning_rate": 2.9552740887828827e-05, + "loss": 2.0662, + "step": 14133000 + }, + { + "epoch": 40.91, + "learning_rate": 2.955201724018155e-05, + "loss": 2.0478, + "step": 14133500 + }, + { + "epoch": 40.91, + "learning_rate": 2.9551293592534275e-05, + "loss": 2.0467, + "step": 14134000 + }, + { + "epoch": 40.91, + "learning_rate": 2.9550569944886998e-05, + "loss": 2.0524, + "step": 14134500 + }, + { + "epoch": 40.92, + "learning_rate": 2.9549847744535013e-05, + "loss": 2.0786, + "step": 14135000 + }, + { + "epoch": 40.92, + "learning_rate": 2.9549124096887735e-05, + "loss": 2.0659, + "step": 14135500 + }, + { + "epoch": 40.92, + "learning_rate": 2.954840044924046e-05, + "loss": 2.0617, + "step": 14136000 + }, + { + "epoch": 40.92, + "learning_rate": 2.9547676801593183e-05, + "loss": 2.0655, + "step": 14136500 + }, + { + "epoch": 40.92, + "learning_rate": 2.95469546012412e-05, + "loss": 2.05, + "step": 14137000 + }, + { + "epoch": 40.92, + "learning_rate": 2.9546230953593924e-05, + "loss": 2.035, + "step": 14137500 + }, + { + "epoch": 40.92, + "learning_rate": 2.9545507305946647e-05, + "loss": 2.0333, + "step": 14138000 + }, + { + "epoch": 40.93, + "learning_rate": 2.9544783658299376e-05, + "loss": 2.0425, + "step": 14138500 + }, + { + "epoch": 40.93, + "learning_rate": 2.9544060010652098e-05, + "loss": 2.0759, + "step": 14139000 + }, + { + "epoch": 40.93, + "learning_rate": 2.954333636300482e-05, + "loss": 2.0426, + "step": 14139500 + }, + { + "epoch": 40.93, + "learning_rate": 2.9542612715357542e-05, + "loss": 2.0702, + "step": 14140000 + }, + { + "epoch": 40.93, + "learning_rate": 2.9541889067710265e-05, + "loss": 2.0498, + "step": 14140500 + }, + { + "epoch": 40.93, + "learning_rate": 2.9541165420062987e-05, + "loss": 2.0564, + "step": 14141000 + }, + { + "epoch": 40.93, + "learning_rate": 2.9540443219711006e-05, + "loss": 2.0458, + "step": 14141500 + }, + { + "epoch": 40.94, + "learning_rate": 2.9539721019359025e-05, + "loss": 2.0467, + "step": 14142000 + }, + { + "epoch": 40.94, + "learning_rate": 2.9538997371711747e-05, + "loss": 2.0302, + "step": 14142500 + }, + { + "epoch": 40.94, + "learning_rate": 2.9538275171359762e-05, + "loss": 2.0612, + "step": 14143000 + }, + { + "epoch": 40.94, + "learning_rate": 2.9537551523712488e-05, + "loss": 2.024, + "step": 14143500 + }, + { + "epoch": 40.94, + "learning_rate": 2.953682787606521e-05, + "loss": 2.034, + "step": 14144000 + }, + { + "epoch": 40.94, + "learning_rate": 2.9536105675713226e-05, + "loss": 2.0255, + "step": 14144500 + }, + { + "epoch": 40.94, + "learning_rate": 2.953538202806595e-05, + "loss": 2.0718, + "step": 14145000 + }, + { + "epoch": 40.95, + "learning_rate": 2.9534658380418674e-05, + "loss": 2.0298, + "step": 14145500 + }, + { + "epoch": 40.95, + "learning_rate": 2.9533934732771396e-05, + "loss": 2.0536, + "step": 14146000 + }, + { + "epoch": 40.95, + "learning_rate": 2.9533211085124125e-05, + "loss": 2.0415, + "step": 14146500 + }, + { + "epoch": 40.95, + "learning_rate": 2.9532487437476847e-05, + "loss": 2.0468, + "step": 14147000 + }, + { + "epoch": 40.95, + "learning_rate": 2.953176378982957e-05, + "loss": 2.0629, + "step": 14147500 + }, + { + "epoch": 40.95, + "learning_rate": 2.953104014218229e-05, + "loss": 2.043, + "step": 14148000 + }, + { + "epoch": 40.95, + "learning_rate": 2.9530316494535014e-05, + "loss": 2.0571, + "step": 14148500 + }, + { + "epoch": 40.96, + "learning_rate": 2.952959284688774e-05, + "loss": 2.0432, + "step": 14149000 + }, + { + "epoch": 40.96, + "learning_rate": 2.9528869199240462e-05, + "loss": 2.0612, + "step": 14149500 + }, + { + "epoch": 40.96, + "learning_rate": 2.9528146998888477e-05, + "loss": 2.0465, + "step": 14150000 + }, + { + "epoch": 40.96, + "learning_rate": 2.9527424798536496e-05, + "loss": 2.0516, + "step": 14150500 + }, + { + "epoch": 40.96, + "learning_rate": 2.952670115088922e-05, + "loss": 2.0598, + "step": 14151000 + }, + { + "epoch": 40.96, + "learning_rate": 2.952597750324194e-05, + "loss": 2.0683, + "step": 14151500 + }, + { + "epoch": 40.96, + "learning_rate": 2.9525253855594663e-05, + "loss": 2.0518, + "step": 14152000 + }, + { + "epoch": 40.97, + "learning_rate": 2.952453020794739e-05, + "loss": 2.0671, + "step": 14152500 + }, + { + "epoch": 40.97, + "learning_rate": 2.952380656030011e-05, + "loss": 2.0462, + "step": 14153000 + }, + { + "epoch": 40.97, + "learning_rate": 2.952308291265284e-05, + "loss": 2.0364, + "step": 14153500 + }, + { + "epoch": 40.97, + "learning_rate": 2.9522359265005562e-05, + "loss": 2.0587, + "step": 14154000 + }, + { + "epoch": 40.97, + "learning_rate": 2.9521637064653578e-05, + "loss": 2.0386, + "step": 14154500 + }, + { + "epoch": 40.97, + "learning_rate": 2.9520914864301597e-05, + "loss": 2.0444, + "step": 14155000 + }, + { + "epoch": 40.97, + "learning_rate": 2.952019121665432e-05, + "loss": 2.0791, + "step": 14155500 + }, + { + "epoch": 40.98, + "learning_rate": 2.951946756900704e-05, + "loss": 2.0472, + "step": 14156000 + }, + { + "epoch": 40.98, + "learning_rate": 2.9518743921359767e-05, + "loss": 2.0553, + "step": 14156500 + }, + { + "epoch": 40.98, + "learning_rate": 2.951802027371249e-05, + "loss": 2.0551, + "step": 14157000 + }, + { + "epoch": 40.98, + "learning_rate": 2.951729662606521e-05, + "loss": 2.0383, + "step": 14157500 + }, + { + "epoch": 40.98, + "learning_rate": 2.9516574425713227e-05, + "loss": 2.0629, + "step": 14158000 + }, + { + "epoch": 40.98, + "learning_rate": 2.9515850778065952e-05, + "loss": 2.0662, + "step": 14158500 + }, + { + "epoch": 40.98, + "learning_rate": 2.9515127130418674e-05, + "loss": 2.0357, + "step": 14159000 + }, + { + "epoch": 40.99, + "learning_rate": 2.9514403482771397e-05, + "loss": 2.0244, + "step": 14159500 + }, + { + "epoch": 40.99, + "learning_rate": 2.951367983512412e-05, + "loss": 2.0499, + "step": 14160000 + }, + { + "epoch": 40.99, + "learning_rate": 2.951295618747684e-05, + "loss": 2.0675, + "step": 14160500 + }, + { + "epoch": 40.99, + "learning_rate": 2.9512232539829567e-05, + "loss": 2.0503, + "step": 14161000 + }, + { + "epoch": 40.99, + "learning_rate": 2.9511508892182292e-05, + "loss": 2.065, + "step": 14161500 + }, + { + "epoch": 40.99, + "learning_rate": 2.9510788139125605e-05, + "loss": 2.0618, + "step": 14162000 + }, + { + "epoch": 40.99, + "learning_rate": 2.9510065938773624e-05, + "loss": 2.0629, + "step": 14162500 + }, + { + "epoch": 41.0, + "learning_rate": 2.9509342291126346e-05, + "loss": 2.0609, + "step": 14163000 + }, + { + "epoch": 41.0, + "learning_rate": 2.9508618643479068e-05, + "loss": 2.0239, + "step": 14163500 + }, + { + "epoch": 41.0, + "learning_rate": 2.950789499583179e-05, + "loss": 2.0627, + "step": 14164000 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.6716259497639389, + "eval_accuracy_mlm": 0.6370840410644276, + "eval_accuracy_nsp": 0.8568652523224851, + "eval_loss": 2.169929027557373, + "eval_runtime": 331.2679, + "eval_samples_per_second": 1317.32, + "eval_steps_per_second": 54.889, + "step": 14164352 + }, + { + "epoch": 41.0, + "learning_rate": 2.9507171348184516e-05, + "loss": 2.0252, + "step": 14164500 + }, + { + "epoch": 41.0, + "learning_rate": 2.9506447700537238e-05, + "loss": 2.0292, + "step": 14165000 + }, + { + "epoch": 41.0, + "learning_rate": 2.950572405288996e-05, + "loss": 2.0558, + "step": 14165500 + }, + { + "epoch": 41.0, + "learning_rate": 2.9505000405242683e-05, + "loss": 2.0259, + "step": 14166000 + }, + { + "epoch": 41.01, + "learning_rate": 2.9504276757595405e-05, + "loss": 2.0301, + "step": 14166500 + }, + { + "epoch": 41.01, + "learning_rate": 2.950355310994813e-05, + "loss": 2.038, + "step": 14167000 + }, + { + "epoch": 41.01, + "learning_rate": 2.9502829462300853e-05, + "loss": 2.0093, + "step": 14167500 + }, + { + "epoch": 41.01, + "learning_rate": 2.9502105814653575e-05, + "loss": 2.0305, + "step": 14168000 + }, + { + "epoch": 41.01, + "learning_rate": 2.9501382167006297e-05, + "loss": 2.0267, + "step": 14168500 + }, + { + "epoch": 41.01, + "learning_rate": 2.9500658519359026e-05, + "loss": 2.0314, + "step": 14169000 + }, + { + "epoch": 41.01, + "learning_rate": 2.949993487171175e-05, + "loss": 2.0196, + "step": 14169500 + }, + { + "epoch": 41.02, + "learning_rate": 2.949921122406447e-05, + "loss": 2.0248, + "step": 14170000 + }, + { + "epoch": 41.02, + "learning_rate": 2.9498487576417193e-05, + "loss": 2.0372, + "step": 14170500 + }, + { + "epoch": 41.02, + "learning_rate": 2.9497766823360505e-05, + "loss": 2.0416, + "step": 14171000 + }, + { + "epoch": 41.02, + "learning_rate": 2.949704317571323e-05, + "loss": 2.0442, + "step": 14171500 + }, + { + "epoch": 41.02, + "learning_rate": 2.9496319528065953e-05, + "loss": 2.0304, + "step": 14172000 + }, + { + "epoch": 41.02, + "learning_rate": 2.9495595880418675e-05, + "loss": 2.047, + "step": 14172500 + }, + { + "epoch": 41.03, + "learning_rate": 2.949487368006669e-05, + "loss": 2.0282, + "step": 14173000 + }, + { + "epoch": 41.03, + "learning_rate": 2.9494150032419416e-05, + "loss": 2.0355, + "step": 14173500 + }, + { + "epoch": 41.03, + "learning_rate": 2.949342638477214e-05, + "loss": 2.0174, + "step": 14174000 + }, + { + "epoch": 41.03, + "learning_rate": 2.949270273712486e-05, + "loss": 1.9953, + "step": 14174500 + }, + { + "epoch": 41.03, + "learning_rate": 2.9491979089477583e-05, + "loss": 2.0373, + "step": 14175000 + }, + { + "epoch": 41.03, + "learning_rate": 2.9491255441830305e-05, + "loss": 2.0278, + "step": 14175500 + }, + { + "epoch": 41.03, + "learning_rate": 2.949053179418303e-05, + "loss": 2.0333, + "step": 14176000 + }, + { + "epoch": 41.04, + "learning_rate": 2.9489808146535757e-05, + "loss": 2.0023, + "step": 14176500 + }, + { + "epoch": 41.04, + "learning_rate": 2.9489084498888482e-05, + "loss": 1.9903, + "step": 14177000 + }, + { + "epoch": 41.04, + "learning_rate": 2.9488360851241204e-05, + "loss": 2.0444, + "step": 14177500 + }, + { + "epoch": 41.04, + "learning_rate": 2.9487637203593927e-05, + "loss": 2.0268, + "step": 14178000 + }, + { + "epoch": 41.04, + "learning_rate": 2.948691355594665e-05, + "loss": 2.0275, + "step": 14178500 + }, + { + "epoch": 41.04, + "learning_rate": 2.948618990829937e-05, + "loss": 2.0297, + "step": 14179000 + }, + { + "epoch": 41.04, + "learning_rate": 2.9485466260652093e-05, + "loss": 2.0383, + "step": 14179500 + }, + { + "epoch": 41.05, + "learning_rate": 2.948474261300482e-05, + "loss": 2.0256, + "step": 14180000 + }, + { + "epoch": 41.05, + "learning_rate": 2.948401896535754e-05, + "loss": 2.0112, + "step": 14180500 + }, + { + "epoch": 41.05, + "learning_rate": 2.9483295317710264e-05, + "loss": 2.0475, + "step": 14181000 + }, + { + "epoch": 41.05, + "learning_rate": 2.9482573117358282e-05, + "loss": 2.0332, + "step": 14181500 + }, + { + "epoch": 41.05, + "learning_rate": 2.9481849469711005e-05, + "loss": 2.0527, + "step": 14182000 + }, + { + "epoch": 41.05, + "learning_rate": 2.9481125822063727e-05, + "loss": 2.0443, + "step": 14182500 + }, + { + "epoch": 41.05, + "learning_rate": 2.948040217441645e-05, + "loss": 2.0133, + "step": 14183000 + }, + { + "epoch": 41.06, + "learning_rate": 2.9479678526769178e-05, + "loss": 2.0414, + "step": 14183500 + }, + { + "epoch": 41.06, + "learning_rate": 2.94789548791219e-05, + "loss": 2.0291, + "step": 14184000 + }, + { + "epoch": 41.06, + "learning_rate": 2.947823267876992e-05, + "loss": 2.0167, + "step": 14184500 + }, + { + "epoch": 41.06, + "learning_rate": 2.947750903112264e-05, + "loss": 2.0188, + "step": 14185000 + }, + { + "epoch": 41.06, + "learning_rate": 2.9476785383475364e-05, + "loss": 2.0144, + "step": 14185500 + }, + { + "epoch": 41.06, + "learning_rate": 2.9476061735828086e-05, + "loss": 2.0406, + "step": 14186000 + }, + { + "epoch": 41.06, + "learning_rate": 2.9475338088180808e-05, + "loss": 2.0518, + "step": 14186500 + }, + { + "epoch": 41.07, + "learning_rate": 2.9474614440533534e-05, + "loss": 2.0462, + "step": 14187000 + }, + { + "epoch": 41.07, + "learning_rate": 2.947389224018155e-05, + "loss": 2.0521, + "step": 14187500 + }, + { + "epoch": 41.07, + "learning_rate": 2.947316859253427e-05, + "loss": 2.0598, + "step": 14188000 + }, + { + "epoch": 41.07, + "learning_rate": 2.9472444944886994e-05, + "loss": 2.0191, + "step": 14188500 + }, + { + "epoch": 41.07, + "learning_rate": 2.947172129723972e-05, + "loss": 2.0168, + "step": 14189000 + }, + { + "epoch": 41.07, + "learning_rate": 2.9471000544183032e-05, + "loss": 2.0318, + "step": 14189500 + }, + { + "epoch": 41.07, + "learning_rate": 2.9470276896535754e-05, + "loss": 2.0274, + "step": 14190000 + }, + { + "epoch": 41.08, + "learning_rate": 2.9469553248888476e-05, + "loss": 2.0667, + "step": 14190500 + }, + { + "epoch": 41.08, + "learning_rate": 2.9468829601241205e-05, + "loss": 2.0326, + "step": 14191000 + }, + { + "epoch": 41.08, + "learning_rate": 2.9468105953593928e-05, + "loss": 2.0261, + "step": 14191500 + }, + { + "epoch": 41.08, + "learning_rate": 2.946738230594665e-05, + "loss": 2.0441, + "step": 14192000 + }, + { + "epoch": 41.08, + "learning_rate": 2.9466658658299372e-05, + "loss": 2.0387, + "step": 14192500 + }, + { + "epoch": 41.08, + "learning_rate": 2.9465935010652098e-05, + "loss": 2.0509, + "step": 14193000 + }, + { + "epoch": 41.08, + "learning_rate": 2.9465212810300113e-05, + "loss": 2.0244, + "step": 14193500 + }, + { + "epoch": 41.09, + "learning_rate": 2.9464489162652835e-05, + "loss": 2.0403, + "step": 14194000 + }, + { + "epoch": 41.09, + "learning_rate": 2.9463765515005558e-05, + "loss": 2.0152, + "step": 14194500 + }, + { + "epoch": 41.09, + "learning_rate": 2.9463041867358283e-05, + "loss": 2.0724, + "step": 14195000 + }, + { + "epoch": 41.09, + "learning_rate": 2.9462318219711005e-05, + "loss": 2.0321, + "step": 14195500 + }, + { + "epoch": 41.09, + "learning_rate": 2.9461594572063728e-05, + "loss": 2.0344, + "step": 14196000 + }, + { + "epoch": 41.09, + "learning_rate": 2.946087092441645e-05, + "loss": 2.0308, + "step": 14196500 + }, + { + "epoch": 41.09, + "learning_rate": 2.9460147276769172e-05, + "loss": 2.0229, + "step": 14197000 + }, + { + "epoch": 41.1, + "learning_rate": 2.9459423629121898e-05, + "loss": 2.0373, + "step": 14197500 + }, + { + "epoch": 41.1, + "learning_rate": 2.945869998147462e-05, + "loss": 2.038, + "step": 14198000 + }, + { + "epoch": 41.1, + "learning_rate": 2.9457979228417932e-05, + "loss": 2.0205, + "step": 14198500 + }, + { + "epoch": 41.1, + "learning_rate": 2.9457258475361248e-05, + "loss": 2.054, + "step": 14199000 + }, + { + "epoch": 41.1, + "learning_rate": 2.9456534827713974e-05, + "loss": 2.0354, + "step": 14199500 + }, + { + "epoch": 41.1, + "learning_rate": 2.9455811180066696e-05, + "loss": 2.0404, + "step": 14200000 + }, + { + "epoch": 41.1, + "learning_rate": 2.9455087532419418e-05, + "loss": 2.0229, + "step": 14200500 + }, + { + "epoch": 41.11, + "learning_rate": 2.945436388477214e-05, + "loss": 2.0462, + "step": 14201000 + }, + { + "epoch": 41.11, + "learning_rate": 2.9453640237124863e-05, + "loss": 2.0287, + "step": 14201500 + }, + { + "epoch": 41.11, + "learning_rate": 2.9452916589477585e-05, + "loss": 2.0327, + "step": 14202000 + }, + { + "epoch": 41.11, + "learning_rate": 2.945219294183031e-05, + "loss": 2.0494, + "step": 14202500 + }, + { + "epoch": 41.11, + "learning_rate": 2.9451470741478326e-05, + "loss": 2.0361, + "step": 14203000 + }, + { + "epoch": 41.11, + "learning_rate": 2.9450747093831048e-05, + "loss": 2.0342, + "step": 14203500 + }, + { + "epoch": 41.11, + "learning_rate": 2.945002344618377e-05, + "loss": 2.0383, + "step": 14204000 + }, + { + "epoch": 41.12, + "learning_rate": 2.9449299798536496e-05, + "loss": 2.0197, + "step": 14204500 + }, + { + "epoch": 41.12, + "learning_rate": 2.9448576150889218e-05, + "loss": 2.0289, + "step": 14205000 + }, + { + "epoch": 41.12, + "learning_rate": 2.944785250324194e-05, + "loss": 2.0386, + "step": 14205500 + }, + { + "epoch": 41.12, + "learning_rate": 2.9447128855594663e-05, + "loss": 2.0362, + "step": 14206000 + }, + { + "epoch": 41.12, + "learning_rate": 2.9446405207947392e-05, + "loss": 2.0308, + "step": 14206500 + }, + { + "epoch": 41.12, + "learning_rate": 2.9445681560300114e-05, + "loss": 2.0423, + "step": 14207000 + }, + { + "epoch": 41.12, + "learning_rate": 2.9444957912652836e-05, + "loss": 2.0322, + "step": 14207500 + }, + { + "epoch": 41.13, + "learning_rate": 2.9444235712300855e-05, + "loss": 2.0588, + "step": 14208000 + }, + { + "epoch": 41.13, + "learning_rate": 2.9443512064653577e-05, + "loss": 2.0297, + "step": 14208500 + }, + { + "epoch": 41.13, + "learning_rate": 2.94427884170063e-05, + "loss": 2.0157, + "step": 14209000 + }, + { + "epoch": 41.13, + "learning_rate": 2.9442064769359022e-05, + "loss": 2.0294, + "step": 14209500 + }, + { + "epoch": 41.13, + "learning_rate": 2.9441341121711747e-05, + "loss": 2.0437, + "step": 14210000 + }, + { + "epoch": 41.13, + "learning_rate": 2.944061747406447e-05, + "loss": 2.0391, + "step": 14210500 + }, + { + "epoch": 41.14, + "learning_rate": 2.9439893826417192e-05, + "loss": 2.0404, + "step": 14211000 + }, + { + "epoch": 41.14, + "learning_rate": 2.9439170178769914e-05, + "loss": 2.0221, + "step": 14211500 + }, + { + "epoch": 41.14, + "learning_rate": 2.9438446531122636e-05, + "loss": 2.0401, + "step": 14212000 + }, + { + "epoch": 41.14, + "learning_rate": 2.9437724330770655e-05, + "loss": 2.0364, + "step": 14212500 + }, + { + "epoch": 41.14, + "learning_rate": 2.9437000683123378e-05, + "loss": 2.0488, + "step": 14213000 + }, + { + "epoch": 41.14, + "learning_rate": 2.9436277035476107e-05, + "loss": 2.0412, + "step": 14213500 + }, + { + "epoch": 41.14, + "learning_rate": 2.943555338782883e-05, + "loss": 2.0482, + "step": 14214000 + }, + { + "epoch": 41.15, + "learning_rate": 2.9434831187476848e-05, + "loss": 2.0316, + "step": 14214500 + }, + { + "epoch": 41.15, + "learning_rate": 2.943410753982957e-05, + "loss": 1.9984, + "step": 14215000 + }, + { + "epoch": 41.15, + "learning_rate": 2.9433383892182292e-05, + "loss": 2.0581, + "step": 14215500 + }, + { + "epoch": 41.15, + "learning_rate": 2.943266169183031e-05, + "loss": 2.0289, + "step": 14216000 + }, + { + "epoch": 41.15, + "learning_rate": 2.9431938044183033e-05, + "loss": 2.0222, + "step": 14216500 + }, + { + "epoch": 41.15, + "learning_rate": 2.9431214396535756e-05, + "loss": 2.0472, + "step": 14217000 + }, + { + "epoch": 41.15, + "learning_rate": 2.9430490748888478e-05, + "loss": 2.0593, + "step": 14217500 + }, + { + "epoch": 41.16, + "learning_rate": 2.94297671012412e-05, + "loss": 2.0209, + "step": 14218000 + }, + { + "epoch": 41.16, + "learning_rate": 2.9429043453593926e-05, + "loss": 2.038, + "step": 14218500 + }, + { + "epoch": 41.16, + "learning_rate": 2.9428319805946648e-05, + "loss": 2.0427, + "step": 14219000 + }, + { + "epoch": 41.16, + "learning_rate": 2.9427597605594664e-05, + "loss": 2.0403, + "step": 14219500 + }, + { + "epoch": 41.16, + "learning_rate": 2.9426873957947386e-05, + "loss": 2.0303, + "step": 14220000 + }, + { + "epoch": 41.16, + "learning_rate": 2.942615031030011e-05, + "loss": 2.0412, + "step": 14220500 + }, + { + "epoch": 41.16, + "learning_rate": 2.9425426662652837e-05, + "loss": 2.0537, + "step": 14221000 + }, + { + "epoch": 41.17, + "learning_rate": 2.9424703015005563e-05, + "loss": 2.031, + "step": 14221500 + }, + { + "epoch": 41.17, + "learning_rate": 2.9423979367358285e-05, + "loss": 2.0252, + "step": 14222000 + }, + { + "epoch": 41.17, + "learning_rate": 2.9423255719711007e-05, + "loss": 2.0444, + "step": 14222500 + }, + { + "epoch": 41.17, + "learning_rate": 2.942253207206373e-05, + "loss": 2.0232, + "step": 14223000 + }, + { + "epoch": 41.17, + "learning_rate": 2.9421809871711748e-05, + "loss": 2.0544, + "step": 14223500 + }, + { + "epoch": 41.17, + "learning_rate": 2.942108622406447e-05, + "loss": 2.0487, + "step": 14224000 + }, + { + "epoch": 41.17, + "learning_rate": 2.9420362576417193e-05, + "loss": 2.035, + "step": 14224500 + }, + { + "epoch": 41.18, + "learning_rate": 2.9419638928769915e-05, + "loss": 2.0461, + "step": 14225000 + }, + { + "epoch": 41.18, + "learning_rate": 2.9418919623008524e-05, + "loss": 2.0592, + "step": 14225500 + }, + { + "epoch": 41.18, + "learning_rate": 2.9418195975361246e-05, + "loss": 2.0388, + "step": 14226000 + }, + { + "epoch": 41.18, + "learning_rate": 2.941747232771397e-05, + "loss": 2.0365, + "step": 14226500 + }, + { + "epoch": 41.18, + "learning_rate": 2.941674868006669e-05, + "loss": 2.0408, + "step": 14227000 + }, + { + "epoch": 41.18, + "learning_rate": 2.941602647971471e-05, + "loss": 1.998, + "step": 14227500 + }, + { + "epoch": 41.18, + "learning_rate": 2.9415302832067432e-05, + "loss": 2.0579, + "step": 14228000 + }, + { + "epoch": 41.19, + "learning_rate": 2.9414579184420154e-05, + "loss": 2.0452, + "step": 14228500 + }, + { + "epoch": 41.19, + "learning_rate": 2.9413855536772883e-05, + "loss": 2.0192, + "step": 14229000 + }, + { + "epoch": 41.19, + "learning_rate": 2.9413131889125605e-05, + "loss": 2.0282, + "step": 14229500 + }, + { + "epoch": 41.19, + "learning_rate": 2.9412409688773624e-05, + "loss": 2.0577, + "step": 14230000 + }, + { + "epoch": 41.19, + "learning_rate": 2.9411686041126346e-05, + "loss": 2.0425, + "step": 14230500 + }, + { + "epoch": 41.19, + "learning_rate": 2.941096239347907e-05, + "loss": 2.0099, + "step": 14231000 + }, + { + "epoch": 41.19, + "learning_rate": 2.941023874583179e-05, + "loss": 2.0373, + "step": 14231500 + }, + { + "epoch": 41.2, + "learning_rate": 2.9409515098184513e-05, + "loss": 2.0134, + "step": 14232000 + }, + { + "epoch": 41.2, + "learning_rate": 2.940879145053724e-05, + "loss": 2.0508, + "step": 14232500 + }, + { + "epoch": 41.2, + "learning_rate": 2.940806780288996e-05, + "loss": 2.0442, + "step": 14233000 + }, + { + "epoch": 41.2, + "learning_rate": 2.9407344155242683e-05, + "loss": 2.0356, + "step": 14233500 + }, + { + "epoch": 41.2, + "learning_rate": 2.9406620507595405e-05, + "loss": 2.0477, + "step": 14234000 + }, + { + "epoch": 41.2, + "learning_rate": 2.9405896859948128e-05, + "loss": 2.0242, + "step": 14234500 + }, + { + "epoch": 41.2, + "learning_rate": 2.9405173212300853e-05, + "loss": 2.0227, + "step": 14235000 + }, + { + "epoch": 41.21, + "learning_rate": 2.9404449564653576e-05, + "loss": 2.057, + "step": 14235500 + }, + { + "epoch": 41.21, + "learning_rate": 2.9403725917006298e-05, + "loss": 2.0369, + "step": 14236000 + }, + { + "epoch": 41.21, + "learning_rate": 2.940300371665432e-05, + "loss": 2.0459, + "step": 14236500 + }, + { + "epoch": 41.21, + "learning_rate": 2.9402280069007042e-05, + "loss": 2.0356, + "step": 14237000 + }, + { + "epoch": 41.21, + "learning_rate": 2.9401556421359765e-05, + "loss": 2.0512, + "step": 14237500 + }, + { + "epoch": 41.21, + "learning_rate": 2.940083277371249e-05, + "loss": 2.0497, + "step": 14238000 + }, + { + "epoch": 41.21, + "learning_rate": 2.9400109126065212e-05, + "loss": 2.0373, + "step": 14238500 + }, + { + "epoch": 41.22, + "learning_rate": 2.9399385478417935e-05, + "loss": 2.0159, + "step": 14239000 + }, + { + "epoch": 41.22, + "learning_rate": 2.9398661830770657e-05, + "loss": 2.0199, + "step": 14239500 + }, + { + "epoch": 41.22, + "learning_rate": 2.939793818312338e-05, + "loss": 2.0493, + "step": 14240000 + }, + { + "epoch": 41.22, + "learning_rate": 2.93972145354761e-05, + "loss": 2.0372, + "step": 14240500 + }, + { + "epoch": 41.22, + "learning_rate": 2.9396490887828827e-05, + "loss": 2.0532, + "step": 14241000 + }, + { + "epoch": 41.22, + "learning_rate": 2.939577013477214e-05, + "loss": 2.0465, + "step": 14241500 + }, + { + "epoch": 41.22, + "learning_rate": 2.939504648712486e-05, + "loss": 2.0258, + "step": 14242000 + }, + { + "epoch": 41.23, + "learning_rate": 2.9394322839477584e-05, + "loss": 2.0057, + "step": 14242500 + }, + { + "epoch": 41.23, + "learning_rate": 2.9393599191830306e-05, + "loss": 2.0447, + "step": 14243000 + }, + { + "epoch": 41.23, + "learning_rate": 2.9392875544183028e-05, + "loss": 2.0533, + "step": 14243500 + }, + { + "epoch": 41.23, + "learning_rate": 2.9392151896535757e-05, + "loss": 2.006, + "step": 14244000 + }, + { + "epoch": 41.23, + "learning_rate": 2.939142824888848e-05, + "loss": 2.0251, + "step": 14244500 + }, + { + "epoch": 41.23, + "learning_rate": 2.9390704601241205e-05, + "loss": 2.0238, + "step": 14245000 + }, + { + "epoch": 41.23, + "learning_rate": 2.9389980953593927e-05, + "loss": 2.0268, + "step": 14245500 + }, + { + "epoch": 41.24, + "learning_rate": 2.938925730594665e-05, + "loss": 2.0337, + "step": 14246000 + }, + { + "epoch": 41.24, + "learning_rate": 2.9388533658299372e-05, + "loss": 2.0455, + "step": 14246500 + }, + { + "epoch": 41.24, + "learning_rate": 2.9387812905242684e-05, + "loss": 2.0372, + "step": 14247000 + }, + { + "epoch": 41.24, + "learning_rate": 2.9387089257595406e-05, + "loss": 2.0379, + "step": 14247500 + }, + { + "epoch": 41.24, + "learning_rate": 2.938636560994813e-05, + "loss": 2.0414, + "step": 14248000 + }, + { + "epoch": 41.24, + "learning_rate": 2.9385641962300854e-05, + "loss": 2.0365, + "step": 14248500 + }, + { + "epoch": 41.25, + "learning_rate": 2.9384918314653576e-05, + "loss": 2.0531, + "step": 14249000 + }, + { + "epoch": 41.25, + "learning_rate": 2.93841946670063e-05, + "loss": 2.0188, + "step": 14249500 + }, + { + "epoch": 41.25, + "learning_rate": 2.938347101935902e-05, + "loss": 2.0293, + "step": 14250000 + }, + { + "epoch": 41.25, + "learning_rate": 2.9382747371711743e-05, + "loss": 2.0461, + "step": 14250500 + }, + { + "epoch": 41.25, + "learning_rate": 2.9382023724064472e-05, + "loss": 2.0478, + "step": 14251000 + }, + { + "epoch": 41.25, + "learning_rate": 2.938130152371249e-05, + "loss": 2.024, + "step": 14251500 + }, + { + "epoch": 41.25, + "learning_rate": 2.9380577876065213e-05, + "loss": 2.04, + "step": 14252000 + }, + { + "epoch": 41.26, + "learning_rate": 2.9379854228417935e-05, + "loss": 2.0208, + "step": 14252500 + }, + { + "epoch": 41.26, + "learning_rate": 2.9379130580770658e-05, + "loss": 2.0497, + "step": 14253000 + }, + { + "epoch": 41.26, + "learning_rate": 2.937840693312338e-05, + "loss": 2.0463, + "step": 14253500 + }, + { + "epoch": 41.26, + "learning_rate": 2.9377683285476106e-05, + "loss": 2.0468, + "step": 14254000 + }, + { + "epoch": 41.26, + "learning_rate": 2.9376959637828828e-05, + "loss": 2.013, + "step": 14254500 + }, + { + "epoch": 41.26, + "learning_rate": 2.937623599018155e-05, + "loss": 2.0199, + "step": 14255000 + }, + { + "epoch": 41.26, + "learning_rate": 2.9375512342534272e-05, + "loss": 2.037, + "step": 14255500 + }, + { + "epoch": 41.27, + "learning_rate": 2.937479014218229e-05, + "loss": 2.0414, + "step": 14256000 + }, + { + "epoch": 41.27, + "learning_rate": 2.9374066494535013e-05, + "loss": 2.0209, + "step": 14256500 + }, + { + "epoch": 41.27, + "learning_rate": 2.9373342846887736e-05, + "loss": 2.0469, + "step": 14257000 + }, + { + "epoch": 41.27, + "learning_rate": 2.9372619199240458e-05, + "loss": 2.0345, + "step": 14257500 + }, + { + "epoch": 41.27, + "learning_rate": 2.9371896998888477e-05, + "loss": 2.0496, + "step": 14258000 + }, + { + "epoch": 41.27, + "learning_rate": 2.9371173351241206e-05, + "loss": 2.0342, + "step": 14258500 + }, + { + "epoch": 41.27, + "learning_rate": 2.9370449703593928e-05, + "loss": 2.0495, + "step": 14259000 + }, + { + "epoch": 41.28, + "learning_rate": 2.936972605594665e-05, + "loss": 2.0525, + "step": 14259500 + }, + { + "epoch": 41.28, + "learning_rate": 2.936900385559467e-05, + "loss": 2.0451, + "step": 14260000 + }, + { + "epoch": 41.28, + "learning_rate": 2.9368281655242685e-05, + "loss": 2.0404, + "step": 14260500 + }, + { + "epoch": 41.28, + "learning_rate": 2.9367558007595407e-05, + "loss": 2.0594, + "step": 14261000 + }, + { + "epoch": 41.28, + "learning_rate": 2.9366834359948133e-05, + "loss": 2.027, + "step": 14261500 + }, + { + "epoch": 41.28, + "learning_rate": 2.9366110712300855e-05, + "loss": 2.0096, + "step": 14262000 + }, + { + "epoch": 41.28, + "learning_rate": 2.9365387064653577e-05, + "loss": 2.0484, + "step": 14262500 + }, + { + "epoch": 41.29, + "learning_rate": 2.9364664864301593e-05, + "loss": 2.0154, + "step": 14263000 + }, + { + "epoch": 41.29, + "learning_rate": 2.936394121665432e-05, + "loss": 2.0449, + "step": 14263500 + }, + { + "epoch": 41.29, + "learning_rate": 2.936321756900704e-05, + "loss": 2.0524, + "step": 14264000 + }, + { + "epoch": 41.29, + "learning_rate": 2.9362493921359763e-05, + "loss": 2.0547, + "step": 14264500 + }, + { + "epoch": 41.29, + "learning_rate": 2.9361770273712485e-05, + "loss": 2.0217, + "step": 14265000 + }, + { + "epoch": 41.29, + "learning_rate": 2.9361048073360504e-05, + "loss": 2.0209, + "step": 14265500 + }, + { + "epoch": 41.29, + "learning_rate": 2.9360324425713226e-05, + "loss": 2.0519, + "step": 14266000 + }, + { + "epoch": 41.3, + "learning_rate": 2.9359600778065955e-05, + "loss": 2.0369, + "step": 14266500 + }, + { + "epoch": 41.3, + "learning_rate": 2.935887857771397e-05, + "loss": 2.0461, + "step": 14267000 + }, + { + "epoch": 41.3, + "learning_rate": 2.9358154930066696e-05, + "loss": 2.0258, + "step": 14267500 + }, + { + "epoch": 41.3, + "learning_rate": 2.935743128241942e-05, + "loss": 2.0282, + "step": 14268000 + }, + { + "epoch": 41.3, + "learning_rate": 2.935670763477214e-05, + "loss": 2.0376, + "step": 14268500 + }, + { + "epoch": 41.3, + "learning_rate": 2.9355983987124863e-05, + "loss": 2.0437, + "step": 14269000 + }, + { + "epoch": 41.3, + "learning_rate": 2.9355260339477585e-05, + "loss": 2.0486, + "step": 14269500 + }, + { + "epoch": 41.31, + "learning_rate": 2.9354536691830308e-05, + "loss": 2.0384, + "step": 14270000 + }, + { + "epoch": 41.31, + "learning_rate": 2.9353813044183033e-05, + "loss": 2.0348, + "step": 14270500 + }, + { + "epoch": 41.31, + "learning_rate": 2.9353089396535755e-05, + "loss": 2.0374, + "step": 14271000 + }, + { + "epoch": 41.31, + "learning_rate": 2.9352365748888478e-05, + "loss": 2.0285, + "step": 14271500 + }, + { + "epoch": 41.31, + "learning_rate": 2.9351643548536493e-05, + "loss": 2.0374, + "step": 14272000 + }, + { + "epoch": 41.31, + "learning_rate": 2.935091990088922e-05, + "loss": 2.0249, + "step": 14272500 + }, + { + "epoch": 41.31, + "learning_rate": 2.935019625324194e-05, + "loss": 2.0331, + "step": 14273000 + }, + { + "epoch": 41.32, + "learning_rate": 2.9349472605594663e-05, + "loss": 2.0448, + "step": 14273500 + }, + { + "epoch": 41.32, + "learning_rate": 2.9348748957947392e-05, + "loss": 2.026, + "step": 14274000 + }, + { + "epoch": 41.32, + "learning_rate": 2.9348025310300115e-05, + "loss": 2.0271, + "step": 14274500 + }, + { + "epoch": 41.32, + "learning_rate": 2.9347303109948133e-05, + "loss": 2.0428, + "step": 14275000 + }, + { + "epoch": 41.32, + "learning_rate": 2.9346579462300856e-05, + "loss": 2.037, + "step": 14275500 + }, + { + "epoch": 41.32, + "learning_rate": 2.9345855814653578e-05, + "loss": 2.0638, + "step": 14276000 + }, + { + "epoch": 41.32, + "learning_rate": 2.93451321670063e-05, + "loss": 2.0037, + "step": 14276500 + }, + { + "epoch": 41.33, + "learning_rate": 2.9344408519359022e-05, + "loss": 2.0691, + "step": 14277000 + }, + { + "epoch": 41.33, + "learning_rate": 2.9343684871711745e-05, + "loss": 2.0482, + "step": 14277500 + }, + { + "epoch": 41.33, + "learning_rate": 2.934296122406447e-05, + "loss": 2.0496, + "step": 14278000 + }, + { + "epoch": 41.33, + "learning_rate": 2.9342237576417193e-05, + "loss": 2.0805, + "step": 14278500 + }, + { + "epoch": 41.33, + "learning_rate": 2.9341515376065208e-05, + "loss": 2.0443, + "step": 14279000 + }, + { + "epoch": 41.33, + "learning_rate": 2.9340791728417934e-05, + "loss": 2.0266, + "step": 14279500 + }, + { + "epoch": 41.33, + "learning_rate": 2.9340068080770656e-05, + "loss": 2.0351, + "step": 14280000 + }, + { + "epoch": 41.34, + "learning_rate": 2.933934588041867e-05, + "loss": 2.0433, + "step": 14280500 + }, + { + "epoch": 41.34, + "learning_rate": 2.9338622232771397e-05, + "loss": 2.0438, + "step": 14281000 + }, + { + "epoch": 41.34, + "learning_rate": 2.933790003241942e-05, + "loss": 2.0319, + "step": 14281500 + }, + { + "epoch": 41.34, + "learning_rate": 2.933717638477214e-05, + "loss": 2.0304, + "step": 14282000 + }, + { + "epoch": 41.34, + "learning_rate": 2.9336452737124864e-05, + "loss": 2.0494, + "step": 14282500 + }, + { + "epoch": 41.34, + "learning_rate": 2.9335729089477586e-05, + "loss": 2.0238, + "step": 14283000 + }, + { + "epoch": 41.34, + "learning_rate": 2.933500544183031e-05, + "loss": 2.0537, + "step": 14283500 + }, + { + "epoch": 41.35, + "learning_rate": 2.9334281794183034e-05, + "loss": 2.0578, + "step": 14284000 + }, + { + "epoch": 41.35, + "learning_rate": 2.9333558146535756e-05, + "loss": 2.0221, + "step": 14284500 + }, + { + "epoch": 41.35, + "learning_rate": 2.933283449888848e-05, + "loss": 2.0348, + "step": 14285000 + }, + { + "epoch": 41.35, + "learning_rate": 2.93321108512412e-05, + "loss": 2.0488, + "step": 14285500 + }, + { + "epoch": 41.35, + "learning_rate": 2.9331387203593923e-05, + "loss": 2.0376, + "step": 14286000 + }, + { + "epoch": 41.35, + "learning_rate": 2.933066355594665e-05, + "loss": 2.035, + "step": 14286500 + }, + { + "epoch": 41.36, + "learning_rate": 2.932993990829937e-05, + "loss": 2.0295, + "step": 14287000 + }, + { + "epoch": 41.36, + "learning_rate": 2.9329217707947386e-05, + "loss": 2.0351, + "step": 14287500 + }, + { + "epoch": 41.36, + "learning_rate": 2.932849406030011e-05, + "loss": 2.0455, + "step": 14288000 + }, + { + "epoch": 41.36, + "learning_rate": 2.9327770412652838e-05, + "loss": 2.0319, + "step": 14288500 + }, + { + "epoch": 41.36, + "learning_rate": 2.932704676500556e-05, + "loss": 2.0595, + "step": 14289000 + }, + { + "epoch": 41.36, + "learning_rate": 2.932632456465358e-05, + "loss": 2.0605, + "step": 14289500 + }, + { + "epoch": 41.36, + "learning_rate": 2.93256009170063e-05, + "loss": 2.0645, + "step": 14290000 + }, + { + "epoch": 41.37, + "learning_rate": 2.9324877269359023e-05, + "loss": 2.0317, + "step": 14290500 + }, + { + "epoch": 41.37, + "learning_rate": 2.932415362171175e-05, + "loss": 2.0304, + "step": 14291000 + }, + { + "epoch": 41.37, + "learning_rate": 2.932342997406447e-05, + "loss": 2.0435, + "step": 14291500 + }, + { + "epoch": 41.37, + "learning_rate": 2.9322706326417193e-05, + "loss": 2.0313, + "step": 14292000 + }, + { + "epoch": 41.37, + "learning_rate": 2.9321982678769916e-05, + "loss": 2.023, + "step": 14292500 + }, + { + "epoch": 41.37, + "learning_rate": 2.9321259031122638e-05, + "loss": 2.0247, + "step": 14293000 + }, + { + "epoch": 41.37, + "learning_rate": 2.932053538347536e-05, + "loss": 2.0413, + "step": 14293500 + }, + { + "epoch": 41.38, + "learning_rate": 2.9319811735828086e-05, + "loss": 2.0438, + "step": 14294000 + }, + { + "epoch": 41.38, + "learning_rate": 2.9319088088180808e-05, + "loss": 2.0204, + "step": 14294500 + }, + { + "epoch": 41.38, + "learning_rate": 2.931836444053353e-05, + "loss": 2.033, + "step": 14295000 + }, + { + "epoch": 41.38, + "learning_rate": 2.9317640792886252e-05, + "loss": 2.0498, + "step": 14295500 + }, + { + "epoch": 41.38, + "learning_rate": 2.931691714523898e-05, + "loss": 2.0341, + "step": 14296000 + }, + { + "epoch": 41.38, + "learning_rate": 2.9316193497591704e-05, + "loss": 2.0471, + "step": 14296500 + }, + { + "epoch": 41.38, + "learning_rate": 2.9315471297239723e-05, + "loss": 2.0409, + "step": 14297000 + }, + { + "epoch": 41.39, + "learning_rate": 2.9314749096887738e-05, + "loss": 2.0417, + "step": 14297500 + }, + { + "epoch": 41.39, + "learning_rate": 2.9314026896535757e-05, + "loss": 2.0365, + "step": 14298000 + }, + { + "epoch": 41.39, + "learning_rate": 2.931330324888848e-05, + "loss": 2.0633, + "step": 14298500 + }, + { + "epoch": 41.39, + "learning_rate": 2.9312581048536498e-05, + "loss": 2.04, + "step": 14299000 + }, + { + "epoch": 41.39, + "learning_rate": 2.931185740088922e-05, + "loss": 2.0437, + "step": 14299500 + }, + { + "epoch": 41.39, + "learning_rate": 2.9311133753241943e-05, + "loss": 2.0411, + "step": 14300000 + }, + { + "epoch": 41.39, + "learning_rate": 2.9310410105594665e-05, + "loss": 2.0459, + "step": 14300500 + }, + { + "epoch": 41.4, + "learning_rate": 2.9309686457947387e-05, + "loss": 2.0326, + "step": 14301000 + }, + { + "epoch": 41.4, + "learning_rate": 2.9308962810300113e-05, + "loss": 2.0201, + "step": 14301500 + }, + { + "epoch": 41.4, + "learning_rate": 2.9308239162652835e-05, + "loss": 2.0523, + "step": 14302000 + }, + { + "epoch": 41.4, + "learning_rate": 2.9307515515005557e-05, + "loss": 2.0617, + "step": 14302500 + }, + { + "epoch": 41.4, + "learning_rate": 2.930679186735828e-05, + "loss": 2.0404, + "step": 14303000 + }, + { + "epoch": 41.4, + "learning_rate": 2.930606821971101e-05, + "loss": 2.0448, + "step": 14303500 + }, + { + "epoch": 41.4, + "learning_rate": 2.930534457206373e-05, + "loss": 2.0318, + "step": 14304000 + }, + { + "epoch": 41.41, + "learning_rate": 2.9304620924416453e-05, + "loss": 2.0434, + "step": 14304500 + }, + { + "epoch": 41.41, + "learning_rate": 2.9303898724064472e-05, + "loss": 2.0243, + "step": 14305000 + }, + { + "epoch": 41.41, + "learning_rate": 2.9303175076417194e-05, + "loss": 2.0184, + "step": 14305500 + }, + { + "epoch": 41.41, + "learning_rate": 2.9302451428769916e-05, + "loss": 2.0571, + "step": 14306000 + }, + { + "epoch": 41.41, + "learning_rate": 2.930172778112264e-05, + "loss": 2.047, + "step": 14306500 + }, + { + "epoch": 41.41, + "learning_rate": 2.9301004133475364e-05, + "loss": 2.0488, + "step": 14307000 + }, + { + "epoch": 41.41, + "learning_rate": 2.930028193312338e-05, + "loss": 2.0491, + "step": 14307500 + }, + { + "epoch": 41.42, + "learning_rate": 2.9299558285476102e-05, + "loss": 2.0362, + "step": 14308000 + }, + { + "epoch": 41.42, + "learning_rate": 2.9298834637828824e-05, + "loss": 2.0454, + "step": 14308500 + }, + { + "epoch": 41.42, + "learning_rate": 2.929811099018155e-05, + "loss": 2.0211, + "step": 14309000 + }, + { + "epoch": 41.42, + "learning_rate": 2.9297388789829565e-05, + "loss": 2.0288, + "step": 14309500 + }, + { + "epoch": 41.42, + "learning_rate": 2.9296665142182288e-05, + "loss": 2.0461, + "step": 14310000 + }, + { + "epoch": 41.42, + "learning_rate": 2.9295941494535013e-05, + "loss": 2.0501, + "step": 14310500 + }, + { + "epoch": 41.42, + "learning_rate": 2.929521784688774e-05, + "loss": 2.053, + "step": 14311000 + }, + { + "epoch": 41.43, + "learning_rate": 2.9294494199240464e-05, + "loss": 2.0349, + "step": 14311500 + }, + { + "epoch": 41.43, + "learning_rate": 2.9293770551593187e-05, + "loss": 2.0163, + "step": 14312000 + }, + { + "epoch": 41.43, + "learning_rate": 2.929304690394591e-05, + "loss": 2.0398, + "step": 14312500 + }, + { + "epoch": 41.43, + "learning_rate": 2.929232325629863e-05, + "loss": 2.0515, + "step": 14313000 + }, + { + "epoch": 41.43, + "learning_rate": 2.9291602503241943e-05, + "loss": 2.0653, + "step": 14313500 + }, + { + "epoch": 41.43, + "learning_rate": 2.9290878855594666e-05, + "loss": 2.0257, + "step": 14314000 + }, + { + "epoch": 41.43, + "learning_rate": 2.9290155207947388e-05, + "loss": 2.053, + "step": 14314500 + }, + { + "epoch": 41.44, + "learning_rate": 2.9289431560300114e-05, + "loss": 2.0509, + "step": 14315000 + }, + { + "epoch": 41.44, + "learning_rate": 2.9288707912652836e-05, + "loss": 2.0454, + "step": 14315500 + }, + { + "epoch": 41.44, + "learning_rate": 2.9287984265005558e-05, + "loss": 2.0403, + "step": 14316000 + }, + { + "epoch": 41.44, + "learning_rate": 2.928726061735828e-05, + "loss": 2.0402, + "step": 14316500 + }, + { + "epoch": 41.44, + "learning_rate": 2.92865384170063e-05, + "loss": 2.0563, + "step": 14317000 + }, + { + "epoch": 41.44, + "learning_rate": 2.928581476935902e-05, + "loss": 2.047, + "step": 14317500 + }, + { + "epoch": 41.44, + "learning_rate": 2.9285091121711744e-05, + "loss": 2.0438, + "step": 14318000 + }, + { + "epoch": 41.45, + "learning_rate": 2.9284367474064473e-05, + "loss": 2.0282, + "step": 14318500 + }, + { + "epoch": 41.45, + "learning_rate": 2.9283643826417195e-05, + "loss": 2.037, + "step": 14319000 + }, + { + "epoch": 41.45, + "learning_rate": 2.9282921626065214e-05, + "loss": 2.0395, + "step": 14319500 + }, + { + "epoch": 41.45, + "learning_rate": 2.9282197978417936e-05, + "loss": 2.0538, + "step": 14320000 + }, + { + "epoch": 41.45, + "learning_rate": 2.9281474330770658e-05, + "loss": 2.0411, + "step": 14320500 + }, + { + "epoch": 41.45, + "learning_rate": 2.928075068312338e-05, + "loss": 2.0349, + "step": 14321000 + }, + { + "epoch": 41.45, + "learning_rate": 2.92800284827714e-05, + "loss": 2.0534, + "step": 14321500 + }, + { + "epoch": 41.46, + "learning_rate": 2.9279304835124122e-05, + "loss": 2.05, + "step": 14322000 + }, + { + "epoch": 41.46, + "learning_rate": 2.9278581187476844e-05, + "loss": 2.0603, + "step": 14322500 + }, + { + "epoch": 41.46, + "learning_rate": 2.9277857539829566e-05, + "loss": 2.028, + "step": 14323000 + }, + { + "epoch": 41.46, + "learning_rate": 2.9277133892182292e-05, + "loss": 2.0426, + "step": 14323500 + }, + { + "epoch": 41.46, + "learning_rate": 2.9276410244535014e-05, + "loss": 2.0443, + "step": 14324000 + }, + { + "epoch": 41.46, + "learning_rate": 2.9275686596887736e-05, + "loss": 2.0083, + "step": 14324500 + }, + { + "epoch": 41.47, + "learning_rate": 2.927496294924046e-05, + "loss": 2.0339, + "step": 14325000 + }, + { + "epoch": 41.47, + "learning_rate": 2.927423930159318e-05, + "loss": 2.0641, + "step": 14325500 + }, + { + "epoch": 41.47, + "learning_rate": 2.927351565394591e-05, + "loss": 2.0424, + "step": 14326000 + }, + { + "epoch": 41.47, + "learning_rate": 2.9272792006298632e-05, + "loss": 2.0721, + "step": 14326500 + }, + { + "epoch": 41.47, + "learning_rate": 2.9272068358651354e-05, + "loss": 2.0216, + "step": 14327000 + }, + { + "epoch": 41.47, + "learning_rate": 2.927134471100408e-05, + "loss": 2.0538, + "step": 14327500 + }, + { + "epoch": 41.47, + "learning_rate": 2.9270621063356802e-05, + "loss": 2.0325, + "step": 14328000 + }, + { + "epoch": 41.48, + "learning_rate": 2.9269898863004818e-05, + "loss": 2.0353, + "step": 14328500 + }, + { + "epoch": 41.48, + "learning_rate": 2.9269176662652837e-05, + "loss": 2.0513, + "step": 14329000 + }, + { + "epoch": 41.48, + "learning_rate": 2.926845301500556e-05, + "loss": 2.0435, + "step": 14329500 + }, + { + "epoch": 41.48, + "learning_rate": 2.926772936735828e-05, + "loss": 2.0423, + "step": 14330000 + }, + { + "epoch": 41.48, + "learning_rate": 2.9267005719711003e-05, + "loss": 2.0119, + "step": 14330500 + }, + { + "epoch": 41.48, + "learning_rate": 2.9266283519359022e-05, + "loss": 2.0307, + "step": 14331000 + }, + { + "epoch": 41.48, + "learning_rate": 2.9265559871711744e-05, + "loss": 2.0332, + "step": 14331500 + }, + { + "epoch": 41.49, + "learning_rate": 2.9264836224064467e-05, + "loss": 2.0231, + "step": 14332000 + }, + { + "epoch": 41.49, + "learning_rate": 2.9264112576417192e-05, + "loss": 2.0426, + "step": 14332500 + }, + { + "epoch": 41.49, + "learning_rate": 2.9263388928769915e-05, + "loss": 2.0437, + "step": 14333000 + }, + { + "epoch": 41.49, + "learning_rate": 2.9262665281122644e-05, + "loss": 2.0333, + "step": 14333500 + }, + { + "epoch": 41.49, + "learning_rate": 2.9261941633475366e-05, + "loss": 2.016, + "step": 14334000 + }, + { + "epoch": 41.49, + "learning_rate": 2.926121943312338e-05, + "loss": 2.0469, + "step": 14334500 + }, + { + "epoch": 41.49, + "learning_rate": 2.92604972327714e-05, + "loss": 2.0355, + "step": 14335000 + }, + { + "epoch": 41.5, + "learning_rate": 2.9259773585124123e-05, + "loss": 2.0485, + "step": 14335500 + }, + { + "epoch": 41.5, + "learning_rate": 2.925905138477214e-05, + "loss": 2.049, + "step": 14336000 + }, + { + "epoch": 41.5, + "learning_rate": 2.9258327737124864e-05, + "loss": 2.0482, + "step": 14336500 + }, + { + "epoch": 41.5, + "learning_rate": 2.9257604089477586e-05, + "loss": 2.0294, + "step": 14337000 + }, + { + "epoch": 41.5, + "learning_rate": 2.9256880441830308e-05, + "loss": 2.0452, + "step": 14337500 + }, + { + "epoch": 41.5, + "learning_rate": 2.925615679418303e-05, + "loss": 2.0598, + "step": 14338000 + }, + { + "epoch": 41.5, + "learning_rate": 2.9255433146535756e-05, + "loss": 2.0569, + "step": 14338500 + }, + { + "epoch": 41.51, + "learning_rate": 2.9254709498888478e-05, + "loss": 2.0297, + "step": 14339000 + }, + { + "epoch": 41.51, + "learning_rate": 2.92539858512412e-05, + "loss": 2.0164, + "step": 14339500 + }, + { + "epoch": 41.51, + "learning_rate": 2.9253262203593923e-05, + "loss": 2.052, + "step": 14340000 + }, + { + "epoch": 41.51, + "learning_rate": 2.9252538555946645e-05, + "loss": 2.032, + "step": 14340500 + }, + { + "epoch": 41.51, + "learning_rate": 2.9251814908299374e-05, + "loss": 2.0693, + "step": 14341000 + }, + { + "epoch": 41.51, + "learning_rate": 2.9251091260652096e-05, + "loss": 2.0412, + "step": 14341500 + }, + { + "epoch": 41.51, + "learning_rate": 2.925036761300482e-05, + "loss": 2.0666, + "step": 14342000 + }, + { + "epoch": 41.52, + "learning_rate": 2.9249643965357544e-05, + "loss": 2.0396, + "step": 14342500 + }, + { + "epoch": 41.52, + "learning_rate": 2.9248920317710266e-05, + "loss": 2.0485, + "step": 14343000 + }, + { + "epoch": 41.52, + "learning_rate": 2.924819667006299e-05, + "loss": 2.0361, + "step": 14343500 + }, + { + "epoch": 41.52, + "learning_rate": 2.9247474469711007e-05, + "loss": 2.0338, + "step": 14344000 + }, + { + "epoch": 41.52, + "learning_rate": 2.924675082206373e-05, + "loss": 2.0413, + "step": 14344500 + }, + { + "epoch": 41.52, + "learning_rate": 2.9246027174416452e-05, + "loss": 2.0466, + "step": 14345000 + }, + { + "epoch": 41.52, + "learning_rate": 2.9245304974064467e-05, + "loss": 2.0298, + "step": 14345500 + }, + { + "epoch": 41.53, + "learning_rate": 2.9244581326417193e-05, + "loss": 2.0153, + "step": 14346000 + }, + { + "epoch": 41.53, + "learning_rate": 2.9243857678769915e-05, + "loss": 2.0309, + "step": 14346500 + }, + { + "epoch": 41.53, + "learning_rate": 2.9243134031122638e-05, + "loss": 2.0422, + "step": 14347000 + }, + { + "epoch": 41.53, + "learning_rate": 2.924241038347536e-05, + "loss": 2.0332, + "step": 14347500 + }, + { + "epoch": 41.53, + "learning_rate": 2.9241686735828082e-05, + "loss": 2.0348, + "step": 14348000 + }, + { + "epoch": 41.53, + "learning_rate": 2.924096308818081e-05, + "loss": 2.0491, + "step": 14348500 + }, + { + "epoch": 41.53, + "learning_rate": 2.9240239440533533e-05, + "loss": 2.0771, + "step": 14349000 + }, + { + "epoch": 41.54, + "learning_rate": 2.923951579288626e-05, + "loss": 2.0481, + "step": 14349500 + }, + { + "epoch": 41.54, + "learning_rate": 2.923879214523898e-05, + "loss": 2.0323, + "step": 14350000 + }, + { + "epoch": 41.54, + "learning_rate": 2.9238068497591703e-05, + "loss": 2.0176, + "step": 14350500 + }, + { + "epoch": 41.54, + "learning_rate": 2.9237344849944426e-05, + "loss": 2.0472, + "step": 14351000 + }, + { + "epoch": 41.54, + "learning_rate": 2.9236621202297148e-05, + "loss": 2.0433, + "step": 14351500 + }, + { + "epoch": 41.54, + "learning_rate": 2.923589755464987e-05, + "loss": 2.0395, + "step": 14352000 + }, + { + "epoch": 41.54, + "learning_rate": 2.9235173907002596e-05, + "loss": 2.0821, + "step": 14352500 + }, + { + "epoch": 41.55, + "learning_rate": 2.9234453153945908e-05, + "loss": 2.0117, + "step": 14353000 + }, + { + "epoch": 41.55, + "learning_rate": 2.923372950629863e-05, + "loss": 2.0426, + "step": 14353500 + }, + { + "epoch": 41.55, + "learning_rate": 2.9233005858651352e-05, + "loss": 2.0344, + "step": 14354000 + }, + { + "epoch": 41.55, + "learning_rate": 2.9232282211004075e-05, + "loss": 2.0576, + "step": 14354500 + }, + { + "epoch": 41.55, + "learning_rate": 2.9231558563356797e-05, + "loss": 2.045, + "step": 14355000 + }, + { + "epoch": 41.55, + "learning_rate": 2.923083491570952e-05, + "loss": 2.0237, + "step": 14355500 + }, + { + "epoch": 41.55, + "learning_rate": 2.9230111268062248e-05, + "loss": 2.0346, + "step": 14356000 + }, + { + "epoch": 41.56, + "learning_rate": 2.922938762041497e-05, + "loss": 2.0149, + "step": 14356500 + }, + { + "epoch": 41.56, + "learning_rate": 2.9228663972767696e-05, + "loss": 2.0313, + "step": 14357000 + }, + { + "epoch": 41.56, + "learning_rate": 2.922794177241571e-05, + "loss": 2.0446, + "step": 14357500 + }, + { + "epoch": 41.56, + "learning_rate": 2.9227218124768434e-05, + "loss": 2.0393, + "step": 14358000 + }, + { + "epoch": 41.56, + "learning_rate": 2.9226495924416453e-05, + "loss": 2.0295, + "step": 14358500 + }, + { + "epoch": 41.56, + "learning_rate": 2.922577372406447e-05, + "loss": 2.0366, + "step": 14359000 + }, + { + "epoch": 41.56, + "learning_rate": 2.9225050076417194e-05, + "loss": 2.0443, + "step": 14359500 + }, + { + "epoch": 41.57, + "learning_rate": 2.9224326428769916e-05, + "loss": 2.042, + "step": 14360000 + }, + { + "epoch": 41.57, + "learning_rate": 2.922360278112264e-05, + "loss": 2.0538, + "step": 14360500 + }, + { + "epoch": 41.57, + "learning_rate": 2.922287913347536e-05, + "loss": 2.0299, + "step": 14361000 + }, + { + "epoch": 41.57, + "learning_rate": 2.922215693312338e-05, + "loss": 2.0487, + "step": 14361500 + }, + { + "epoch": 41.57, + "learning_rate": 2.9221433285476102e-05, + "loss": 2.0313, + "step": 14362000 + }, + { + "epoch": 41.57, + "learning_rate": 2.9220709637828824e-05, + "loss": 2.0435, + "step": 14362500 + }, + { + "epoch": 41.58, + "learning_rate": 2.9219985990181546e-05, + "loss": 2.023, + "step": 14363000 + }, + { + "epoch": 41.58, + "learning_rate": 2.9219262342534275e-05, + "loss": 2.0393, + "step": 14363500 + }, + { + "epoch": 41.58, + "learning_rate": 2.9218538694886997e-05, + "loss": 2.052, + "step": 14364000 + }, + { + "epoch": 41.58, + "learning_rate": 2.9217815047239723e-05, + "loss": 2.0229, + "step": 14364500 + }, + { + "epoch": 41.58, + "learning_rate": 2.9217091399592445e-05, + "loss": 2.0286, + "step": 14365000 + }, + { + "epoch": 41.58, + "learning_rate": 2.9216367751945168e-05, + "loss": 2.0452, + "step": 14365500 + }, + { + "epoch": 41.58, + "learning_rate": 2.921564410429789e-05, + "loss": 2.0486, + "step": 14366000 + }, + { + "epoch": 41.59, + "learning_rate": 2.921492190394591e-05, + "loss": 2.038, + "step": 14366500 + }, + { + "epoch": 41.59, + "learning_rate": 2.921419825629863e-05, + "loss": 2.0712, + "step": 14367000 + }, + { + "epoch": 41.59, + "learning_rate": 2.9213474608651353e-05, + "loss": 2.0336, + "step": 14367500 + }, + { + "epoch": 41.59, + "learning_rate": 2.9212750961004075e-05, + "loss": 2.0522, + "step": 14368000 + }, + { + "epoch": 41.59, + "learning_rate": 2.9212028760652094e-05, + "loss": 2.034, + "step": 14368500 + }, + { + "epoch": 41.59, + "learning_rate": 2.9211305113004817e-05, + "loss": 2.0417, + "step": 14369000 + }, + { + "epoch": 41.59, + "learning_rate": 2.921058146535754e-05, + "loss": 2.048, + "step": 14369500 + }, + { + "epoch": 41.6, + "learning_rate": 2.920985781771026e-05, + "loss": 2.046, + "step": 14370000 + }, + { + "epoch": 41.6, + "learning_rate": 2.9209134170062987e-05, + "loss": 2.0638, + "step": 14370500 + }, + { + "epoch": 41.6, + "learning_rate": 2.9208410522415712e-05, + "loss": 2.0396, + "step": 14371000 + }, + { + "epoch": 41.6, + "learning_rate": 2.920768832206373e-05, + "loss": 2.0366, + "step": 14371500 + }, + { + "epoch": 41.6, + "learning_rate": 2.9206964674416454e-05, + "loss": 2.0301, + "step": 14372000 + }, + { + "epoch": 41.6, + "learning_rate": 2.9206241026769176e-05, + "loss": 2.0225, + "step": 14372500 + }, + { + "epoch": 41.6, + "learning_rate": 2.9205517379121898e-05, + "loss": 2.049, + "step": 14373000 + }, + { + "epoch": 41.61, + "learning_rate": 2.9204793731474624e-05, + "loss": 2.0448, + "step": 14373500 + }, + { + "epoch": 41.61, + "learning_rate": 2.9204070083827346e-05, + "loss": 2.0232, + "step": 14374000 + }, + { + "epoch": 41.61, + "learning_rate": 2.9203346436180068e-05, + "loss": 2.0291, + "step": 14374500 + }, + { + "epoch": 41.61, + "learning_rate": 2.920262278853279e-05, + "loss": 2.0343, + "step": 14375000 + }, + { + "epoch": 41.61, + "learning_rate": 2.9201899140885513e-05, + "loss": 2.0466, + "step": 14375500 + }, + { + "epoch": 41.61, + "learning_rate": 2.9201175493238235e-05, + "loss": 2.0384, + "step": 14376000 + }, + { + "epoch": 41.61, + "learning_rate": 2.920045184559096e-05, + "loss": 2.0427, + "step": 14376500 + }, + { + "epoch": 41.62, + "learning_rate": 2.9199728197943683e-05, + "loss": 2.0529, + "step": 14377000 + }, + { + "epoch": 41.62, + "learning_rate": 2.9199004550296405e-05, + "loss": 2.0511, + "step": 14377500 + }, + { + "epoch": 41.62, + "learning_rate": 2.9198280902649134e-05, + "loss": 2.0504, + "step": 14378000 + }, + { + "epoch": 41.62, + "learning_rate": 2.9197557255001856e-05, + "loss": 2.03, + "step": 14378500 + }, + { + "epoch": 41.62, + "learning_rate": 2.9196835054649875e-05, + "loss": 2.0433, + "step": 14379000 + }, + { + "epoch": 41.62, + "learning_rate": 2.9196111407002597e-05, + "loss": 2.0251, + "step": 14379500 + }, + { + "epoch": 41.62, + "learning_rate": 2.9195389206650613e-05, + "loss": 2.0443, + "step": 14380000 + }, + { + "epoch": 41.63, + "learning_rate": 2.919466555900334e-05, + "loss": 2.0494, + "step": 14380500 + }, + { + "epoch": 41.63, + "learning_rate": 2.9193943358651354e-05, + "loss": 2.0707, + "step": 14381000 + }, + { + "epoch": 41.63, + "learning_rate": 2.9193221158299373e-05, + "loss": 2.0576, + "step": 14381500 + }, + { + "epoch": 41.63, + "learning_rate": 2.919249895794739e-05, + "loss": 2.041, + "step": 14382000 + }, + { + "epoch": 41.63, + "learning_rate": 2.919177531030011e-05, + "loss": 2.0208, + "step": 14382500 + }, + { + "epoch": 41.63, + "learning_rate": 2.919105310994813e-05, + "loss": 2.0707, + "step": 14383000 + }, + { + "epoch": 41.63, + "learning_rate": 2.9190329462300852e-05, + "loss": 2.0685, + "step": 14383500 + }, + { + "epoch": 41.64, + "learning_rate": 2.9189605814653574e-05, + "loss": 2.0405, + "step": 14384000 + }, + { + "epoch": 41.64, + "learning_rate": 2.91888821670063e-05, + "loss": 2.0455, + "step": 14384500 + }, + { + "epoch": 41.64, + "learning_rate": 2.9188158519359022e-05, + "loss": 2.0608, + "step": 14385000 + }, + { + "epoch": 41.64, + "learning_rate": 2.9187434871711744e-05, + "loss": 2.0474, + "step": 14385500 + }, + { + "epoch": 41.64, + "learning_rate": 2.9186711224064473e-05, + "loss": 2.0477, + "step": 14386000 + }, + { + "epoch": 41.64, + "learning_rate": 2.9185987576417195e-05, + "loss": 2.0497, + "step": 14386500 + }, + { + "epoch": 41.64, + "learning_rate": 2.9185263928769918e-05, + "loss": 2.0444, + "step": 14387000 + }, + { + "epoch": 41.65, + "learning_rate": 2.918454028112264e-05, + "loss": 2.042, + "step": 14387500 + }, + { + "epoch": 41.65, + "learning_rate": 2.9183816633475362e-05, + "loss": 2.0377, + "step": 14388000 + }, + { + "epoch": 41.65, + "learning_rate": 2.9183092985828088e-05, + "loss": 2.0568, + "step": 14388500 + }, + { + "epoch": 41.65, + "learning_rate": 2.918236933818081e-05, + "loss": 2.0508, + "step": 14389000 + }, + { + "epoch": 41.65, + "learning_rate": 2.9181645690533532e-05, + "loss": 2.0382, + "step": 14389500 + }, + { + "epoch": 41.65, + "learning_rate": 2.9180922042886255e-05, + "loss": 2.0491, + "step": 14390000 + }, + { + "epoch": 41.65, + "learning_rate": 2.9180198395238977e-05, + "loss": 2.0302, + "step": 14390500 + }, + { + "epoch": 41.66, + "learning_rate": 2.9179476194886996e-05, + "loss": 2.0605, + "step": 14391000 + }, + { + "epoch": 41.66, + "learning_rate": 2.9178752547239718e-05, + "loss": 2.0509, + "step": 14391500 + }, + { + "epoch": 41.66, + "learning_rate": 2.917802889959244e-05, + "loss": 2.0567, + "step": 14392000 + }, + { + "epoch": 41.66, + "learning_rate": 2.9177305251945162e-05, + "loss": 2.0385, + "step": 14392500 + }, + { + "epoch": 41.66, + "learning_rate": 2.9176581604297888e-05, + "loss": 2.0492, + "step": 14393000 + }, + { + "epoch": 41.66, + "learning_rate": 2.9175857956650614e-05, + "loss": 2.045, + "step": 14393500 + }, + { + "epoch": 41.66, + "learning_rate": 2.917513430900334e-05, + "loss": 2.0298, + "step": 14394000 + }, + { + "epoch": 41.67, + "learning_rate": 2.917441066135606e-05, + "loss": 2.0332, + "step": 14394500 + }, + { + "epoch": 41.67, + "learning_rate": 2.9173687013708784e-05, + "loss": 2.0468, + "step": 14395000 + }, + { + "epoch": 41.67, + "learning_rate": 2.9172963366061506e-05, + "loss": 2.0502, + "step": 14395500 + }, + { + "epoch": 41.67, + "learning_rate": 2.9172239718414228e-05, + "loss": 2.0475, + "step": 14396000 + }, + { + "epoch": 41.67, + "learning_rate": 2.9171516070766954e-05, + "loss": 2.064, + "step": 14396500 + }, + { + "epoch": 41.67, + "learning_rate": 2.917079387041497e-05, + "loss": 2.037, + "step": 14397000 + }, + { + "epoch": 41.67, + "learning_rate": 2.917007022276769e-05, + "loss": 2.0329, + "step": 14397500 + }, + { + "epoch": 41.68, + "learning_rate": 2.9169346575120414e-05, + "loss": 2.0361, + "step": 14398000 + }, + { + "epoch": 41.68, + "learning_rate": 2.9168624374768433e-05, + "loss": 2.0466, + "step": 14398500 + }, + { + "epoch": 41.68, + "learning_rate": 2.9167902174416452e-05, + "loss": 2.043, + "step": 14399000 + }, + { + "epoch": 41.68, + "learning_rate": 2.9167178526769174e-05, + "loss": 2.0241, + "step": 14399500 + }, + { + "epoch": 41.68, + "learning_rate": 2.9166454879121896e-05, + "loss": 2.046, + "step": 14400000 + }, + { + "epoch": 41.68, + "learning_rate": 2.916573123147462e-05, + "loss": 2.0254, + "step": 14400500 + }, + { + "epoch": 41.68, + "learning_rate": 2.9165007583827347e-05, + "loss": 2.0496, + "step": 14401000 + }, + { + "epoch": 41.69, + "learning_rate": 2.9164285383475366e-05, + "loss": 2.0644, + "step": 14401500 + }, + { + "epoch": 41.69, + "learning_rate": 2.916356173582809e-05, + "loss": 2.0286, + "step": 14402000 + }, + { + "epoch": 41.69, + "learning_rate": 2.916283808818081e-05, + "loss": 2.0649, + "step": 14402500 + }, + { + "epoch": 41.69, + "learning_rate": 2.9162114440533533e-05, + "loss": 2.03, + "step": 14403000 + }, + { + "epoch": 41.69, + "learning_rate": 2.9161392240181552e-05, + "loss": 2.0385, + "step": 14403500 + }, + { + "epoch": 41.69, + "learning_rate": 2.9160668592534274e-05, + "loss": 2.0321, + "step": 14404000 + }, + { + "epoch": 41.7, + "learning_rate": 2.9159944944886996e-05, + "loss": 2.0474, + "step": 14404500 + }, + { + "epoch": 41.7, + "learning_rate": 2.915922129723972e-05, + "loss": 2.0311, + "step": 14405000 + }, + { + "epoch": 41.7, + "learning_rate": 2.9158499096887738e-05, + "loss": 2.0371, + "step": 14405500 + }, + { + "epoch": 41.7, + "learning_rate": 2.915777544924046e-05, + "loss": 2.0624, + "step": 14406000 + }, + { + "epoch": 41.7, + "learning_rate": 2.9157051801593182e-05, + "loss": 2.0633, + "step": 14406500 + }, + { + "epoch": 41.7, + "learning_rate": 2.9156328153945904e-05, + "loss": 2.0479, + "step": 14407000 + }, + { + "epoch": 41.7, + "learning_rate": 2.9155604506298627e-05, + "loss": 2.0549, + "step": 14407500 + }, + { + "epoch": 41.71, + "learning_rate": 2.9154880858651352e-05, + "loss": 2.04, + "step": 14408000 + }, + { + "epoch": 41.71, + "learning_rate": 2.9154158658299375e-05, + "loss": 2.0239, + "step": 14408500 + }, + { + "epoch": 41.71, + "learning_rate": 2.9153435010652097e-05, + "loss": 2.0485, + "step": 14409000 + }, + { + "epoch": 41.71, + "learning_rate": 2.915271136300482e-05, + "loss": 2.0478, + "step": 14409500 + }, + { + "epoch": 41.71, + "learning_rate": 2.915198771535754e-05, + "loss": 2.0402, + "step": 14410000 + }, + { + "epoch": 41.71, + "learning_rate": 2.9151264067710267e-05, + "loss": 2.0445, + "step": 14410500 + }, + { + "epoch": 41.71, + "learning_rate": 2.915054042006299e-05, + "loss": 2.0272, + "step": 14411000 + }, + { + "epoch": 41.72, + "learning_rate": 2.914981677241571e-05, + "loss": 2.0544, + "step": 14411500 + }, + { + "epoch": 41.72, + "learning_rate": 2.9149093124768434e-05, + "loss": 2.0309, + "step": 14412000 + }, + { + "epoch": 41.72, + "learning_rate": 2.9148369477121156e-05, + "loss": 2.0496, + "step": 14412500 + }, + { + "epoch": 41.72, + "learning_rate": 2.9147645829473878e-05, + "loss": 2.0293, + "step": 14413000 + }, + { + "epoch": 41.72, + "learning_rate": 2.9146922181826604e-05, + "loss": 2.0508, + "step": 14413500 + }, + { + "epoch": 41.72, + "learning_rate": 2.9146198534179326e-05, + "loss": 2.0166, + "step": 14414000 + }, + { + "epoch": 41.72, + "learning_rate": 2.9145474886532048e-05, + "loss": 2.031, + "step": 14414500 + }, + { + "epoch": 41.73, + "learning_rate": 2.914475123888477e-05, + "loss": 2.0304, + "step": 14415000 + }, + { + "epoch": 41.73, + "learning_rate": 2.91440275912375e-05, + "loss": 2.0527, + "step": 14415500 + }, + { + "epoch": 41.73, + "learning_rate": 2.914330683818081e-05, + "loss": 2.0389, + "step": 14416000 + }, + { + "epoch": 41.73, + "learning_rate": 2.9142583190533534e-05, + "loss": 2.0494, + "step": 14416500 + }, + { + "epoch": 41.73, + "learning_rate": 2.9141859542886256e-05, + "loss": 2.063, + "step": 14417000 + }, + { + "epoch": 41.73, + "learning_rate": 2.9141135895238982e-05, + "loss": 2.042, + "step": 14417500 + }, + { + "epoch": 41.73, + "learning_rate": 2.9140412247591704e-05, + "loss": 2.0597, + "step": 14418000 + }, + { + "epoch": 41.74, + "learning_rate": 2.913969004723972e-05, + "loss": 2.0675, + "step": 14418500 + }, + { + "epoch": 41.74, + "learning_rate": 2.9138966399592442e-05, + "loss": 2.0672, + "step": 14419000 + }, + { + "epoch": 41.74, + "learning_rate": 2.913824419924046e-05, + "loss": 2.054, + "step": 14419500 + }, + { + "epoch": 41.74, + "learning_rate": 2.9137520551593183e-05, + "loss": 2.0417, + "step": 14420000 + }, + { + "epoch": 41.74, + "learning_rate": 2.9136796903945905e-05, + "loss": 2.0501, + "step": 14420500 + }, + { + "epoch": 41.74, + "learning_rate": 2.9136074703593924e-05, + "loss": 2.0391, + "step": 14421000 + }, + { + "epoch": 41.74, + "learning_rate": 2.9135351055946646e-05, + "loss": 2.0595, + "step": 14421500 + }, + { + "epoch": 41.75, + "learning_rate": 2.913462740829937e-05, + "loss": 2.0199, + "step": 14422000 + }, + { + "epoch": 41.75, + "learning_rate": 2.9133903760652094e-05, + "loss": 2.0432, + "step": 14422500 + }, + { + "epoch": 41.75, + "learning_rate": 2.9133180113004816e-05, + "loss": 2.0248, + "step": 14423000 + }, + { + "epoch": 41.75, + "learning_rate": 2.9132456465357545e-05, + "loss": 2.0428, + "step": 14423500 + }, + { + "epoch": 41.75, + "learning_rate": 2.9131732817710268e-05, + "loss": 2.0607, + "step": 14424000 + }, + { + "epoch": 41.75, + "learning_rate": 2.913100917006299e-05, + "loss": 2.0614, + "step": 14424500 + }, + { + "epoch": 41.75, + "learning_rate": 2.9130285522415712e-05, + "loss": 2.0605, + "step": 14425000 + }, + { + "epoch": 41.76, + "learning_rate": 2.9129561874768434e-05, + "loss": 2.0613, + "step": 14425500 + }, + { + "epoch": 41.76, + "learning_rate": 2.9128838227121157e-05, + "loss": 2.0324, + "step": 14426000 + }, + { + "epoch": 41.76, + "learning_rate": 2.9128116026769176e-05, + "loss": 2.0536, + "step": 14426500 + }, + { + "epoch": 41.76, + "learning_rate": 2.9127392379121898e-05, + "loss": 2.035, + "step": 14427000 + }, + { + "epoch": 41.76, + "learning_rate": 2.912666873147462e-05, + "loss": 2.0611, + "step": 14427500 + }, + { + "epoch": 41.76, + "learning_rate": 2.9125945083827346e-05, + "loss": 2.0334, + "step": 14428000 + }, + { + "epoch": 41.76, + "learning_rate": 2.9125221436180068e-05, + "loss": 2.0496, + "step": 14428500 + }, + { + "epoch": 41.77, + "learning_rate": 2.912449778853279e-05, + "loss": 2.0146, + "step": 14429000 + }, + { + "epoch": 41.77, + "learning_rate": 2.9123774140885512e-05, + "loss": 2.039, + "step": 14429500 + }, + { + "epoch": 41.77, + "learning_rate": 2.912305194053353e-05, + "loss": 2.0408, + "step": 14430000 + }, + { + "epoch": 41.77, + "learning_rate": 2.9122328292886254e-05, + "loss": 2.0621, + "step": 14430500 + }, + { + "epoch": 41.77, + "learning_rate": 2.9121604645238983e-05, + "loss": 2.0409, + "step": 14431000 + }, + { + "epoch": 41.77, + "learning_rate": 2.9120880997591705e-05, + "loss": 2.0371, + "step": 14431500 + }, + { + "epoch": 41.77, + "learning_rate": 2.9120157349944427e-05, + "loss": 2.0556, + "step": 14432000 + }, + { + "epoch": 41.78, + "learning_rate": 2.911943370229715e-05, + "loss": 2.044, + "step": 14432500 + }, + { + "epoch": 41.78, + "learning_rate": 2.911871005464987e-05, + "loss": 2.0509, + "step": 14433000 + }, + { + "epoch": 41.78, + "learning_rate": 2.9117986407002597e-05, + "loss": 2.0443, + "step": 14433500 + }, + { + "epoch": 41.78, + "learning_rate": 2.911726275935532e-05, + "loss": 2.0389, + "step": 14434000 + }, + { + "epoch": 41.78, + "learning_rate": 2.911653911170804e-05, + "loss": 2.0561, + "step": 14434500 + }, + { + "epoch": 41.78, + "learning_rate": 2.9115815464060764e-05, + "loss": 2.045, + "step": 14435000 + }, + { + "epoch": 41.78, + "learning_rate": 2.9115091816413486e-05, + "loss": 2.0532, + "step": 14435500 + }, + { + "epoch": 41.79, + "learning_rate": 2.9114368168766208e-05, + "loss": 2.0249, + "step": 14436000 + }, + { + "epoch": 41.79, + "learning_rate": 2.9113644521118934e-05, + "loss": 2.0307, + "step": 14436500 + }, + { + "epoch": 41.79, + "learning_rate": 2.911292232076695e-05, + "loss": 2.057, + "step": 14437000 + }, + { + "epoch": 41.79, + "learning_rate": 2.911220012041497e-05, + "loss": 2.0323, + "step": 14437500 + }, + { + "epoch": 41.79, + "learning_rate": 2.9111477920062984e-05, + "loss": 2.047, + "step": 14438000 + }, + { + "epoch": 41.79, + "learning_rate": 2.9110754272415713e-05, + "loss": 2.0587, + "step": 14438500 + }, + { + "epoch": 41.79, + "learning_rate": 2.9110030624768435e-05, + "loss": 2.0428, + "step": 14439000 + }, + { + "epoch": 41.8, + "learning_rate": 2.9109306977121157e-05, + "loss": 2.044, + "step": 14439500 + }, + { + "epoch": 41.8, + "learning_rate": 2.9108583329473883e-05, + "loss": 2.0539, + "step": 14440000 + }, + { + "epoch": 41.8, + "learning_rate": 2.9107859681826605e-05, + "loss": 2.0343, + "step": 14440500 + }, + { + "epoch": 41.8, + "learning_rate": 2.9107136034179327e-05, + "loss": 2.0326, + "step": 14441000 + }, + { + "epoch": 41.8, + "learning_rate": 2.910641238653205e-05, + "loss": 2.0231, + "step": 14441500 + }, + { + "epoch": 41.8, + "learning_rate": 2.9105688738884772e-05, + "loss": 2.049, + "step": 14442000 + }, + { + "epoch": 41.81, + "learning_rate": 2.9104965091237498e-05, + "loss": 2.0557, + "step": 14442500 + }, + { + "epoch": 41.81, + "learning_rate": 2.910424144359022e-05, + "loss": 2.0543, + "step": 14443000 + }, + { + "epoch": 41.81, + "learning_rate": 2.9103517795942942e-05, + "loss": 2.0575, + "step": 14443500 + }, + { + "epoch": 41.81, + "learning_rate": 2.9102794148295664e-05, + "loss": 1.9995, + "step": 14444000 + }, + { + "epoch": 41.81, + "learning_rate": 2.9102070500648387e-05, + "loss": 2.0222, + "step": 14444500 + }, + { + "epoch": 41.81, + "learning_rate": 2.910134685300111e-05, + "loss": 2.0571, + "step": 14445000 + }, + { + "epoch": 41.81, + "learning_rate": 2.9100624652649134e-05, + "loss": 2.0443, + "step": 14445500 + }, + { + "epoch": 41.82, + "learning_rate": 2.9099901005001857e-05, + "loss": 2.0475, + "step": 14446000 + }, + { + "epoch": 41.82, + "learning_rate": 2.909917735735458e-05, + "loss": 2.054, + "step": 14446500 + }, + { + "epoch": 41.82, + "learning_rate": 2.90984537097073e-05, + "loss": 2.0255, + "step": 14447000 + }, + { + "epoch": 41.82, + "learning_rate": 2.9097732956650613e-05, + "loss": 2.0404, + "step": 14447500 + }, + { + "epoch": 41.82, + "learning_rate": 2.9097009309003336e-05, + "loss": 2.0728, + "step": 14448000 + }, + { + "epoch": 41.82, + "learning_rate": 2.909628566135606e-05, + "loss": 2.0632, + "step": 14448500 + }, + { + "epoch": 41.82, + "learning_rate": 2.9095562013708784e-05, + "loss": 2.0364, + "step": 14449000 + }, + { + "epoch": 41.83, + "learning_rate": 2.9094838366061506e-05, + "loss": 2.047, + "step": 14449500 + }, + { + "epoch": 41.83, + "learning_rate": 2.9094114718414228e-05, + "loss": 2.0376, + "step": 14450000 + }, + { + "epoch": 41.83, + "learning_rate": 2.909339107076695e-05, + "loss": 2.0364, + "step": 14450500 + }, + { + "epoch": 41.83, + "learning_rate": 2.9092667423119672e-05, + "loss": 2.0375, + "step": 14451000 + }, + { + "epoch": 41.83, + "learning_rate": 2.9091943775472398e-05, + "loss": 2.0328, + "step": 14451500 + }, + { + "epoch": 41.83, + "learning_rate": 2.909122012782512e-05, + "loss": 2.0566, + "step": 14452000 + }, + { + "epoch": 41.83, + "learning_rate": 2.9090496480177843e-05, + "loss": 2.0477, + "step": 14452500 + }, + { + "epoch": 41.84, + "learning_rate": 2.908977283253057e-05, + "loss": 2.0344, + "step": 14453000 + }, + { + "epoch": 41.84, + "learning_rate": 2.9089050632178587e-05, + "loss": 2.0672, + "step": 14453500 + }, + { + "epoch": 41.84, + "learning_rate": 2.9088326984531313e-05, + "loss": 2.0296, + "step": 14454000 + }, + { + "epoch": 41.84, + "learning_rate": 2.9087604784179328e-05, + "loss": 2.055, + "step": 14454500 + }, + { + "epoch": 41.84, + "learning_rate": 2.908688113653205e-05, + "loss": 2.0451, + "step": 14455000 + }, + { + "epoch": 41.84, + "learning_rate": 2.9086157488884773e-05, + "loss": 2.0475, + "step": 14455500 + }, + { + "epoch": 41.84, + "learning_rate": 2.908543528853279e-05, + "loss": 2.0197, + "step": 14456000 + }, + { + "epoch": 41.85, + "learning_rate": 2.9084711640885514e-05, + "loss": 2.041, + "step": 14456500 + }, + { + "epoch": 41.85, + "learning_rate": 2.9083987993238236e-05, + "loss": 2.0467, + "step": 14457000 + }, + { + "epoch": 41.85, + "learning_rate": 2.9083264345590962e-05, + "loss": 2.0368, + "step": 14457500 + }, + { + "epoch": 41.85, + "learning_rate": 2.9082540697943684e-05, + "loss": 2.0287, + "step": 14458000 + }, + { + "epoch": 41.85, + "learning_rate": 2.9081817050296406e-05, + "loss": 2.051, + "step": 14458500 + }, + { + "epoch": 41.85, + "learning_rate": 2.908109340264913e-05, + "loss": 2.0712, + "step": 14459000 + }, + { + "epoch": 41.85, + "learning_rate": 2.908036975500185e-05, + "loss": 2.036, + "step": 14459500 + }, + { + "epoch": 41.86, + "learning_rate": 2.9079646107354573e-05, + "loss": 2.0571, + "step": 14460000 + }, + { + "epoch": 41.86, + "learning_rate": 2.9078922459707302e-05, + "loss": 2.0648, + "step": 14460500 + }, + { + "epoch": 41.86, + "learning_rate": 2.907820025935532e-05, + "loss": 2.0495, + "step": 14461000 + }, + { + "epoch": 41.86, + "learning_rate": 2.9077476611708043e-05, + "loss": 2.0468, + "step": 14461500 + }, + { + "epoch": 41.86, + "learning_rate": 2.9076752964060765e-05, + "loss": 2.0356, + "step": 14462000 + }, + { + "epoch": 41.86, + "learning_rate": 2.9076029316413488e-05, + "loss": 2.048, + "step": 14462500 + }, + { + "epoch": 41.86, + "learning_rate": 2.9075305668766213e-05, + "loss": 2.0691, + "step": 14463000 + }, + { + "epoch": 41.87, + "learning_rate": 2.9074582021118935e-05, + "loss": 2.0339, + "step": 14463500 + }, + { + "epoch": 41.87, + "learning_rate": 2.9073858373471658e-05, + "loss": 2.0461, + "step": 14464000 + }, + { + "epoch": 41.87, + "learning_rate": 2.9073136173119677e-05, + "loss": 2.0147, + "step": 14464500 + }, + { + "epoch": 41.87, + "learning_rate": 2.90724125254724e-05, + "loss": 2.0492, + "step": 14465000 + }, + { + "epoch": 41.87, + "learning_rate": 2.907168887782512e-05, + "loss": 2.0388, + "step": 14465500 + }, + { + "epoch": 41.87, + "learning_rate": 2.9070965230177843e-05, + "loss": 2.0586, + "step": 14466000 + }, + { + "epoch": 41.87, + "learning_rate": 2.9070241582530566e-05, + "loss": 2.0627, + "step": 14466500 + }, + { + "epoch": 41.88, + "learning_rate": 2.9069517934883288e-05, + "loss": 2.0375, + "step": 14467000 + }, + { + "epoch": 41.88, + "learning_rate": 2.9068794287236017e-05, + "loss": 2.0392, + "step": 14467500 + }, + { + "epoch": 41.88, + "learning_rate": 2.906807063958874e-05, + "loss": 2.0473, + "step": 14468000 + }, + { + "epoch": 41.88, + "learning_rate": 2.9067346991941465e-05, + "loss": 2.0469, + "step": 14468500 + }, + { + "epoch": 41.88, + "learning_rate": 2.906662479158948e-05, + "loss": 2.0266, + "step": 14469000 + }, + { + "epoch": 41.88, + "learning_rate": 2.90659025912375e-05, + "loss": 2.0563, + "step": 14469500 + }, + { + "epoch": 41.88, + "learning_rate": 2.906517894359022e-05, + "loss": 2.0259, + "step": 14470000 + }, + { + "epoch": 41.89, + "learning_rate": 2.9064455295942944e-05, + "loss": 2.069, + "step": 14470500 + }, + { + "epoch": 41.89, + "learning_rate": 2.9063731648295666e-05, + "loss": 2.0168, + "step": 14471000 + }, + { + "epoch": 41.89, + "learning_rate": 2.9063008000648388e-05, + "loss": 2.0383, + "step": 14471500 + }, + { + "epoch": 41.89, + "learning_rate": 2.9062284353001114e-05, + "loss": 2.0454, + "step": 14472000 + }, + { + "epoch": 41.89, + "learning_rate": 2.9061560705353836e-05, + "loss": 2.0553, + "step": 14472500 + }, + { + "epoch": 41.89, + "learning_rate": 2.9060839952297148e-05, + "loss": 2.0451, + "step": 14473000 + }, + { + "epoch": 41.89, + "learning_rate": 2.906011630464987e-05, + "loss": 2.0569, + "step": 14473500 + }, + { + "epoch": 41.9, + "learning_rate": 2.9059392657002593e-05, + "loss": 2.0374, + "step": 14474000 + }, + { + "epoch": 41.9, + "learning_rate": 2.9058669009355315e-05, + "loss": 2.0778, + "step": 14474500 + }, + { + "epoch": 41.9, + "learning_rate": 2.9057945361708037e-05, + "loss": 2.0174, + "step": 14475000 + }, + { + "epoch": 41.9, + "learning_rate": 2.9057221714060766e-05, + "loss": 2.0471, + "step": 14475500 + }, + { + "epoch": 41.9, + "learning_rate": 2.905649806641349e-05, + "loss": 2.0417, + "step": 14476000 + }, + { + "epoch": 41.9, + "learning_rate": 2.9055774418766214e-05, + "loss": 2.0438, + "step": 14476500 + }, + { + "epoch": 41.9, + "learning_rate": 2.9055050771118936e-05, + "loss": 2.0261, + "step": 14477000 + }, + { + "epoch": 41.91, + "learning_rate": 2.905432712347166e-05, + "loss": 2.0288, + "step": 14477500 + }, + { + "epoch": 41.91, + "learning_rate": 2.905360347582438e-05, + "loss": 2.0476, + "step": 14478000 + }, + { + "epoch": 41.91, + "learning_rate": 2.9052879828177103e-05, + "loss": 2.052, + "step": 14478500 + }, + { + "epoch": 41.91, + "learning_rate": 2.905215618052983e-05, + "loss": 2.0482, + "step": 14479000 + }, + { + "epoch": 41.91, + "learning_rate": 2.905143253288255e-05, + "loss": 2.0501, + "step": 14479500 + }, + { + "epoch": 41.91, + "learning_rate": 2.9050708885235273e-05, + "loss": 2.0558, + "step": 14480000 + }, + { + "epoch": 41.92, + "learning_rate": 2.9049985237587995e-05, + "loss": 2.0617, + "step": 14480500 + }, + { + "epoch": 41.92, + "learning_rate": 2.9049261589940718e-05, + "loss": 2.0456, + "step": 14481000 + }, + { + "epoch": 41.92, + "learning_rate": 2.904853794229344e-05, + "loss": 2.0503, + "step": 14481500 + }, + { + "epoch": 41.92, + "learning_rate": 2.9047814294646165e-05, + "loss": 2.0383, + "step": 14482000 + }, + { + "epoch": 41.92, + "learning_rate": 2.904709064699889e-05, + "loss": 2.0524, + "step": 14482500 + }, + { + "epoch": 41.92, + "learning_rate": 2.9046369893942203e-05, + "loss": 2.0432, + "step": 14483000 + }, + { + "epoch": 41.92, + "learning_rate": 2.9045647693590222e-05, + "loss": 2.043, + "step": 14483500 + }, + { + "epoch": 41.93, + "learning_rate": 2.9044924045942944e-05, + "loss": 2.07, + "step": 14484000 + }, + { + "epoch": 41.93, + "learning_rate": 2.9044200398295667e-05, + "loss": 2.0551, + "step": 14484500 + }, + { + "epoch": 41.93, + "learning_rate": 2.9043476750648392e-05, + "loss": 2.0634, + "step": 14485000 + }, + { + "epoch": 41.93, + "learning_rate": 2.9042753103001115e-05, + "loss": 2.0289, + "step": 14485500 + }, + { + "epoch": 41.93, + "learning_rate": 2.904203090264913e-05, + "loss": 2.0245, + "step": 14486000 + }, + { + "epoch": 41.93, + "learning_rate": 2.9041307255001852e-05, + "loss": 2.0494, + "step": 14486500 + }, + { + "epoch": 41.93, + "learning_rate": 2.9040583607354578e-05, + "loss": 2.0464, + "step": 14487000 + }, + { + "epoch": 41.94, + "learning_rate": 2.90398599597073e-05, + "loss": 2.0466, + "step": 14487500 + }, + { + "epoch": 41.94, + "learning_rate": 2.9039136312060022e-05, + "loss": 2.0492, + "step": 14488000 + }, + { + "epoch": 41.94, + "learning_rate": 2.903841411170804e-05, + "loss": 2.0447, + "step": 14488500 + }, + { + "epoch": 41.94, + "learning_rate": 2.9037690464060764e-05, + "loss": 2.0701, + "step": 14489000 + }, + { + "epoch": 41.94, + "learning_rate": 2.9036966816413486e-05, + "loss": 2.0494, + "step": 14489500 + }, + { + "epoch": 41.94, + "learning_rate": 2.9036243168766208e-05, + "loss": 2.0491, + "step": 14490000 + }, + { + "epoch": 41.94, + "learning_rate": 2.9035519521118937e-05, + "loss": 2.0516, + "step": 14490500 + }, + { + "epoch": 41.95, + "learning_rate": 2.903479587347166e-05, + "loss": 2.0454, + "step": 14491000 + }, + { + "epoch": 41.95, + "learning_rate": 2.903407222582438e-05, + "loss": 2.0609, + "step": 14491500 + }, + { + "epoch": 41.95, + "learning_rate": 2.90333500254724e-05, + "loss": 2.047, + "step": 14492000 + }, + { + "epoch": 41.95, + "learning_rate": 2.9032626377825123e-05, + "loss": 2.0528, + "step": 14492500 + }, + { + "epoch": 41.95, + "learning_rate": 2.9031902730177845e-05, + "loss": 2.0383, + "step": 14493000 + }, + { + "epoch": 41.95, + "learning_rate": 2.9031180529825864e-05, + "loss": 2.0586, + "step": 14493500 + }, + { + "epoch": 41.95, + "learning_rate": 2.9030456882178586e-05, + "loss": 2.0486, + "step": 14494000 + }, + { + "epoch": 41.96, + "learning_rate": 2.902973323453131e-05, + "loss": 2.043, + "step": 14494500 + }, + { + "epoch": 41.96, + "learning_rate": 2.902900958688403e-05, + "loss": 2.0269, + "step": 14495000 + }, + { + "epoch": 41.96, + "learning_rate": 2.902828738653205e-05, + "loss": 2.0268, + "step": 14495500 + }, + { + "epoch": 41.96, + "learning_rate": 2.9027563738884772e-05, + "loss": 2.0649, + "step": 14496000 + }, + { + "epoch": 41.96, + "learning_rate": 2.9026840091237494e-05, + "loss": 2.0446, + "step": 14496500 + }, + { + "epoch": 41.96, + "learning_rate": 2.9026116443590216e-05, + "loss": 2.0596, + "step": 14497000 + }, + { + "epoch": 41.96, + "learning_rate": 2.9025392795942942e-05, + "loss": 2.02, + "step": 14497500 + }, + { + "epoch": 41.97, + "learning_rate": 2.9024669148295667e-05, + "loss": 2.0333, + "step": 14498000 + }, + { + "epoch": 41.97, + "learning_rate": 2.9023945500648393e-05, + "loss": 2.0338, + "step": 14498500 + }, + { + "epoch": 41.97, + "learning_rate": 2.9023221853001115e-05, + "loss": 2.0514, + "step": 14499000 + }, + { + "epoch": 41.97, + "learning_rate": 2.9022498205353838e-05, + "loss": 2.0496, + "step": 14499500 + }, + { + "epoch": 41.97, + "learning_rate": 2.902177455770656e-05, + "loss": 2.046, + "step": 14500000 + }, + { + "epoch": 41.97, + "learning_rate": 2.9021050910059282e-05, + "loss": 2.0668, + "step": 14500500 + }, + { + "epoch": 41.97, + "learning_rate": 2.9020327262412008e-05, + "loss": 2.0152, + "step": 14501000 + }, + { + "epoch": 41.98, + "learning_rate": 2.901960361476473e-05, + "loss": 2.0179, + "step": 14501500 + }, + { + "epoch": 41.98, + "learning_rate": 2.9018881414412745e-05, + "loss": 2.0482, + "step": 14502000 + }, + { + "epoch": 41.98, + "learning_rate": 2.9018157766765468e-05, + "loss": 2.0322, + "step": 14502500 + }, + { + "epoch": 41.98, + "learning_rate": 2.9017434119118193e-05, + "loss": 2.05, + "step": 14503000 + }, + { + "epoch": 41.98, + "learning_rate": 2.901671191876621e-05, + "loss": 2.0248, + "step": 14503500 + }, + { + "epoch": 41.98, + "learning_rate": 2.901598827111893e-05, + "loss": 2.0466, + "step": 14504000 + }, + { + "epoch": 41.98, + "learning_rate": 2.9015264623471657e-05, + "loss": 2.0162, + "step": 14504500 + }, + { + "epoch": 41.99, + "learning_rate": 2.901454097582438e-05, + "loss": 2.0288, + "step": 14505000 + }, + { + "epoch": 41.99, + "learning_rate": 2.9013817328177108e-05, + "loss": 2.0321, + "step": 14505500 + }, + { + "epoch": 41.99, + "learning_rate": 2.901309368052983e-05, + "loss": 2.0285, + "step": 14506000 + }, + { + "epoch": 41.99, + "learning_rate": 2.9012370032882552e-05, + "loss": 2.0527, + "step": 14506500 + }, + { + "epoch": 41.99, + "learning_rate": 2.9011646385235275e-05, + "loss": 2.0253, + "step": 14507000 + }, + { + "epoch": 41.99, + "learning_rate": 2.9010922737587997e-05, + "loss": 2.0485, + "step": 14507500 + }, + { + "epoch": 41.99, + "learning_rate": 2.901019908994072e-05, + "loss": 2.0659, + "step": 14508000 + }, + { + "epoch": 42.0, + "learning_rate": 2.9009475442293445e-05, + "loss": 2.0598, + "step": 14508500 + }, + { + "epoch": 42.0, + "learning_rate": 2.9008751794646167e-05, + "loss": 2.0648, + "step": 14509000 + }, + { + "epoch": 42.0, + "learning_rate": 2.900802814699889e-05, + "loss": 2.0386, + "step": 14509500 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.67138533193208, + "eval_accuracy_mlm": 0.6369597842213577, + "eval_accuracy_nsp": 0.8559967551662977, + "eval_loss": 2.166109323501587, + "eval_runtime": 331.1807, + "eval_samples_per_second": 1317.667, + "eval_steps_per_second": 54.904, + "step": 14509824 + }, + { + "epoch": 42.0, + "learning_rate": 2.90073073939422e-05, + "loss": 2.0616, + "step": 14510000 + }, + { + "epoch": 42.0, + "learning_rate": 2.9006583746294924e-05, + "loss": 2.0531, + "step": 14510500 + }, + { + "epoch": 42.0, + "learning_rate": 2.9005862993238236e-05, + "loss": 2.0052, + "step": 14511000 + }, + { + "epoch": 42.0, + "learning_rate": 2.9005139345590958e-05, + "loss": 2.0104, + "step": 14511500 + }, + { + "epoch": 42.01, + "learning_rate": 2.900441569794368e-05, + "loss": 2.0179, + "step": 14512000 + }, + { + "epoch": 42.01, + "learning_rate": 2.9003692050296406e-05, + "loss": 2.0305, + "step": 14512500 + }, + { + "epoch": 42.01, + "learning_rate": 2.900296840264913e-05, + "loss": 2.0107, + "step": 14513000 + }, + { + "epoch": 42.01, + "learning_rate": 2.9002244755001857e-05, + "loss": 2.0175, + "step": 14513500 + }, + { + "epoch": 42.01, + "learning_rate": 2.900152110735458e-05, + "loss": 2.0106, + "step": 14514000 + }, + { + "epoch": 42.01, + "learning_rate": 2.9000798907002595e-05, + "loss": 2.0327, + "step": 14514500 + }, + { + "epoch": 42.01, + "learning_rate": 2.900007525935532e-05, + "loss": 2.0215, + "step": 14515000 + }, + { + "epoch": 42.02, + "learning_rate": 2.8999351611708043e-05, + "loss": 2.0025, + "step": 14515500 + }, + { + "epoch": 42.02, + "learning_rate": 2.8998627964060765e-05, + "loss": 2.0331, + "step": 14516000 + }, + { + "epoch": 42.02, + "learning_rate": 2.8997904316413487e-05, + "loss": 2.0377, + "step": 14516500 + }, + { + "epoch": 42.02, + "learning_rate": 2.899718066876621e-05, + "loss": 2.016, + "step": 14517000 + }, + { + "epoch": 42.02, + "learning_rate": 2.8996457021118932e-05, + "loss": 2.031, + "step": 14517500 + }, + { + "epoch": 42.02, + "learning_rate": 2.8995733373471657e-05, + "loss": 2.0532, + "step": 14518000 + }, + { + "epoch": 42.03, + "learning_rate": 2.899500972582438e-05, + "loss": 2.0345, + "step": 14518500 + }, + { + "epoch": 42.03, + "learning_rate": 2.8994286078177102e-05, + "loss": 2.0238, + "step": 14519000 + }, + { + "epoch": 42.03, + "learning_rate": 2.899356387782512e-05, + "loss": 2.0127, + "step": 14519500 + }, + { + "epoch": 42.03, + "learning_rate": 2.8992840230177843e-05, + "loss": 2.0367, + "step": 14520000 + }, + { + "epoch": 42.03, + "learning_rate": 2.8992116582530572e-05, + "loss": 2.0445, + "step": 14520500 + }, + { + "epoch": 42.03, + "learning_rate": 2.8991392934883294e-05, + "loss": 2.0297, + "step": 14521000 + }, + { + "epoch": 42.03, + "learning_rate": 2.8990669287236017e-05, + "loss": 2.0226, + "step": 14521500 + }, + { + "epoch": 42.04, + "learning_rate": 2.898994563958874e-05, + "loss": 2.0137, + "step": 14522000 + }, + { + "epoch": 42.04, + "learning_rate": 2.898922199194146e-05, + "loss": 2.0168, + "step": 14522500 + }, + { + "epoch": 42.04, + "learning_rate": 2.898849979158948e-05, + "loss": 2.0072, + "step": 14523000 + }, + { + "epoch": 42.04, + "learning_rate": 2.8987776143942202e-05, + "loss": 2.0148, + "step": 14523500 + }, + { + "epoch": 42.04, + "learning_rate": 2.8987052496294924e-05, + "loss": 2.0187, + "step": 14524000 + }, + { + "epoch": 42.04, + "learning_rate": 2.8986328848647647e-05, + "loss": 2.029, + "step": 14524500 + }, + { + "epoch": 42.04, + "learning_rate": 2.8985605201000372e-05, + "loss": 2.0371, + "step": 14525000 + }, + { + "epoch": 42.05, + "learning_rate": 2.8984881553353095e-05, + "loss": 2.0135, + "step": 14525500 + }, + { + "epoch": 42.05, + "learning_rate": 2.898415935300111e-05, + "loss": 2.0311, + "step": 14526000 + }, + { + "epoch": 42.05, + "learning_rate": 2.8983435705353836e-05, + "loss": 2.0066, + "step": 14526500 + }, + { + "epoch": 42.05, + "learning_rate": 2.8982712057706558e-05, + "loss": 2.0221, + "step": 14527000 + }, + { + "epoch": 42.05, + "learning_rate": 2.8981988410059287e-05, + "loss": 2.0123, + "step": 14527500 + }, + { + "epoch": 42.05, + "learning_rate": 2.898126476241201e-05, + "loss": 2.0428, + "step": 14528000 + }, + { + "epoch": 42.05, + "learning_rate": 2.898054111476473e-05, + "loss": 2.0391, + "step": 14528500 + }, + { + "epoch": 42.06, + "learning_rate": 2.8979817467117454e-05, + "loss": 2.0325, + "step": 14529000 + }, + { + "epoch": 42.06, + "learning_rate": 2.8979093819470176e-05, + "loss": 2.054, + "step": 14529500 + }, + { + "epoch": 42.06, + "learning_rate": 2.8978370171822898e-05, + "loss": 2.0372, + "step": 14530000 + }, + { + "epoch": 42.06, + "learning_rate": 2.8977646524175624e-05, + "loss": 2.0119, + "step": 14530500 + }, + { + "epoch": 42.06, + "learning_rate": 2.8976922876528346e-05, + "loss": 2.0362, + "step": 14531000 + }, + { + "epoch": 42.06, + "learning_rate": 2.8976199228881068e-05, + "loss": 2.0255, + "step": 14531500 + }, + { + "epoch": 42.06, + "learning_rate": 2.8975477028529087e-05, + "loss": 1.9955, + "step": 14532000 + }, + { + "epoch": 42.07, + "learning_rate": 2.897475338088181e-05, + "loss": 2.0328, + "step": 14532500 + }, + { + "epoch": 42.07, + "learning_rate": 2.897402973323453e-05, + "loss": 2.0371, + "step": 14533000 + }, + { + "epoch": 42.07, + "learning_rate": 2.8973306085587254e-05, + "loss": 2.0096, + "step": 14533500 + }, + { + "epoch": 42.07, + "learning_rate": 2.8972582437939976e-05, + "loss": 2.0362, + "step": 14534000 + }, + { + "epoch": 42.07, + "learning_rate": 2.89718587902927e-05, + "loss": 2.0386, + "step": 14534500 + }, + { + "epoch": 42.07, + "learning_rate": 2.8971136589940724e-05, + "loss": 2.0148, + "step": 14535000 + }, + { + "epoch": 42.07, + "learning_rate": 2.897041438958874e-05, + "loss": 2.031, + "step": 14535500 + }, + { + "epoch": 42.08, + "learning_rate": 2.8969690741941462e-05, + "loss": 2.0479, + "step": 14536000 + }, + { + "epoch": 42.08, + "learning_rate": 2.8968967094294187e-05, + "loss": 2.0273, + "step": 14536500 + }, + { + "epoch": 42.08, + "learning_rate": 2.896824344664691e-05, + "loss": 2.0247, + "step": 14537000 + }, + { + "epoch": 42.08, + "learning_rate": 2.8967519798999632e-05, + "loss": 2.0239, + "step": 14537500 + }, + { + "epoch": 42.08, + "learning_rate": 2.8966797598647648e-05, + "loss": 2.0116, + "step": 14538000 + }, + { + "epoch": 42.08, + "learning_rate": 2.8966073951000373e-05, + "loss": 2.02, + "step": 14538500 + }, + { + "epoch": 42.08, + "learning_rate": 2.8965350303353095e-05, + "loss": 2.0286, + "step": 14539000 + }, + { + "epoch": 42.09, + "learning_rate": 2.8964626655705818e-05, + "loss": 2.0144, + "step": 14539500 + }, + { + "epoch": 42.09, + "learning_rate": 2.896390300805854e-05, + "loss": 2.0283, + "step": 14540000 + }, + { + "epoch": 42.09, + "learning_rate": 2.8963179360411262e-05, + "loss": 2.044, + "step": 14540500 + }, + { + "epoch": 42.09, + "learning_rate": 2.8962455712763988e-05, + "loss": 2.0191, + "step": 14541000 + }, + { + "epoch": 42.09, + "learning_rate": 2.896173206511671e-05, + "loss": 2.019, + "step": 14541500 + }, + { + "epoch": 42.09, + "learning_rate": 2.8961009864764725e-05, + "loss": 2.0043, + "step": 14542000 + }, + { + "epoch": 42.09, + "learning_rate": 2.8960286217117454e-05, + "loss": 2.016, + "step": 14542500 + }, + { + "epoch": 42.1, + "learning_rate": 2.8959564016765473e-05, + "loss": 2.0118, + "step": 14543000 + }, + { + "epoch": 42.1, + "learning_rate": 2.8958840369118196e-05, + "loss": 2.0224, + "step": 14543500 + }, + { + "epoch": 42.1, + "learning_rate": 2.8958116721470918e-05, + "loss": 2.0456, + "step": 14544000 + }, + { + "epoch": 42.1, + "learning_rate": 2.895739307382364e-05, + "loss": 2.0371, + "step": 14544500 + }, + { + "epoch": 42.1, + "learning_rate": 2.8956669426176362e-05, + "loss": 1.9996, + "step": 14545000 + }, + { + "epoch": 42.1, + "learning_rate": 2.895594722582438e-05, + "loss": 2.0171, + "step": 14545500 + }, + { + "epoch": 42.1, + "learning_rate": 2.8955223578177104e-05, + "loss": 2.0375, + "step": 14546000 + }, + { + "epoch": 42.11, + "learning_rate": 2.8954499930529826e-05, + "loss": 2.0182, + "step": 14546500 + }, + { + "epoch": 42.11, + "learning_rate": 2.8953777730177845e-05, + "loss": 2.0285, + "step": 14547000 + }, + { + "epoch": 42.11, + "learning_rate": 2.8953054082530567e-05, + "loss": 2.0496, + "step": 14547500 + }, + { + "epoch": 42.11, + "learning_rate": 2.895233043488329e-05, + "loss": 2.0249, + "step": 14548000 + }, + { + "epoch": 42.11, + "learning_rate": 2.895160678723601e-05, + "loss": 2.0212, + "step": 14548500 + }, + { + "epoch": 42.11, + "learning_rate": 2.8950883139588737e-05, + "loss": 2.055, + "step": 14549000 + }, + { + "epoch": 42.11, + "learning_rate": 2.8950160939236753e-05, + "loss": 2.0435, + "step": 14549500 + }, + { + "epoch": 42.12, + "learning_rate": 2.8949437291589475e-05, + "loss": 2.0248, + "step": 14550000 + }, + { + "epoch": 42.12, + "learning_rate": 2.8948713643942204e-05, + "loss": 2.0364, + "step": 14550500 + }, + { + "epoch": 42.12, + "learning_rate": 2.8947989996294926e-05, + "loss": 2.0316, + "step": 14551000 + }, + { + "epoch": 42.12, + "learning_rate": 2.894726634864765e-05, + "loss": 2.0169, + "step": 14551500 + }, + { + "epoch": 42.12, + "learning_rate": 2.8946542701000374e-05, + "loss": 2.0268, + "step": 14552000 + }, + { + "epoch": 42.12, + "learning_rate": 2.8945819053353096e-05, + "loss": 2.0256, + "step": 14552500 + }, + { + "epoch": 42.12, + "learning_rate": 2.894509540570582e-05, + "loss": 2.0215, + "step": 14553000 + }, + { + "epoch": 42.13, + "learning_rate": 2.894437175805854e-05, + "loss": 2.0051, + "step": 14553500 + }, + { + "epoch": 42.13, + "learning_rate": 2.8943648110411263e-05, + "loss": 2.0326, + "step": 14554000 + }, + { + "epoch": 42.13, + "learning_rate": 2.894292446276399e-05, + "loss": 2.0293, + "step": 14554500 + }, + { + "epoch": 42.13, + "learning_rate": 2.894220081511671e-05, + "loss": 2.0201, + "step": 14555000 + }, + { + "epoch": 42.13, + "learning_rate": 2.8941477167469433e-05, + "loss": 2.0175, + "step": 14555500 + }, + { + "epoch": 42.13, + "learning_rate": 2.8940754967117452e-05, + "loss": 2.0304, + "step": 14556000 + }, + { + "epoch": 42.14, + "learning_rate": 2.8940031319470174e-05, + "loss": 2.0125, + "step": 14556500 + }, + { + "epoch": 42.14, + "learning_rate": 2.8939307671822896e-05, + "loss": 2.0398, + "step": 14557000 + }, + { + "epoch": 42.14, + "learning_rate": 2.8938584024175625e-05, + "loss": 2.0403, + "step": 14557500 + }, + { + "epoch": 42.14, + "learning_rate": 2.8937860376528348e-05, + "loss": 2.032, + "step": 14558000 + }, + { + "epoch": 42.14, + "learning_rate": 2.8937138176176367e-05, + "loss": 2.0372, + "step": 14558500 + }, + { + "epoch": 42.14, + "learning_rate": 2.893641452852909e-05, + "loss": 2.0376, + "step": 14559000 + }, + { + "epoch": 42.14, + "learning_rate": 2.893569088088181e-05, + "loss": 2.05, + "step": 14559500 + }, + { + "epoch": 42.15, + "learning_rate": 2.8934968680529827e-05, + "loss": 2.0376, + "step": 14560000 + }, + { + "epoch": 42.15, + "learning_rate": 2.8934245032882552e-05, + "loss": 2.0204, + "step": 14560500 + }, + { + "epoch": 42.15, + "learning_rate": 2.8933521385235274e-05, + "loss": 2.0459, + "step": 14561000 + }, + { + "epoch": 42.15, + "learning_rate": 2.893279918488329e-05, + "loss": 2.0417, + "step": 14561500 + }, + { + "epoch": 42.15, + "learning_rate": 2.8932075537236016e-05, + "loss": 2.0172, + "step": 14562000 + }, + { + "epoch": 42.15, + "learning_rate": 2.8931351889588738e-05, + "loss": 2.0186, + "step": 14562500 + }, + { + "epoch": 42.15, + "learning_rate": 2.893062824194146e-05, + "loss": 2.0129, + "step": 14563000 + }, + { + "epoch": 42.16, + "learning_rate": 2.8929904594294182e-05, + "loss": 2.0572, + "step": 14563500 + }, + { + "epoch": 42.16, + "learning_rate": 2.8929180946646905e-05, + "loss": 2.0532, + "step": 14564000 + }, + { + "epoch": 42.16, + "learning_rate": 2.8928457298999627e-05, + "loss": 2.0247, + "step": 14564500 + }, + { + "epoch": 42.16, + "learning_rate": 2.8927733651352356e-05, + "loss": 2.0218, + "step": 14565000 + }, + { + "epoch": 42.16, + "learning_rate": 2.8927010003705078e-05, + "loss": 2.0175, + "step": 14565500 + }, + { + "epoch": 42.16, + "learning_rate": 2.8926286356057804e-05, + "loss": 2.0425, + "step": 14566000 + }, + { + "epoch": 42.16, + "learning_rate": 2.8925562708410526e-05, + "loss": 2.0121, + "step": 14566500 + }, + { + "epoch": 42.17, + "learning_rate": 2.8924839060763248e-05, + "loss": 2.0439, + "step": 14567000 + }, + { + "epoch": 42.17, + "learning_rate": 2.892411541311597e-05, + "loss": 2.0145, + "step": 14567500 + }, + { + "epoch": 42.17, + "learning_rate": 2.8923391765468693e-05, + "loss": 2.0306, + "step": 14568000 + }, + { + "epoch": 42.17, + "learning_rate": 2.8922668117821418e-05, + "loss": 2.0074, + "step": 14568500 + }, + { + "epoch": 42.17, + "learning_rate": 2.8921945917469434e-05, + "loss": 2.0232, + "step": 14569000 + }, + { + "epoch": 42.17, + "learning_rate": 2.8921222269822156e-05, + "loss": 2.0061, + "step": 14569500 + }, + { + "epoch": 42.17, + "learning_rate": 2.8920498622174878e-05, + "loss": 2.0149, + "step": 14570000 + }, + { + "epoch": 42.18, + "learning_rate": 2.8919776421822897e-05, + "loss": 2.0109, + "step": 14570500 + }, + { + "epoch": 42.18, + "learning_rate": 2.891905277417562e-05, + "loss": 2.0483, + "step": 14571000 + }, + { + "epoch": 42.18, + "learning_rate": 2.891832912652834e-05, + "loss": 2.0157, + "step": 14571500 + }, + { + "epoch": 42.18, + "learning_rate": 2.8917605478881067e-05, + "loss": 2.0475, + "step": 14572000 + }, + { + "epoch": 42.18, + "learning_rate": 2.8916881831233793e-05, + "loss": 2.0168, + "step": 14572500 + }, + { + "epoch": 42.18, + "learning_rate": 2.891615818358652e-05, + "loss": 2.0433, + "step": 14573000 + }, + { + "epoch": 42.18, + "learning_rate": 2.891543453593924e-05, + "loss": 2.0381, + "step": 14573500 + }, + { + "epoch": 42.19, + "learning_rate": 2.8914710888291963e-05, + "loss": 2.0413, + "step": 14574000 + }, + { + "epoch": 42.19, + "learning_rate": 2.8913987240644685e-05, + "loss": 2.0472, + "step": 14574500 + }, + { + "epoch": 42.19, + "learning_rate": 2.8913265040292704e-05, + "loss": 2.0397, + "step": 14575000 + }, + { + "epoch": 42.19, + "learning_rate": 2.8912541392645426e-05, + "loss": 2.0539, + "step": 14575500 + }, + { + "epoch": 42.19, + "learning_rate": 2.8911819192293442e-05, + "loss": 2.0282, + "step": 14576000 + }, + { + "epoch": 42.19, + "learning_rate": 2.891109699194146e-05, + "loss": 2.0371, + "step": 14576500 + }, + { + "epoch": 42.19, + "learning_rate": 2.8910373344294183e-05, + "loss": 2.0107, + "step": 14577000 + }, + { + "epoch": 42.2, + "learning_rate": 2.8909649696646905e-05, + "loss": 2.0172, + "step": 14577500 + }, + { + "epoch": 42.2, + "learning_rate": 2.890892604899963e-05, + "loss": 2.0064, + "step": 14578000 + }, + { + "epoch": 42.2, + "learning_rate": 2.8908202401352353e-05, + "loss": 2.0421, + "step": 14578500 + }, + { + "epoch": 42.2, + "learning_rate": 2.8907478753705075e-05, + "loss": 2.0333, + "step": 14579000 + }, + { + "epoch": 42.2, + "learning_rate": 2.8906755106057798e-05, + "loss": 2.0206, + "step": 14579500 + }, + { + "epoch": 42.2, + "learning_rate": 2.8906031458410527e-05, + "loss": 2.0132, + "step": 14580000 + }, + { + "epoch": 42.2, + "learning_rate": 2.890530781076325e-05, + "loss": 2.0151, + "step": 14580500 + }, + { + "epoch": 42.21, + "learning_rate": 2.890458416311597e-05, + "loss": 2.0154, + "step": 14581000 + }, + { + "epoch": 42.21, + "learning_rate": 2.8903860515468693e-05, + "loss": 2.0375, + "step": 14581500 + }, + { + "epoch": 42.21, + "learning_rate": 2.890313686782142e-05, + "loss": 2.0176, + "step": 14582000 + }, + { + "epoch": 42.21, + "learning_rate": 2.890241322017414e-05, + "loss": 2.0368, + "step": 14582500 + }, + { + "epoch": 42.21, + "learning_rate": 2.8901691019822157e-05, + "loss": 2.0381, + "step": 14583000 + }, + { + "epoch": 42.21, + "learning_rate": 2.8900968819470176e-05, + "loss": 2.0342, + "step": 14583500 + }, + { + "epoch": 42.21, + "learning_rate": 2.8900245171822898e-05, + "loss": 2.0247, + "step": 14584000 + }, + { + "epoch": 42.22, + "learning_rate": 2.8899522971470917e-05, + "loss": 2.0353, + "step": 14584500 + }, + { + "epoch": 42.22, + "learning_rate": 2.889879932382364e-05, + "loss": 2.0347, + "step": 14585000 + }, + { + "epoch": 42.22, + "learning_rate": 2.889807567617636e-05, + "loss": 2.0215, + "step": 14585500 + }, + { + "epoch": 42.22, + "learning_rate": 2.8897352028529084e-05, + "loss": 2.0529, + "step": 14586000 + }, + { + "epoch": 42.22, + "learning_rate": 2.8896628380881806e-05, + "loss": 2.0232, + "step": 14586500 + }, + { + "epoch": 42.22, + "learning_rate": 2.889590473323453e-05, + "loss": 2.0138, + "step": 14587000 + }, + { + "epoch": 42.22, + "learning_rate": 2.8895181085587257e-05, + "loss": 2.0291, + "step": 14587500 + }, + { + "epoch": 42.23, + "learning_rate": 2.8894457437939983e-05, + "loss": 2.0303, + "step": 14588000 + }, + { + "epoch": 42.23, + "learning_rate": 2.8893733790292705e-05, + "loss": 2.0334, + "step": 14588500 + }, + { + "epoch": 42.23, + "learning_rate": 2.8893010142645427e-05, + "loss": 2.0518, + "step": 14589000 + }, + { + "epoch": 42.23, + "learning_rate": 2.889228649499815e-05, + "loss": 2.0404, + "step": 14589500 + }, + { + "epoch": 42.23, + "learning_rate": 2.889156284735087e-05, + "loss": 2.0278, + "step": 14590000 + }, + { + "epoch": 42.23, + "learning_rate": 2.8890839199703594e-05, + "loss": 2.0285, + "step": 14590500 + }, + { + "epoch": 42.23, + "learning_rate": 2.889011555205632e-05, + "loss": 2.0256, + "step": 14591000 + }, + { + "epoch": 42.24, + "learning_rate": 2.8889391904409042e-05, + "loss": 2.0372, + "step": 14591500 + }, + { + "epoch": 42.24, + "learning_rate": 2.8888669704057057e-05, + "loss": 2.0415, + "step": 14592000 + }, + { + "epoch": 42.24, + "learning_rate": 2.8887947503705076e-05, + "loss": 2.0073, + "step": 14592500 + }, + { + "epoch": 42.24, + "learning_rate": 2.88872238560578e-05, + "loss": 2.0372, + "step": 14593000 + }, + { + "epoch": 42.24, + "learning_rate": 2.8886501655705817e-05, + "loss": 2.0291, + "step": 14593500 + }, + { + "epoch": 42.24, + "learning_rate": 2.888577800805854e-05, + "loss": 2.0486, + "step": 14594000 + }, + { + "epoch": 42.25, + "learning_rate": 2.8885054360411262e-05, + "loss": 2.0247, + "step": 14594500 + }, + { + "epoch": 42.25, + "learning_rate": 2.888433071276399e-05, + "loss": 2.0197, + "step": 14595000 + }, + { + "epoch": 42.25, + "learning_rate": 2.8883607065116713e-05, + "loss": 2.019, + "step": 14595500 + }, + { + "epoch": 42.25, + "learning_rate": 2.8882883417469435e-05, + "loss": 2.011, + "step": 14596000 + }, + { + "epoch": 42.25, + "learning_rate": 2.8882159769822158e-05, + "loss": 2.0147, + "step": 14596500 + }, + { + "epoch": 42.25, + "learning_rate": 2.8881436122174883e-05, + "loss": 2.0478, + "step": 14597000 + }, + { + "epoch": 42.25, + "learning_rate": 2.88807139218229e-05, + "loss": 2.0582, + "step": 14597500 + }, + { + "epoch": 42.26, + "learning_rate": 2.887999027417562e-05, + "loss": 2.043, + "step": 14598000 + }, + { + "epoch": 42.26, + "learning_rate": 2.8879266626528347e-05, + "loss": 2.0323, + "step": 14598500 + }, + { + "epoch": 42.26, + "learning_rate": 2.887854297888107e-05, + "loss": 2.0241, + "step": 14599000 + }, + { + "epoch": 42.26, + "learning_rate": 2.887781933123379e-05, + "loss": 2.0218, + "step": 14599500 + }, + { + "epoch": 42.26, + "learning_rate": 2.8877095683586513e-05, + "loss": 2.0479, + "step": 14600000 + }, + { + "epoch": 42.26, + "learning_rate": 2.8876372035939236e-05, + "loss": 2.0562, + "step": 14600500 + }, + { + "epoch": 42.26, + "learning_rate": 2.8875648388291958e-05, + "loss": 2.0333, + "step": 14601000 + }, + { + "epoch": 42.27, + "learning_rate": 2.8874924740644683e-05, + "loss": 2.0431, + "step": 14601500 + }, + { + "epoch": 42.27, + "learning_rate": 2.887420109299741e-05, + "loss": 2.0481, + "step": 14602000 + }, + { + "epoch": 42.27, + "learning_rate": 2.8873477445350135e-05, + "loss": 2.0207, + "step": 14602500 + }, + { + "epoch": 42.27, + "learning_rate": 2.8872753797702857e-05, + "loss": 2.0206, + "step": 14603000 + }, + { + "epoch": 42.27, + "learning_rate": 2.887203015005558e-05, + "loss": 2.0478, + "step": 14603500 + }, + { + "epoch": 42.27, + "learning_rate": 2.88713065024083e-05, + "loss": 2.0345, + "step": 14604000 + }, + { + "epoch": 42.27, + "learning_rate": 2.887058430205632e-05, + "loss": 2.0399, + "step": 14604500 + }, + { + "epoch": 42.28, + "learning_rate": 2.8869860654409043e-05, + "loss": 2.016, + "step": 14605000 + }, + { + "epoch": 42.28, + "learning_rate": 2.8869137006761765e-05, + "loss": 2.0391, + "step": 14605500 + }, + { + "epoch": 42.28, + "learning_rate": 2.8868413359114487e-05, + "loss": 2.0145, + "step": 14606000 + }, + { + "epoch": 42.28, + "learning_rate": 2.886768971146721e-05, + "loss": 2.0103, + "step": 14606500 + }, + { + "epoch": 42.28, + "learning_rate": 2.8866966063819935e-05, + "loss": 2.0311, + "step": 14607000 + }, + { + "epoch": 42.28, + "learning_rate": 2.8866242416172657e-05, + "loss": 2.0282, + "step": 14607500 + }, + { + "epoch": 42.28, + "learning_rate": 2.886551876852538e-05, + "loss": 2.0024, + "step": 14608000 + }, + { + "epoch": 42.29, + "learning_rate": 2.88647951208781e-05, + "loss": 2.0325, + "step": 14608500 + }, + { + "epoch": 42.29, + "learning_rate": 2.886407292052612e-05, + "loss": 2.0257, + "step": 14609000 + }, + { + "epoch": 42.29, + "learning_rate": 2.8863350720174143e-05, + "loss": 2.0526, + "step": 14609500 + }, + { + "epoch": 42.29, + "learning_rate": 2.8862628519822162e-05, + "loss": 2.0654, + "step": 14610000 + }, + { + "epoch": 42.29, + "learning_rate": 2.8861904872174884e-05, + "loss": 2.0396, + "step": 14610500 + }, + { + "epoch": 42.29, + "learning_rate": 2.8861181224527606e-05, + "loss": 2.034, + "step": 14611000 + }, + { + "epoch": 42.29, + "learning_rate": 2.886045757688033e-05, + "loss": 2.0542, + "step": 14611500 + }, + { + "epoch": 42.3, + "learning_rate": 2.885973392923305e-05, + "loss": 2.0368, + "step": 14612000 + }, + { + "epoch": 42.3, + "learning_rate": 2.8859010281585773e-05, + "loss": 2.0409, + "step": 14612500 + }, + { + "epoch": 42.3, + "learning_rate": 2.88582866339385e-05, + "loss": 2.0308, + "step": 14613000 + }, + { + "epoch": 42.3, + "learning_rate": 2.885756298629122e-05, + "loss": 2.0325, + "step": 14613500 + }, + { + "epoch": 42.3, + "learning_rate": 2.8856839338643943e-05, + "loss": 2.015, + "step": 14614000 + }, + { + "epoch": 42.3, + "learning_rate": 2.8856117138291962e-05, + "loss": 2.0039, + "step": 14614500 + }, + { + "epoch": 42.3, + "learning_rate": 2.8855394937939978e-05, + "loss": 2.0391, + "step": 14615000 + }, + { + "epoch": 42.31, + "learning_rate": 2.88546712902927e-05, + "loss": 2.0429, + "step": 14615500 + }, + { + "epoch": 42.31, + "learning_rate": 2.8853947642645422e-05, + "loss": 2.0443, + "step": 14616000 + }, + { + "epoch": 42.31, + "learning_rate": 2.8853223994998148e-05, + "loss": 2.034, + "step": 14616500 + }, + { + "epoch": 42.31, + "learning_rate": 2.8852500347350873e-05, + "loss": 2.0329, + "step": 14617000 + }, + { + "epoch": 42.31, + "learning_rate": 2.8851778146998892e-05, + "loss": 2.0302, + "step": 14617500 + }, + { + "epoch": 42.31, + "learning_rate": 2.8851054499351614e-05, + "loss": 2.0164, + "step": 14618000 + }, + { + "epoch": 42.31, + "learning_rate": 2.8850330851704337e-05, + "loss": 2.0242, + "step": 14618500 + }, + { + "epoch": 42.32, + "learning_rate": 2.8849607204057062e-05, + "loss": 2.036, + "step": 14619000 + }, + { + "epoch": 42.32, + "learning_rate": 2.8848883556409784e-05, + "loss": 2.0345, + "step": 14619500 + }, + { + "epoch": 42.32, + "learning_rate": 2.8848159908762507e-05, + "loss": 2.0367, + "step": 14620000 + }, + { + "epoch": 42.32, + "learning_rate": 2.884743626111523e-05, + "loss": 2.0217, + "step": 14620500 + }, + { + "epoch": 42.32, + "learning_rate": 2.8846714060763248e-05, + "loss": 2.0322, + "step": 14621000 + }, + { + "epoch": 42.32, + "learning_rate": 2.884599041311597e-05, + "loss": 2.0047, + "step": 14621500 + }, + { + "epoch": 42.32, + "learning_rate": 2.8845266765468692e-05, + "loss": 2.0437, + "step": 14622000 + }, + { + "epoch": 42.33, + "learning_rate": 2.8844543117821415e-05, + "loss": 2.0325, + "step": 14622500 + }, + { + "epoch": 42.33, + "learning_rate": 2.8843819470174137e-05, + "loss": 2.0025, + "step": 14623000 + }, + { + "epoch": 42.33, + "learning_rate": 2.8843095822526862e-05, + "loss": 2.0427, + "step": 14623500 + }, + { + "epoch": 42.33, + "learning_rate": 2.8842372174879585e-05, + "loss": 2.0531, + "step": 14624000 + }, + { + "epoch": 42.33, + "learning_rate": 2.8841648527232314e-05, + "loss": 2.0363, + "step": 14624500 + }, + { + "epoch": 42.33, + "learning_rate": 2.8840924879585036e-05, + "loss": 2.0558, + "step": 14625000 + }, + { + "epoch": 42.33, + "learning_rate": 2.884020267923305e-05, + "loss": 2.0515, + "step": 14625500 + }, + { + "epoch": 42.34, + "learning_rate": 2.8839479031585777e-05, + "loss": 2.0355, + "step": 14626000 + }, + { + "epoch": 42.34, + "learning_rate": 2.88387553839385e-05, + "loss": 2.0275, + "step": 14626500 + }, + { + "epoch": 42.34, + "learning_rate": 2.883803173629122e-05, + "loss": 2.0335, + "step": 14627000 + }, + { + "epoch": 42.34, + "learning_rate": 2.8837309535939237e-05, + "loss": 2.0268, + "step": 14627500 + }, + { + "epoch": 42.34, + "learning_rate": 2.8836585888291963e-05, + "loss": 2.0286, + "step": 14628000 + }, + { + "epoch": 42.34, + "learning_rate": 2.8835862240644685e-05, + "loss": 2.0371, + "step": 14628500 + }, + { + "epoch": 42.34, + "learning_rate": 2.8835138592997407e-05, + "loss": 2.0327, + "step": 14629000 + }, + { + "epoch": 42.35, + "learning_rate": 2.883441494535013e-05, + "loss": 2.0415, + "step": 14629500 + }, + { + "epoch": 42.35, + "learning_rate": 2.883369129770285e-05, + "loss": 2.0355, + "step": 14630000 + }, + { + "epoch": 42.35, + "learning_rate": 2.8832967650055577e-05, + "loss": 2.0367, + "step": 14630500 + }, + { + "epoch": 42.35, + "learning_rate": 2.88322440024083e-05, + "loss": 2.0233, + "step": 14631000 + }, + { + "epoch": 42.35, + "learning_rate": 2.8831520354761022e-05, + "loss": 2.0414, + "step": 14631500 + }, + { + "epoch": 42.35, + "learning_rate": 2.883079670711375e-05, + "loss": 2.0284, + "step": 14632000 + }, + { + "epoch": 42.36, + "learning_rate": 2.8830073059466473e-05, + "loss": 2.0409, + "step": 14632500 + }, + { + "epoch": 42.36, + "learning_rate": 2.8829349411819195e-05, + "loss": 2.0436, + "step": 14633000 + }, + { + "epoch": 42.36, + "learning_rate": 2.8828625764171917e-05, + "loss": 2.035, + "step": 14633500 + }, + { + "epoch": 42.36, + "learning_rate": 2.882790211652464e-05, + "loss": 2.0391, + "step": 14634000 + }, + { + "epoch": 42.36, + "learning_rate": 2.8827181363467952e-05, + "loss": 2.0495, + "step": 14634500 + }, + { + "epoch": 42.36, + "learning_rate": 2.8826457715820678e-05, + "loss": 2.0459, + "step": 14635000 + }, + { + "epoch": 42.36, + "learning_rate": 2.88257340681734e-05, + "loss": 2.0351, + "step": 14635500 + }, + { + "epoch": 42.37, + "learning_rate": 2.8825010420526122e-05, + "loss": 2.0164, + "step": 14636000 + }, + { + "epoch": 42.37, + "learning_rate": 2.8824286772878844e-05, + "loss": 2.0443, + "step": 14636500 + }, + { + "epoch": 42.37, + "learning_rate": 2.8823564572526863e-05, + "loss": 2.0393, + "step": 14637000 + }, + { + "epoch": 42.37, + "learning_rate": 2.882284237217488e-05, + "loss": 2.0353, + "step": 14637500 + }, + { + "epoch": 42.37, + "learning_rate": 2.88221187245276e-05, + "loss": 2.0414, + "step": 14638000 + }, + { + "epoch": 42.37, + "learning_rate": 2.8821395076880327e-05, + "loss": 2.0463, + "step": 14638500 + }, + { + "epoch": 42.37, + "learning_rate": 2.882067142923305e-05, + "loss": 2.0378, + "step": 14639000 + }, + { + "epoch": 42.38, + "learning_rate": 2.8819947781585778e-05, + "loss": 2.055, + "step": 14639500 + }, + { + "epoch": 42.38, + "learning_rate": 2.88192241339385e-05, + "loss": 2.0415, + "step": 14640000 + }, + { + "epoch": 42.38, + "learning_rate": 2.8818500486291222e-05, + "loss": 2.0418, + "step": 14640500 + }, + { + "epoch": 42.38, + "learning_rate": 2.8817776838643945e-05, + "loss": 2.0335, + "step": 14641000 + }, + { + "epoch": 42.38, + "learning_rate": 2.8817053190996667e-05, + "loss": 2.0252, + "step": 14641500 + }, + { + "epoch": 42.38, + "learning_rate": 2.881632954334939e-05, + "loss": 2.0314, + "step": 14642000 + }, + { + "epoch": 42.38, + "learning_rate": 2.8815607342997408e-05, + "loss": 2.0345, + "step": 14642500 + }, + { + "epoch": 42.39, + "learning_rate": 2.881488369535013e-05, + "loss": 2.0313, + "step": 14643000 + }, + { + "epoch": 42.39, + "learning_rate": 2.881416149499815e-05, + "loss": 2.0212, + "step": 14643500 + }, + { + "epoch": 42.39, + "learning_rate": 2.8813439294646165e-05, + "loss": 2.0339, + "step": 14644000 + }, + { + "epoch": 42.39, + "learning_rate": 2.881271564699889e-05, + "loss": 2.0343, + "step": 14644500 + }, + { + "epoch": 42.39, + "learning_rate": 2.8811991999351613e-05, + "loss": 2.0495, + "step": 14645000 + }, + { + "epoch": 42.39, + "learning_rate": 2.8811268351704335e-05, + "loss": 2.0556, + "step": 14645500 + }, + { + "epoch": 42.39, + "learning_rate": 2.8810544704057057e-05, + "loss": 2.048, + "step": 14646000 + }, + { + "epoch": 42.4, + "learning_rate": 2.880982105640978e-05, + "loss": 2.0214, + "step": 14646500 + }, + { + "epoch": 42.4, + "learning_rate": 2.880909740876251e-05, + "loss": 2.0202, + "step": 14647000 + }, + { + "epoch": 42.4, + "learning_rate": 2.880837376111523e-05, + "loss": 2.0386, + "step": 14647500 + }, + { + "epoch": 42.4, + "learning_rate": 2.8807650113467953e-05, + "loss": 2.0342, + "step": 14648000 + }, + { + "epoch": 42.4, + "learning_rate": 2.880692646582068e-05, + "loss": 2.0299, + "step": 14648500 + }, + { + "epoch": 42.4, + "learning_rate": 2.88062028181734e-05, + "loss": 2.0389, + "step": 14649000 + }, + { + "epoch": 42.4, + "learning_rate": 2.8805479170526123e-05, + "loss": 2.04, + "step": 14649500 + }, + { + "epoch": 42.41, + "learning_rate": 2.8804755522878845e-05, + "loss": 2.0533, + "step": 14650000 + }, + { + "epoch": 42.41, + "learning_rate": 2.8804034769822157e-05, + "loss": 2.0359, + "step": 14650500 + }, + { + "epoch": 42.41, + "learning_rate": 2.880331112217488e-05, + "loss": 2.027, + "step": 14651000 + }, + { + "epoch": 42.41, + "learning_rate": 2.8802587474527605e-05, + "loss": 2.0216, + "step": 14651500 + }, + { + "epoch": 42.41, + "learning_rate": 2.8801863826880327e-05, + "loss": 2.0488, + "step": 14652000 + }, + { + "epoch": 42.41, + "learning_rate": 2.880114017923305e-05, + "loss": 2.025, + "step": 14652500 + }, + { + "epoch": 42.41, + "learning_rate": 2.8800416531585772e-05, + "loss": 2.0462, + "step": 14653000 + }, + { + "epoch": 42.42, + "learning_rate": 2.8799692883938494e-05, + "loss": 2.0393, + "step": 14653500 + }, + { + "epoch": 42.42, + "learning_rate": 2.8798969236291216e-05, + "loss": 2.0244, + "step": 14654000 + }, + { + "epoch": 42.42, + "learning_rate": 2.8798247035939242e-05, + "loss": 2.031, + "step": 14654500 + }, + { + "epoch": 42.42, + "learning_rate": 2.8797523388291964e-05, + "loss": 2.0585, + "step": 14655000 + }, + { + "epoch": 42.42, + "learning_rate": 2.8796799740644687e-05, + "loss": 2.0137, + "step": 14655500 + }, + { + "epoch": 42.42, + "learning_rate": 2.879607609299741e-05, + "loss": 2.0618, + "step": 14656000 + }, + { + "epoch": 42.42, + "learning_rate": 2.879535244535013e-05, + "loss": 2.0288, + "step": 14656500 + }, + { + "epoch": 42.43, + "learning_rate": 2.8794628797702857e-05, + "loss": 2.0301, + "step": 14657000 + }, + { + "epoch": 42.43, + "learning_rate": 2.879390515005558e-05, + "loss": 2.0464, + "step": 14657500 + }, + { + "epoch": 42.43, + "learning_rate": 2.8793182949703594e-05, + "loss": 2.0286, + "step": 14658000 + }, + { + "epoch": 42.43, + "learning_rate": 2.8792459302056317e-05, + "loss": 2.0266, + "step": 14658500 + }, + { + "epoch": 42.43, + "learning_rate": 2.8791735654409042e-05, + "loss": 2.0288, + "step": 14659000 + }, + { + "epoch": 42.43, + "learning_rate": 2.8791013454057058e-05, + "loss": 2.0289, + "step": 14659500 + }, + { + "epoch": 42.43, + "learning_rate": 2.8790291253705077e-05, + "loss": 2.0432, + "step": 14660000 + }, + { + "epoch": 42.44, + "learning_rate": 2.87895676060578e-05, + "loss": 2.0491, + "step": 14660500 + }, + { + "epoch": 42.44, + "learning_rate": 2.878884395841052e-05, + "loss": 2.029, + "step": 14661000 + }, + { + "epoch": 42.44, + "learning_rate": 2.8788120310763244e-05, + "loss": 2.0458, + "step": 14661500 + }, + { + "epoch": 42.44, + "learning_rate": 2.878739666311597e-05, + "loss": 2.0419, + "step": 14662000 + }, + { + "epoch": 42.44, + "learning_rate": 2.8786673015468695e-05, + "loss": 2.0454, + "step": 14662500 + }, + { + "epoch": 42.44, + "learning_rate": 2.878594936782142e-05, + "loss": 2.0443, + "step": 14663000 + }, + { + "epoch": 42.44, + "learning_rate": 2.8785225720174143e-05, + "loss": 2.033, + "step": 14663500 + }, + { + "epoch": 42.45, + "learning_rate": 2.8784502072526865e-05, + "loss": 2.0488, + "step": 14664000 + }, + { + "epoch": 42.45, + "learning_rate": 2.878377987217488e-05, + "loss": 2.0515, + "step": 14664500 + }, + { + "epoch": 42.45, + "learning_rate": 2.8783056224527606e-05, + "loss": 2.028, + "step": 14665000 + }, + { + "epoch": 42.45, + "learning_rate": 2.8782332576880328e-05, + "loss": 2.0513, + "step": 14665500 + }, + { + "epoch": 42.45, + "learning_rate": 2.878160892923305e-05, + "loss": 2.0313, + "step": 14666000 + }, + { + "epoch": 42.45, + "learning_rate": 2.8780885281585773e-05, + "loss": 2.044, + "step": 14666500 + }, + { + "epoch": 42.45, + "learning_rate": 2.8780161633938495e-05, + "loss": 2.0136, + "step": 14667000 + }, + { + "epoch": 42.46, + "learning_rate": 2.877943798629122e-05, + "loss": 2.0236, + "step": 14667500 + }, + { + "epoch": 42.46, + "learning_rate": 2.8778714338643943e-05, + "loss": 2.0343, + "step": 14668000 + }, + { + "epoch": 42.46, + "learning_rate": 2.8777990690996665e-05, + "loss": 2.0179, + "step": 14668500 + }, + { + "epoch": 42.46, + "learning_rate": 2.877726849064468e-05, + "loss": 2.0196, + "step": 14669000 + }, + { + "epoch": 42.46, + "learning_rate": 2.877654484299741e-05, + "loss": 2.0302, + "step": 14669500 + }, + { + "epoch": 42.46, + "learning_rate": 2.8775821195350132e-05, + "loss": 2.0326, + "step": 14670000 + }, + { + "epoch": 42.47, + "learning_rate": 2.8775097547702857e-05, + "loss": 2.0464, + "step": 14670500 + }, + { + "epoch": 42.47, + "learning_rate": 2.8774375347350873e-05, + "loss": 2.0277, + "step": 14671000 + }, + { + "epoch": 42.47, + "learning_rate": 2.8773651699703595e-05, + "loss": 2.0477, + "step": 14671500 + }, + { + "epoch": 42.47, + "learning_rate": 2.877292805205632e-05, + "loss": 2.0489, + "step": 14672000 + }, + { + "epoch": 42.47, + "learning_rate": 2.8772204404409043e-05, + "loss": 2.044, + "step": 14672500 + }, + { + "epoch": 42.47, + "learning_rate": 2.8771480756761765e-05, + "loss": 2.0357, + "step": 14673000 + }, + { + "epoch": 42.47, + "learning_rate": 2.8770757109114488e-05, + "loss": 2.0248, + "step": 14673500 + }, + { + "epoch": 42.48, + "learning_rate": 2.877003346146721e-05, + "loss": 2.0478, + "step": 14674000 + }, + { + "epoch": 42.48, + "learning_rate": 2.8769309813819932e-05, + "loss": 2.0463, + "step": 14674500 + }, + { + "epoch": 42.48, + "learning_rate": 2.8768586166172658e-05, + "loss": 2.0189, + "step": 14675000 + }, + { + "epoch": 42.48, + "learning_rate": 2.8767863965820673e-05, + "loss": 2.0427, + "step": 14675500 + }, + { + "epoch": 42.48, + "learning_rate": 2.8767140318173395e-05, + "loss": 2.0283, + "step": 14676000 + }, + { + "epoch": 42.48, + "learning_rate": 2.876641667052612e-05, + "loss": 2.0448, + "step": 14676500 + }, + { + "epoch": 42.48, + "learning_rate": 2.8765693022878847e-05, + "loss": 2.0218, + "step": 14677000 + }, + { + "epoch": 42.49, + "learning_rate": 2.8764969375231572e-05, + "loss": 2.0406, + "step": 14677500 + }, + { + "epoch": 42.49, + "learning_rate": 2.8764248622174885e-05, + "loss": 2.0623, + "step": 14678000 + }, + { + "epoch": 42.49, + "learning_rate": 2.8763524974527607e-05, + "loss": 2.0276, + "step": 14678500 + }, + { + "epoch": 42.49, + "learning_rate": 2.876280132688033e-05, + "loss": 2.0386, + "step": 14679000 + }, + { + "epoch": 42.49, + "learning_rate": 2.876207767923305e-05, + "loss": 2.0367, + "step": 14679500 + }, + { + "epoch": 42.49, + "learning_rate": 2.8761354031585774e-05, + "loss": 2.0411, + "step": 14680000 + }, + { + "epoch": 42.49, + "learning_rate": 2.8760630383938496e-05, + "loss": 2.0414, + "step": 14680500 + }, + { + "epoch": 42.5, + "learning_rate": 2.875990673629122e-05, + "loss": 2.0271, + "step": 14681000 + }, + { + "epoch": 42.5, + "learning_rate": 2.8759183088643944e-05, + "loss": 2.0272, + "step": 14681500 + }, + { + "epoch": 42.5, + "learning_rate": 2.8758459440996666e-05, + "loss": 2.0495, + "step": 14682000 + }, + { + "epoch": 42.5, + "learning_rate": 2.8757735793349388e-05, + "loss": 2.0531, + "step": 14682500 + }, + { + "epoch": 42.5, + "learning_rate": 2.8757013592997407e-05, + "loss": 2.0354, + "step": 14683000 + }, + { + "epoch": 42.5, + "learning_rate": 2.8756291392645423e-05, + "loss": 2.0573, + "step": 14683500 + }, + { + "epoch": 42.5, + "learning_rate": 2.8755567744998145e-05, + "loss": 2.0308, + "step": 14684000 + }, + { + "epoch": 42.51, + "learning_rate": 2.8754845544646164e-05, + "loss": 2.0451, + "step": 14684500 + }, + { + "epoch": 42.51, + "learning_rate": 2.8754121896998893e-05, + "loss": 2.0096, + "step": 14685000 + }, + { + "epoch": 42.51, + "learning_rate": 2.8753398249351615e-05, + "loss": 2.0246, + "step": 14685500 + }, + { + "epoch": 42.51, + "learning_rate": 2.8752674601704337e-05, + "loss": 2.031, + "step": 14686000 + }, + { + "epoch": 42.51, + "learning_rate": 2.875195095405706e-05, + "loss": 2.0396, + "step": 14686500 + }, + { + "epoch": 42.51, + "learning_rate": 2.8751227306409785e-05, + "loss": 2.0351, + "step": 14687000 + }, + { + "epoch": 42.51, + "learning_rate": 2.8750503658762507e-05, + "loss": 2.0189, + "step": 14687500 + }, + { + "epoch": 42.52, + "learning_rate": 2.874978001111523e-05, + "loss": 2.0447, + "step": 14688000 + }, + { + "epoch": 42.52, + "learning_rate": 2.8749056363467952e-05, + "loss": 2.0286, + "step": 14688500 + }, + { + "epoch": 42.52, + "learning_rate": 2.8748332715820674e-05, + "loss": 2.0601, + "step": 14689000 + }, + { + "epoch": 42.52, + "learning_rate": 2.8747609068173396e-05, + "loss": 2.0403, + "step": 14689500 + }, + { + "epoch": 42.52, + "learning_rate": 2.8746885420526122e-05, + "loss": 2.0476, + "step": 14690000 + }, + { + "epoch": 42.52, + "learning_rate": 2.8746163220174137e-05, + "loss": 2.0105, + "step": 14690500 + }, + { + "epoch": 42.52, + "learning_rate": 2.874543957252686e-05, + "loss": 2.0411, + "step": 14691000 + }, + { + "epoch": 42.53, + "learning_rate": 2.8744715924879585e-05, + "loss": 2.0457, + "step": 14691500 + }, + { + "epoch": 42.53, + "learning_rate": 2.874399227723231e-05, + "loss": 2.0389, + "step": 14692000 + }, + { + "epoch": 42.53, + "learning_rate": 2.8743268629585037e-05, + "loss": 2.0476, + "step": 14692500 + }, + { + "epoch": 42.53, + "learning_rate": 2.8742546429233052e-05, + "loss": 2.0201, + "step": 14693000 + }, + { + "epoch": 42.53, + "learning_rate": 2.8741822781585774e-05, + "loss": 2.0304, + "step": 14693500 + }, + { + "epoch": 42.53, + "learning_rate": 2.87410991339385e-05, + "loss": 2.0682, + "step": 14694000 + }, + { + "epoch": 42.53, + "learning_rate": 2.8740375486291222e-05, + "loss": 2.0234, + "step": 14694500 + }, + { + "epoch": 42.54, + "learning_rate": 2.8739651838643944e-05, + "loss": 2.0245, + "step": 14695000 + }, + { + "epoch": 42.54, + "learning_rate": 2.873892963829196e-05, + "loss": 2.0185, + "step": 14695500 + }, + { + "epoch": 42.54, + "learning_rate": 2.8738205990644686e-05, + "loss": 2.0563, + "step": 14696000 + }, + { + "epoch": 42.54, + "learning_rate": 2.8737482342997408e-05, + "loss": 2.0714, + "step": 14696500 + }, + { + "epoch": 42.54, + "learning_rate": 2.873675869535013e-05, + "loss": 2.0623, + "step": 14697000 + }, + { + "epoch": 42.54, + "learning_rate": 2.8736035047702852e-05, + "loss": 2.0446, + "step": 14697500 + }, + { + "epoch": 42.54, + "learning_rate": 2.8735311400055575e-05, + "loss": 2.0317, + "step": 14698000 + }, + { + "epoch": 42.55, + "learning_rate": 2.8734589199703593e-05, + "loss": 2.0283, + "step": 14698500 + }, + { + "epoch": 42.55, + "learning_rate": 2.8733866999351612e-05, + "loss": 2.0212, + "step": 14699000 + }, + { + "epoch": 42.55, + "learning_rate": 2.8733143351704335e-05, + "loss": 2.0186, + "step": 14699500 + }, + { + "epoch": 42.55, + "learning_rate": 2.8732419704057064e-05, + "loss": 2.0581, + "step": 14700000 + }, + { + "epoch": 42.55, + "learning_rate": 2.8731696056409786e-05, + "loss": 2.0295, + "step": 14700500 + }, + { + "epoch": 42.55, + "learning_rate": 2.8730972408762508e-05, + "loss": 2.0377, + "step": 14701000 + }, + { + "epoch": 42.55, + "learning_rate": 2.873024876111523e-05, + "loss": 2.027, + "step": 14701500 + }, + { + "epoch": 42.56, + "learning_rate": 2.8729525113467953e-05, + "loss": 2.0458, + "step": 14702000 + }, + { + "epoch": 42.56, + "learning_rate": 2.8728801465820675e-05, + "loss": 2.0227, + "step": 14702500 + }, + { + "epoch": 42.56, + "learning_rate": 2.87280778181734e-05, + "loss": 2.0217, + "step": 14703000 + }, + { + "epoch": 42.56, + "learning_rate": 2.8727354170526123e-05, + "loss": 2.0183, + "step": 14703500 + }, + { + "epoch": 42.56, + "learning_rate": 2.8726630522878845e-05, + "loss": 2.0339, + "step": 14704000 + }, + { + "epoch": 42.56, + "learning_rate": 2.8725906875231567e-05, + "loss": 2.0355, + "step": 14704500 + }, + { + "epoch": 42.56, + "learning_rate": 2.8725184674879586e-05, + "loss": 2.0395, + "step": 14705000 + }, + { + "epoch": 42.57, + "learning_rate": 2.8724461027232308e-05, + "loss": 2.044, + "step": 14705500 + }, + { + "epoch": 42.57, + "learning_rate": 2.872373737958503e-05, + "loss": 2.0308, + "step": 14706000 + }, + { + "epoch": 42.57, + "learning_rate": 2.8723013731937753e-05, + "loss": 2.0492, + "step": 14706500 + }, + { + "epoch": 42.57, + "learning_rate": 2.8722291531585775e-05, + "loss": 2.037, + "step": 14707000 + }, + { + "epoch": 42.57, + "learning_rate": 2.87215678839385e-05, + "loss": 2.0308, + "step": 14707500 + }, + { + "epoch": 42.57, + "learning_rate": 2.8720844236291223e-05, + "loss": 2.0623, + "step": 14708000 + }, + { + "epoch": 42.58, + "learning_rate": 2.8720120588643945e-05, + "loss": 2.0578, + "step": 14708500 + }, + { + "epoch": 42.58, + "learning_rate": 2.8719396940996667e-05, + "loss": 2.0394, + "step": 14709000 + }, + { + "epoch": 42.58, + "learning_rate": 2.871867329334939e-05, + "loss": 2.0309, + "step": 14709500 + }, + { + "epoch": 42.58, + "learning_rate": 2.8717949645702112e-05, + "loss": 2.0533, + "step": 14710000 + }, + { + "epoch": 42.58, + "learning_rate": 2.8717225998054838e-05, + "loss": 2.0589, + "step": 14710500 + }, + { + "epoch": 42.58, + "learning_rate": 2.8716506692293443e-05, + "loss": 2.0296, + "step": 14711000 + }, + { + "epoch": 42.58, + "learning_rate": 2.8715783044646165e-05, + "loss": 2.0117, + "step": 14711500 + }, + { + "epoch": 42.59, + "learning_rate": 2.8715059396998888e-05, + "loss": 2.0301, + "step": 14712000 + }, + { + "epoch": 42.59, + "learning_rate": 2.8714335749351613e-05, + "loss": 2.0237, + "step": 14712500 + }, + { + "epoch": 42.59, + "learning_rate": 2.8713612101704335e-05, + "loss": 2.0251, + "step": 14713000 + }, + { + "epoch": 42.59, + "learning_rate": 2.871288990135235e-05, + "loss": 2.0421, + "step": 14713500 + }, + { + "epoch": 42.59, + "learning_rate": 2.8712166253705077e-05, + "loss": 2.0351, + "step": 14714000 + }, + { + "epoch": 42.59, + "learning_rate": 2.87114426060578e-05, + "loss": 2.0385, + "step": 14714500 + }, + { + "epoch": 42.59, + "learning_rate": 2.8710718958410528e-05, + "loss": 2.0338, + "step": 14715000 + }, + { + "epoch": 42.6, + "learning_rate": 2.870999531076325e-05, + "loss": 2.0358, + "step": 14715500 + }, + { + "epoch": 42.6, + "learning_rate": 2.8709271663115972e-05, + "loss": 2.0437, + "step": 14716000 + }, + { + "epoch": 42.6, + "learning_rate": 2.8708548015468695e-05, + "loss": 2.0272, + "step": 14716500 + }, + { + "epoch": 42.6, + "learning_rate": 2.8707824367821417e-05, + "loss": 2.0232, + "step": 14717000 + }, + { + "epoch": 42.6, + "learning_rate": 2.870710072017414e-05, + "loss": 2.0487, + "step": 14717500 + }, + { + "epoch": 42.6, + "learning_rate": 2.8706377072526865e-05, + "loss": 2.0618, + "step": 14718000 + }, + { + "epoch": 42.6, + "learning_rate": 2.8705653424879587e-05, + "loss": 2.0352, + "step": 14718500 + }, + { + "epoch": 42.61, + "learning_rate": 2.8704931224527602e-05, + "loss": 2.0438, + "step": 14719000 + }, + { + "epoch": 42.61, + "learning_rate": 2.8704207576880328e-05, + "loss": 2.0094, + "step": 14719500 + }, + { + "epoch": 42.61, + "learning_rate": 2.870348392923305e-05, + "loss": 2.0237, + "step": 14720000 + }, + { + "epoch": 42.61, + "learning_rate": 2.8702760281585773e-05, + "loss": 2.0432, + "step": 14720500 + }, + { + "epoch": 42.61, + "learning_rate": 2.8702036633938495e-05, + "loss": 2.0322, + "step": 14721000 + }, + { + "epoch": 42.61, + "learning_rate": 2.8701312986291217e-05, + "loss": 2.0471, + "step": 14721500 + }, + { + "epoch": 42.61, + "learning_rate": 2.870059078593924e-05, + "loss": 2.0273, + "step": 14722000 + }, + { + "epoch": 42.62, + "learning_rate": 2.8699867138291965e-05, + "loss": 2.0034, + "step": 14722500 + }, + { + "epoch": 42.62, + "learning_rate": 2.869914493793998e-05, + "loss": 2.0394, + "step": 14723000 + }, + { + "epoch": 42.62, + "learning_rate": 2.8698421290292703e-05, + "loss": 2.0204, + "step": 14723500 + }, + { + "epoch": 42.62, + "learning_rate": 2.869769764264543e-05, + "loss": 2.0293, + "step": 14724000 + }, + { + "epoch": 42.62, + "learning_rate": 2.8696975442293444e-05, + "loss": 2.0396, + "step": 14724500 + }, + { + "epoch": 42.62, + "learning_rate": 2.8696251794646166e-05, + "loss": 2.0298, + "step": 14725000 + }, + { + "epoch": 42.62, + "learning_rate": 2.8695528146998892e-05, + "loss": 2.0333, + "step": 14725500 + }, + { + "epoch": 42.63, + "learning_rate": 2.8694807393942204e-05, + "loss": 2.0326, + "step": 14726000 + }, + { + "epoch": 42.63, + "learning_rate": 2.8694083746294926e-05, + "loss": 2.0328, + "step": 14726500 + }, + { + "epoch": 42.63, + "learning_rate": 2.869336009864765e-05, + "loss": 2.0247, + "step": 14727000 + }, + { + "epoch": 42.63, + "learning_rate": 2.869263645100037e-05, + "loss": 2.0399, + "step": 14727500 + }, + { + "epoch": 42.63, + "learning_rate": 2.8691912803353093e-05, + "loss": 2.0059, + "step": 14728000 + }, + { + "epoch": 42.63, + "learning_rate": 2.8691189155705815e-05, + "loss": 2.0222, + "step": 14728500 + }, + { + "epoch": 42.63, + "learning_rate": 2.869046550805854e-05, + "loss": 2.0298, + "step": 14729000 + }, + { + "epoch": 42.64, + "learning_rate": 2.8689741860411263e-05, + "loss": 2.0265, + "step": 14729500 + }, + { + "epoch": 42.64, + "learning_rate": 2.8689018212763992e-05, + "loss": 2.047, + "step": 14730000 + }, + { + "epoch": 42.64, + "learning_rate": 2.8688294565116714e-05, + "loss": 2.0352, + "step": 14730500 + }, + { + "epoch": 42.64, + "learning_rate": 2.8687570917469437e-05, + "loss": 2.0344, + "step": 14731000 + }, + { + "epoch": 42.64, + "learning_rate": 2.868684726982216e-05, + "loss": 2.0417, + "step": 14731500 + }, + { + "epoch": 42.64, + "learning_rate": 2.868612362217488e-05, + "loss": 2.0161, + "step": 14732000 + }, + { + "epoch": 42.64, + "learning_rate": 2.8685399974527603e-05, + "loss": 2.0566, + "step": 14732500 + }, + { + "epoch": 42.65, + "learning_rate": 2.868467632688033e-05, + "loss": 2.0454, + "step": 14733000 + }, + { + "epoch": 42.65, + "learning_rate": 2.868395267923305e-05, + "loss": 2.0402, + "step": 14733500 + }, + { + "epoch": 42.65, + "learning_rate": 2.8683229031585773e-05, + "loss": 2.0364, + "step": 14734000 + }, + { + "epoch": 42.65, + "learning_rate": 2.8682505383938496e-05, + "loss": 2.0379, + "step": 14734500 + }, + { + "epoch": 42.65, + "learning_rate": 2.8681781736291218e-05, + "loss": 2.0342, + "step": 14735000 + }, + { + "epoch": 42.65, + "learning_rate": 2.8681058088643943e-05, + "loss": 2.0393, + "step": 14735500 + }, + { + "epoch": 42.65, + "learning_rate": 2.868033588829196e-05, + "loss": 2.0343, + "step": 14736000 + }, + { + "epoch": 42.66, + "learning_rate": 2.867961224064468e-05, + "loss": 2.0532, + "step": 14736500 + }, + { + "epoch": 42.66, + "learning_rate": 2.867888859299741e-05, + "loss": 2.0343, + "step": 14737000 + }, + { + "epoch": 42.66, + "learning_rate": 2.8678164945350132e-05, + "loss": 2.0334, + "step": 14737500 + }, + { + "epoch": 42.66, + "learning_rate": 2.8677441297702855e-05, + "loss": 2.0439, + "step": 14738000 + }, + { + "epoch": 42.66, + "learning_rate": 2.867671765005558e-05, + "loss": 2.0133, + "step": 14738500 + }, + { + "epoch": 42.66, + "learning_rate": 2.8675994002408303e-05, + "loss": 2.0312, + "step": 14739000 + }, + { + "epoch": 42.66, + "learning_rate": 2.8675271802056318e-05, + "loss": 2.0118, + "step": 14739500 + }, + { + "epoch": 42.67, + "learning_rate": 2.8674548154409044e-05, + "loss": 2.0325, + "step": 14740000 + }, + { + "epoch": 42.67, + "learning_rate": 2.8673824506761766e-05, + "loss": 2.0285, + "step": 14740500 + }, + { + "epoch": 42.67, + "learning_rate": 2.8673100859114488e-05, + "loss": 2.0624, + "step": 14741000 + }, + { + "epoch": 42.67, + "learning_rate": 2.867237721146721e-05, + "loss": 2.0547, + "step": 14741500 + }, + { + "epoch": 42.67, + "learning_rate": 2.8671653563819933e-05, + "loss": 2.0437, + "step": 14742000 + }, + { + "epoch": 42.67, + "learning_rate": 2.8670929916172655e-05, + "loss": 2.0325, + "step": 14742500 + }, + { + "epoch": 42.67, + "learning_rate": 2.867020626852538e-05, + "loss": 2.0387, + "step": 14743000 + }, + { + "epoch": 42.68, + "learning_rate": 2.8669482620878103e-05, + "loss": 2.0219, + "step": 14743500 + }, + { + "epoch": 42.68, + "learning_rate": 2.8668758973230832e-05, + "loss": 2.0147, + "step": 14744000 + }, + { + "epoch": 42.68, + "learning_rate": 2.8668035325583554e-05, + "loss": 2.0429, + "step": 14744500 + }, + { + "epoch": 42.68, + "learning_rate": 2.8667311677936276e-05, + "loss": 2.013, + "step": 14745000 + }, + { + "epoch": 42.68, + "learning_rate": 2.8666589477584295e-05, + "loss": 2.0398, + "step": 14745500 + }, + { + "epoch": 42.68, + "learning_rate": 2.8665865829937017e-05, + "loss": 2.0473, + "step": 14746000 + }, + { + "epoch": 42.69, + "learning_rate": 2.866514218228974e-05, + "loss": 2.0283, + "step": 14746500 + }, + { + "epoch": 42.69, + "learning_rate": 2.8664418534642462e-05, + "loss": 2.0295, + "step": 14747000 + }, + { + "epoch": 42.69, + "learning_rate": 2.8663694886995184e-05, + "loss": 2.0503, + "step": 14747500 + }, + { + "epoch": 42.69, + "learning_rate": 2.8662972686643203e-05, + "loss": 2.0238, + "step": 14748000 + }, + { + "epoch": 42.69, + "learning_rate": 2.8662249038995925e-05, + "loss": 2.0445, + "step": 14748500 + }, + { + "epoch": 42.69, + "learning_rate": 2.8661525391348647e-05, + "loss": 2.0396, + "step": 14749000 + }, + { + "epoch": 42.69, + "learning_rate": 2.8660803190996666e-05, + "loss": 2.0341, + "step": 14749500 + }, + { + "epoch": 42.7, + "learning_rate": 2.866007954334939e-05, + "loss": 2.0215, + "step": 14750000 + }, + { + "epoch": 42.7, + "learning_rate": 2.865935589570211e-05, + "loss": 2.0485, + "step": 14750500 + }, + { + "epoch": 42.7, + "learning_rate": 2.8658632248054833e-05, + "loss": 2.0246, + "step": 14751000 + }, + { + "epoch": 42.7, + "learning_rate": 2.8657910047702852e-05, + "loss": 2.0378, + "step": 14751500 + }, + { + "epoch": 42.7, + "learning_rate": 2.865718640005558e-05, + "loss": 2.0523, + "step": 14752000 + }, + { + "epoch": 42.7, + "learning_rate": 2.8656462752408303e-05, + "loss": 2.04, + "step": 14752500 + }, + { + "epoch": 42.7, + "learning_rate": 2.8655739104761026e-05, + "loss": 2.0473, + "step": 14753000 + }, + { + "epoch": 42.71, + "learning_rate": 2.8655015457113748e-05, + "loss": 2.062, + "step": 14753500 + }, + { + "epoch": 42.71, + "learning_rate": 2.865429180946647e-05, + "loss": 2.0307, + "step": 14754000 + }, + { + "epoch": 42.71, + "learning_rate": 2.865356960911449e-05, + "loss": 2.0329, + "step": 14754500 + }, + { + "epoch": 42.71, + "learning_rate": 2.865284596146721e-05, + "loss": 2.0439, + "step": 14755000 + }, + { + "epoch": 42.71, + "learning_rate": 2.8652122313819933e-05, + "loss": 2.0568, + "step": 14755500 + }, + { + "epoch": 42.71, + "learning_rate": 2.865139866617266e-05, + "loss": 2.0531, + "step": 14756000 + }, + { + "epoch": 42.71, + "learning_rate": 2.8650676465820675e-05, + "loss": 2.0499, + "step": 14756500 + }, + { + "epoch": 42.72, + "learning_rate": 2.8649954265468694e-05, + "loss": 2.034, + "step": 14757000 + }, + { + "epoch": 42.72, + "learning_rate": 2.8649230617821416e-05, + "loss": 2.0372, + "step": 14757500 + }, + { + "epoch": 42.72, + "learning_rate": 2.8648506970174138e-05, + "loss": 2.0424, + "step": 14758000 + }, + { + "epoch": 42.72, + "learning_rate": 2.864778332252686e-05, + "loss": 2.0537, + "step": 14758500 + }, + { + "epoch": 42.72, + "learning_rate": 2.8647059674879582e-05, + "loss": 2.0529, + "step": 14759000 + }, + { + "epoch": 42.72, + "learning_rate": 2.864633602723231e-05, + "loss": 2.0538, + "step": 14759500 + }, + { + "epoch": 42.72, + "learning_rate": 2.8645612379585034e-05, + "loss": 2.0154, + "step": 14760000 + }, + { + "epoch": 42.73, + "learning_rate": 2.864488873193776e-05, + "loss": 2.027, + "step": 14760500 + }, + { + "epoch": 42.73, + "learning_rate": 2.864416508429048e-05, + "loss": 2.0483, + "step": 14761000 + }, + { + "epoch": 42.73, + "learning_rate": 2.8643441436643204e-05, + "loss": 2.0278, + "step": 14761500 + }, + { + "epoch": 42.73, + "learning_rate": 2.8642717788995926e-05, + "loss": 2.0291, + "step": 14762000 + }, + { + "epoch": 42.73, + "learning_rate": 2.8641994141348648e-05, + "loss": 2.0253, + "step": 14762500 + }, + { + "epoch": 42.73, + "learning_rate": 2.864127049370137e-05, + "loss": 2.0359, + "step": 14763000 + }, + { + "epoch": 42.73, + "learning_rate": 2.8640546846054096e-05, + "loss": 2.0286, + "step": 14763500 + }, + { + "epoch": 42.74, + "learning_rate": 2.863982319840682e-05, + "loss": 2.0496, + "step": 14764000 + }, + { + "epoch": 42.74, + "learning_rate": 2.863909955075954e-05, + "loss": 2.0372, + "step": 14764500 + }, + { + "epoch": 42.74, + "learning_rate": 2.8638375903112263e-05, + "loss": 2.0578, + "step": 14765000 + }, + { + "epoch": 42.74, + "learning_rate": 2.8637653702760282e-05, + "loss": 2.0509, + "step": 14765500 + }, + { + "epoch": 42.74, + "learning_rate": 2.8636930055113004e-05, + "loss": 2.0279, + "step": 14766000 + }, + { + "epoch": 42.74, + "learning_rate": 2.8636206407465733e-05, + "loss": 2.0211, + "step": 14766500 + }, + { + "epoch": 42.74, + "learning_rate": 2.8635482759818455e-05, + "loss": 2.0431, + "step": 14767000 + }, + { + "epoch": 42.75, + "learning_rate": 2.8634759112171177e-05, + "loss": 2.0261, + "step": 14767500 + }, + { + "epoch": 42.75, + "learning_rate": 2.86340354645239e-05, + "loss": 2.0408, + "step": 14768000 + }, + { + "epoch": 42.75, + "learning_rate": 2.863331326417192e-05, + "loss": 2.0227, + "step": 14768500 + }, + { + "epoch": 42.75, + "learning_rate": 2.863258961652464e-05, + "loss": 2.0395, + "step": 14769000 + }, + { + "epoch": 42.75, + "learning_rate": 2.8631865968877363e-05, + "loss": 2.0295, + "step": 14769500 + }, + { + "epoch": 42.75, + "learning_rate": 2.8631142321230085e-05, + "loss": 2.0598, + "step": 14770000 + }, + { + "epoch": 42.75, + "learning_rate": 2.863041867358281e-05, + "loss": 2.0476, + "step": 14770500 + }, + { + "epoch": 42.76, + "learning_rate": 2.8629695025935533e-05, + "loss": 2.035, + "step": 14771000 + }, + { + "epoch": 42.76, + "learning_rate": 2.8628971378288255e-05, + "loss": 2.0286, + "step": 14771500 + }, + { + "epoch": 42.76, + "learning_rate": 2.8628247730640978e-05, + "loss": 2.0395, + "step": 14772000 + }, + { + "epoch": 42.76, + "learning_rate": 2.8627525530288997e-05, + "loss": 2.008, + "step": 14772500 + }, + { + "epoch": 42.76, + "learning_rate": 2.8626803329937012e-05, + "loss": 2.0721, + "step": 14773000 + }, + { + "epoch": 42.76, + "learning_rate": 2.862608112958503e-05, + "loss": 2.0472, + "step": 14773500 + }, + { + "epoch": 42.76, + "learning_rate": 2.8625357481937753e-05, + "loss": 2.0181, + "step": 14774000 + }, + { + "epoch": 42.77, + "learning_rate": 2.8624633834290482e-05, + "loss": 2.018, + "step": 14774500 + }, + { + "epoch": 42.77, + "learning_rate": 2.8623910186643205e-05, + "loss": 2.0046, + "step": 14775000 + }, + { + "epoch": 42.77, + "learning_rate": 2.8623186538995927e-05, + "loss": 2.051, + "step": 14775500 + }, + { + "epoch": 42.77, + "learning_rate": 2.862246289134865e-05, + "loss": 2.0264, + "step": 14776000 + }, + { + "epoch": 42.77, + "learning_rate": 2.8621739243701375e-05, + "loss": 2.0404, + "step": 14776500 + }, + { + "epoch": 42.77, + "learning_rate": 2.862101704334939e-05, + "loss": 2.0325, + "step": 14777000 + }, + { + "epoch": 42.77, + "learning_rate": 2.8620293395702112e-05, + "loss": 2.032, + "step": 14777500 + }, + { + "epoch": 42.78, + "learning_rate": 2.8619569748054835e-05, + "loss": 2.0436, + "step": 14778000 + }, + { + "epoch": 42.78, + "learning_rate": 2.861884610040756e-05, + "loss": 2.0264, + "step": 14778500 + }, + { + "epoch": 42.78, + "learning_rate": 2.8618122452760283e-05, + "loss": 2.0338, + "step": 14779000 + }, + { + "epoch": 42.78, + "learning_rate": 2.8617398805113005e-05, + "loss": 2.0256, + "step": 14779500 + }, + { + "epoch": 42.78, + "learning_rate": 2.8616675157465727e-05, + "loss": 2.0288, + "step": 14780000 + }, + { + "epoch": 42.78, + "learning_rate": 2.861595150981845e-05, + "loss": 2.0235, + "step": 14780500 + }, + { + "epoch": 42.78, + "learning_rate": 2.8615229309466468e-05, + "loss": 2.0277, + "step": 14781000 + }, + { + "epoch": 42.79, + "learning_rate": 2.861450566181919e-05, + "loss": 2.0378, + "step": 14781500 + }, + { + "epoch": 42.79, + "learning_rate": 2.861378201417192e-05, + "loss": 2.0344, + "step": 14782000 + }, + { + "epoch": 42.79, + "learning_rate": 2.861305981381994e-05, + "loss": 2.0348, + "step": 14782500 + }, + { + "epoch": 42.79, + "learning_rate": 2.861233616617266e-05, + "loss": 2.0642, + "step": 14783000 + }, + { + "epoch": 42.79, + "learning_rate": 2.8611612518525383e-05, + "loss": 2.031, + "step": 14783500 + }, + { + "epoch": 42.79, + "learning_rate": 2.8610888870878105e-05, + "loss": 2.0345, + "step": 14784000 + }, + { + "epoch": 42.8, + "learning_rate": 2.8610165223230827e-05, + "loss": 2.0334, + "step": 14784500 + }, + { + "epoch": 42.8, + "learning_rate": 2.8609443022878846e-05, + "loss": 2.0439, + "step": 14785000 + }, + { + "epoch": 42.8, + "learning_rate": 2.860871937523157e-05, + "loss": 2.0406, + "step": 14785500 + }, + { + "epoch": 42.8, + "learning_rate": 2.860799572758429e-05, + "loss": 2.0188, + "step": 14786000 + }, + { + "epoch": 42.8, + "learning_rate": 2.8607272079937013e-05, + "loss": 2.0352, + "step": 14786500 + }, + { + "epoch": 42.8, + "learning_rate": 2.860654843228974e-05, + "loss": 2.0322, + "step": 14787000 + }, + { + "epoch": 42.8, + "learning_rate": 2.860582478464246e-05, + "loss": 2.0461, + "step": 14787500 + }, + { + "epoch": 42.81, + "learning_rate": 2.8605102584290476e-05, + "loss": 2.0528, + "step": 14788000 + }, + { + "epoch": 42.81, + "learning_rate": 2.86043789366432e-05, + "loss": 2.0286, + "step": 14788500 + }, + { + "epoch": 42.81, + "learning_rate": 2.8603655288995924e-05, + "loss": 2.0538, + "step": 14789000 + }, + { + "epoch": 42.81, + "learning_rate": 2.860293164134865e-05, + "loss": 2.0155, + "step": 14789500 + }, + { + "epoch": 42.81, + "learning_rate": 2.8602207993701375e-05, + "loss": 2.0313, + "step": 14790000 + }, + { + "epoch": 42.81, + "learning_rate": 2.8601484346054098e-05, + "loss": 2.0611, + "step": 14790500 + }, + { + "epoch": 42.81, + "learning_rate": 2.860076069840682e-05, + "loss": 2.0324, + "step": 14791000 + }, + { + "epoch": 42.82, + "learning_rate": 2.8600037050759542e-05, + "loss": 2.0359, + "step": 14791500 + }, + { + "epoch": 42.82, + "learning_rate": 2.8599313403112264e-05, + "loss": 2.035, + "step": 14792000 + }, + { + "epoch": 42.82, + "learning_rate": 2.8598591202760283e-05, + "loss": 2.029, + "step": 14792500 + }, + { + "epoch": 42.82, + "learning_rate": 2.8597867555113006e-05, + "loss": 2.0588, + "step": 14793000 + }, + { + "epoch": 42.82, + "learning_rate": 2.8597143907465728e-05, + "loss": 2.0417, + "step": 14793500 + }, + { + "epoch": 42.82, + "learning_rate": 2.859642025981845e-05, + "loss": 2.0302, + "step": 14794000 + }, + { + "epoch": 42.82, + "learning_rate": 2.859569805946647e-05, + "loss": 2.0162, + "step": 14794500 + }, + { + "epoch": 42.83, + "learning_rate": 2.859497441181919e-05, + "loss": 2.0265, + "step": 14795000 + }, + { + "epoch": 42.83, + "learning_rate": 2.8594250764171913e-05, + "loss": 2.018, + "step": 14795500 + }, + { + "epoch": 42.83, + "learning_rate": 2.859352711652464e-05, + "loss": 2.0289, + "step": 14796000 + }, + { + "epoch": 42.83, + "learning_rate": 2.8592803468877365e-05, + "loss": 2.0401, + "step": 14796500 + }, + { + "epoch": 42.83, + "learning_rate": 2.8592081268525384e-05, + "loss": 2.0481, + "step": 14797000 + }, + { + "epoch": 42.83, + "learning_rate": 2.8591357620878106e-05, + "loss": 2.0327, + "step": 14797500 + }, + { + "epoch": 42.83, + "learning_rate": 2.8590633973230828e-05, + "loss": 2.0431, + "step": 14798000 + }, + { + "epoch": 42.84, + "learning_rate": 2.8589910325583554e-05, + "loss": 2.041, + "step": 14798500 + }, + { + "epoch": 42.84, + "learning_rate": 2.858918812523157e-05, + "loss": 2.0356, + "step": 14799000 + }, + { + "epoch": 42.84, + "learning_rate": 2.8588465924879588e-05, + "loss": 2.054, + "step": 14799500 + }, + { + "epoch": 42.84, + "learning_rate": 2.858774227723231e-05, + "loss": 2.045, + "step": 14800000 + }, + { + "epoch": 42.84, + "learning_rate": 2.8587018629585033e-05, + "loss": 2.0497, + "step": 14800500 + }, + { + "epoch": 42.84, + "learning_rate": 2.8586294981937755e-05, + "loss": 2.0328, + "step": 14801000 + }, + { + "epoch": 42.84, + "learning_rate": 2.8585571334290477e-05, + "loss": 2.043, + "step": 14801500 + }, + { + "epoch": 42.85, + "learning_rate": 2.8584847686643203e-05, + "loss": 2.0367, + "step": 14802000 + }, + { + "epoch": 42.85, + "learning_rate": 2.8584124038995925e-05, + "loss": 2.0283, + "step": 14802500 + }, + { + "epoch": 42.85, + "learning_rate": 2.8583400391348647e-05, + "loss": 2.0355, + "step": 14803000 + }, + { + "epoch": 42.85, + "learning_rate": 2.858267674370137e-05, + "loss": 2.0412, + "step": 14803500 + }, + { + "epoch": 42.85, + "learning_rate": 2.85819530960541e-05, + "loss": 2.0415, + "step": 14804000 + }, + { + "epoch": 42.85, + "learning_rate": 2.858122944840682e-05, + "loss": 2.0253, + "step": 14804500 + }, + { + "epoch": 42.85, + "learning_rate": 2.8580505800759543e-05, + "loss": 2.0263, + "step": 14805000 + }, + { + "epoch": 42.86, + "learning_rate": 2.8579783600407562e-05, + "loss": 2.0416, + "step": 14805500 + }, + { + "epoch": 42.86, + "learning_rate": 2.8579059952760284e-05, + "loss": 2.0587, + "step": 14806000 + }, + { + "epoch": 42.86, + "learning_rate": 2.8578336305113006e-05, + "loss": 2.0294, + "step": 14806500 + }, + { + "epoch": 42.86, + "learning_rate": 2.857761265746573e-05, + "loss": 2.0317, + "step": 14807000 + }, + { + "epoch": 42.86, + "learning_rate": 2.8576889009818454e-05, + "loss": 2.0413, + "step": 14807500 + }, + { + "epoch": 42.86, + "learning_rate": 2.8576165362171176e-05, + "loss": 2.046, + "step": 14808000 + }, + { + "epoch": 42.86, + "learning_rate": 2.85754417145239e-05, + "loss": 2.0395, + "step": 14808500 + }, + { + "epoch": 42.87, + "learning_rate": 2.857471806687662e-05, + "loss": 2.0305, + "step": 14809000 + }, + { + "epoch": 42.87, + "learning_rate": 2.8573994419229343e-05, + "loss": 2.0306, + "step": 14809500 + }, + { + "epoch": 42.87, + "learning_rate": 2.8573272218877362e-05, + "loss": 2.0431, + "step": 14810000 + }, + { + "epoch": 42.87, + "learning_rate": 2.8572550018525378e-05, + "loss": 2.0377, + "step": 14810500 + }, + { + "epoch": 42.87, + "learning_rate": 2.8571826370878103e-05, + "loss": 2.0484, + "step": 14811000 + }, + { + "epoch": 42.87, + "learning_rate": 2.857110272323083e-05, + "loss": 2.0508, + "step": 14811500 + }, + { + "epoch": 42.87, + "learning_rate": 2.8570379075583555e-05, + "loss": 2.0346, + "step": 14812000 + }, + { + "epoch": 42.88, + "learning_rate": 2.8569655427936277e-05, + "loss": 2.0552, + "step": 14812500 + }, + { + "epoch": 42.88, + "learning_rate": 2.8568931780289e-05, + "loss": 2.0223, + "step": 14813000 + }, + { + "epoch": 42.88, + "learning_rate": 2.856820813264172e-05, + "loss": 2.0244, + "step": 14813500 + }, + { + "epoch": 42.88, + "learning_rate": 2.856748593228974e-05, + "loss": 2.021, + "step": 14814000 + }, + { + "epoch": 42.88, + "learning_rate": 2.8566763731937756e-05, + "loss": 2.0628, + "step": 14814500 + }, + { + "epoch": 42.88, + "learning_rate": 2.8566040084290478e-05, + "loss": 2.0513, + "step": 14815000 + }, + { + "epoch": 42.88, + "learning_rate": 2.8565316436643204e-05, + "loss": 2.0458, + "step": 14815500 + }, + { + "epoch": 42.89, + "learning_rate": 2.8564592788995926e-05, + "loss": 2.0509, + "step": 14816000 + }, + { + "epoch": 42.89, + "learning_rate": 2.856387058864394e-05, + "loss": 2.0436, + "step": 14816500 + }, + { + "epoch": 42.89, + "learning_rate": 2.8563146940996667e-05, + "loss": 2.0256, + "step": 14817000 + }, + { + "epoch": 42.89, + "learning_rate": 2.856242329334939e-05, + "loss": 2.0493, + "step": 14817500 + }, + { + "epoch": 42.89, + "learning_rate": 2.856169964570211e-05, + "loss": 2.0357, + "step": 14818000 + }, + { + "epoch": 42.89, + "learning_rate": 2.856097744535013e-05, + "loss": 2.0506, + "step": 14818500 + }, + { + "epoch": 42.89, + "learning_rate": 2.8560255244998146e-05, + "loss": 2.0339, + "step": 14819000 + }, + { + "epoch": 42.9, + "learning_rate": 2.8559531597350875e-05, + "loss": 2.0675, + "step": 14819500 + }, + { + "epoch": 42.9, + "learning_rate": 2.8558807949703597e-05, + "loss": 2.0389, + "step": 14820000 + }, + { + "epoch": 42.9, + "learning_rate": 2.855808430205632e-05, + "loss": 2.053, + "step": 14820500 + }, + { + "epoch": 42.9, + "learning_rate": 2.855736065440904e-05, + "loss": 2.0194, + "step": 14821000 + }, + { + "epoch": 42.9, + "learning_rate": 2.8556637006761767e-05, + "loss": 2.053, + "step": 14821500 + }, + { + "epoch": 42.9, + "learning_rate": 2.855591335911449e-05, + "loss": 2.0289, + "step": 14822000 + }, + { + "epoch": 42.91, + "learning_rate": 2.8555189711467212e-05, + "loss": 2.027, + "step": 14822500 + }, + { + "epoch": 42.91, + "learning_rate": 2.8554466063819934e-05, + "loss": 2.0382, + "step": 14823000 + }, + { + "epoch": 42.91, + "learning_rate": 2.8553742416172656e-05, + "loss": 2.0103, + "step": 14823500 + }, + { + "epoch": 42.91, + "learning_rate": 2.8553020215820675e-05, + "loss": 2.0305, + "step": 14824000 + }, + { + "epoch": 42.91, + "learning_rate": 2.8552296568173397e-05, + "loss": 2.0363, + "step": 14824500 + }, + { + "epoch": 42.91, + "learning_rate": 2.855157292052612e-05, + "loss": 2.0475, + "step": 14825000 + }, + { + "epoch": 42.91, + "learning_rate": 2.8550849272878842e-05, + "loss": 2.0249, + "step": 14825500 + }, + { + "epoch": 42.92, + "learning_rate": 2.8550125625231568e-05, + "loss": 2.039, + "step": 14826000 + }, + { + "epoch": 42.92, + "learning_rate": 2.854940197758429e-05, + "loss": 2.0425, + "step": 14826500 + }, + { + "epoch": 42.92, + "learning_rate": 2.854867832993702e-05, + "loss": 2.055, + "step": 14827000 + }, + { + "epoch": 42.92, + "learning_rate": 2.854795468228974e-05, + "loss": 2.0296, + "step": 14827500 + }, + { + "epoch": 42.92, + "learning_rate": 2.8547231034642463e-05, + "loss": 2.0321, + "step": 14828000 + }, + { + "epoch": 42.92, + "learning_rate": 2.8546507386995185e-05, + "loss": 2.0342, + "step": 14828500 + }, + { + "epoch": 42.92, + "learning_rate": 2.8545783739347908e-05, + "loss": 2.0303, + "step": 14829000 + }, + { + "epoch": 42.93, + "learning_rate": 2.8545060091700633e-05, + "loss": 2.0395, + "step": 14829500 + }, + { + "epoch": 42.93, + "learning_rate": 2.854433789134865e-05, + "loss": 2.04, + "step": 14830000 + }, + { + "epoch": 42.93, + "learning_rate": 2.8543615690996668e-05, + "loss": 2.0258, + "step": 14830500 + }, + { + "epoch": 42.93, + "learning_rate": 2.854289204334939e-05, + "loss": 2.0443, + "step": 14831000 + }, + { + "epoch": 42.93, + "learning_rate": 2.8542168395702112e-05, + "loss": 2.0127, + "step": 14831500 + }, + { + "epoch": 42.93, + "learning_rate": 2.8541444748054835e-05, + "loss": 2.0293, + "step": 14832000 + }, + { + "epoch": 42.93, + "learning_rate": 2.8540721100407557e-05, + "loss": 2.0316, + "step": 14832500 + }, + { + "epoch": 42.94, + "learning_rate": 2.8539997452760282e-05, + "loss": 2.0376, + "step": 14833000 + }, + { + "epoch": 42.94, + "learning_rate": 2.8539273805113005e-05, + "loss": 2.0284, + "step": 14833500 + }, + { + "epoch": 42.94, + "learning_rate": 2.8538550157465734e-05, + "loss": 2.0308, + "step": 14834000 + }, + { + "epoch": 42.94, + "learning_rate": 2.8537826509818456e-05, + "loss": 2.0398, + "step": 14834500 + }, + { + "epoch": 42.94, + "learning_rate": 2.853710430946647e-05, + "loss": 2.0466, + "step": 14835000 + }, + { + "epoch": 42.94, + "learning_rate": 2.8536380661819197e-05, + "loss": 2.0646, + "step": 14835500 + }, + { + "epoch": 42.94, + "learning_rate": 2.853565701417192e-05, + "loss": 2.0643, + "step": 14836000 + }, + { + "epoch": 42.95, + "learning_rate": 2.853493336652464e-05, + "loss": 2.034, + "step": 14836500 + }, + { + "epoch": 42.95, + "learning_rate": 2.8534209718877364e-05, + "loss": 2.0379, + "step": 14837000 + }, + { + "epoch": 42.95, + "learning_rate": 2.8533486071230086e-05, + "loss": 2.0302, + "step": 14837500 + }, + { + "epoch": 42.95, + "learning_rate": 2.8532762423582808e-05, + "loss": 2.0351, + "step": 14838000 + }, + { + "epoch": 42.95, + "learning_rate": 2.8532038775935534e-05, + "loss": 2.0524, + "step": 14838500 + }, + { + "epoch": 42.95, + "learning_rate": 2.853131657558355e-05, + "loss": 2.065, + "step": 14839000 + }, + { + "epoch": 42.95, + "learning_rate": 2.853059292793627e-05, + "loss": 2.0477, + "step": 14839500 + }, + { + "epoch": 42.96, + "learning_rate": 2.852987072758429e-05, + "loss": 2.0367, + "step": 14840000 + }, + { + "epoch": 42.96, + "learning_rate": 2.852914852723231e-05, + "loss": 2.0352, + "step": 14840500 + }, + { + "epoch": 42.96, + "learning_rate": 2.8528424879585032e-05, + "loss": 2.0255, + "step": 14841000 + }, + { + "epoch": 42.96, + "learning_rate": 2.8527701231937754e-05, + "loss": 2.022, + "step": 14841500 + }, + { + "epoch": 42.96, + "learning_rate": 2.8526977584290483e-05, + "loss": 2.0591, + "step": 14842000 + }, + { + "epoch": 42.96, + "learning_rate": 2.8526253936643205e-05, + "loss": 2.023, + "step": 14842500 + }, + { + "epoch": 42.96, + "learning_rate": 2.8525530288995927e-05, + "loss": 2.0592, + "step": 14843000 + }, + { + "epoch": 42.97, + "learning_rate": 2.852480664134865e-05, + "loss": 2.0111, + "step": 14843500 + }, + { + "epoch": 42.97, + "learning_rate": 2.8524082993701372e-05, + "loss": 2.0723, + "step": 14844000 + }, + { + "epoch": 42.97, + "learning_rate": 2.8523359346054098e-05, + "loss": 2.0519, + "step": 14844500 + }, + { + "epoch": 42.97, + "learning_rate": 2.852263569840682e-05, + "loss": 2.0577, + "step": 14845000 + }, + { + "epoch": 42.97, + "learning_rate": 2.8521913498054835e-05, + "loss": 2.0255, + "step": 14845500 + }, + { + "epoch": 42.97, + "learning_rate": 2.8521189850407558e-05, + "loss": 2.0372, + "step": 14846000 + }, + { + "epoch": 42.97, + "learning_rate": 2.8520466202760283e-05, + "loss": 2.0559, + "step": 14846500 + }, + { + "epoch": 42.98, + "learning_rate": 2.8519742555113005e-05, + "loss": 2.0478, + "step": 14847000 + }, + { + "epoch": 42.98, + "learning_rate": 2.851902035476102e-05, + "loss": 2.0353, + "step": 14847500 + }, + { + "epoch": 42.98, + "learning_rate": 2.8518296707113747e-05, + "loss": 2.0451, + "step": 14848000 + }, + { + "epoch": 42.98, + "learning_rate": 2.851757305946647e-05, + "loss": 2.0482, + "step": 14848500 + }, + { + "epoch": 42.98, + "learning_rate": 2.8516850859114484e-05, + "loss": 2.0535, + "step": 14849000 + }, + { + "epoch": 42.98, + "learning_rate": 2.851612865876251e-05, + "loss": 2.0497, + "step": 14849500 + }, + { + "epoch": 42.98, + "learning_rate": 2.8515405011115232e-05, + "loss": 2.0619, + "step": 14850000 + }, + { + "epoch": 42.99, + "learning_rate": 2.8514681363467955e-05, + "loss": 2.0135, + "step": 14850500 + }, + { + "epoch": 42.99, + "learning_rate": 2.8513957715820677e-05, + "loss": 2.018, + "step": 14851000 + }, + { + "epoch": 42.99, + "learning_rate": 2.85132340681734e-05, + "loss": 2.0373, + "step": 14851500 + }, + { + "epoch": 42.99, + "learning_rate": 2.8512511867821418e-05, + "loss": 2.0609, + "step": 14852000 + }, + { + "epoch": 42.99, + "learning_rate": 2.851178822017414e-05, + "loss": 2.0517, + "step": 14852500 + }, + { + "epoch": 42.99, + "learning_rate": 2.8511064572526862e-05, + "loss": 2.026, + "step": 14853000 + }, + { + "epoch": 42.99, + "learning_rate": 2.8510340924879585e-05, + "loss": 2.0487, + "step": 14853500 + }, + { + "epoch": 43.0, + "learning_rate": 2.850961727723231e-05, + "loss": 2.023, + "step": 14854000 + }, + { + "epoch": 43.0, + "learning_rate": 2.8508893629585033e-05, + "loss": 2.0351, + "step": 14854500 + }, + { + "epoch": 43.0, + "learning_rate": 2.8508169981937755e-05, + "loss": 2.0147, + "step": 14855000 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.6710644659307677, + "eval_accuracy_mlm": 0.6362523003108089, + "eval_accuracy_nsp": 0.8578139537015395, + "eval_loss": 2.1682963371276855, + "eval_runtime": 331.1057, + "eval_samples_per_second": 1317.966, + "eval_steps_per_second": 54.916, + "step": 14855296 + }, + { + "epoch": 43.0, + "learning_rate": 2.8507446334290477e-05, + "loss": 2.0307, + "step": 14855500 + }, + { + "epoch": 43.0, + "learning_rate": 2.85067226866432e-05, + "loss": 2.0209, + "step": 14856000 + }, + { + "epoch": 43.0, + "learning_rate": 2.850599903899592e-05, + "loss": 1.9957, + "step": 14856500 + }, + { + "epoch": 43.0, + "learning_rate": 2.850527539134865e-05, + "loss": 2.016, + "step": 14857000 + }, + { + "epoch": 43.01, + "learning_rate": 2.8504551743701373e-05, + "loss": 2.006, + "step": 14857500 + }, + { + "epoch": 43.01, + "learning_rate": 2.850382954334939e-05, + "loss": 2.0374, + "step": 14858000 + }, + { + "epoch": 43.01, + "learning_rate": 2.8503105895702114e-05, + "loss": 2.0148, + "step": 14858500 + }, + { + "epoch": 43.01, + "learning_rate": 2.8502382248054836e-05, + "loss": 2.0184, + "step": 14859000 + }, + { + "epoch": 43.01, + "learning_rate": 2.8501658600407562e-05, + "loss": 2.0393, + "step": 14859500 + }, + { + "epoch": 43.01, + "learning_rate": 2.8500934952760284e-05, + "loss": 2.0317, + "step": 14860000 + }, + { + "epoch": 43.02, + "learning_rate": 2.8500211305113006e-05, + "loss": 2.0428, + "step": 14860500 + }, + { + "epoch": 43.02, + "learning_rate": 2.849948765746573e-05, + "loss": 2.0015, + "step": 14861000 + }, + { + "epoch": 43.02, + "learning_rate": 2.849876400981845e-05, + "loss": 2.0086, + "step": 14861500 + }, + { + "epoch": 43.02, + "learning_rate": 2.8498040362171173e-05, + "loss": 2.0092, + "step": 14862000 + }, + { + "epoch": 43.02, + "learning_rate": 2.8497318161819192e-05, + "loss": 2.011, + "step": 14862500 + }, + { + "epoch": 43.02, + "learning_rate": 2.8496594514171914e-05, + "loss": 2.0157, + "step": 14863000 + }, + { + "epoch": 43.02, + "learning_rate": 2.8495870866524636e-05, + "loss": 2.0127, + "step": 14863500 + }, + { + "epoch": 43.03, + "learning_rate": 2.8495148666172655e-05, + "loss": 2.0225, + "step": 14864000 + }, + { + "epoch": 43.03, + "learning_rate": 2.8494425018525384e-05, + "loss": 2.0169, + "step": 14864500 + }, + { + "epoch": 43.03, + "learning_rate": 2.8493701370878106e-05, + "loss": 2.0258, + "step": 14865000 + }, + { + "epoch": 43.03, + "learning_rate": 2.849297772323083e-05, + "loss": 2.0225, + "step": 14865500 + }, + { + "epoch": 43.03, + "learning_rate": 2.849225407558355e-05, + "loss": 2.0173, + "step": 14866000 + }, + { + "epoch": 43.03, + "learning_rate": 2.8491530427936277e-05, + "loss": 2.0302, + "step": 14866500 + }, + { + "epoch": 43.03, + "learning_rate": 2.8490806780289e-05, + "loss": 2.0347, + "step": 14867000 + }, + { + "epoch": 43.04, + "learning_rate": 2.849008313264172e-05, + "loss": 2.0324, + "step": 14867500 + }, + { + "epoch": 43.04, + "learning_rate": 2.8489360932289737e-05, + "loss": 1.9943, + "step": 14868000 + }, + { + "epoch": 43.04, + "learning_rate": 2.8488637284642462e-05, + "loss": 2.0056, + "step": 14868500 + }, + { + "epoch": 43.04, + "learning_rate": 2.8487913636995184e-05, + "loss": 2.0512, + "step": 14869000 + }, + { + "epoch": 43.04, + "learning_rate": 2.8487189989347907e-05, + "loss": 2.02, + "step": 14869500 + }, + { + "epoch": 43.04, + "learning_rate": 2.848646634170063e-05, + "loss": 2.0295, + "step": 14870000 + }, + { + "epoch": 43.04, + "learning_rate": 2.848574269405335e-05, + "loss": 2.012, + "step": 14870500 + }, + { + "epoch": 43.05, + "learning_rate": 2.848502049370137e-05, + "loss": 2.0118, + "step": 14871000 + }, + { + "epoch": 43.05, + "learning_rate": 2.848429829334939e-05, + "loss": 2.0138, + "step": 14871500 + }, + { + "epoch": 43.05, + "learning_rate": 2.8483574645702115e-05, + "loss": 2.0165, + "step": 14872000 + }, + { + "epoch": 43.05, + "learning_rate": 2.8482850998054837e-05, + "loss": 2.0458, + "step": 14872500 + }, + { + "epoch": 43.05, + "learning_rate": 2.8482127350407563e-05, + "loss": 2.0071, + "step": 14873000 + }, + { + "epoch": 43.05, + "learning_rate": 2.8481403702760285e-05, + "loss": 2.0164, + "step": 14873500 + }, + { + "epoch": 43.05, + "learning_rate": 2.8480680055113007e-05, + "loss": 2.0145, + "step": 14874000 + }, + { + "epoch": 43.06, + "learning_rate": 2.847995640746573e-05, + "loss": 2.0147, + "step": 14874500 + }, + { + "epoch": 43.06, + "learning_rate": 2.8479234207113748e-05, + "loss": 2.0382, + "step": 14875000 + }, + { + "epoch": 43.06, + "learning_rate": 2.847851055946647e-05, + "loss": 2.0137, + "step": 14875500 + }, + { + "epoch": 43.06, + "learning_rate": 2.8477786911819193e-05, + "loss": 2.0113, + "step": 14876000 + }, + { + "epoch": 43.06, + "learning_rate": 2.8477063264171915e-05, + "loss": 2.014, + "step": 14876500 + }, + { + "epoch": 43.06, + "learning_rate": 2.8476339616524637e-05, + "loss": 2.0193, + "step": 14877000 + }, + { + "epoch": 43.06, + "learning_rate": 2.8475617416172656e-05, + "loss": 2.034, + "step": 14877500 + }, + { + "epoch": 43.07, + "learning_rate": 2.8474893768525378e-05, + "loss": 2.0088, + "step": 14878000 + }, + { + "epoch": 43.07, + "learning_rate": 2.84741701208781e-05, + "loss": 2.0212, + "step": 14878500 + }, + { + "epoch": 43.07, + "learning_rate": 2.847344647323083e-05, + "loss": 2.0289, + "step": 14879000 + }, + { + "epoch": 43.07, + "learning_rate": 2.847272427287885e-05, + "loss": 2.0146, + "step": 14879500 + }, + { + "epoch": 43.07, + "learning_rate": 2.8472002072526864e-05, + "loss": 2.0272, + "step": 14880000 + }, + { + "epoch": 43.07, + "learning_rate": 2.847127842487959e-05, + "loss": 2.0264, + "step": 14880500 + }, + { + "epoch": 43.07, + "learning_rate": 2.8470554777232312e-05, + "loss": 2.0123, + "step": 14881000 + }, + { + "epoch": 43.08, + "learning_rate": 2.8469831129585034e-05, + "loss": 2.0062, + "step": 14881500 + }, + { + "epoch": 43.08, + "learning_rate": 2.8469107481937756e-05, + "loss": 2.013, + "step": 14882000 + }, + { + "epoch": 43.08, + "learning_rate": 2.8468385281585775e-05, + "loss": 2.0219, + "step": 14882500 + }, + { + "epoch": 43.08, + "learning_rate": 2.8467661633938498e-05, + "loss": 2.0216, + "step": 14883000 + }, + { + "epoch": 43.08, + "learning_rate": 2.846693798629122e-05, + "loss": 2.0076, + "step": 14883500 + }, + { + "epoch": 43.08, + "learning_rate": 2.8466214338643942e-05, + "loss": 1.9989, + "step": 14884000 + }, + { + "epoch": 43.08, + "learning_rate": 2.846549213829196e-05, + "loss": 2.0148, + "step": 14884500 + }, + { + "epoch": 43.09, + "learning_rate": 2.8464771385235273e-05, + "loss": 2.0065, + "step": 14885000 + }, + { + "epoch": 43.09, + "learning_rate": 2.8464047737587995e-05, + "loss": 2.0281, + "step": 14885500 + }, + { + "epoch": 43.09, + "learning_rate": 2.8463324089940718e-05, + "loss": 2.0402, + "step": 14886000 + }, + { + "epoch": 43.09, + "learning_rate": 2.846260044229344e-05, + "loss": 2.0392, + "step": 14886500 + }, + { + "epoch": 43.09, + "learning_rate": 2.8461876794646166e-05, + "loss": 2.037, + "step": 14887000 + }, + { + "epoch": 43.09, + "learning_rate": 2.846115314699889e-05, + "loss": 2.0052, + "step": 14887500 + }, + { + "epoch": 43.09, + "learning_rate": 2.8460429499351617e-05, + "loss": 2.0263, + "step": 14888000 + }, + { + "epoch": 43.1, + "learning_rate": 2.845970585170434e-05, + "loss": 2.0344, + "step": 14888500 + }, + { + "epoch": 43.1, + "learning_rate": 2.845898220405706e-05, + "loss": 2.0221, + "step": 14889000 + }, + { + "epoch": 43.1, + "learning_rate": 2.8458258556409783e-05, + "loss": 2.0314, + "step": 14889500 + }, + { + "epoch": 43.1, + "learning_rate": 2.8457534908762506e-05, + "loss": 2.0191, + "step": 14890000 + }, + { + "epoch": 43.1, + "learning_rate": 2.8456811261115228e-05, + "loss": 2.0214, + "step": 14890500 + }, + { + "epoch": 43.1, + "learning_rate": 2.8456087613467954e-05, + "loss": 2.0253, + "step": 14891000 + }, + { + "epoch": 43.1, + "learning_rate": 2.8455363965820676e-05, + "loss": 2.0355, + "step": 14891500 + }, + { + "epoch": 43.11, + "learning_rate": 2.8454640318173398e-05, + "loss": 2.0139, + "step": 14892000 + }, + { + "epoch": 43.11, + "learning_rate": 2.8453918117821417e-05, + "loss": 2.0323, + "step": 14892500 + }, + { + "epoch": 43.11, + "learning_rate": 2.845319447017414e-05, + "loss": 2.0263, + "step": 14893000 + }, + { + "epoch": 43.11, + "learning_rate": 2.845247082252686e-05, + "loss": 2.0279, + "step": 14893500 + }, + { + "epoch": 43.11, + "learning_rate": 2.8451747174879584e-05, + "loss": 2.015, + "step": 14894000 + }, + { + "epoch": 43.11, + "learning_rate": 2.8451023527232313e-05, + "loss": 2.0321, + "step": 14894500 + }, + { + "epoch": 43.11, + "learning_rate": 2.8450299879585035e-05, + "loss": 2.0075, + "step": 14895000 + }, + { + "epoch": 43.12, + "learning_rate": 2.8449576231937757e-05, + "loss": 2.0199, + "step": 14895500 + }, + { + "epoch": 43.12, + "learning_rate": 2.844885258429048e-05, + "loss": 2.0372, + "step": 14896000 + }, + { + "epoch": 43.12, + "learning_rate": 2.8448128936643205e-05, + "loss": 1.9983, + "step": 14896500 + }, + { + "epoch": 43.12, + "learning_rate": 2.8447405288995927e-05, + "loss": 2.0201, + "step": 14897000 + }, + { + "epoch": 43.12, + "learning_rate": 2.844668164134865e-05, + "loss": 2.0251, + "step": 14897500 + }, + { + "epoch": 43.12, + "learning_rate": 2.844595799370137e-05, + "loss": 2.0249, + "step": 14898000 + }, + { + "epoch": 43.13, + "learning_rate": 2.8445234346054094e-05, + "loss": 2.0146, + "step": 14898500 + }, + { + "epoch": 43.13, + "learning_rate": 2.8444513592997406e-05, + "loss": 2.0266, + "step": 14899000 + }, + { + "epoch": 43.13, + "learning_rate": 2.844378994535013e-05, + "loss": 2.043, + "step": 14899500 + }, + { + "epoch": 43.13, + "learning_rate": 2.8443067744998147e-05, + "loss": 2.0273, + "step": 14900000 + }, + { + "epoch": 43.13, + "learning_rate": 2.844234409735087e-05, + "loss": 2.0508, + "step": 14900500 + }, + { + "epoch": 43.13, + "learning_rate": 2.8441620449703592e-05, + "loss": 2.0411, + "step": 14901000 + }, + { + "epoch": 43.13, + "learning_rate": 2.8440896802056317e-05, + "loss": 2.0321, + "step": 14901500 + }, + { + "epoch": 43.14, + "learning_rate": 2.8440173154409043e-05, + "loss": 2.0107, + "step": 14902000 + }, + { + "epoch": 43.14, + "learning_rate": 2.843944950676177e-05, + "loss": 2.0338, + "step": 14902500 + }, + { + "epoch": 43.14, + "learning_rate": 2.8438727306409784e-05, + "loss": 2.0342, + "step": 14903000 + }, + { + "epoch": 43.14, + "learning_rate": 2.8438003658762506e-05, + "loss": 1.9996, + "step": 14903500 + }, + { + "epoch": 43.14, + "learning_rate": 2.8437280011115232e-05, + "loss": 2.0191, + "step": 14904000 + }, + { + "epoch": 43.14, + "learning_rate": 2.8436556363467954e-05, + "loss": 2.0065, + "step": 14904500 + }, + { + "epoch": 43.14, + "learning_rate": 2.843583416311597e-05, + "loss": 2.0259, + "step": 14905000 + }, + { + "epoch": 43.15, + "learning_rate": 2.843511196276399e-05, + "loss": 2.039, + "step": 14905500 + }, + { + "epoch": 43.15, + "learning_rate": 2.843438831511671e-05, + "loss": 2.0137, + "step": 14906000 + }, + { + "epoch": 43.15, + "learning_rate": 2.843366611476473e-05, + "loss": 2.0159, + "step": 14906500 + }, + { + "epoch": 43.15, + "learning_rate": 2.8432942467117452e-05, + "loss": 2.0372, + "step": 14907000 + }, + { + "epoch": 43.15, + "learning_rate": 2.8432218819470174e-05, + "loss": 2.0331, + "step": 14907500 + }, + { + "epoch": 43.15, + "learning_rate": 2.8431495171822897e-05, + "loss": 2.0214, + "step": 14908000 + }, + { + "epoch": 43.15, + "learning_rate": 2.843077152417562e-05, + "loss": 2.0344, + "step": 14908500 + }, + { + "epoch": 43.16, + "learning_rate": 2.8430047876528345e-05, + "loss": 2.0102, + "step": 14909000 + }, + { + "epoch": 43.16, + "learning_rate": 2.8429324228881067e-05, + "loss": 2.0324, + "step": 14909500 + }, + { + "epoch": 43.16, + "learning_rate": 2.8428600581233792e-05, + "loss": 2.0216, + "step": 14910000 + }, + { + "epoch": 43.16, + "learning_rate": 2.8427876933586518e-05, + "loss": 2.0348, + "step": 14910500 + }, + { + "epoch": 43.16, + "learning_rate": 2.842715328593924e-05, + "loss": 2.0115, + "step": 14911000 + }, + { + "epoch": 43.16, + "learning_rate": 2.8426429638291963e-05, + "loss": 2.0229, + "step": 14911500 + }, + { + "epoch": 43.16, + "learning_rate": 2.8425705990644685e-05, + "loss": 2.018, + "step": 14912000 + }, + { + "epoch": 43.17, + "learning_rate": 2.8424982342997407e-05, + "loss": 2.0154, + "step": 14912500 + }, + { + "epoch": 43.17, + "learning_rate": 2.8424258695350133e-05, + "loss": 2.0404, + "step": 14913000 + }, + { + "epoch": 43.17, + "learning_rate": 2.8423535047702855e-05, + "loss": 2.0305, + "step": 14913500 + }, + { + "epoch": 43.17, + "learning_rate": 2.842281284735087e-05, + "loss": 2.027, + "step": 14914000 + }, + { + "epoch": 43.17, + "learning_rate": 2.8422089199703593e-05, + "loss": 2.0372, + "step": 14914500 + }, + { + "epoch": 43.17, + "learning_rate": 2.842136699935161e-05, + "loss": 2.0119, + "step": 14915000 + }, + { + "epoch": 43.17, + "learning_rate": 2.8420643351704334e-05, + "loss": 2.0044, + "step": 14915500 + }, + { + "epoch": 43.18, + "learning_rate": 2.8419919704057056e-05, + "loss": 2.003, + "step": 14916000 + }, + { + "epoch": 43.18, + "learning_rate": 2.841919605640978e-05, + "loss": 2.0246, + "step": 14916500 + }, + { + "epoch": 43.18, + "learning_rate": 2.8418472408762507e-05, + "loss": 2.0254, + "step": 14917000 + }, + { + "epoch": 43.18, + "learning_rate": 2.8417748761115233e-05, + "loss": 2.0299, + "step": 14917500 + }, + { + "epoch": 43.18, + "learning_rate": 2.8417025113467955e-05, + "loss": 2.0323, + "step": 14918000 + }, + { + "epoch": 43.18, + "learning_rate": 2.841630291311597e-05, + "loss": 2.0376, + "step": 14918500 + }, + { + "epoch": 43.18, + "learning_rate": 2.8415579265468696e-05, + "loss": 2.0335, + "step": 14919000 + }, + { + "epoch": 43.19, + "learning_rate": 2.841485561782142e-05, + "loss": 2.0162, + "step": 14919500 + }, + { + "epoch": 43.19, + "learning_rate": 2.841413197017414e-05, + "loss": 2.0419, + "step": 14920000 + }, + { + "epoch": 43.19, + "learning_rate": 2.8413408322526863e-05, + "loss": 2.0342, + "step": 14920500 + }, + { + "epoch": 43.19, + "learning_rate": 2.8412684674879585e-05, + "loss": 2.027, + "step": 14921000 + }, + { + "epoch": 43.19, + "learning_rate": 2.8411961027232307e-05, + "loss": 2.0122, + "step": 14921500 + }, + { + "epoch": 43.19, + "learning_rate": 2.8411237379585033e-05, + "loss": 2.0283, + "step": 14922000 + }, + { + "epoch": 43.19, + "learning_rate": 2.8410513731937755e-05, + "loss": 2.0356, + "step": 14922500 + }, + { + "epoch": 43.2, + "learning_rate": 2.8409790084290478e-05, + "loss": 2.0297, + "step": 14923000 + }, + { + "epoch": 43.2, + "learning_rate": 2.8409067883938497e-05, + "loss": 2.043, + "step": 14923500 + }, + { + "epoch": 43.2, + "learning_rate": 2.840834423629122e-05, + "loss": 2.0283, + "step": 14924000 + }, + { + "epoch": 43.2, + "learning_rate": 2.8407620588643948e-05, + "loss": 2.0028, + "step": 14924500 + }, + { + "epoch": 43.2, + "learning_rate": 2.840689694099667e-05, + "loss": 2.0442, + "step": 14925000 + }, + { + "epoch": 43.2, + "learning_rate": 2.8406173293349392e-05, + "loss": 2.0567, + "step": 14925500 + }, + { + "epoch": 43.2, + "learning_rate": 2.8405451092997408e-05, + "loss": 2.0207, + "step": 14926000 + }, + { + "epoch": 43.21, + "learning_rate": 2.8404727445350133e-05, + "loss": 2.0199, + "step": 14926500 + }, + { + "epoch": 43.21, + "learning_rate": 2.8404006692293446e-05, + "loss": 2.0339, + "step": 14927000 + }, + { + "epoch": 43.21, + "learning_rate": 2.8403283044646168e-05, + "loss": 2.0325, + "step": 14927500 + }, + { + "epoch": 43.21, + "learning_rate": 2.840255939699889e-05, + "loss": 2.0167, + "step": 14928000 + }, + { + "epoch": 43.21, + "learning_rate": 2.8401835749351612e-05, + "loss": 2.0508, + "step": 14928500 + }, + { + "epoch": 43.21, + "learning_rate": 2.8401112101704335e-05, + "loss": 2.0213, + "step": 14929000 + }, + { + "epoch": 43.21, + "learning_rate": 2.840038845405706e-05, + "loss": 2.0097, + "step": 14929500 + }, + { + "epoch": 43.22, + "learning_rate": 2.8399664806409782e-05, + "loss": 2.0307, + "step": 14930000 + }, + { + "epoch": 43.22, + "learning_rate": 2.8398941158762505e-05, + "loss": 2.0142, + "step": 14930500 + }, + { + "epoch": 43.22, + "learning_rate": 2.8398217511115227e-05, + "loss": 2.0263, + "step": 14931000 + }, + { + "epoch": 43.22, + "learning_rate": 2.839749386346795e-05, + "loss": 2.0288, + "step": 14931500 + }, + { + "epoch": 43.22, + "learning_rate": 2.8396770215820678e-05, + "loss": 2.028, + "step": 14932000 + }, + { + "epoch": 43.22, + "learning_rate": 2.83960465681734e-05, + "loss": 2.0179, + "step": 14932500 + }, + { + "epoch": 43.22, + "learning_rate": 2.8395322920526123e-05, + "loss": 2.0553, + "step": 14933000 + }, + { + "epoch": 43.23, + "learning_rate": 2.8394599272878848e-05, + "loss": 2.0168, + "step": 14933500 + }, + { + "epoch": 43.23, + "learning_rate": 2.8393877072526864e-05, + "loss": 2.0229, + "step": 14934000 + }, + { + "epoch": 43.23, + "learning_rate": 2.8393153424879586e-05, + "loss": 2.013, + "step": 14934500 + }, + { + "epoch": 43.23, + "learning_rate": 2.839242977723231e-05, + "loss": 2.015, + "step": 14935000 + }, + { + "epoch": 43.23, + "learning_rate": 2.8391706129585034e-05, + "loss": 2.0255, + "step": 14935500 + }, + { + "epoch": 43.23, + "learning_rate": 2.839098392923305e-05, + "loss": 2.0342, + "step": 14936000 + }, + { + "epoch": 43.24, + "learning_rate": 2.839026028158577e-05, + "loss": 2.03, + "step": 14936500 + }, + { + "epoch": 43.24, + "learning_rate": 2.8389536633938497e-05, + "loss": 1.9976, + "step": 14937000 + }, + { + "epoch": 43.24, + "learning_rate": 2.838881298629122e-05, + "loss": 2.0311, + "step": 14937500 + }, + { + "epoch": 43.24, + "learning_rate": 2.8388089338643942e-05, + "loss": 2.0553, + "step": 14938000 + }, + { + "epoch": 43.24, + "learning_rate": 2.8387365690996664e-05, + "loss": 2.0093, + "step": 14938500 + }, + { + "epoch": 43.24, + "learning_rate": 2.8386642043349386e-05, + "loss": 2.0278, + "step": 14939000 + }, + { + "epoch": 43.24, + "learning_rate": 2.8385919842997412e-05, + "loss": 2.0185, + "step": 14939500 + }, + { + "epoch": 43.25, + "learning_rate": 2.8385196195350134e-05, + "loss": 2.0079, + "step": 14940000 + }, + { + "epoch": 43.25, + "learning_rate": 2.8384472547702856e-05, + "loss": 2.0378, + "step": 14940500 + }, + { + "epoch": 43.25, + "learning_rate": 2.8383750347350875e-05, + "loss": 2.0261, + "step": 14941000 + }, + { + "epoch": 43.25, + "learning_rate": 2.8383026699703598e-05, + "loss": 2.044, + "step": 14941500 + }, + { + "epoch": 43.25, + "learning_rate": 2.838230305205632e-05, + "loss": 2.0383, + "step": 14942000 + }, + { + "epoch": 43.25, + "learning_rate": 2.8381579404409042e-05, + "loss": 2.0318, + "step": 14942500 + }, + { + "epoch": 43.25, + "learning_rate": 2.8380855756761764e-05, + "loss": 2.0335, + "step": 14943000 + }, + { + "epoch": 43.26, + "learning_rate": 2.8380132109114487e-05, + "loss": 2.0266, + "step": 14943500 + }, + { + "epoch": 43.26, + "learning_rate": 2.8379408461467212e-05, + "loss": 2.04, + "step": 14944000 + }, + { + "epoch": 43.26, + "learning_rate": 2.8378684813819934e-05, + "loss": 2.0296, + "step": 14944500 + }, + { + "epoch": 43.26, + "learning_rate": 2.8377961166172657e-05, + "loss": 2.0401, + "step": 14945000 + }, + { + "epoch": 43.26, + "learning_rate": 2.837723751852538e-05, + "loss": 2.0266, + "step": 14945500 + }, + { + "epoch": 43.26, + "learning_rate": 2.83765138708781e-05, + "loss": 2.0192, + "step": 14946000 + }, + { + "epoch": 43.26, + "learning_rate": 2.837579022323083e-05, + "loss": 2.0373, + "step": 14946500 + }, + { + "epoch": 43.27, + "learning_rate": 2.8375066575583552e-05, + "loss": 2.0167, + "step": 14947000 + }, + { + "epoch": 43.27, + "learning_rate": 2.8374342927936275e-05, + "loss": 1.9926, + "step": 14947500 + }, + { + "epoch": 43.27, + "learning_rate": 2.8373619280289e-05, + "loss": 2.0195, + "step": 14948000 + }, + { + "epoch": 43.27, + "learning_rate": 2.8372895632641722e-05, + "loss": 2.0332, + "step": 14948500 + }, + { + "epoch": 43.27, + "learning_rate": 2.8372171984994445e-05, + "loss": 2.013, + "step": 14949000 + }, + { + "epoch": 43.27, + "learning_rate": 2.8371449784642464e-05, + "loss": 2.0185, + "step": 14949500 + }, + { + "epoch": 43.27, + "learning_rate": 2.837072758429048e-05, + "loss": 2.0307, + "step": 14950000 + }, + { + "epoch": 43.28, + "learning_rate": 2.83700039366432e-05, + "loss": 2.0297, + "step": 14950500 + }, + { + "epoch": 43.28, + "learning_rate": 2.8369280288995924e-05, + "loss": 2.0336, + "step": 14951000 + }, + { + "epoch": 43.28, + "learning_rate": 2.836855664134865e-05, + "loss": 2.03, + "step": 14951500 + }, + { + "epoch": 43.28, + "learning_rate": 2.836783299370137e-05, + "loss": 2.0196, + "step": 14952000 + }, + { + "epoch": 43.28, + "learning_rate": 2.8367110793349387e-05, + "loss": 2.0138, + "step": 14952500 + }, + { + "epoch": 43.28, + "learning_rate": 2.8366387145702113e-05, + "loss": 2.015, + "step": 14953000 + }, + { + "epoch": 43.28, + "learning_rate": 2.8365663498054835e-05, + "loss": 2.0273, + "step": 14953500 + }, + { + "epoch": 43.29, + "learning_rate": 2.8364939850407557e-05, + "loss": 2.0188, + "step": 14954000 + }, + { + "epoch": 43.29, + "learning_rate": 2.8364216202760286e-05, + "loss": 2.0304, + "step": 14954500 + }, + { + "epoch": 43.29, + "learning_rate": 2.83634940024083e-05, + "loss": 2.0231, + "step": 14955000 + }, + { + "epoch": 43.29, + "learning_rate": 2.8362770354761027e-05, + "loss": 2.0216, + "step": 14955500 + }, + { + "epoch": 43.29, + "learning_rate": 2.8362048154409043e-05, + "loss": 2.0151, + "step": 14956000 + }, + { + "epoch": 43.29, + "learning_rate": 2.8361324506761765e-05, + "loss": 2.0515, + "step": 14956500 + }, + { + "epoch": 43.29, + "learning_rate": 2.8360600859114487e-05, + "loss": 2.0114, + "step": 14957000 + }, + { + "epoch": 43.3, + "learning_rate": 2.8359877211467213e-05, + "loss": 2.0415, + "step": 14957500 + }, + { + "epoch": 43.3, + "learning_rate": 2.8359153563819935e-05, + "loss": 2.0304, + "step": 14958000 + }, + { + "epoch": 43.3, + "learning_rate": 2.8358429916172657e-05, + "loss": 2.0338, + "step": 14958500 + }, + { + "epoch": 43.3, + "learning_rate": 2.8357707715820676e-05, + "loss": 2.0177, + "step": 14959000 + }, + { + "epoch": 43.3, + "learning_rate": 2.83569840681734e-05, + "loss": 2.0489, + "step": 14959500 + }, + { + "epoch": 43.3, + "learning_rate": 2.835626042052612e-05, + "loss": 2.029, + "step": 14960000 + }, + { + "epoch": 43.3, + "learning_rate": 2.8355536772878843e-05, + "loss": 2.0315, + "step": 14960500 + }, + { + "epoch": 43.31, + "learning_rate": 2.8354813125231565e-05, + "loss": 2.0452, + "step": 14961000 + }, + { + "epoch": 43.31, + "learning_rate": 2.8354089477584288e-05, + "loss": 2.0276, + "step": 14961500 + }, + { + "epoch": 43.31, + "learning_rate": 2.8353365829937017e-05, + "loss": 2.0429, + "step": 14962000 + }, + { + "epoch": 43.31, + "learning_rate": 2.835264218228974e-05, + "loss": 2.0471, + "step": 14962500 + }, + { + "epoch": 43.31, + "learning_rate": 2.8351918534642464e-05, + "loss": 2.0265, + "step": 14963000 + }, + { + "epoch": 43.31, + "learning_rate": 2.8351194886995187e-05, + "loss": 2.043, + "step": 14963500 + }, + { + "epoch": 43.31, + "learning_rate": 2.8350472686643202e-05, + "loss": 2.0203, + "step": 14964000 + }, + { + "epoch": 43.32, + "learning_rate": 2.8349749038995928e-05, + "loss": 2.0141, + "step": 14964500 + }, + { + "epoch": 43.32, + "learning_rate": 2.8349026838643943e-05, + "loss": 2.0428, + "step": 14965000 + }, + { + "epoch": 43.32, + "learning_rate": 2.8348303190996666e-05, + "loss": 2.0516, + "step": 14965500 + }, + { + "epoch": 43.32, + "learning_rate": 2.834757954334939e-05, + "loss": 1.998, + "step": 14966000 + }, + { + "epoch": 43.32, + "learning_rate": 2.8346855895702113e-05, + "loss": 2.0207, + "step": 14966500 + }, + { + "epoch": 43.32, + "learning_rate": 2.8346132248054836e-05, + "loss": 2.026, + "step": 14967000 + }, + { + "epoch": 43.32, + "learning_rate": 2.8345408600407558e-05, + "loss": 2.0376, + "step": 14967500 + }, + { + "epoch": 43.33, + "learning_rate": 2.834468495276028e-05, + "loss": 2.0188, + "step": 14968000 + }, + { + "epoch": 43.33, + "learning_rate": 2.8343961305113002e-05, + "loss": 2.0453, + "step": 14968500 + }, + { + "epoch": 43.33, + "learning_rate": 2.834323765746573e-05, + "loss": 2.0241, + "step": 14969000 + }, + { + "epoch": 43.33, + "learning_rate": 2.8342516904409044e-05, + "loss": 2.0369, + "step": 14969500 + }, + { + "epoch": 43.33, + "learning_rate": 2.8341793256761766e-05, + "loss": 2.0389, + "step": 14970000 + }, + { + "epoch": 43.33, + "learning_rate": 2.834106960911449e-05, + "loss": 2.0179, + "step": 14970500 + }, + { + "epoch": 43.33, + "learning_rate": 2.8340345961467214e-05, + "loss": 2.0545, + "step": 14971000 + }, + { + "epoch": 43.34, + "learning_rate": 2.8339622313819936e-05, + "loss": 2.0432, + "step": 14971500 + }, + { + "epoch": 43.34, + "learning_rate": 2.8338898666172658e-05, + "loss": 2.0278, + "step": 14972000 + }, + { + "epoch": 43.34, + "learning_rate": 2.833817501852538e-05, + "loss": 2.0335, + "step": 14972500 + }, + { + "epoch": 43.34, + "learning_rate": 2.8337451370878103e-05, + "loss": 2.0357, + "step": 14973000 + }, + { + "epoch": 43.34, + "learning_rate": 2.833672917052612e-05, + "loss": 2.0292, + "step": 14973500 + }, + { + "epoch": 43.34, + "learning_rate": 2.833600697017414e-05, + "loss": 2.0166, + "step": 14974000 + }, + { + "epoch": 43.35, + "learning_rate": 2.8335283322526863e-05, + "loss": 2.019, + "step": 14974500 + }, + { + "epoch": 43.35, + "learning_rate": 2.833456112217488e-05, + "loss": 2.0353, + "step": 14975000 + }, + { + "epoch": 43.35, + "learning_rate": 2.8333837474527604e-05, + "loss": 2.0309, + "step": 14975500 + }, + { + "epoch": 43.35, + "learning_rate": 2.8333113826880326e-05, + "loss": 2.0358, + "step": 14976000 + }, + { + "epoch": 43.35, + "learning_rate": 2.833239017923305e-05, + "loss": 2.0329, + "step": 14976500 + }, + { + "epoch": 43.35, + "learning_rate": 2.8331666531585777e-05, + "loss": 2.0374, + "step": 14977000 + }, + { + "epoch": 43.35, + "learning_rate": 2.83309428839385e-05, + "loss": 2.0121, + "step": 14977500 + }, + { + "epoch": 43.36, + "learning_rate": 2.8330219236291222e-05, + "loss": 2.0294, + "step": 14978000 + }, + { + "epoch": 43.36, + "learning_rate": 2.8329495588643944e-05, + "loss": 2.0138, + "step": 14978500 + }, + { + "epoch": 43.36, + "learning_rate": 2.8328771940996666e-05, + "loss": 2.0253, + "step": 14979000 + }, + { + "epoch": 43.36, + "learning_rate": 2.8328048293349392e-05, + "loss": 2.0091, + "step": 14979500 + }, + { + "epoch": 43.36, + "learning_rate": 2.8327324645702114e-05, + "loss": 2.0151, + "step": 14980000 + }, + { + "epoch": 43.36, + "learning_rate": 2.832660244535013e-05, + "loss": 2.0451, + "step": 14980500 + }, + { + "epoch": 43.36, + "learning_rate": 2.8325878797702855e-05, + "loss": 2.0238, + "step": 14981000 + }, + { + "epoch": 43.37, + "learning_rate": 2.8325155150055578e-05, + "loss": 2.0587, + "step": 14981500 + }, + { + "epoch": 43.37, + "learning_rate": 2.83244315024083e-05, + "loss": 2.0362, + "step": 14982000 + }, + { + "epoch": 43.37, + "learning_rate": 2.8323707854761022e-05, + "loss": 2.0388, + "step": 14982500 + }, + { + "epoch": 43.37, + "learning_rate": 2.8322984207113744e-05, + "loss": 2.029, + "step": 14983000 + }, + { + "epoch": 43.37, + "learning_rate": 2.8322260559466467e-05, + "loss": 2.0281, + "step": 14983500 + }, + { + "epoch": 43.37, + "learning_rate": 2.8321536911819192e-05, + "loss": 2.0476, + "step": 14984000 + }, + { + "epoch": 43.37, + "learning_rate": 2.8320814711467215e-05, + "loss": 2.0211, + "step": 14984500 + }, + { + "epoch": 43.38, + "learning_rate": 2.8320091063819937e-05, + "loss": 2.0336, + "step": 14985000 + }, + { + "epoch": 43.38, + "learning_rate": 2.831936741617266e-05, + "loss": 1.9926, + "step": 14985500 + }, + { + "epoch": 43.38, + "learning_rate": 2.831864376852538e-05, + "loss": 2.0468, + "step": 14986000 + }, + { + "epoch": 43.38, + "learning_rate": 2.8317920120878107e-05, + "loss": 2.0081, + "step": 14986500 + }, + { + "epoch": 43.38, + "learning_rate": 2.831719647323083e-05, + "loss": 2.0501, + "step": 14987000 + }, + { + "epoch": 43.38, + "learning_rate": 2.831647282558355e-05, + "loss": 2.0215, + "step": 14987500 + }, + { + "epoch": 43.38, + "learning_rate": 2.8315749177936274e-05, + "loss": 2.0425, + "step": 14988000 + }, + { + "epoch": 43.39, + "learning_rate": 2.8315025530288996e-05, + "loss": 2.0181, + "step": 14988500 + }, + { + "epoch": 43.39, + "learning_rate": 2.8314301882641718e-05, + "loss": 2.0245, + "step": 14989000 + }, + { + "epoch": 43.39, + "learning_rate": 2.8313578234994444e-05, + "loss": 2.0511, + "step": 14989500 + }, + { + "epoch": 43.39, + "learning_rate": 2.831285603464246e-05, + "loss": 2.0325, + "step": 14990000 + }, + { + "epoch": 43.39, + "learning_rate": 2.831213238699518e-05, + "loss": 2.0364, + "step": 14990500 + }, + { + "epoch": 43.39, + "learning_rate": 2.8311408739347907e-05, + "loss": 2.013, + "step": 14991000 + }, + { + "epoch": 43.39, + "learning_rate": 2.8310685091700633e-05, + "loss": 2.0237, + "step": 14991500 + }, + { + "epoch": 43.4, + "learning_rate": 2.830996289134865e-05, + "loss": 2.0257, + "step": 14992000 + }, + { + "epoch": 43.4, + "learning_rate": 2.8309239243701374e-05, + "loss": 2.0229, + "step": 14992500 + }, + { + "epoch": 43.4, + "learning_rate": 2.8308515596054096e-05, + "loss": 2.0335, + "step": 14993000 + }, + { + "epoch": 43.4, + "learning_rate": 2.830779194840682e-05, + "loss": 2.0097, + "step": 14993500 + }, + { + "epoch": 43.4, + "learning_rate": 2.8307068300759544e-05, + "loss": 2.0122, + "step": 14994000 + }, + { + "epoch": 43.4, + "learning_rate": 2.8306344653112266e-05, + "loss": 2.0308, + "step": 14994500 + }, + { + "epoch": 43.4, + "learning_rate": 2.830562100546499e-05, + "loss": 2.0191, + "step": 14995000 + }, + { + "epoch": 43.41, + "learning_rate": 2.830489735781771e-05, + "loss": 2.0177, + "step": 14995500 + }, + { + "epoch": 43.41, + "learning_rate": 2.8304173710170433e-05, + "loss": 2.0276, + "step": 14996000 + }, + { + "epoch": 43.41, + "learning_rate": 2.830345006252316e-05, + "loss": 2.05, + "step": 14996500 + }, + { + "epoch": 43.41, + "learning_rate": 2.830272641487588e-05, + "loss": 2.0345, + "step": 14997000 + }, + { + "epoch": 43.41, + "learning_rate": 2.8302002767228603e-05, + "loss": 2.0426, + "step": 14997500 + }, + { + "epoch": 43.41, + "learning_rate": 2.8301279119581325e-05, + "loss": 2.0081, + "step": 14998000 + }, + { + "epoch": 43.41, + "learning_rate": 2.8300555471934054e-05, + "loss": 2.0217, + "step": 14998500 + }, + { + "epoch": 43.42, + "learning_rate": 2.829983327158207e-05, + "loss": 2.0242, + "step": 14999000 + }, + { + "epoch": 43.42, + "learning_rate": 2.8299109623934795e-05, + "loss": 2.0004, + "step": 14999500 + }, + { + "epoch": 43.42, + "learning_rate": 2.8298385976287518e-05, + "loss": 2.0542, + "step": 15000000 + }, + { + "epoch": 43.42, + "learning_rate": 2.829766232864024e-05, + "loss": 2.0309, + "step": 15000500 + }, + { + "epoch": 43.42, + "learning_rate": 2.8296938680992962e-05, + "loss": 2.0235, + "step": 15001000 + }, + { + "epoch": 43.42, + "learning_rate": 2.8296215033345684e-05, + "loss": 2.0369, + "step": 15001500 + }, + { + "epoch": 43.42, + "learning_rate": 2.829549138569841e-05, + "loss": 2.0267, + "step": 15002000 + }, + { + "epoch": 43.43, + "learning_rate": 2.8294767738051132e-05, + "loss": 2.0253, + "step": 15002500 + }, + { + "epoch": 43.43, + "learning_rate": 2.8294046984994444e-05, + "loss": 2.024, + "step": 15003000 + }, + { + "epoch": 43.43, + "learning_rate": 2.8293323337347167e-05, + "loss": 2.006, + "step": 15003500 + }, + { + "epoch": 43.43, + "learning_rate": 2.8292601136995182e-05, + "loss": 2.0438, + "step": 15004000 + }, + { + "epoch": 43.43, + "learning_rate": 2.8291877489347908e-05, + "loss": 2.0485, + "step": 15004500 + }, + { + "epoch": 43.43, + "learning_rate": 2.829115384170063e-05, + "loss": 2.0128, + "step": 15005000 + }, + { + "epoch": 43.43, + "learning_rate": 2.8290430194053352e-05, + "loss": 2.0366, + "step": 15005500 + }, + { + "epoch": 43.44, + "learning_rate": 2.8289706546406075e-05, + "loss": 2.0276, + "step": 15006000 + }, + { + "epoch": 43.44, + "learning_rate": 2.8288982898758804e-05, + "loss": 2.0379, + "step": 15006500 + }, + { + "epoch": 43.44, + "learning_rate": 2.8288259251111526e-05, + "loss": 2.0282, + "step": 15007000 + }, + { + "epoch": 43.44, + "learning_rate": 2.8287535603464248e-05, + "loss": 2.0414, + "step": 15007500 + }, + { + "epoch": 43.44, + "learning_rate": 2.8286813403112267e-05, + "loss": 2.0239, + "step": 15008000 + }, + { + "epoch": 43.44, + "learning_rate": 2.828608975546499e-05, + "loss": 2.0484, + "step": 15008500 + }, + { + "epoch": 43.44, + "learning_rate": 2.828536610781771e-05, + "loss": 2.0034, + "step": 15009000 + }, + { + "epoch": 43.45, + "learning_rate": 2.8284642460170434e-05, + "loss": 2.041, + "step": 15009500 + }, + { + "epoch": 43.45, + "learning_rate": 2.828391881252316e-05, + "loss": 2.0261, + "step": 15010000 + }, + { + "epoch": 43.45, + "learning_rate": 2.828319516487588e-05, + "loss": 2.0496, + "step": 15010500 + }, + { + "epoch": 43.45, + "learning_rate": 2.8282471517228604e-05, + "loss": 2.0061, + "step": 15011000 + }, + { + "epoch": 43.45, + "learning_rate": 2.8281747869581326e-05, + "loss": 2.0369, + "step": 15011500 + }, + { + "epoch": 43.45, + "learning_rate": 2.8281025669229345e-05, + "loss": 2.039, + "step": 15012000 + }, + { + "epoch": 43.46, + "learning_rate": 2.8280302021582067e-05, + "loss": 2.0491, + "step": 15012500 + }, + { + "epoch": 43.46, + "learning_rate": 2.827957837393479e-05, + "loss": 2.0543, + "step": 15013000 + }, + { + "epoch": 43.46, + "learning_rate": 2.827885472628751e-05, + "loss": 2.0441, + "step": 15013500 + }, + { + "epoch": 43.46, + "learning_rate": 2.827813107864024e-05, + "loss": 2.0385, + "step": 15014000 + }, + { + "epoch": 43.46, + "learning_rate": 2.827740887828826e-05, + "loss": 2.033, + "step": 15014500 + }, + { + "epoch": 43.46, + "learning_rate": 2.8276686677936275e-05, + "loss": 2.0203, + "step": 15015000 + }, + { + "epoch": 43.46, + "learning_rate": 2.8275963030288997e-05, + "loss": 2.0468, + "step": 15015500 + }, + { + "epoch": 43.47, + "learning_rate": 2.8275239382641723e-05, + "loss": 2.0277, + "step": 15016000 + }, + { + "epoch": 43.47, + "learning_rate": 2.8274515734994445e-05, + "loss": 2.0225, + "step": 15016500 + }, + { + "epoch": 43.47, + "learning_rate": 2.8273792087347167e-05, + "loss": 2.0267, + "step": 15017000 + }, + { + "epoch": 43.47, + "learning_rate": 2.827306843969989e-05, + "loss": 2.0276, + "step": 15017500 + }, + { + "epoch": 43.47, + "learning_rate": 2.8272344792052612e-05, + "loss": 1.9969, + "step": 15018000 + }, + { + "epoch": 43.47, + "learning_rate": 2.827162259170063e-05, + "loss": 2.0192, + "step": 15018500 + }, + { + "epoch": 43.47, + "learning_rate": 2.8270898944053353e-05, + "loss": 2.0524, + "step": 15019000 + }, + { + "epoch": 43.48, + "learning_rate": 2.8270175296406075e-05, + "loss": 2.0219, + "step": 15019500 + }, + { + "epoch": 43.48, + "learning_rate": 2.8269451648758798e-05, + "loss": 2.0262, + "step": 15020000 + }, + { + "epoch": 43.48, + "learning_rate": 2.8268729448406817e-05, + "loss": 2.0181, + "step": 15020500 + }, + { + "epoch": 43.48, + "learning_rate": 2.826800580075954e-05, + "loss": 2.0494, + "step": 15021000 + }, + { + "epoch": 43.48, + "learning_rate": 2.8267282153112268e-05, + "loss": 2.0354, + "step": 15021500 + }, + { + "epoch": 43.48, + "learning_rate": 2.826655850546499e-05, + "loss": 2.0429, + "step": 15022000 + }, + { + "epoch": 43.48, + "learning_rate": 2.826583630511301e-05, + "loss": 2.0401, + "step": 15022500 + }, + { + "epoch": 43.49, + "learning_rate": 2.826511265746573e-05, + "loss": 2.0528, + "step": 15023000 + }, + { + "epoch": 43.49, + "learning_rate": 2.8264389009818453e-05, + "loss": 2.0227, + "step": 15023500 + }, + { + "epoch": 43.49, + "learning_rate": 2.8263665362171176e-05, + "loss": 2.008, + "step": 15024000 + }, + { + "epoch": 43.49, + "learning_rate": 2.8262941714523898e-05, + "loss": 2.0312, + "step": 15024500 + }, + { + "epoch": 43.49, + "learning_rate": 2.8262219514171917e-05, + "loss": 2.0423, + "step": 15025000 + }, + { + "epoch": 43.49, + "learning_rate": 2.826149586652464e-05, + "loss": 2.0193, + "step": 15025500 + }, + { + "epoch": 43.49, + "learning_rate": 2.826077221887736e-05, + "loss": 2.0396, + "step": 15026000 + }, + { + "epoch": 43.5, + "learning_rate": 2.8260048571230087e-05, + "loss": 2.0217, + "step": 15026500 + }, + { + "epoch": 43.5, + "learning_rate": 2.825932492358281e-05, + "loss": 2.0319, + "step": 15027000 + }, + { + "epoch": 43.5, + "learning_rate": 2.825860127593553e-05, + "loss": 2.0378, + "step": 15027500 + }, + { + "epoch": 43.5, + "learning_rate": 2.8257877628288254e-05, + "loss": 2.0307, + "step": 15028000 + }, + { + "epoch": 43.5, + "learning_rate": 2.8257153980640976e-05, + "loss": 2.0375, + "step": 15028500 + }, + { + "epoch": 43.5, + "learning_rate": 2.8256430332993705e-05, + "loss": 2.036, + "step": 15029000 + }, + { + "epoch": 43.5, + "learning_rate": 2.8255708132641724e-05, + "loss": 2.0496, + "step": 15029500 + }, + { + "epoch": 43.51, + "learning_rate": 2.8254984484994446e-05, + "loss": 2.0254, + "step": 15030000 + }, + { + "epoch": 43.51, + "learning_rate": 2.8254260837347168e-05, + "loss": 2.0424, + "step": 15030500 + }, + { + "epoch": 43.51, + "learning_rate": 2.825353718969989e-05, + "loss": 2.0329, + "step": 15031000 + }, + { + "epoch": 43.51, + "learning_rate": 2.8252813542052613e-05, + "loss": 2.0062, + "step": 15031500 + }, + { + "epoch": 43.51, + "learning_rate": 2.825208989440534e-05, + "loss": 2.0122, + "step": 15032000 + }, + { + "epoch": 43.51, + "learning_rate": 2.825136624675806e-05, + "loss": 2.0331, + "step": 15032500 + }, + { + "epoch": 43.51, + "learning_rate": 2.8250642599110783e-05, + "loss": 2.0542, + "step": 15033000 + }, + { + "epoch": 43.52, + "learning_rate": 2.8249918951463505e-05, + "loss": 2.0294, + "step": 15033500 + }, + { + "epoch": 43.52, + "learning_rate": 2.8249195303816227e-05, + "loss": 2.0435, + "step": 15034000 + }, + { + "epoch": 43.52, + "learning_rate": 2.8248473103464246e-05, + "loss": 2.0227, + "step": 15034500 + }, + { + "epoch": 43.52, + "learning_rate": 2.824774945581697e-05, + "loss": 2.0251, + "step": 15035000 + }, + { + "epoch": 43.52, + "learning_rate": 2.8247027255464987e-05, + "loss": 2.0489, + "step": 15035500 + }, + { + "epoch": 43.52, + "learning_rate": 2.824630360781771e-05, + "loss": 2.0261, + "step": 15036000 + }, + { + "epoch": 43.52, + "learning_rate": 2.824557996017044e-05, + "loss": 2.0326, + "step": 15036500 + }, + { + "epoch": 43.53, + "learning_rate": 2.824485631252316e-05, + "loss": 2.0403, + "step": 15037000 + }, + { + "epoch": 43.53, + "learning_rate": 2.8244135559466473e-05, + "loss": 2.0003, + "step": 15037500 + }, + { + "epoch": 43.53, + "learning_rate": 2.8243411911819195e-05, + "loss": 2.0232, + "step": 15038000 + }, + { + "epoch": 43.53, + "learning_rate": 2.8242688264171918e-05, + "loss": 2.038, + "step": 15038500 + }, + { + "epoch": 43.53, + "learning_rate": 2.824196461652464e-05, + "loss": 2.0088, + "step": 15039000 + }, + { + "epoch": 43.53, + "learning_rate": 2.8241240968877365e-05, + "loss": 2.0455, + "step": 15039500 + }, + { + "epoch": 43.53, + "learning_rate": 2.8240517321230088e-05, + "loss": 2.0294, + "step": 15040000 + }, + { + "epoch": 43.54, + "learning_rate": 2.823979367358281e-05, + "loss": 2.0483, + "step": 15040500 + }, + { + "epoch": 43.54, + "learning_rate": 2.8239070025935532e-05, + "loss": 2.0074, + "step": 15041000 + }, + { + "epoch": 43.54, + "learning_rate": 2.823834782558355e-05, + "loss": 2.0408, + "step": 15041500 + }, + { + "epoch": 43.54, + "learning_rate": 2.8237624177936273e-05, + "loss": 2.047, + "step": 15042000 + }, + { + "epoch": 43.54, + "learning_rate": 2.8236900530288996e-05, + "loss": 2.0383, + "step": 15042500 + }, + { + "epoch": 43.54, + "learning_rate": 2.8236176882641718e-05, + "loss": 2.0419, + "step": 15043000 + }, + { + "epoch": 43.54, + "learning_rate": 2.823545323499444e-05, + "loss": 2.0434, + "step": 15043500 + }, + { + "epoch": 43.55, + "learning_rate": 2.823472958734717e-05, + "loss": 2.0169, + "step": 15044000 + }, + { + "epoch": 43.55, + "learning_rate": 2.823400593969989e-05, + "loss": 2.0357, + "step": 15044500 + }, + { + "epoch": 43.55, + "learning_rate": 2.8233282292052614e-05, + "loss": 2.0164, + "step": 15045000 + }, + { + "epoch": 43.55, + "learning_rate": 2.823255864440534e-05, + "loss": 2.0462, + "step": 15045500 + }, + { + "epoch": 43.55, + "learning_rate": 2.823183499675806e-05, + "loss": 2.0379, + "step": 15046000 + }, + { + "epoch": 43.55, + "learning_rate": 2.8231112796406077e-05, + "loss": 2.0354, + "step": 15046500 + }, + { + "epoch": 43.55, + "learning_rate": 2.8230389148758803e-05, + "loss": 2.0352, + "step": 15047000 + }, + { + "epoch": 43.56, + "learning_rate": 2.8229666948406818e-05, + "loss": 2.0224, + "step": 15047500 + }, + { + "epoch": 43.56, + "learning_rate": 2.822894330075954e-05, + "loss": 2.0276, + "step": 15048000 + }, + { + "epoch": 43.56, + "learning_rate": 2.822822110040756e-05, + "loss": 2.0223, + "step": 15048500 + }, + { + "epoch": 43.56, + "learning_rate": 2.822749745276028e-05, + "loss": 2.0201, + "step": 15049000 + }, + { + "epoch": 43.56, + "learning_rate": 2.8226773805113004e-05, + "loss": 2.0327, + "step": 15049500 + }, + { + "epoch": 43.56, + "learning_rate": 2.8226050157465726e-05, + "loss": 2.0433, + "step": 15050000 + }, + { + "epoch": 43.57, + "learning_rate": 2.822532650981845e-05, + "loss": 2.0407, + "step": 15050500 + }, + { + "epoch": 43.57, + "learning_rate": 2.8224602862171174e-05, + "loss": 2.0401, + "step": 15051000 + }, + { + "epoch": 43.57, + "learning_rate": 2.8223879214523903e-05, + "loss": 2.0475, + "step": 15051500 + }, + { + "epoch": 43.57, + "learning_rate": 2.822315701417192e-05, + "loss": 2.0167, + "step": 15052000 + }, + { + "epoch": 43.57, + "learning_rate": 2.822243336652464e-05, + "loss": 2.0189, + "step": 15052500 + }, + { + "epoch": 43.57, + "learning_rate": 2.8221709718877366e-05, + "loss": 2.0269, + "step": 15053000 + }, + { + "epoch": 43.57, + "learning_rate": 2.822098607123009e-05, + "loss": 2.0438, + "step": 15053500 + }, + { + "epoch": 43.58, + "learning_rate": 2.8220263870878104e-05, + "loss": 2.0271, + "step": 15054000 + }, + { + "epoch": 43.58, + "learning_rate": 2.821954022323083e-05, + "loss": 2.018, + "step": 15054500 + }, + { + "epoch": 43.58, + "learning_rate": 2.8218816575583552e-05, + "loss": 2.044, + "step": 15055000 + }, + { + "epoch": 43.58, + "learning_rate": 2.8218092927936274e-05, + "loss": 2.0521, + "step": 15055500 + }, + { + "epoch": 43.58, + "learning_rate": 2.8217369280288996e-05, + "loss": 2.0363, + "step": 15056000 + }, + { + "epoch": 43.58, + "learning_rate": 2.821664563264172e-05, + "loss": 2.0399, + "step": 15056500 + }, + { + "epoch": 43.58, + "learning_rate": 2.821592198499444e-05, + "loss": 2.0435, + "step": 15057000 + }, + { + "epoch": 43.59, + "learning_rate": 2.821519978464246e-05, + "loss": 2.0359, + "step": 15057500 + }, + { + "epoch": 43.59, + "learning_rate": 2.8214476136995182e-05, + "loss": 2.0209, + "step": 15058000 + }, + { + "epoch": 43.59, + "learning_rate": 2.8213752489347904e-05, + "loss": 2.0252, + "step": 15058500 + }, + { + "epoch": 43.59, + "learning_rate": 2.8213028841700633e-05, + "loss": 2.0409, + "step": 15059000 + }, + { + "epoch": 43.59, + "learning_rate": 2.8212305194053356e-05, + "loss": 2.0084, + "step": 15059500 + }, + { + "epoch": 43.59, + "learning_rate": 2.821158154640608e-05, + "loss": 2.0585, + "step": 15060000 + }, + { + "epoch": 43.59, + "learning_rate": 2.8210857898758803e-05, + "loss": 2.0224, + "step": 15060500 + }, + { + "epoch": 43.6, + "learning_rate": 2.8210134251111526e-05, + "loss": 2.0256, + "step": 15061000 + }, + { + "epoch": 43.6, + "learning_rate": 2.8209410603464248e-05, + "loss": 2.0399, + "step": 15061500 + }, + { + "epoch": 43.6, + "learning_rate": 2.820868695581697e-05, + "loss": 2.0261, + "step": 15062000 + }, + { + "epoch": 43.6, + "learning_rate": 2.820796475546499e-05, + "loss": 2.0453, + "step": 15062500 + }, + { + "epoch": 43.6, + "learning_rate": 2.8207242555113005e-05, + "loss": 2.0382, + "step": 15063000 + }, + { + "epoch": 43.6, + "learning_rate": 2.820651890746573e-05, + "loss": 2.0518, + "step": 15063500 + }, + { + "epoch": 43.6, + "learning_rate": 2.8205795259818452e-05, + "loss": 2.026, + "step": 15064000 + }, + { + "epoch": 43.61, + "learning_rate": 2.8205071612171175e-05, + "loss": 1.9932, + "step": 15064500 + }, + { + "epoch": 43.61, + "learning_rate": 2.8204347964523897e-05, + "loss": 2.033, + "step": 15065000 + }, + { + "epoch": 43.61, + "learning_rate": 2.8203625764171916e-05, + "loss": 2.024, + "step": 15065500 + }, + { + "epoch": 43.61, + "learning_rate": 2.8202902116524638e-05, + "loss": 2.0513, + "step": 15066000 + }, + { + "epoch": 43.61, + "learning_rate": 2.8202178468877367e-05, + "loss": 2.0246, + "step": 15066500 + }, + { + "epoch": 43.61, + "learning_rate": 2.820145482123009e-05, + "loss": 2.0356, + "step": 15067000 + }, + { + "epoch": 43.61, + "learning_rate": 2.820073117358281e-05, + "loss": 2.0437, + "step": 15067500 + }, + { + "epoch": 43.62, + "learning_rate": 2.8200007525935534e-05, + "loss": 2.0235, + "step": 15068000 + }, + { + "epoch": 43.62, + "learning_rate": 2.8199283878288256e-05, + "loss": 2.0539, + "step": 15068500 + }, + { + "epoch": 43.62, + "learning_rate": 2.819856023064098e-05, + "loss": 2.0495, + "step": 15069000 + }, + { + "epoch": 43.62, + "learning_rate": 2.8197836582993704e-05, + "loss": 2.027, + "step": 15069500 + }, + { + "epoch": 43.62, + "learning_rate": 2.8197112935346426e-05, + "loss": 2.0282, + "step": 15070000 + }, + { + "epoch": 43.62, + "learning_rate": 2.8196390734994445e-05, + "loss": 2.0344, + "step": 15070500 + }, + { + "epoch": 43.62, + "learning_rate": 2.819566853464246e-05, + "loss": 2.025, + "step": 15071000 + }, + { + "epoch": 43.63, + "learning_rate": 2.8194944886995183e-05, + "loss": 2.0292, + "step": 15071500 + }, + { + "epoch": 43.63, + "learning_rate": 2.8194221239347905e-05, + "loss": 2.0355, + "step": 15072000 + }, + { + "epoch": 43.63, + "learning_rate": 2.819349759170063e-05, + "loss": 2.0628, + "step": 15072500 + }, + { + "epoch": 43.63, + "learning_rate": 2.8192773944053353e-05, + "loss": 2.0503, + "step": 15073000 + }, + { + "epoch": 43.63, + "learning_rate": 2.819205174370137e-05, + "loss": 2.0328, + "step": 15073500 + }, + { + "epoch": 43.63, + "learning_rate": 2.8191328096054097e-05, + "loss": 2.0537, + "step": 15074000 + }, + { + "epoch": 43.63, + "learning_rate": 2.819060444840682e-05, + "loss": 2.0285, + "step": 15074500 + }, + { + "epoch": 43.64, + "learning_rate": 2.818988224805484e-05, + "loss": 2.0202, + "step": 15075000 + }, + { + "epoch": 43.64, + "learning_rate": 2.818915860040756e-05, + "loss": 2.0138, + "step": 15075500 + }, + { + "epoch": 43.64, + "learning_rate": 2.8188434952760283e-05, + "loss": 2.0269, + "step": 15076000 + }, + { + "epoch": 43.64, + "learning_rate": 2.8187711305113005e-05, + "loss": 2.0538, + "step": 15076500 + }, + { + "epoch": 43.64, + "learning_rate": 2.818698765746573e-05, + "loss": 2.0247, + "step": 15077000 + }, + { + "epoch": 43.64, + "learning_rate": 2.8186264009818453e-05, + "loss": 2.0187, + "step": 15077500 + }, + { + "epoch": 43.64, + "learning_rate": 2.8185540362171175e-05, + "loss": 2.0431, + "step": 15078000 + }, + { + "epoch": 43.65, + "learning_rate": 2.8184818161819194e-05, + "loss": 2.0263, + "step": 15078500 + }, + { + "epoch": 43.65, + "learning_rate": 2.8184094514171917e-05, + "loss": 2.0304, + "step": 15079000 + }, + { + "epoch": 43.65, + "learning_rate": 2.818337086652464e-05, + "loss": 2.0089, + "step": 15079500 + }, + { + "epoch": 43.65, + "learning_rate": 2.818264721887736e-05, + "loss": 2.0579, + "step": 15080000 + }, + { + "epoch": 43.65, + "learning_rate": 2.8181923571230083e-05, + "loss": 2.0367, + "step": 15080500 + }, + { + "epoch": 43.65, + "learning_rate": 2.8181201370878102e-05, + "loss": 2.0391, + "step": 15081000 + }, + { + "epoch": 43.65, + "learning_rate": 2.818047772323083e-05, + "loss": 2.0426, + "step": 15081500 + }, + { + "epoch": 43.66, + "learning_rate": 2.8179754075583554e-05, + "loss": 2.029, + "step": 15082000 + }, + { + "epoch": 43.66, + "learning_rate": 2.8179030427936276e-05, + "loss": 2.03, + "step": 15082500 + }, + { + "epoch": 43.66, + "learning_rate": 2.8178306780288998e-05, + "loss": 2.0299, + "step": 15083000 + }, + { + "epoch": 43.66, + "learning_rate": 2.8177584579937017e-05, + "loss": 2.0359, + "step": 15083500 + }, + { + "epoch": 43.66, + "learning_rate": 2.8176862379585032e-05, + "loss": 2.0356, + "step": 15084000 + }, + { + "epoch": 43.66, + "learning_rate": 2.8176138731937758e-05, + "loss": 2.0175, + "step": 15084500 + }, + { + "epoch": 43.66, + "learning_rate": 2.817541508429048e-05, + "loss": 2.0323, + "step": 15085000 + }, + { + "epoch": 43.67, + "learning_rate": 2.8174692883938496e-05, + "loss": 2.0351, + "step": 15085500 + }, + { + "epoch": 43.67, + "learning_rate": 2.817396923629122e-05, + "loss": 2.021, + "step": 15086000 + }, + { + "epoch": 43.67, + "learning_rate": 2.8173245588643944e-05, + "loss": 2.0638, + "step": 15086500 + }, + { + "epoch": 43.67, + "learning_rate": 2.8172521940996666e-05, + "loss": 2.0319, + "step": 15087000 + }, + { + "epoch": 43.67, + "learning_rate": 2.8171798293349388e-05, + "loss": 2.0264, + "step": 15087500 + }, + { + "epoch": 43.67, + "learning_rate": 2.817107464570211e-05, + "loss": 2.0438, + "step": 15088000 + }, + { + "epoch": 43.68, + "learning_rate": 2.8170350998054833e-05, + "loss": 2.0523, + "step": 15088500 + }, + { + "epoch": 43.68, + "learning_rate": 2.816962735040756e-05, + "loss": 2.0165, + "step": 15089000 + }, + { + "epoch": 43.68, + "learning_rate": 2.8168903702760284e-05, + "loss": 2.0181, + "step": 15089500 + }, + { + "epoch": 43.68, + "learning_rate": 2.816818005511301e-05, + "loss": 2.0273, + "step": 15090000 + }, + { + "epoch": 43.68, + "learning_rate": 2.8167456407465732e-05, + "loss": 2.0118, + "step": 15090500 + }, + { + "epoch": 43.68, + "learning_rate": 2.8166732759818454e-05, + "loss": 2.0334, + "step": 15091000 + }, + { + "epoch": 43.68, + "learning_rate": 2.8166009112171176e-05, + "loss": 2.047, + "step": 15091500 + }, + { + "epoch": 43.69, + "learning_rate": 2.81652854645239e-05, + "loss": 2.0423, + "step": 15092000 + }, + { + "epoch": 43.69, + "learning_rate": 2.8164563264171917e-05, + "loss": 2.0326, + "step": 15092500 + }, + { + "epoch": 43.69, + "learning_rate": 2.816383961652464e-05, + "loss": 2.0407, + "step": 15093000 + }, + { + "epoch": 43.69, + "learning_rate": 2.8163115968877362e-05, + "loss": 2.0319, + "step": 15093500 + }, + { + "epoch": 43.69, + "learning_rate": 2.8162392321230084e-05, + "loss": 2.0084, + "step": 15094000 + }, + { + "epoch": 43.69, + "learning_rate": 2.816166867358281e-05, + "loss": 2.025, + "step": 15094500 + }, + { + "epoch": 43.69, + "learning_rate": 2.8160945025935532e-05, + "loss": 2.028, + "step": 15095000 + }, + { + "epoch": 43.7, + "learning_rate": 2.8160221378288254e-05, + "loss": 2.0224, + "step": 15095500 + }, + { + "epoch": 43.7, + "learning_rate": 2.8159497730640976e-05, + "loss": 2.0219, + "step": 15096000 + }, + { + "epoch": 43.7, + "learning_rate": 2.8158774082993705e-05, + "loss": 2.0216, + "step": 15096500 + }, + { + "epoch": 43.7, + "learning_rate": 2.8158050435346428e-05, + "loss": 2.0243, + "step": 15097000 + }, + { + "epoch": 43.7, + "learning_rate": 2.815732678769915e-05, + "loss": 2.0342, + "step": 15097500 + }, + { + "epoch": 43.7, + "learning_rate": 2.8156603140051872e-05, + "loss": 2.053, + "step": 15098000 + }, + { + "epoch": 43.7, + "learning_rate": 2.8155879492404598e-05, + "loss": 2.0282, + "step": 15098500 + }, + { + "epoch": 43.71, + "learning_rate": 2.815515873934791e-05, + "loss": 2.0433, + "step": 15099000 + }, + { + "epoch": 43.71, + "learning_rate": 2.8154435091700632e-05, + "loss": 2.0381, + "step": 15099500 + }, + { + "epoch": 43.71, + "learning_rate": 2.8153711444053355e-05, + "loss": 2.0397, + "step": 15100000 + }, + { + "epoch": 43.71, + "learning_rate": 2.8152987796406077e-05, + "loss": 2.033, + "step": 15100500 + }, + { + "epoch": 43.71, + "learning_rate": 2.81522641487588e-05, + "loss": 2.0363, + "step": 15101000 + }, + { + "epoch": 43.71, + "learning_rate": 2.8151540501111525e-05, + "loss": 2.0633, + "step": 15101500 + }, + { + "epoch": 43.71, + "learning_rate": 2.8150816853464247e-05, + "loss": 2.0244, + "step": 15102000 + }, + { + "epoch": 43.72, + "learning_rate": 2.815009320581697e-05, + "loss": 2.0343, + "step": 15102500 + }, + { + "epoch": 43.72, + "learning_rate": 2.814936955816969e-05, + "loss": 2.0198, + "step": 15103000 + }, + { + "epoch": 43.72, + "learning_rate": 2.8148645910522414e-05, + "loss": 2.0551, + "step": 15103500 + }, + { + "epoch": 43.72, + "learning_rate": 2.8147922262875143e-05, + "loss": 2.0374, + "step": 15104000 + }, + { + "epoch": 43.72, + "learning_rate": 2.8147198615227865e-05, + "loss": 2.0142, + "step": 15104500 + }, + { + "epoch": 43.72, + "learning_rate": 2.8146474967580587e-05, + "loss": 2.0017, + "step": 15105000 + }, + { + "epoch": 43.72, + "learning_rate": 2.8145751319933313e-05, + "loss": 2.0345, + "step": 15105500 + }, + { + "epoch": 43.73, + "learning_rate": 2.8145030566876625e-05, + "loss": 2.0095, + "step": 15106000 + }, + { + "epoch": 43.73, + "learning_rate": 2.814430836652464e-05, + "loss": 2.0361, + "step": 15106500 + }, + { + "epoch": 43.73, + "learning_rate": 2.8143584718877363e-05, + "loss": 2.0414, + "step": 15107000 + }, + { + "epoch": 43.73, + "learning_rate": 2.814286107123009e-05, + "loss": 2.0457, + "step": 15107500 + }, + { + "epoch": 43.73, + "learning_rate": 2.814213742358281e-05, + "loss": 2.0439, + "step": 15108000 + }, + { + "epoch": 43.73, + "learning_rate": 2.8141413775935533e-05, + "loss": 2.0197, + "step": 15108500 + }, + { + "epoch": 43.73, + "learning_rate": 2.8140690128288255e-05, + "loss": 2.0572, + "step": 15109000 + }, + { + "epoch": 43.74, + "learning_rate": 2.8139966480640977e-05, + "loss": 2.047, + "step": 15109500 + }, + { + "epoch": 43.74, + "learning_rate": 2.81392428329937e-05, + "loss": 2.0403, + "step": 15110000 + }, + { + "epoch": 43.74, + "learning_rate": 2.8138519185346425e-05, + "loss": 2.0316, + "step": 15110500 + }, + { + "epoch": 43.74, + "learning_rate": 2.813779698499444e-05, + "loss": 2.0391, + "step": 15111000 + }, + { + "epoch": 43.74, + "learning_rate": 2.813707333734717e-05, + "loss": 2.0498, + "step": 15111500 + }, + { + "epoch": 43.74, + "learning_rate": 2.8136349689699892e-05, + "loss": 2.0171, + "step": 15112000 + }, + { + "epoch": 43.74, + "learning_rate": 2.8135626042052614e-05, + "loss": 2.0297, + "step": 15112500 + }, + { + "epoch": 43.75, + "learning_rate": 2.8134902394405336e-05, + "loss": 2.0336, + "step": 15113000 + }, + { + "epoch": 43.75, + "learning_rate": 2.8134180194053355e-05, + "loss": 2.0219, + "step": 15113500 + }, + { + "epoch": 43.75, + "learning_rate": 2.8133456546406078e-05, + "loss": 2.0175, + "step": 15114000 + }, + { + "epoch": 43.75, + "learning_rate": 2.81327328987588e-05, + "loss": 2.0199, + "step": 15114500 + }, + { + "epoch": 43.75, + "learning_rate": 2.8132009251111525e-05, + "loss": 2.0674, + "step": 15115000 + }, + { + "epoch": 43.75, + "learning_rate": 2.813128705075954e-05, + "loss": 2.0267, + "step": 15115500 + }, + { + "epoch": 43.75, + "learning_rate": 2.8130563403112263e-05, + "loss": 2.0573, + "step": 15116000 + }, + { + "epoch": 43.76, + "learning_rate": 2.812983975546499e-05, + "loss": 2.0374, + "step": 15116500 + }, + { + "epoch": 43.76, + "learning_rate": 2.8129117555113004e-05, + "loss": 2.0292, + "step": 15117000 + }, + { + "epoch": 43.76, + "learning_rate": 2.8128395354761023e-05, + "loss": 2.0451, + "step": 15117500 + }, + { + "epoch": 43.76, + "learning_rate": 2.8127671707113746e-05, + "loss": 2.032, + "step": 15118000 + }, + { + "epoch": 43.76, + "learning_rate": 2.8126948059466468e-05, + "loss": 2.0396, + "step": 15118500 + }, + { + "epoch": 43.76, + "learning_rate": 2.812622441181919e-05, + "loss": 2.0288, + "step": 15119000 + }, + { + "epoch": 43.76, + "learning_rate": 2.812550076417192e-05, + "loss": 2.0416, + "step": 15119500 + }, + { + "epoch": 43.77, + "learning_rate": 2.812477711652464e-05, + "loss": 2.0376, + "step": 15120000 + }, + { + "epoch": 43.77, + "learning_rate": 2.8124053468877363e-05, + "loss": 2.0349, + "step": 15120500 + }, + { + "epoch": 43.77, + "learning_rate": 2.8123331268525382e-05, + "loss": 2.0422, + "step": 15121000 + }, + { + "epoch": 43.77, + "learning_rate": 2.8122607620878105e-05, + "loss": 2.0406, + "step": 15121500 + }, + { + "epoch": 43.77, + "learning_rate": 2.8121883973230827e-05, + "loss": 2.0152, + "step": 15122000 + }, + { + "epoch": 43.77, + "learning_rate": 2.8121160325583553e-05, + "loss": 2.0261, + "step": 15122500 + }, + { + "epoch": 43.77, + "learning_rate": 2.8120436677936275e-05, + "loss": 2.0409, + "step": 15123000 + }, + { + "epoch": 43.78, + "learning_rate": 2.8119713030288997e-05, + "loss": 2.0277, + "step": 15123500 + }, + { + "epoch": 43.78, + "learning_rate": 2.811898938264172e-05, + "loss": 2.0189, + "step": 15124000 + }, + { + "epoch": 43.78, + "learning_rate": 2.811826573499444e-05, + "loss": 2.0423, + "step": 15124500 + }, + { + "epoch": 43.78, + "learning_rate": 2.8117542087347164e-05, + "loss": 2.0456, + "step": 15125000 + }, + { + "epoch": 43.78, + "learning_rate": 2.811681843969989e-05, + "loss": 2.0281, + "step": 15125500 + }, + { + "epoch": 43.78, + "learning_rate": 2.811609479205261e-05, + "loss": 2.0565, + "step": 15126000 + }, + { + "epoch": 43.79, + "learning_rate": 2.811537114440534e-05, + "loss": 2.0452, + "step": 15126500 + }, + { + "epoch": 43.79, + "learning_rate": 2.8114647496758063e-05, + "loss": 2.0615, + "step": 15127000 + }, + { + "epoch": 43.79, + "learning_rate": 2.811392529640608e-05, + "loss": 2.0412, + "step": 15127500 + }, + { + "epoch": 43.79, + "learning_rate": 2.8113203096054097e-05, + "loss": 2.0276, + "step": 15128000 + }, + { + "epoch": 43.79, + "learning_rate": 2.811247944840682e-05, + "loss": 2.0316, + "step": 15128500 + }, + { + "epoch": 43.79, + "learning_rate": 2.8111755800759542e-05, + "loss": 2.0318, + "step": 15129000 + }, + { + "epoch": 43.79, + "learning_rate": 2.8111032153112264e-05, + "loss": 2.0193, + "step": 15129500 + }, + { + "epoch": 43.8, + "learning_rate": 2.8110309952760283e-05, + "loss": 2.0202, + "step": 15130000 + }, + { + "epoch": 43.8, + "learning_rate": 2.8109586305113005e-05, + "loss": 2.0339, + "step": 15130500 + }, + { + "epoch": 43.8, + "learning_rate": 2.8108862657465727e-05, + "loss": 2.0292, + "step": 15131000 + }, + { + "epoch": 43.8, + "learning_rate": 2.8108139009818453e-05, + "loss": 2.0344, + "step": 15131500 + }, + { + "epoch": 43.8, + "learning_rate": 2.8107415362171175e-05, + "loss": 2.0295, + "step": 15132000 + }, + { + "epoch": 43.8, + "learning_rate": 2.8106691714523897e-05, + "loss": 2.0423, + "step": 15132500 + }, + { + "epoch": 43.8, + "learning_rate": 2.8105969514171916e-05, + "loss": 2.0419, + "step": 15133000 + }, + { + "epoch": 43.81, + "learning_rate": 2.810524586652464e-05, + "loss": 2.027, + "step": 15133500 + }, + { + "epoch": 43.81, + "learning_rate": 2.8104522218877368e-05, + "loss": 2.0171, + "step": 15134000 + }, + { + "epoch": 43.81, + "learning_rate": 2.810379857123009e-05, + "loss": 2.0303, + "step": 15134500 + }, + { + "epoch": 43.81, + "learning_rate": 2.8103074923582812e-05, + "loss": 2.0271, + "step": 15135000 + }, + { + "epoch": 43.81, + "learning_rate": 2.8102351275935534e-05, + "loss": 2.0598, + "step": 15135500 + }, + { + "epoch": 43.81, + "learning_rate": 2.8101627628288257e-05, + "loss": 2.0433, + "step": 15136000 + }, + { + "epoch": 43.81, + "learning_rate": 2.810090398064098e-05, + "loss": 2.0301, + "step": 15136500 + }, + { + "epoch": 43.82, + "learning_rate": 2.8100180332993704e-05, + "loss": 2.0237, + "step": 15137000 + }, + { + "epoch": 43.82, + "learning_rate": 2.8099456685346427e-05, + "loss": 2.0314, + "step": 15137500 + }, + { + "epoch": 43.82, + "learning_rate": 2.809873303769915e-05, + "loss": 2.0209, + "step": 15138000 + }, + { + "epoch": 43.82, + "learning_rate": 2.809800939005187e-05, + "loss": 2.0353, + "step": 15138500 + }, + { + "epoch": 43.82, + "learning_rate": 2.8097285742404593e-05, + "loss": 2.0583, + "step": 15139000 + }, + { + "epoch": 43.82, + "learning_rate": 2.8096562094757316e-05, + "loss": 2.0239, + "step": 15139500 + }, + { + "epoch": 43.82, + "learning_rate": 2.8095839894405335e-05, + "loss": 2.0382, + "step": 15140000 + }, + { + "epoch": 43.83, + "learning_rate": 2.8095116246758057e-05, + "loss": 2.0346, + "step": 15140500 + }, + { + "epoch": 43.83, + "learning_rate": 2.809439259911078e-05, + "loss": 2.042, + "step": 15141000 + }, + { + "epoch": 43.83, + "learning_rate": 2.8093668951463508e-05, + "loss": 2.042, + "step": 15141500 + }, + { + "epoch": 43.83, + "learning_rate": 2.809294530381623e-05, + "loss": 2.0402, + "step": 15142000 + }, + { + "epoch": 43.83, + "learning_rate": 2.8092221656168956e-05, + "loss": 2.0253, + "step": 15142500 + }, + { + "epoch": 43.83, + "learning_rate": 2.809149945581697e-05, + "loss": 2.0506, + "step": 15143000 + }, + { + "epoch": 43.83, + "learning_rate": 2.8090775808169694e-05, + "loss": 2.0312, + "step": 15143500 + }, + { + "epoch": 43.84, + "learning_rate": 2.8090052160522416e-05, + "loss": 2.0245, + "step": 15144000 + }, + { + "epoch": 43.84, + "learning_rate": 2.808932851287514e-05, + "loss": 2.0177, + "step": 15144500 + }, + { + "epoch": 43.84, + "learning_rate": 2.8088604865227864e-05, + "loss": 2.035, + "step": 15145000 + }, + { + "epoch": 43.84, + "learning_rate": 2.8087881217580586e-05, + "loss": 2.0341, + "step": 15145500 + }, + { + "epoch": 43.84, + "learning_rate": 2.8087159017228605e-05, + "loss": 2.0433, + "step": 15146000 + }, + { + "epoch": 43.84, + "learning_rate": 2.8086435369581327e-05, + "loss": 2.0471, + "step": 15146500 + }, + { + "epoch": 43.84, + "learning_rate": 2.8085713169229343e-05, + "loss": 2.0364, + "step": 15147000 + }, + { + "epoch": 43.85, + "learning_rate": 2.808498952158207e-05, + "loss": 2.0443, + "step": 15147500 + }, + { + "epoch": 43.85, + "learning_rate": 2.808426587393479e-05, + "loss": 2.005, + "step": 15148000 + }, + { + "epoch": 43.85, + "learning_rate": 2.8083542226287513e-05, + "loss": 2.0191, + "step": 15148500 + }, + { + "epoch": 43.85, + "learning_rate": 2.8082818578640242e-05, + "loss": 2.0452, + "step": 15149000 + }, + { + "epoch": 43.85, + "learning_rate": 2.8082096378288257e-05, + "loss": 2.0174, + "step": 15149500 + }, + { + "epoch": 43.85, + "learning_rate": 2.8081374177936276e-05, + "loss": 2.025, + "step": 15150000 + }, + { + "epoch": 43.85, + "learning_rate": 2.8080650530289e-05, + "loss": 2.0373, + "step": 15150500 + }, + { + "epoch": 43.86, + "learning_rate": 2.807992688264172e-05, + "loss": 2.0291, + "step": 15151000 + }, + { + "epoch": 43.86, + "learning_rate": 2.8079203234994443e-05, + "loss": 2.0261, + "step": 15151500 + }, + { + "epoch": 43.86, + "learning_rate": 2.807847958734717e-05, + "loss": 2.0458, + "step": 15152000 + }, + { + "epoch": 43.86, + "learning_rate": 2.807775593969989e-05, + "loss": 2.0115, + "step": 15152500 + }, + { + "epoch": 43.86, + "learning_rate": 2.8077032292052613e-05, + "loss": 2.0399, + "step": 15153000 + }, + { + "epoch": 43.86, + "learning_rate": 2.8076308644405335e-05, + "loss": 2.0631, + "step": 15153500 + }, + { + "epoch": 43.86, + "learning_rate": 2.8075584996758058e-05, + "loss": 2.0195, + "step": 15154000 + }, + { + "epoch": 43.87, + "learning_rate": 2.807486134911078e-05, + "loss": 2.0189, + "step": 15154500 + }, + { + "epoch": 43.87, + "learning_rate": 2.8074137701463505e-05, + "loss": 2.0117, + "step": 15155000 + }, + { + "epoch": 43.87, + "learning_rate": 2.8073414053816228e-05, + "loss": 2.0555, + "step": 15155500 + }, + { + "epoch": 43.87, + "learning_rate": 2.8072691853464243e-05, + "loss": 2.0427, + "step": 15156000 + }, + { + "epoch": 43.87, + "learning_rate": 2.8071968205816972e-05, + "loss": 2.0264, + "step": 15156500 + }, + { + "epoch": 43.87, + "learning_rate": 2.8071244558169694e-05, + "loss": 2.0139, + "step": 15157000 + }, + { + "epoch": 43.87, + "learning_rate": 2.8070522357817713e-05, + "loss": 2.0325, + "step": 15157500 + }, + { + "epoch": 43.88, + "learning_rate": 2.8069800157465732e-05, + "loss": 2.0187, + "step": 15158000 + }, + { + "epoch": 43.88, + "learning_rate": 2.8069076509818455e-05, + "loss": 2.0081, + "step": 15158500 + }, + { + "epoch": 43.88, + "learning_rate": 2.8068352862171177e-05, + "loss": 2.0222, + "step": 15159000 + }, + { + "epoch": 43.88, + "learning_rate": 2.80676292145239e-05, + "loss": 2.0217, + "step": 15159500 + }, + { + "epoch": 43.88, + "learning_rate": 2.806690556687662e-05, + "loss": 2.0284, + "step": 15160000 + }, + { + "epoch": 43.88, + "learning_rate": 2.8066181919229344e-05, + "loss": 2.0332, + "step": 15160500 + }, + { + "epoch": 43.88, + "learning_rate": 2.806545827158207e-05, + "loss": 2.0356, + "step": 15161000 + }, + { + "epoch": 43.89, + "learning_rate": 2.806473462393479e-05, + "loss": 2.0326, + "step": 15161500 + }, + { + "epoch": 43.89, + "learning_rate": 2.8064010976287514e-05, + "loss": 2.0578, + "step": 15162000 + }, + { + "epoch": 43.89, + "learning_rate": 2.8063287328640236e-05, + "loss": 2.032, + "step": 15162500 + }, + { + "epoch": 43.89, + "learning_rate": 2.8062563680992958e-05, + "loss": 2.0408, + "step": 15163000 + }, + { + "epoch": 43.89, + "learning_rate": 2.8061840033345687e-05, + "loss": 2.0631, + "step": 15163500 + }, + { + "epoch": 43.89, + "learning_rate": 2.806111638569841e-05, + "loss": 2.0167, + "step": 15164000 + }, + { + "epoch": 43.9, + "learning_rate": 2.8060392738051135e-05, + "loss": 2.0508, + "step": 15164500 + }, + { + "epoch": 43.9, + "learning_rate": 2.8059669090403857e-05, + "loss": 2.023, + "step": 15165000 + }, + { + "epoch": 43.9, + "learning_rate": 2.805894544275658e-05, + "loss": 2.0441, + "step": 15165500 + }, + { + "epoch": 43.9, + "learning_rate": 2.80582217951093e-05, + "loss": 2.0011, + "step": 15166000 + }, + { + "epoch": 43.9, + "learning_rate": 2.805749959475732e-05, + "loss": 2.0103, + "step": 15166500 + }, + { + "epoch": 43.9, + "learning_rate": 2.8056777394405336e-05, + "loss": 2.0437, + "step": 15167000 + }, + { + "epoch": 43.9, + "learning_rate": 2.8056055194053355e-05, + "loss": 2.0204, + "step": 15167500 + }, + { + "epoch": 43.91, + "learning_rate": 2.8055331546406077e-05, + "loss": 2.0041, + "step": 15168000 + }, + { + "epoch": 43.91, + "learning_rate": 2.80546078987588e-05, + "loss": 2.0411, + "step": 15168500 + }, + { + "epoch": 43.91, + "learning_rate": 2.8053884251111522e-05, + "loss": 2.0392, + "step": 15169000 + }, + { + "epoch": 43.91, + "learning_rate": 2.8053160603464247e-05, + "loss": 2.0306, + "step": 15169500 + }, + { + "epoch": 43.91, + "learning_rate": 2.805243695581697e-05, + "loss": 2.0198, + "step": 15170000 + }, + { + "epoch": 43.91, + "learning_rate": 2.8051714755464985e-05, + "loss": 2.0352, + "step": 15170500 + }, + { + "epoch": 43.91, + "learning_rate": 2.8050991107817707e-05, + "loss": 2.0524, + "step": 15171000 + }, + { + "epoch": 43.92, + "learning_rate": 2.8050267460170436e-05, + "loss": 2.039, + "step": 15171500 + }, + { + "epoch": 43.92, + "learning_rate": 2.804954381252316e-05, + "loss": 2.0261, + "step": 15172000 + }, + { + "epoch": 43.92, + "learning_rate": 2.8048820164875884e-05, + "loss": 2.0425, + "step": 15172500 + }, + { + "epoch": 43.92, + "learning_rate": 2.8048096517228607e-05, + "loss": 2.0188, + "step": 15173000 + }, + { + "epoch": 43.92, + "learning_rate": 2.804737286958133e-05, + "loss": 2.0114, + "step": 15173500 + }, + { + "epoch": 43.92, + "learning_rate": 2.804664922193405e-05, + "loss": 2.0284, + "step": 15174000 + }, + { + "epoch": 43.92, + "learning_rate": 2.8045925574286773e-05, + "loss": 2.0212, + "step": 15174500 + }, + { + "epoch": 43.93, + "learning_rate": 2.80452019266395e-05, + "loss": 2.0319, + "step": 15175000 + }, + { + "epoch": 43.93, + "learning_rate": 2.804447827899222e-05, + "loss": 2.0428, + "step": 15175500 + }, + { + "epoch": 43.93, + "learning_rate": 2.8043754631344943e-05, + "loss": 2.023, + "step": 15176000 + }, + { + "epoch": 43.93, + "learning_rate": 2.804303243099296e-05, + "loss": 2.0372, + "step": 15176500 + }, + { + "epoch": 43.93, + "learning_rate": 2.8042308783345685e-05, + "loss": 2.0247, + "step": 15177000 + }, + { + "epoch": 43.93, + "learning_rate": 2.8041585135698407e-05, + "loss": 2.0239, + "step": 15177500 + }, + { + "epoch": 43.93, + "learning_rate": 2.804086148805113e-05, + "loss": 2.0422, + "step": 15178000 + }, + { + "epoch": 43.94, + "learning_rate": 2.8040137840403858e-05, + "loss": 2.02, + "step": 15178500 + }, + { + "epoch": 43.94, + "learning_rate": 2.803941419275658e-05, + "loss": 2.0406, + "step": 15179000 + }, + { + "epoch": 43.94, + "learning_rate": 2.8038690545109302e-05, + "loss": 2.0377, + "step": 15179500 + }, + { + "epoch": 43.94, + "learning_rate": 2.8037966897462025e-05, + "loss": 2.0315, + "step": 15180000 + }, + { + "epoch": 43.94, + "learning_rate": 2.8037244697110044e-05, + "loss": 2.0404, + "step": 15180500 + }, + { + "epoch": 43.94, + "learning_rate": 2.8036521049462766e-05, + "loss": 2.0224, + "step": 15181000 + }, + { + "epoch": 43.94, + "learning_rate": 2.8035797401815488e-05, + "loss": 2.0298, + "step": 15181500 + }, + { + "epoch": 43.95, + "learning_rate": 2.803507375416821e-05, + "loss": 2.0401, + "step": 15182000 + }, + { + "epoch": 43.95, + "learning_rate": 2.8034350106520936e-05, + "loss": 2.0314, + "step": 15182500 + }, + { + "epoch": 43.95, + "learning_rate": 2.803362790616895e-05, + "loss": 2.0432, + "step": 15183000 + }, + { + "epoch": 43.95, + "learning_rate": 2.8032904258521674e-05, + "loss": 2.0257, + "step": 15183500 + }, + { + "epoch": 43.95, + "learning_rate": 2.80321806108744e-05, + "loss": 2.0317, + "step": 15184000 + }, + { + "epoch": 43.95, + "learning_rate": 2.803145696322712e-05, + "loss": 2.045, + "step": 15184500 + }, + { + "epoch": 43.95, + "learning_rate": 2.8030734762875137e-05, + "loss": 2.0361, + "step": 15185000 + }, + { + "epoch": 43.96, + "learning_rate": 2.8030012562523156e-05, + "loss": 2.055, + "step": 15185500 + }, + { + "epoch": 43.96, + "learning_rate": 2.802928891487588e-05, + "loss": 2.0447, + "step": 15186000 + }, + { + "epoch": 43.96, + "learning_rate": 2.8028565267228607e-05, + "loss": 2.0436, + "step": 15186500 + }, + { + "epoch": 43.96, + "learning_rate": 2.802784161958133e-05, + "loss": 2.0449, + "step": 15187000 + }, + { + "epoch": 43.96, + "learning_rate": 2.8027117971934052e-05, + "loss": 2.0231, + "step": 15187500 + }, + { + "epoch": 43.96, + "learning_rate": 2.8026394324286774e-05, + "loss": 2.0372, + "step": 15188000 + }, + { + "epoch": 43.96, + "learning_rate": 2.80256706766395e-05, + "loss": 2.0146, + "step": 15188500 + }, + { + "epoch": 43.97, + "learning_rate": 2.8024947028992222e-05, + "loss": 2.0484, + "step": 15189000 + }, + { + "epoch": 43.97, + "learning_rate": 2.8024223381344944e-05, + "loss": 2.0244, + "step": 15189500 + }, + { + "epoch": 43.97, + "learning_rate": 2.8023499733697666e-05, + "loss": 2.0208, + "step": 15190000 + }, + { + "epoch": 43.97, + "learning_rate": 2.8022777533345685e-05, + "loss": 2.0356, + "step": 15190500 + }, + { + "epoch": 43.97, + "learning_rate": 2.80220553329937e-05, + "loss": 2.0564, + "step": 15191000 + }, + { + "epoch": 43.97, + "learning_rate": 2.8021331685346423e-05, + "loss": 2.0381, + "step": 15191500 + }, + { + "epoch": 43.97, + "learning_rate": 2.802060803769915e-05, + "loss": 2.0386, + "step": 15192000 + }, + { + "epoch": 43.98, + "learning_rate": 2.801988439005187e-05, + "loss": 2.0258, + "step": 15192500 + }, + { + "epoch": 43.98, + "learning_rate": 2.8019160742404593e-05, + "loss": 2.0454, + "step": 15193000 + }, + { + "epoch": 43.98, + "learning_rate": 2.8018437094757322e-05, + "loss": 2.0328, + "step": 15193500 + }, + { + "epoch": 43.98, + "learning_rate": 2.8017713447110044e-05, + "loss": 2.0444, + "step": 15194000 + }, + { + "epoch": 43.98, + "learning_rate": 2.8016991246758063e-05, + "loss": 2.0149, + "step": 15194500 + }, + { + "epoch": 43.98, + "learning_rate": 2.8016267599110786e-05, + "loss": 2.0361, + "step": 15195000 + }, + { + "epoch": 43.98, + "learning_rate": 2.8015543951463508e-05, + "loss": 2.0131, + "step": 15195500 + }, + { + "epoch": 43.99, + "learning_rate": 2.8014821751111527e-05, + "loss": 2.0619, + "step": 15196000 + }, + { + "epoch": 43.99, + "learning_rate": 2.8014099550759542e-05, + "loss": 2.0229, + "step": 15196500 + }, + { + "epoch": 43.99, + "learning_rate": 2.8013375903112265e-05, + "loss": 2.0374, + "step": 15197000 + }, + { + "epoch": 43.99, + "learning_rate": 2.8012652255464987e-05, + "loss": 2.0544, + "step": 15197500 + }, + { + "epoch": 43.99, + "learning_rate": 2.8011930055113006e-05, + "loss": 2.0401, + "step": 15198000 + }, + { + "epoch": 43.99, + "learning_rate": 2.8011206407465728e-05, + "loss": 2.0413, + "step": 15198500 + }, + { + "epoch": 43.99, + "learning_rate": 2.801048275981845e-05, + "loss": 2.0313, + "step": 15199000 + }, + { + "epoch": 44.0, + "learning_rate": 2.8009759112171176e-05, + "loss": 2.0427, + "step": 15199500 + }, + { + "epoch": 44.0, + "learning_rate": 2.8009035464523898e-05, + "loss": 2.0291, + "step": 15200000 + }, + { + "epoch": 44.0, + "learning_rate": 2.800831181687662e-05, + "loss": 2.0537, + "step": 15200500 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.6721790831878672, + "eval_accuracy_mlm": 0.6378787277283956, + "eval_accuracy_nsp": 0.856310697410091, + "eval_loss": 2.162269353866577, + "eval_runtime": 330.7589, + "eval_samples_per_second": 1319.348, + "eval_steps_per_second": 54.974, + "step": 15200768 + }, + { + "epoch": 44.0, + "learning_rate": 2.8007588169229343e-05, + "loss": 2.0462, + "step": 15201000 + }, + { + "epoch": 44.0, + "learning_rate": 2.800686452158207e-05, + "loss": 1.9841, + "step": 15201500 + }, + { + "epoch": 44.0, + "learning_rate": 2.8006140873934794e-05, + "loss": 2.0125, + "step": 15202000 + }, + { + "epoch": 44.01, + "learning_rate": 2.8005417226287516e-05, + "loss": 2.021, + "step": 15202500 + }, + { + "epoch": 44.01, + "learning_rate": 2.8004693578640238e-05, + "loss": 2.021, + "step": 15203000 + }, + { + "epoch": 44.01, + "learning_rate": 2.8003969930992964e-05, + "loss": 1.9987, + "step": 15203500 + }, + { + "epoch": 44.01, + "learning_rate": 2.8003246283345686e-05, + "loss": 2.012, + "step": 15204000 + }, + { + "epoch": 44.01, + "learning_rate": 2.800252263569841e-05, + "loss": 2.021, + "step": 15204500 + }, + { + "epoch": 44.01, + "learning_rate": 2.800179898805113e-05, + "loss": 2.0301, + "step": 15205000 + }, + { + "epoch": 44.01, + "learning_rate": 2.8001075340403853e-05, + "loss": 2.0177, + "step": 15205500 + }, + { + "epoch": 44.02, + "learning_rate": 2.800035169275658e-05, + "loss": 2.0283, + "step": 15206000 + }, + { + "epoch": 44.02, + "learning_rate": 2.79996280451093e-05, + "loss": 2.0221, + "step": 15206500 + }, + { + "epoch": 44.02, + "learning_rate": 2.7998904397462023e-05, + "loss": 2.0286, + "step": 15207000 + }, + { + "epoch": 44.02, + "learning_rate": 2.7998180749814745e-05, + "loss": 2.0311, + "step": 15207500 + }, + { + "epoch": 44.02, + "learning_rate": 2.7997458549462764e-05, + "loss": 2.0269, + "step": 15208000 + }, + { + "epoch": 44.02, + "learning_rate": 2.799673634911078e-05, + "loss": 2.0087, + "step": 15208500 + }, + { + "epoch": 44.02, + "learning_rate": 2.799601270146351e-05, + "loss": 2.0074, + "step": 15209000 + }, + { + "epoch": 44.03, + "learning_rate": 2.799528905381623e-05, + "loss": 2.0182, + "step": 15209500 + }, + { + "epoch": 44.03, + "learning_rate": 2.7994565406168953e-05, + "loss": 2.0089, + "step": 15210000 + }, + { + "epoch": 44.03, + "learning_rate": 2.799384175852168e-05, + "loss": 2.0228, + "step": 15210500 + }, + { + "epoch": 44.03, + "learning_rate": 2.79931181108744e-05, + "loss": 2.0175, + "step": 15211000 + }, + { + "epoch": 44.03, + "learning_rate": 2.7992395910522417e-05, + "loss": 2.0327, + "step": 15211500 + }, + { + "epoch": 44.03, + "learning_rate": 2.799167226287514e-05, + "loss": 2.038, + "step": 15212000 + }, + { + "epoch": 44.03, + "learning_rate": 2.7990948615227864e-05, + "loss": 2.0184, + "step": 15212500 + }, + { + "epoch": 44.04, + "learning_rate": 2.799022641487588e-05, + "loss": 2.0191, + "step": 15213000 + }, + { + "epoch": 44.04, + "learning_rate": 2.7989502767228602e-05, + "loss": 2.0406, + "step": 15213500 + }, + { + "epoch": 44.04, + "learning_rate": 2.7988779119581328e-05, + "loss": 2.0026, + "step": 15214000 + }, + { + "epoch": 44.04, + "learning_rate": 2.798805547193405e-05, + "loss": 2.0131, + "step": 15214500 + }, + { + "epoch": 44.04, + "learning_rate": 2.7987331824286772e-05, + "loss": 1.9986, + "step": 15215000 + }, + { + "epoch": 44.04, + "learning_rate": 2.7986608176639494e-05, + "loss": 2.0458, + "step": 15215500 + }, + { + "epoch": 44.04, + "learning_rate": 2.7985884528992223e-05, + "loss": 2.0487, + "step": 15216000 + }, + { + "epoch": 44.05, + "learning_rate": 2.7985160881344946e-05, + "loss": 2.0105, + "step": 15216500 + }, + { + "epoch": 44.05, + "learning_rate": 2.7984437233697668e-05, + "loss": 2.0341, + "step": 15217000 + }, + { + "epoch": 44.05, + "learning_rate": 2.7983715033345687e-05, + "loss": 2.0077, + "step": 15217500 + }, + { + "epoch": 44.05, + "learning_rate": 2.798299138569841e-05, + "loss": 2.013, + "step": 15218000 + }, + { + "epoch": 44.05, + "learning_rate": 2.798226773805113e-05, + "loss": 2.0405, + "step": 15218500 + }, + { + "epoch": 44.05, + "learning_rate": 2.7981544090403854e-05, + "loss": 2.0105, + "step": 15219000 + }, + { + "epoch": 44.05, + "learning_rate": 2.798082044275658e-05, + "loss": 2.0219, + "step": 15219500 + }, + { + "epoch": 44.06, + "learning_rate": 2.79800967951093e-05, + "loss": 2.0175, + "step": 15220000 + }, + { + "epoch": 44.06, + "learning_rate": 2.7979373147462024e-05, + "loss": 2.0051, + "step": 15220500 + }, + { + "epoch": 44.06, + "learning_rate": 2.7978650947110043e-05, + "loss": 2.0242, + "step": 15221000 + }, + { + "epoch": 44.06, + "learning_rate": 2.7977927299462765e-05, + "loss": 2.0493, + "step": 15221500 + }, + { + "epoch": 44.06, + "learning_rate": 2.7977203651815487e-05, + "loss": 2.0055, + "step": 15222000 + }, + { + "epoch": 44.06, + "learning_rate": 2.797648000416821e-05, + "loss": 2.0175, + "step": 15222500 + }, + { + "epoch": 44.06, + "learning_rate": 2.797575635652093e-05, + "loss": 2.0179, + "step": 15223000 + }, + { + "epoch": 44.07, + "learning_rate": 2.797503270887366e-05, + "loss": 2.0057, + "step": 15223500 + }, + { + "epoch": 44.07, + "learning_rate": 2.7974309061226383e-05, + "loss": 2.0358, + "step": 15224000 + }, + { + "epoch": 44.07, + "learning_rate": 2.7973585413579105e-05, + "loss": 2.0064, + "step": 15224500 + }, + { + "epoch": 44.07, + "learning_rate": 2.797286176593183e-05, + "loss": 1.9895, + "step": 15225000 + }, + { + "epoch": 44.07, + "learning_rate": 2.7972138118284553e-05, + "loss": 2.0263, + "step": 15225500 + }, + { + "epoch": 44.07, + "learning_rate": 2.7971414470637275e-05, + "loss": 2.0012, + "step": 15226000 + }, + { + "epoch": 44.07, + "learning_rate": 2.7970690822989997e-05, + "loss": 2.0143, + "step": 15226500 + }, + { + "epoch": 44.08, + "learning_rate": 2.796996717534272e-05, + "loss": 1.9842, + "step": 15227000 + }, + { + "epoch": 44.08, + "learning_rate": 2.7969243527695442e-05, + "loss": 1.9813, + "step": 15227500 + }, + { + "epoch": 44.08, + "learning_rate": 2.796852132734346e-05, + "loss": 2.0244, + "step": 15228000 + }, + { + "epoch": 44.08, + "learning_rate": 2.7967797679696183e-05, + "loss": 2.0318, + "step": 15228500 + }, + { + "epoch": 44.08, + "learning_rate": 2.7967075479344202e-05, + "loss": 2.0235, + "step": 15229000 + }, + { + "epoch": 44.08, + "learning_rate": 2.7966351831696924e-05, + "loss": 2.0087, + "step": 15229500 + }, + { + "epoch": 44.08, + "learning_rate": 2.7965628184049646e-05, + "loss": 2.0067, + "step": 15230000 + }, + { + "epoch": 44.09, + "learning_rate": 2.796490453640237e-05, + "loss": 2.0404, + "step": 15230500 + }, + { + "epoch": 44.09, + "learning_rate": 2.7964182336050394e-05, + "loss": 2.0222, + "step": 15231000 + }, + { + "epoch": 44.09, + "learning_rate": 2.7963458688403117e-05, + "loss": 2.033, + "step": 15231500 + }, + { + "epoch": 44.09, + "learning_rate": 2.796273504075584e-05, + "loss": 2.0447, + "step": 15232000 + }, + { + "epoch": 44.09, + "learning_rate": 2.796201139310856e-05, + "loss": 2.0161, + "step": 15232500 + }, + { + "epoch": 44.09, + "learning_rate": 2.7961287745461283e-05, + "loss": 2.0099, + "step": 15233000 + }, + { + "epoch": 44.09, + "learning_rate": 2.7960564097814006e-05, + "loss": 2.0051, + "step": 15233500 + }, + { + "epoch": 44.1, + "learning_rate": 2.795984045016673e-05, + "loss": 2.0276, + "step": 15234000 + }, + { + "epoch": 44.1, + "learning_rate": 2.7959116802519453e-05, + "loss": 1.9939, + "step": 15234500 + }, + { + "epoch": 44.1, + "learning_rate": 2.7958393154872176e-05, + "loss": 2.0317, + "step": 15235000 + }, + { + "epoch": 44.1, + "learning_rate": 2.7957670954520195e-05, + "loss": 2.0083, + "step": 15235500 + }, + { + "epoch": 44.1, + "learning_rate": 2.7956947306872917e-05, + "loss": 1.9789, + "step": 15236000 + }, + { + "epoch": 44.1, + "learning_rate": 2.7956225106520932e-05, + "loss": 2.0415, + "step": 15236500 + }, + { + "epoch": 44.1, + "learning_rate": 2.7955501458873658e-05, + "loss": 2.0125, + "step": 15237000 + }, + { + "epoch": 44.11, + "learning_rate": 2.795477781122638e-05, + "loss": 2.0434, + "step": 15237500 + }, + { + "epoch": 44.11, + "learning_rate": 2.7954054163579102e-05, + "loss": 2.0453, + "step": 15238000 + }, + { + "epoch": 44.11, + "learning_rate": 2.795333051593183e-05, + "loss": 2.0381, + "step": 15238500 + }, + { + "epoch": 44.11, + "learning_rate": 2.7952606868284554e-05, + "loss": 2.0252, + "step": 15239000 + }, + { + "epoch": 44.11, + "learning_rate": 2.7951883220637276e-05, + "loss": 2.0146, + "step": 15239500 + }, + { + "epoch": 44.11, + "learning_rate": 2.7951159572989998e-05, + "loss": 2.0108, + "step": 15240000 + }, + { + "epoch": 44.12, + "learning_rate": 2.795043592534272e-05, + "loss": 2.0463, + "step": 15240500 + }, + { + "epoch": 44.12, + "learning_rate": 2.7949712277695446e-05, + "loss": 2.0133, + "step": 15241000 + }, + { + "epoch": 44.12, + "learning_rate": 2.794899007734346e-05, + "loss": 2.0249, + "step": 15241500 + }, + { + "epoch": 44.12, + "learning_rate": 2.7948266429696184e-05, + "loss": 1.9912, + "step": 15242000 + }, + { + "epoch": 44.12, + "learning_rate": 2.7947544229344203e-05, + "loss": 2.0128, + "step": 15242500 + }, + { + "epoch": 44.12, + "learning_rate": 2.7946820581696925e-05, + "loss": 2.0264, + "step": 15243000 + }, + { + "epoch": 44.12, + "learning_rate": 2.7946096934049647e-05, + "loss": 2.0107, + "step": 15243500 + }, + { + "epoch": 44.13, + "learning_rate": 2.794537328640237e-05, + "loss": 2.0288, + "step": 15244000 + }, + { + "epoch": 44.13, + "learning_rate": 2.7944652533345682e-05, + "loss": 2.0201, + "step": 15244500 + }, + { + "epoch": 44.13, + "learning_rate": 2.7943928885698407e-05, + "loss": 2.0198, + "step": 15245000 + }, + { + "epoch": 44.13, + "learning_rate": 2.794320523805113e-05, + "loss": 2.0175, + "step": 15245500 + }, + { + "epoch": 44.13, + "learning_rate": 2.794248159040386e-05, + "loss": 2.0258, + "step": 15246000 + }, + { + "epoch": 44.13, + "learning_rate": 2.7941759390051874e-05, + "loss": 2.0183, + "step": 15246500 + }, + { + "epoch": 44.13, + "learning_rate": 2.7941035742404596e-05, + "loss": 2.0374, + "step": 15247000 + }, + { + "epoch": 44.14, + "learning_rate": 2.7940312094757322e-05, + "loss": 2.0271, + "step": 15247500 + }, + { + "epoch": 44.14, + "learning_rate": 2.7939588447110044e-05, + "loss": 2.0027, + "step": 15248000 + }, + { + "epoch": 44.14, + "learning_rate": 2.7938864799462766e-05, + "loss": 2.0275, + "step": 15248500 + }, + { + "epoch": 44.14, + "learning_rate": 2.793814115181549e-05, + "loss": 2.0126, + "step": 15249000 + }, + { + "epoch": 44.14, + "learning_rate": 2.793741750416821e-05, + "loss": 2.0178, + "step": 15249500 + }, + { + "epoch": 44.14, + "learning_rate": 2.793669530381623e-05, + "loss": 2.0265, + "step": 15250000 + }, + { + "epoch": 44.14, + "learning_rate": 2.7935971656168952e-05, + "loss": 2.0051, + "step": 15250500 + }, + { + "epoch": 44.15, + "learning_rate": 2.7935248008521674e-05, + "loss": 2.0153, + "step": 15251000 + }, + { + "epoch": 44.15, + "learning_rate": 2.7934524360874397e-05, + "loss": 2.0367, + "step": 15251500 + }, + { + "epoch": 44.15, + "learning_rate": 2.7933800713227122e-05, + "loss": 2.0009, + "step": 15252000 + }, + { + "epoch": 44.15, + "learning_rate": 2.7933077065579844e-05, + "loss": 2.0183, + "step": 15252500 + }, + { + "epoch": 44.15, + "learning_rate": 2.7932353417932567e-05, + "loss": 2.0243, + "step": 15253000 + }, + { + "epoch": 44.15, + "learning_rate": 2.7931629770285296e-05, + "loss": 2.0292, + "step": 15253500 + }, + { + "epoch": 44.15, + "learning_rate": 2.7930906122638018e-05, + "loss": 2.0268, + "step": 15254000 + }, + { + "epoch": 44.16, + "learning_rate": 2.793018247499074e-05, + "loss": 2.0233, + "step": 15254500 + }, + { + "epoch": 44.16, + "learning_rate": 2.7929458827343462e-05, + "loss": 1.9949, + "step": 15255000 + }, + { + "epoch": 44.16, + "learning_rate": 2.7928735179696185e-05, + "loss": 2.0169, + "step": 15255500 + }, + { + "epoch": 44.16, + "learning_rate": 2.792801153204891e-05, + "loss": 2.0106, + "step": 15256000 + }, + { + "epoch": 44.16, + "learning_rate": 2.7927287884401632e-05, + "loss": 2.016, + "step": 15256500 + }, + { + "epoch": 44.16, + "learning_rate": 2.7926564236754355e-05, + "loss": 2.0158, + "step": 15257000 + }, + { + "epoch": 44.16, + "learning_rate": 2.7925840589107077e-05, + "loss": 2.0099, + "step": 15257500 + }, + { + "epoch": 44.17, + "learning_rate": 2.7925118388755096e-05, + "loss": 2.0234, + "step": 15258000 + }, + { + "epoch": 44.17, + "learning_rate": 2.7924394741107818e-05, + "loss": 2.0335, + "step": 15258500 + }, + { + "epoch": 44.17, + "learning_rate": 2.792367109346054e-05, + "loss": 2.0166, + "step": 15259000 + }, + { + "epoch": 44.17, + "learning_rate": 2.7922947445813263e-05, + "loss": 2.033, + "step": 15259500 + }, + { + "epoch": 44.17, + "learning_rate": 2.7922223798165985e-05, + "loss": 2.0284, + "step": 15260000 + }, + { + "epoch": 44.17, + "learning_rate": 2.7921500150518714e-05, + "loss": 2.0372, + "step": 15260500 + }, + { + "epoch": 44.17, + "learning_rate": 2.7920776502871436e-05, + "loss": 2.0268, + "step": 15261000 + }, + { + "epoch": 44.18, + "learning_rate": 2.792005285522416e-05, + "loss": 2.0131, + "step": 15261500 + }, + { + "epoch": 44.18, + "learning_rate": 2.7919329207576884e-05, + "loss": 2.03, + "step": 15262000 + }, + { + "epoch": 44.18, + "learning_rate": 2.7918605559929606e-05, + "loss": 2.0144, + "step": 15262500 + }, + { + "epoch": 44.18, + "learning_rate": 2.7917883359577625e-05, + "loss": 2.0196, + "step": 15263000 + }, + { + "epoch": 44.18, + "learning_rate": 2.7917159711930347e-05, + "loss": 2.017, + "step": 15263500 + }, + { + "epoch": 44.18, + "learning_rate": 2.791643606428307e-05, + "loss": 2.0145, + "step": 15264000 + }, + { + "epoch": 44.18, + "learning_rate": 2.7915712416635792e-05, + "loss": 2.0081, + "step": 15264500 + }, + { + "epoch": 44.19, + "learning_rate": 2.7914988768988514e-05, + "loss": 2.025, + "step": 15265000 + }, + { + "epoch": 44.19, + "learning_rate": 2.7914265121341236e-05, + "loss": 2.0249, + "step": 15265500 + }, + { + "epoch": 44.19, + "learning_rate": 2.7913541473693962e-05, + "loss": 2.0134, + "step": 15266000 + }, + { + "epoch": 44.19, + "learning_rate": 2.7912819273341977e-05, + "loss": 2.0154, + "step": 15266500 + }, + { + "epoch": 44.19, + "learning_rate": 2.79120956256947e-05, + "loss": 2.0316, + "step": 15267000 + }, + { + "epoch": 44.19, + "learning_rate": 2.791137342534272e-05, + "loss": 2.0092, + "step": 15267500 + }, + { + "epoch": 44.19, + "learning_rate": 2.7910649777695448e-05, + "loss": 2.04, + "step": 15268000 + }, + { + "epoch": 44.2, + "learning_rate": 2.790992613004817e-05, + "loss": 2.0049, + "step": 15268500 + }, + { + "epoch": 44.2, + "learning_rate": 2.7909202482400892e-05, + "loss": 2.0114, + "step": 15269000 + }, + { + "epoch": 44.2, + "learning_rate": 2.7908478834753614e-05, + "loss": 2.0479, + "step": 15269500 + }, + { + "epoch": 44.2, + "learning_rate": 2.7907755187106337e-05, + "loss": 2.0251, + "step": 15270000 + }, + { + "epoch": 44.2, + "learning_rate": 2.7907031539459062e-05, + "loss": 2.0042, + "step": 15270500 + }, + { + "epoch": 44.2, + "learning_rate": 2.7906307891811784e-05, + "loss": 2.0176, + "step": 15271000 + }, + { + "epoch": 44.2, + "learning_rate": 2.7905584244164507e-05, + "loss": 2.0235, + "step": 15271500 + }, + { + "epoch": 44.21, + "learning_rate": 2.790486059651723e-05, + "loss": 2.0126, + "step": 15272000 + }, + { + "epoch": 44.21, + "learning_rate": 2.7904138396165248e-05, + "loss": 2.0186, + "step": 15272500 + }, + { + "epoch": 44.21, + "learning_rate": 2.790341474851797e-05, + "loss": 2.0316, + "step": 15273000 + }, + { + "epoch": 44.21, + "learning_rate": 2.7902691100870692e-05, + "loss": 2.0198, + "step": 15273500 + }, + { + "epoch": 44.21, + "learning_rate": 2.7901967453223415e-05, + "loss": 2.0195, + "step": 15274000 + }, + { + "epoch": 44.21, + "learning_rate": 2.7901245252871433e-05, + "loss": 2.0159, + "step": 15274500 + }, + { + "epoch": 44.21, + "learning_rate": 2.7900521605224156e-05, + "loss": 2.0059, + "step": 15275000 + }, + { + "epoch": 44.22, + "learning_rate": 2.7899797957576885e-05, + "loss": 2.0086, + "step": 15275500 + }, + { + "epoch": 44.22, + "learning_rate": 2.7899074309929607e-05, + "loss": 2.0225, + "step": 15276000 + }, + { + "epoch": 44.22, + "learning_rate": 2.7898352109577626e-05, + "loss": 2.0312, + "step": 15276500 + }, + { + "epoch": 44.22, + "learning_rate": 2.7897628461930348e-05, + "loss": 2.0394, + "step": 15277000 + }, + { + "epoch": 44.22, + "learning_rate": 2.7896906261578364e-05, + "loss": 2.0265, + "step": 15277500 + }, + { + "epoch": 44.22, + "learning_rate": 2.789618261393109e-05, + "loss": 2.0142, + "step": 15278000 + }, + { + "epoch": 44.23, + "learning_rate": 2.789545896628381e-05, + "loss": 2.0287, + "step": 15278500 + }, + { + "epoch": 44.23, + "learning_rate": 2.7894735318636534e-05, + "loss": 2.0255, + "step": 15279000 + }, + { + "epoch": 44.23, + "learning_rate": 2.789401311828455e-05, + "loss": 2.0195, + "step": 15279500 + }, + { + "epoch": 44.23, + "learning_rate": 2.7893289470637275e-05, + "loss": 2.027, + "step": 15280000 + }, + { + "epoch": 44.23, + "learning_rate": 2.7892565822989997e-05, + "loss": 1.9916, + "step": 15280500 + }, + { + "epoch": 44.23, + "learning_rate": 2.789184217534272e-05, + "loss": 2.064, + "step": 15281000 + }, + { + "epoch": 44.23, + "learning_rate": 2.789111852769544e-05, + "loss": 2.0183, + "step": 15281500 + }, + { + "epoch": 44.24, + "learning_rate": 2.7890394880048164e-05, + "loss": 2.0155, + "step": 15282000 + }, + { + "epoch": 44.24, + "learning_rate": 2.7889672679696183e-05, + "loss": 2.04, + "step": 15282500 + }, + { + "epoch": 44.24, + "learning_rate": 2.7888949032048912e-05, + "loss": 2.0014, + "step": 15283000 + }, + { + "epoch": 44.24, + "learning_rate": 2.7888225384401634e-05, + "loss": 2.0259, + "step": 15283500 + }, + { + "epoch": 44.24, + "learning_rate": 2.7887501736754356e-05, + "loss": 2.0313, + "step": 15284000 + }, + { + "epoch": 44.24, + "learning_rate": 2.788677808910708e-05, + "loss": 2.0194, + "step": 15284500 + }, + { + "epoch": 44.24, + "learning_rate": 2.78860544414598e-05, + "loss": 2.0364, + "step": 15285000 + }, + { + "epoch": 44.25, + "learning_rate": 2.788533224110782e-05, + "loss": 2.0165, + "step": 15285500 + }, + { + "epoch": 44.25, + "learning_rate": 2.7884608593460542e-05, + "loss": 2.032, + "step": 15286000 + }, + { + "epoch": 44.25, + "learning_rate": 2.7883884945813264e-05, + "loss": 2.026, + "step": 15286500 + }, + { + "epoch": 44.25, + "learning_rate": 2.788316129816599e-05, + "loss": 2.0183, + "step": 15287000 + }, + { + "epoch": 44.25, + "learning_rate": 2.7882439097814005e-05, + "loss": 2.024, + "step": 15287500 + }, + { + "epoch": 44.25, + "learning_rate": 2.7881715450166728e-05, + "loss": 2.027, + "step": 15288000 + }, + { + "epoch": 44.25, + "learning_rate": 2.7880991802519453e-05, + "loss": 2.017, + "step": 15288500 + }, + { + "epoch": 44.26, + "learning_rate": 2.7880268154872175e-05, + "loss": 2.0175, + "step": 15289000 + }, + { + "epoch": 44.26, + "learning_rate": 2.7879544507224898e-05, + "loss": 2.0359, + "step": 15289500 + }, + { + "epoch": 44.26, + "learning_rate": 2.7878822306872913e-05, + "loss": 2.0056, + "step": 15290000 + }, + { + "epoch": 44.26, + "learning_rate": 2.7878100106520932e-05, + "loss": 2.0117, + "step": 15290500 + }, + { + "epoch": 44.26, + "learning_rate": 2.787737645887366e-05, + "loss": 2.0052, + "step": 15291000 + }, + { + "epoch": 44.26, + "learning_rate": 2.7876654258521677e-05, + "loss": 2.0074, + "step": 15291500 + }, + { + "epoch": 44.26, + "learning_rate": 2.7875930610874402e-05, + "loss": 2.0325, + "step": 15292000 + }, + { + "epoch": 44.27, + "learning_rate": 2.7875206963227125e-05, + "loss": 2.0248, + "step": 15292500 + }, + { + "epoch": 44.27, + "learning_rate": 2.7874483315579847e-05, + "loss": 2.0315, + "step": 15293000 + }, + { + "epoch": 44.27, + "learning_rate": 2.787375966793257e-05, + "loss": 2.0347, + "step": 15293500 + }, + { + "epoch": 44.27, + "learning_rate": 2.787303602028529e-05, + "loss": 2.0309, + "step": 15294000 + }, + { + "epoch": 44.27, + "learning_rate": 2.7872312372638017e-05, + "loss": 2.0448, + "step": 15294500 + }, + { + "epoch": 44.27, + "learning_rate": 2.787158872499074e-05, + "loss": 2.0297, + "step": 15295000 + }, + { + "epoch": 44.27, + "learning_rate": 2.787086507734346e-05, + "loss": 2.0033, + "step": 15295500 + }, + { + "epoch": 44.28, + "learning_rate": 2.7870141429696184e-05, + "loss": 2.037, + "step": 15296000 + }, + { + "epoch": 44.28, + "learning_rate": 2.7869419229344203e-05, + "loss": 2.0066, + "step": 15296500 + }, + { + "epoch": 44.28, + "learning_rate": 2.7868695581696925e-05, + "loss": 2.041, + "step": 15297000 + }, + { + "epoch": 44.28, + "learning_rate": 2.7867971934049647e-05, + "loss": 2.028, + "step": 15297500 + }, + { + "epoch": 44.28, + "learning_rate": 2.786724828640237e-05, + "loss": 2.043, + "step": 15298000 + }, + { + "epoch": 44.28, + "learning_rate": 2.7866524638755098e-05, + "loss": 2.042, + "step": 15298500 + }, + { + "epoch": 44.28, + "learning_rate": 2.7865802438403117e-05, + "loss": 2.004, + "step": 15299000 + }, + { + "epoch": 44.29, + "learning_rate": 2.786507879075584e-05, + "loss": 2.0251, + "step": 15299500 + }, + { + "epoch": 44.29, + "learning_rate": 2.786435514310856e-05, + "loss": 2.0135, + "step": 15300000 + }, + { + "epoch": 44.29, + "learning_rate": 2.7863631495461284e-05, + "loss": 1.998, + "step": 15300500 + }, + { + "epoch": 44.29, + "learning_rate": 2.7862907847814006e-05, + "loss": 2.0288, + "step": 15301000 + }, + { + "epoch": 44.29, + "learning_rate": 2.786218420016673e-05, + "loss": 2.0382, + "step": 15301500 + }, + { + "epoch": 44.29, + "learning_rate": 2.7861460552519454e-05, + "loss": 2.0279, + "step": 15302000 + }, + { + "epoch": 44.29, + "learning_rate": 2.7860736904872176e-05, + "loss": 2.0104, + "step": 15302500 + }, + { + "epoch": 44.3, + "learning_rate": 2.78600132572249e-05, + "loss": 2.0155, + "step": 15303000 + }, + { + "epoch": 44.3, + "learning_rate": 2.785928960957762e-05, + "loss": 2.029, + "step": 15303500 + }, + { + "epoch": 44.3, + "learning_rate": 2.7858565961930343e-05, + "loss": 2.0046, + "step": 15304000 + }, + { + "epoch": 44.3, + "learning_rate": 2.785784231428307e-05, + "loss": 2.0643, + "step": 15304500 + }, + { + "epoch": 44.3, + "learning_rate": 2.7857120113931084e-05, + "loss": 2.0255, + "step": 15305000 + }, + { + "epoch": 44.3, + "learning_rate": 2.7856396466283813e-05, + "loss": 2.0147, + "step": 15305500 + }, + { + "epoch": 44.3, + "learning_rate": 2.7855672818636535e-05, + "loss": 2.0143, + "step": 15306000 + }, + { + "epoch": 44.31, + "learning_rate": 2.7854949170989258e-05, + "loss": 2.0128, + "step": 15306500 + }, + { + "epoch": 44.31, + "learning_rate": 2.785422552334198e-05, + "loss": 1.999, + "step": 15307000 + }, + { + "epoch": 44.31, + "learning_rate": 2.7853501875694705e-05, + "loss": 2.0249, + "step": 15307500 + }, + { + "epoch": 44.31, + "learning_rate": 2.7852778228047428e-05, + "loss": 2.0329, + "step": 15308000 + }, + { + "epoch": 44.31, + "learning_rate": 2.7852056027695443e-05, + "loss": 2.0117, + "step": 15308500 + }, + { + "epoch": 44.31, + "learning_rate": 2.785133238004817e-05, + "loss": 2.0302, + "step": 15309000 + }, + { + "epoch": 44.31, + "learning_rate": 2.785060873240089e-05, + "loss": 2.0107, + "step": 15309500 + }, + { + "epoch": 44.32, + "learning_rate": 2.7849885084753613e-05, + "loss": 2.0314, + "step": 15310000 + }, + { + "epoch": 44.32, + "learning_rate": 2.7849161437106336e-05, + "loss": 2.0131, + "step": 15310500 + }, + { + "epoch": 44.32, + "learning_rate": 2.7848437789459058e-05, + "loss": 2.0086, + "step": 15311000 + }, + { + "epoch": 44.32, + "learning_rate": 2.7847715589107077e-05, + "loss": 2.0241, + "step": 15311500 + }, + { + "epoch": 44.32, + "learning_rate": 2.78469919414598e-05, + "loss": 2.0221, + "step": 15312000 + }, + { + "epoch": 44.32, + "learning_rate": 2.784626829381252e-05, + "loss": 2.032, + "step": 15312500 + }, + { + "epoch": 44.32, + "learning_rate": 2.784554464616525e-05, + "loss": 2.0186, + "step": 15313000 + }, + { + "epoch": 44.33, + "learning_rate": 2.7844820998517972e-05, + "loss": 2.0299, + "step": 15313500 + }, + { + "epoch": 44.33, + "learning_rate": 2.7844097350870695e-05, + "loss": 2.0375, + "step": 15314000 + }, + { + "epoch": 44.33, + "learning_rate": 2.784337370322342e-05, + "loss": 2.0207, + "step": 15314500 + }, + { + "epoch": 44.33, + "learning_rate": 2.7842650055576143e-05, + "loss": 2.0224, + "step": 15315000 + }, + { + "epoch": 44.33, + "learning_rate": 2.7841927855224158e-05, + "loss": 2.0187, + "step": 15315500 + }, + { + "epoch": 44.33, + "learning_rate": 2.7841205654872177e-05, + "loss": 2.0115, + "step": 15316000 + }, + { + "epoch": 44.33, + "learning_rate": 2.78404820072249e-05, + "loss": 2.0243, + "step": 15316500 + }, + { + "epoch": 44.34, + "learning_rate": 2.783975835957762e-05, + "loss": 2.0285, + "step": 15317000 + }, + { + "epoch": 44.34, + "learning_rate": 2.7839034711930344e-05, + "loss": 2.0122, + "step": 15317500 + }, + { + "epoch": 44.34, + "learning_rate": 2.783831106428307e-05, + "loss": 2.0449, + "step": 15318000 + }, + { + "epoch": 44.34, + "learning_rate": 2.783758741663579e-05, + "loss": 2.0147, + "step": 15318500 + }, + { + "epoch": 44.34, + "learning_rate": 2.7836863768988514e-05, + "loss": 2.0313, + "step": 15319000 + }, + { + "epoch": 44.34, + "learning_rate": 2.7836140121341236e-05, + "loss": 2.0144, + "step": 15319500 + }, + { + "epoch": 44.35, + "learning_rate": 2.7835417920989255e-05, + "loss": 2.0423, + "step": 15320000 + }, + { + "epoch": 44.35, + "learning_rate": 2.7834694273341984e-05, + "loss": 2.0124, + "step": 15320500 + }, + { + "epoch": 44.35, + "learning_rate": 2.783397207299e-05, + "loss": 2.0298, + "step": 15321000 + }, + { + "epoch": 44.35, + "learning_rate": 2.7833248425342722e-05, + "loss": 2.035, + "step": 15321500 + }, + { + "epoch": 44.35, + "learning_rate": 2.7832524777695444e-05, + "loss": 2.0091, + "step": 15322000 + }, + { + "epoch": 44.35, + "learning_rate": 2.7831802577343463e-05, + "loss": 2.0194, + "step": 15322500 + }, + { + "epoch": 44.35, + "learning_rate": 2.7831080376991482e-05, + "loss": 2.0239, + "step": 15323000 + }, + { + "epoch": 44.36, + "learning_rate": 2.7830356729344204e-05, + "loss": 2.0222, + "step": 15323500 + }, + { + "epoch": 44.36, + "learning_rate": 2.7829633081696926e-05, + "loss": 2.0538, + "step": 15324000 + }, + { + "epoch": 44.36, + "learning_rate": 2.782890943404965e-05, + "loss": 2.0065, + "step": 15324500 + }, + { + "epoch": 44.36, + "learning_rate": 2.782818578640237e-05, + "loss": 2.0098, + "step": 15325000 + }, + { + "epoch": 44.36, + "learning_rate": 2.782746358605039e-05, + "loss": 2.0524, + "step": 15325500 + }, + { + "epoch": 44.36, + "learning_rate": 2.7826739938403112e-05, + "loss": 2.0207, + "step": 15326000 + }, + { + "epoch": 44.36, + "learning_rate": 2.7826016290755834e-05, + "loss": 1.9872, + "step": 15326500 + }, + { + "epoch": 44.37, + "learning_rate": 2.7825292643108556e-05, + "loss": 2.0234, + "step": 15327000 + }, + { + "epoch": 44.37, + "learning_rate": 2.7824568995461282e-05, + "loss": 1.9998, + "step": 15327500 + }, + { + "epoch": 44.37, + "learning_rate": 2.7823846795109298e-05, + "loss": 2.0049, + "step": 15328000 + }, + { + "epoch": 44.37, + "learning_rate": 2.7823123147462027e-05, + "loss": 2.0349, + "step": 15328500 + }, + { + "epoch": 44.37, + "learning_rate": 2.782239949981475e-05, + "loss": 2.0068, + "step": 15329000 + }, + { + "epoch": 44.37, + "learning_rate": 2.782167585216747e-05, + "loss": 2.0199, + "step": 15329500 + }, + { + "epoch": 44.37, + "learning_rate": 2.7820952204520197e-05, + "loss": 2.0386, + "step": 15330000 + }, + { + "epoch": 44.38, + "learning_rate": 2.782022855687292e-05, + "loss": 2.0121, + "step": 15330500 + }, + { + "epoch": 44.38, + "learning_rate": 2.781950490922564e-05, + "loss": 2.0246, + "step": 15331000 + }, + { + "epoch": 44.38, + "learning_rate": 2.7818781261578363e-05, + "loss": 2.0267, + "step": 15331500 + }, + { + "epoch": 44.38, + "learning_rate": 2.7818057613931086e-05, + "loss": 2.0044, + "step": 15332000 + }, + { + "epoch": 44.38, + "learning_rate": 2.7817335413579105e-05, + "loss": 2.0221, + "step": 15332500 + }, + { + "epoch": 44.38, + "learning_rate": 2.7816611765931827e-05, + "loss": 2.0098, + "step": 15333000 + }, + { + "epoch": 44.38, + "learning_rate": 2.781588811828455e-05, + "loss": 2.0247, + "step": 15333500 + }, + { + "epoch": 44.39, + "learning_rate": 2.781516447063727e-05, + "loss": 2.0177, + "step": 15334000 + }, + { + "epoch": 44.39, + "learning_rate": 2.7814440822989997e-05, + "loss": 2.0078, + "step": 15334500 + }, + { + "epoch": 44.39, + "learning_rate": 2.781371717534272e-05, + "loss": 2.0085, + "step": 15335000 + }, + { + "epoch": 44.39, + "learning_rate": 2.7812993527695448e-05, + "loss": 2.0265, + "step": 15335500 + }, + { + "epoch": 44.39, + "learning_rate": 2.781226988004817e-05, + "loss": 2.0375, + "step": 15336000 + }, + { + "epoch": 44.39, + "learning_rate": 2.7811546232400893e-05, + "loss": 2.025, + "step": 15336500 + }, + { + "epoch": 44.39, + "learning_rate": 2.7810822584753615e-05, + "loss": 2.0382, + "step": 15337000 + }, + { + "epoch": 44.4, + "learning_rate": 2.7810098937106337e-05, + "loss": 2.0318, + "step": 15337500 + }, + { + "epoch": 44.4, + "learning_rate": 2.780937528945906e-05, + "loss": 2.0034, + "step": 15338000 + }, + { + "epoch": 44.4, + "learning_rate": 2.7808651641811785e-05, + "loss": 1.9948, + "step": 15338500 + }, + { + "epoch": 44.4, + "learning_rate": 2.7807927994164507e-05, + "loss": 2.0067, + "step": 15339000 + }, + { + "epoch": 44.4, + "learning_rate": 2.780720434651723e-05, + "loss": 2.0151, + "step": 15339500 + }, + { + "epoch": 44.4, + "learning_rate": 2.780648069886995e-05, + "loss": 2.0058, + "step": 15340000 + }, + { + "epoch": 44.4, + "learning_rate": 2.7805757051222674e-05, + "loss": 1.9883, + "step": 15340500 + }, + { + "epoch": 44.41, + "learning_rate": 2.78050334035754e-05, + "loss": 2.04, + "step": 15341000 + }, + { + "epoch": 44.41, + "learning_rate": 2.7804309755928122e-05, + "loss": 2.0062, + "step": 15341500 + }, + { + "epoch": 44.41, + "learning_rate": 2.7803587555576137e-05, + "loss": 2.0356, + "step": 15342000 + }, + { + "epoch": 44.41, + "learning_rate": 2.7802863907928866e-05, + "loss": 2.0251, + "step": 15342500 + }, + { + "epoch": 44.41, + "learning_rate": 2.780214026028159e-05, + "loss": 2.0356, + "step": 15343000 + }, + { + "epoch": 44.41, + "learning_rate": 2.780141661263431e-05, + "loss": 2.0173, + "step": 15343500 + }, + { + "epoch": 44.41, + "learning_rate": 2.7800692964987036e-05, + "loss": 2.0264, + "step": 15344000 + }, + { + "epoch": 44.42, + "learning_rate": 2.779996931733976e-05, + "loss": 2.037, + "step": 15344500 + }, + { + "epoch": 44.42, + "learning_rate": 2.779924566969248e-05, + "loss": 2.0251, + "step": 15345000 + }, + { + "epoch": 44.42, + "learning_rate": 2.77985234693405e-05, + "loss": 2.0296, + "step": 15345500 + }, + { + "epoch": 44.42, + "learning_rate": 2.7797802716283812e-05, + "loss": 2.0287, + "step": 15346000 + }, + { + "epoch": 44.42, + "learning_rate": 2.7797079068636534e-05, + "loss": 2.0435, + "step": 15346500 + }, + { + "epoch": 44.42, + "learning_rate": 2.7796355420989257e-05, + "loss": 2.0207, + "step": 15347000 + }, + { + "epoch": 44.42, + "learning_rate": 2.779563177334198e-05, + "loss": 2.0247, + "step": 15347500 + }, + { + "epoch": 44.43, + "learning_rate": 2.77949081256947e-05, + "loss": 2.0232, + "step": 15348000 + }, + { + "epoch": 44.43, + "learning_rate": 2.7794184478047423e-05, + "loss": 2.0163, + "step": 15348500 + }, + { + "epoch": 44.43, + "learning_rate": 2.779346083040015e-05, + "loss": 2.0282, + "step": 15349000 + }, + { + "epoch": 44.43, + "learning_rate": 2.779273718275287e-05, + "loss": 2.0148, + "step": 15349500 + }, + { + "epoch": 44.43, + "learning_rate": 2.7792013535105593e-05, + "loss": 2.0152, + "step": 15350000 + }, + { + "epoch": 44.43, + "learning_rate": 2.7791289887458322e-05, + "loss": 2.0364, + "step": 15350500 + }, + { + "epoch": 44.43, + "learning_rate": 2.7790566239811045e-05, + "loss": 2.0257, + "step": 15351000 + }, + { + "epoch": 44.44, + "learning_rate": 2.7789842592163767e-05, + "loss": 2.0317, + "step": 15351500 + }, + { + "epoch": 44.44, + "learning_rate": 2.778911894451649e-05, + "loss": 2.0397, + "step": 15352000 + }, + { + "epoch": 44.44, + "learning_rate": 2.7788396744164508e-05, + "loss": 2.0236, + "step": 15352500 + }, + { + "epoch": 44.44, + "learning_rate": 2.778767309651723e-05, + "loss": 2.022, + "step": 15353000 + }, + { + "epoch": 44.44, + "learning_rate": 2.7786949448869952e-05, + "loss": 2.0098, + "step": 15353500 + }, + { + "epoch": 44.44, + "learning_rate": 2.7786225801222675e-05, + "loss": 2.0197, + "step": 15354000 + }, + { + "epoch": 44.44, + "learning_rate": 2.7785503600870694e-05, + "loss": 2.0173, + "step": 15354500 + }, + { + "epoch": 44.45, + "learning_rate": 2.7784781400518713e-05, + "loss": 2.0226, + "step": 15355000 + }, + { + "epoch": 44.45, + "learning_rate": 2.7784057752871435e-05, + "loss": 2.0129, + "step": 15355500 + }, + { + "epoch": 44.45, + "learning_rate": 2.7783334105224157e-05, + "loss": 2.0396, + "step": 15356000 + }, + { + "epoch": 44.45, + "learning_rate": 2.778261045757688e-05, + "loss": 2.0172, + "step": 15356500 + }, + { + "epoch": 44.45, + "learning_rate": 2.77818868099296e-05, + "loss": 2.0165, + "step": 15357000 + }, + { + "epoch": 44.45, + "learning_rate": 2.7781163162282324e-05, + "loss": 2.0269, + "step": 15357500 + }, + { + "epoch": 44.46, + "learning_rate": 2.778044096193035e-05, + "loss": 2.0306, + "step": 15358000 + }, + { + "epoch": 44.46, + "learning_rate": 2.7779717314283072e-05, + "loss": 2.0379, + "step": 15358500 + }, + { + "epoch": 44.46, + "learning_rate": 2.7778993666635794e-05, + "loss": 2.0098, + "step": 15359000 + }, + { + "epoch": 44.46, + "learning_rate": 2.7778270018988516e-05, + "loss": 2.0068, + "step": 15359500 + }, + { + "epoch": 44.46, + "learning_rate": 2.777754637134124e-05, + "loss": 2.034, + "step": 15360000 + }, + { + "epoch": 44.46, + "learning_rate": 2.7776822723693964e-05, + "loss": 2.0434, + "step": 15360500 + }, + { + "epoch": 44.46, + "learning_rate": 2.7776099076046686e-05, + "loss": 2.0208, + "step": 15361000 + }, + { + "epoch": 44.47, + "learning_rate": 2.777537542839941e-05, + "loss": 2.0284, + "step": 15361500 + }, + { + "epoch": 44.47, + "learning_rate": 2.777465178075213e-05, + "loss": 2.0276, + "step": 15362000 + }, + { + "epoch": 44.47, + "learning_rate": 2.777392958040015e-05, + "loss": 2.0211, + "step": 15362500 + }, + { + "epoch": 44.47, + "learning_rate": 2.7773205932752872e-05, + "loss": 2.0469, + "step": 15363000 + }, + { + "epoch": 44.47, + "learning_rate": 2.7772482285105594e-05, + "loss": 2.0327, + "step": 15363500 + }, + { + "epoch": 44.47, + "learning_rate": 2.7771760084753613e-05, + "loss": 2.0076, + "step": 15364000 + }, + { + "epoch": 44.47, + "learning_rate": 2.777103788440163e-05, + "loss": 2.0418, + "step": 15364500 + }, + { + "epoch": 44.48, + "learning_rate": 2.777031423675435e-05, + "loss": 2.0387, + "step": 15365000 + }, + { + "epoch": 44.48, + "learning_rate": 2.776959058910708e-05, + "loss": 2.0352, + "step": 15365500 + }, + { + "epoch": 44.48, + "learning_rate": 2.7768866941459802e-05, + "loss": 2.0342, + "step": 15366000 + }, + { + "epoch": 44.48, + "learning_rate": 2.7768143293812528e-05, + "loss": 2.0271, + "step": 15366500 + }, + { + "epoch": 44.48, + "learning_rate": 2.776741964616525e-05, + "loss": 2.0325, + "step": 15367000 + }, + { + "epoch": 44.48, + "learning_rate": 2.7766695998517972e-05, + "loss": 2.0051, + "step": 15367500 + }, + { + "epoch": 44.48, + "learning_rate": 2.7765972350870694e-05, + "loss": 2.0193, + "step": 15368000 + }, + { + "epoch": 44.49, + "learning_rate": 2.7765248703223417e-05, + "loss": 2.0475, + "step": 15368500 + }, + { + "epoch": 44.49, + "learning_rate": 2.776452505557614e-05, + "loss": 2.0041, + "step": 15369000 + }, + { + "epoch": 44.49, + "learning_rate": 2.7763802855224158e-05, + "loss": 2.0245, + "step": 15369500 + }, + { + "epoch": 44.49, + "learning_rate": 2.776307920757688e-05, + "loss": 2.0492, + "step": 15370000 + }, + { + "epoch": 44.49, + "learning_rate": 2.7762355559929602e-05, + "loss": 2.0266, + "step": 15370500 + }, + { + "epoch": 44.49, + "learning_rate": 2.7761631912282328e-05, + "loss": 2.0276, + "step": 15371000 + }, + { + "epoch": 44.49, + "learning_rate": 2.776090826463505e-05, + "loss": 2.0181, + "step": 15371500 + }, + { + "epoch": 44.5, + "learning_rate": 2.7760186064283066e-05, + "loss": 2.0176, + "step": 15372000 + }, + { + "epoch": 44.5, + "learning_rate": 2.775946241663579e-05, + "loss": 2.0343, + "step": 15372500 + }, + { + "epoch": 44.5, + "learning_rate": 2.7758738768988517e-05, + "loss": 2.0079, + "step": 15373000 + }, + { + "epoch": 44.5, + "learning_rate": 2.7758015121341243e-05, + "loss": 2.0467, + "step": 15373500 + }, + { + "epoch": 44.5, + "learning_rate": 2.7757291473693965e-05, + "loss": 2.0438, + "step": 15374000 + }, + { + "epoch": 44.5, + "learning_rate": 2.7756567826046687e-05, + "loss": 2.0245, + "step": 15374500 + }, + { + "epoch": 44.5, + "learning_rate": 2.775584417839941e-05, + "loss": 2.0256, + "step": 15375000 + }, + { + "epoch": 44.51, + "learning_rate": 2.7755121978047428e-05, + "loss": 2.0467, + "step": 15375500 + }, + { + "epoch": 44.51, + "learning_rate": 2.775439833040015e-05, + "loss": 2.0109, + "step": 15376000 + }, + { + "epoch": 44.51, + "learning_rate": 2.7753674682752873e-05, + "loss": 2.0127, + "step": 15376500 + }, + { + "epoch": 44.51, + "learning_rate": 2.7752951035105595e-05, + "loss": 2.0171, + "step": 15377000 + }, + { + "epoch": 44.51, + "learning_rate": 2.7752227387458317e-05, + "loss": 2.0383, + "step": 15377500 + }, + { + "epoch": 44.51, + "learning_rate": 2.7751503739811043e-05, + "loss": 2.0601, + "step": 15378000 + }, + { + "epoch": 44.51, + "learning_rate": 2.7750780092163765e-05, + "loss": 2.0324, + "step": 15378500 + }, + { + "epoch": 44.52, + "learning_rate": 2.7750056444516487e-05, + "loss": 1.9837, + "step": 15379000 + }, + { + "epoch": 44.52, + "learning_rate": 2.774933279686921e-05, + "loss": 2.0369, + "step": 15379500 + }, + { + "epoch": 44.52, + "learning_rate": 2.774861059651723e-05, + "loss": 2.016, + "step": 15380000 + }, + { + "epoch": 44.52, + "learning_rate": 2.774788839616525e-05, + "loss": 2.0227, + "step": 15380500 + }, + { + "epoch": 44.52, + "learning_rate": 2.7747164748517973e-05, + "loss": 1.993, + "step": 15381000 + }, + { + "epoch": 44.52, + "learning_rate": 2.7746441100870695e-05, + "loss": 2.0341, + "step": 15381500 + }, + { + "epoch": 44.52, + "learning_rate": 2.7745717453223417e-05, + "loss": 2.0057, + "step": 15382000 + }, + { + "epoch": 44.53, + "learning_rate": 2.7744993805576143e-05, + "loss": 2.0314, + "step": 15382500 + }, + { + "epoch": 44.53, + "learning_rate": 2.7744270157928865e-05, + "loss": 2.0241, + "step": 15383000 + }, + { + "epoch": 44.53, + "learning_rate": 2.7743546510281588e-05, + "loss": 2.0218, + "step": 15383500 + }, + { + "epoch": 44.53, + "learning_rate": 2.774282286263431e-05, + "loss": 2.0287, + "step": 15384000 + }, + { + "epoch": 44.53, + "learning_rate": 2.7742099214987032e-05, + "loss": 2.0239, + "step": 15384500 + }, + { + "epoch": 44.53, + "learning_rate": 2.774137701463505e-05, + "loss": 2.0351, + "step": 15385000 + }, + { + "epoch": 44.53, + "learning_rate": 2.7740656261578363e-05, + "loss": 2.0405, + "step": 15385500 + }, + { + "epoch": 44.54, + "learning_rate": 2.7739932613931085e-05, + "loss": 2.0294, + "step": 15386000 + }, + { + "epoch": 44.54, + "learning_rate": 2.7739210413579104e-05, + "loss": 2.0316, + "step": 15386500 + }, + { + "epoch": 44.54, + "learning_rate": 2.7738486765931827e-05, + "loss": 2.0271, + "step": 15387000 + }, + { + "epoch": 44.54, + "learning_rate": 2.773776311828455e-05, + "loss": 2.0034, + "step": 15387500 + }, + { + "epoch": 44.54, + "learning_rate": 2.7737039470637278e-05, + "loss": 2.0186, + "step": 15388000 + }, + { + "epoch": 44.54, + "learning_rate": 2.773631582299e-05, + "loss": 2.025, + "step": 15388500 + }, + { + "epoch": 44.54, + "learning_rate": 2.7735592175342722e-05, + "loss": 2.0371, + "step": 15389000 + }, + { + "epoch": 44.55, + "learning_rate": 2.7734868527695445e-05, + "loss": 2.0261, + "step": 15389500 + }, + { + "epoch": 44.55, + "learning_rate": 2.7734144880048167e-05, + "loss": 2.0086, + "step": 15390000 + }, + { + "epoch": 44.55, + "learning_rate": 2.7733421232400892e-05, + "loss": 2.0366, + "step": 15390500 + }, + { + "epoch": 44.55, + "learning_rate": 2.7732697584753615e-05, + "loss": 2.0196, + "step": 15391000 + }, + { + "epoch": 44.55, + "learning_rate": 2.7731973937106337e-05, + "loss": 2.0405, + "step": 15391500 + }, + { + "epoch": 44.55, + "learning_rate": 2.773125028945906e-05, + "loss": 2.0009, + "step": 15392000 + }, + { + "epoch": 44.55, + "learning_rate": 2.773052664181178e-05, + "loss": 2.0245, + "step": 15392500 + }, + { + "epoch": 44.56, + "learning_rate": 2.7729802994164507e-05, + "loss": 2.0314, + "step": 15393000 + }, + { + "epoch": 44.56, + "learning_rate": 2.772907934651723e-05, + "loss": 2.0416, + "step": 15393500 + }, + { + "epoch": 44.56, + "learning_rate": 2.772835569886995e-05, + "loss": 2.0238, + "step": 15394000 + }, + { + "epoch": 44.56, + "learning_rate": 2.7727632051222674e-05, + "loss": 2.0244, + "step": 15394500 + }, + { + "epoch": 44.56, + "learning_rate": 2.7726909850870693e-05, + "loss": 2.0237, + "step": 15395000 + }, + { + "epoch": 44.56, + "learning_rate": 2.7726187650518715e-05, + "loss": 2.0221, + "step": 15395500 + }, + { + "epoch": 44.57, + "learning_rate": 2.7725464002871437e-05, + "loss": 2.0026, + "step": 15396000 + }, + { + "epoch": 44.57, + "learning_rate": 2.772474035522416e-05, + "loss": 2.0574, + "step": 15396500 + }, + { + "epoch": 44.57, + "learning_rate": 2.7724016707576882e-05, + "loss": 2.0411, + "step": 15397000 + }, + { + "epoch": 44.57, + "learning_rate": 2.7723293059929607e-05, + "loss": 2.0346, + "step": 15397500 + }, + { + "epoch": 44.57, + "learning_rate": 2.772256941228233e-05, + "loss": 2.0221, + "step": 15398000 + }, + { + "epoch": 44.57, + "learning_rate": 2.7721845764635052e-05, + "loss": 2.0388, + "step": 15398500 + }, + { + "epoch": 44.57, + "learning_rate": 2.772112356428307e-05, + "loss": 2.042, + "step": 15399000 + }, + { + "epoch": 44.58, + "learning_rate": 2.7720399916635793e-05, + "loss": 2.0408, + "step": 15399500 + }, + { + "epoch": 44.58, + "learning_rate": 2.7719676268988515e-05, + "loss": 2.0275, + "step": 15400000 + }, + { + "epoch": 44.58, + "learning_rate": 2.7718952621341237e-05, + "loss": 2.0247, + "step": 15400500 + }, + { + "epoch": 44.58, + "learning_rate": 2.771822897369396e-05, + "loss": 2.0378, + "step": 15401000 + }, + { + "epoch": 44.58, + "learning_rate": 2.7717505326046682e-05, + "loss": 2.0399, + "step": 15401500 + }, + { + "epoch": 44.58, + "learning_rate": 2.7716781678399408e-05, + "loss": 2.0303, + "step": 15402000 + }, + { + "epoch": 44.58, + "learning_rate": 2.7716058030752133e-05, + "loss": 2.0014, + "step": 15402500 + }, + { + "epoch": 44.59, + "learning_rate": 2.771533438310486e-05, + "loss": 2.0562, + "step": 15403000 + }, + { + "epoch": 44.59, + "learning_rate": 2.771461073545758e-05, + "loss": 2.0335, + "step": 15403500 + }, + { + "epoch": 44.59, + "learning_rate": 2.7713887087810303e-05, + "loss": 2.0232, + "step": 15404000 + }, + { + "epoch": 44.59, + "learning_rate": 2.7713164887458322e-05, + "loss": 2.0321, + "step": 15404500 + }, + { + "epoch": 44.59, + "learning_rate": 2.7712442687106338e-05, + "loss": 2.0064, + "step": 15405000 + }, + { + "epoch": 44.59, + "learning_rate": 2.771171903945906e-05, + "loss": 2.007, + "step": 15405500 + }, + { + "epoch": 44.59, + "learning_rate": 2.7710995391811782e-05, + "loss": 2.0206, + "step": 15406000 + }, + { + "epoch": 44.6, + "learning_rate": 2.7710271744164508e-05, + "loss": 2.0148, + "step": 15406500 + }, + { + "epoch": 44.6, + "learning_rate": 2.770954809651723e-05, + "loss": 2.0216, + "step": 15407000 + }, + { + "epoch": 44.6, + "learning_rate": 2.7708824448869952e-05, + "loss": 2.0292, + "step": 15407500 + }, + { + "epoch": 44.6, + "learning_rate": 2.7708100801222675e-05, + "loss": 2.0118, + "step": 15408000 + }, + { + "epoch": 44.6, + "learning_rate": 2.7707377153575397e-05, + "loss": 2.0229, + "step": 15408500 + }, + { + "epoch": 44.6, + "learning_rate": 2.770665640051871e-05, + "loss": 2.0192, + "step": 15409000 + }, + { + "epoch": 44.6, + "learning_rate": 2.7705932752871435e-05, + "loss": 2.0167, + "step": 15409500 + }, + { + "epoch": 44.61, + "learning_rate": 2.7705209105224157e-05, + "loss": 2.0329, + "step": 15410000 + }, + { + "epoch": 44.61, + "learning_rate": 2.7704485457576882e-05, + "loss": 2.0161, + "step": 15410500 + }, + { + "epoch": 44.61, + "learning_rate": 2.7703761809929608e-05, + "loss": 2.0203, + "step": 15411000 + }, + { + "epoch": 44.61, + "learning_rate": 2.7703039609577624e-05, + "loss": 2.0215, + "step": 15411500 + }, + { + "epoch": 44.61, + "learning_rate": 2.7702315961930346e-05, + "loss": 2.0136, + "step": 15412000 + }, + { + "epoch": 44.61, + "learning_rate": 2.770159231428307e-05, + "loss": 2.0249, + "step": 15412500 + }, + { + "epoch": 44.61, + "learning_rate": 2.7700868666635794e-05, + "loss": 2.0392, + "step": 15413000 + }, + { + "epoch": 44.62, + "learning_rate": 2.7700145018988516e-05, + "loss": 2.0239, + "step": 15413500 + }, + { + "epoch": 44.62, + "learning_rate": 2.7699422818636535e-05, + "loss": 2.0215, + "step": 15414000 + }, + { + "epoch": 44.62, + "learning_rate": 2.7698699170989257e-05, + "loss": 2.0367, + "step": 15414500 + }, + { + "epoch": 44.62, + "learning_rate": 2.769797552334198e-05, + "loss": 2.0367, + "step": 15415000 + }, + { + "epoch": 44.62, + "learning_rate": 2.76972518756947e-05, + "loss": 2.0218, + "step": 15415500 + }, + { + "epoch": 44.62, + "learning_rate": 2.7696528228047424e-05, + "loss": 1.998, + "step": 15416000 + }, + { + "epoch": 44.62, + "learning_rate": 2.7695804580400146e-05, + "loss": 2.0639, + "step": 15416500 + }, + { + "epoch": 44.63, + "learning_rate": 2.7695080932752872e-05, + "loss": 2.0026, + "step": 15417000 + }, + { + "epoch": 44.63, + "learning_rate": 2.7694358732400887e-05, + "loss": 2.043, + "step": 15417500 + }, + { + "epoch": 44.63, + "learning_rate": 2.7693635084753616e-05, + "loss": 2.0346, + "step": 15418000 + }, + { + "epoch": 44.63, + "learning_rate": 2.769291143710634e-05, + "loss": 2.0413, + "step": 15418500 + }, + { + "epoch": 44.63, + "learning_rate": 2.769218778945906e-05, + "loss": 2.0251, + "step": 15419000 + }, + { + "epoch": 44.63, + "learning_rate": 2.7691464141811786e-05, + "loss": 2.0379, + "step": 15419500 + }, + { + "epoch": 44.63, + "learning_rate": 2.769074049416451e-05, + "loss": 2.013, + "step": 15420000 + }, + { + "epoch": 44.64, + "learning_rate": 2.769001684651723e-05, + "loss": 2.0257, + "step": 15420500 + }, + { + "epoch": 44.64, + "learning_rate": 2.7689294646165246e-05, + "loss": 2.0228, + "step": 15421000 + }, + { + "epoch": 44.64, + "learning_rate": 2.7688570998517972e-05, + "loss": 2.0316, + "step": 15421500 + }, + { + "epoch": 44.64, + "learning_rate": 2.7687847350870694e-05, + "loss": 2.03, + "step": 15422000 + }, + { + "epoch": 44.64, + "learning_rate": 2.7687123703223416e-05, + "loss": 2.0258, + "step": 15422500 + }, + { + "epoch": 44.64, + "learning_rate": 2.768640005557614e-05, + "loss": 2.0258, + "step": 15423000 + }, + { + "epoch": 44.64, + "learning_rate": 2.768567640792886e-05, + "loss": 2.022, + "step": 15423500 + }, + { + "epoch": 44.65, + "learning_rate": 2.7684952760281587e-05, + "loss": 2.0118, + "step": 15424000 + }, + { + "epoch": 44.65, + "learning_rate": 2.7684230559929602e-05, + "loss": 2.0348, + "step": 15424500 + }, + { + "epoch": 44.65, + "learning_rate": 2.7683506912282324e-05, + "loss": 2.0062, + "step": 15425000 + }, + { + "epoch": 44.65, + "learning_rate": 2.7682783264635053e-05, + "loss": 2.0211, + "step": 15425500 + }, + { + "epoch": 44.65, + "learning_rate": 2.7682059616987776e-05, + "loss": 2.0404, + "step": 15426000 + }, + { + "epoch": 44.65, + "learning_rate": 2.7681335969340498e-05, + "loss": 2.0398, + "step": 15426500 + }, + { + "epoch": 44.65, + "learning_rate": 2.7680612321693223e-05, + "loss": 2.0363, + "step": 15427000 + }, + { + "epoch": 44.66, + "learning_rate": 2.7679888674045946e-05, + "loss": 2.0263, + "step": 15427500 + }, + { + "epoch": 44.66, + "learning_rate": 2.7679165026398668e-05, + "loss": 2.0629, + "step": 15428000 + }, + { + "epoch": 44.66, + "learning_rate": 2.767844137875139e-05, + "loss": 2.035, + "step": 15428500 + }, + { + "epoch": 44.66, + "learning_rate": 2.7677717731104112e-05, + "loss": 2.0238, + "step": 15429000 + }, + { + "epoch": 44.66, + "learning_rate": 2.767699553075213e-05, + "loss": 2.0454, + "step": 15429500 + }, + { + "epoch": 44.66, + "learning_rate": 2.7676271883104854e-05, + "loss": 2.0443, + "step": 15430000 + }, + { + "epoch": 44.66, + "learning_rate": 2.7675548235457576e-05, + "loss": 2.0205, + "step": 15430500 + }, + { + "epoch": 44.67, + "learning_rate": 2.7674824587810298e-05, + "loss": 2.0248, + "step": 15431000 + }, + { + "epoch": 44.67, + "learning_rate": 2.7674100940163024e-05, + "loss": 2.0287, + "step": 15431500 + }, + { + "epoch": 44.67, + "learning_rate": 2.767337873981104e-05, + "loss": 2.0165, + "step": 15432000 + }, + { + "epoch": 44.67, + "learning_rate": 2.7672655092163768e-05, + "loss": 2.0239, + "step": 15432500 + }, + { + "epoch": 44.67, + "learning_rate": 2.767193144451649e-05, + "loss": 2.0353, + "step": 15433000 + }, + { + "epoch": 44.67, + "learning_rate": 2.7671207796869213e-05, + "loss": 2.044, + "step": 15433500 + }, + { + "epoch": 44.68, + "learning_rate": 2.767048414922194e-05, + "loss": 2.0304, + "step": 15434000 + }, + { + "epoch": 44.68, + "learning_rate": 2.766976050157466e-05, + "loss": 2.0372, + "step": 15434500 + }, + { + "epoch": 44.68, + "learning_rate": 2.7669036853927383e-05, + "loss": 2.0035, + "step": 15435000 + }, + { + "epoch": 44.68, + "learning_rate": 2.7668316100870695e-05, + "loss": 2.018, + "step": 15435500 + }, + { + "epoch": 44.68, + "learning_rate": 2.7667592453223417e-05, + "loss": 2.0405, + "step": 15436000 + }, + { + "epoch": 44.68, + "learning_rate": 2.766686880557614e-05, + "loss": 2.018, + "step": 15436500 + }, + { + "epoch": 44.68, + "learning_rate": 2.7666145157928862e-05, + "loss": 2.0275, + "step": 15437000 + }, + { + "epoch": 44.69, + "learning_rate": 2.7665421510281587e-05, + "loss": 2.0375, + "step": 15437500 + }, + { + "epoch": 44.69, + "learning_rate": 2.766469786263431e-05, + "loss": 2.0209, + "step": 15438000 + }, + { + "epoch": 44.69, + "learning_rate": 2.7663977109577622e-05, + "loss": 2.0195, + "step": 15438500 + }, + { + "epoch": 44.69, + "learning_rate": 2.7663253461930344e-05, + "loss": 2.0324, + "step": 15439000 + }, + { + "epoch": 44.69, + "learning_rate": 2.7662529814283066e-05, + "loss": 2.0263, + "step": 15439500 + }, + { + "epoch": 44.69, + "learning_rate": 2.766180616663579e-05, + "loss": 2.0231, + "step": 15440000 + }, + { + "epoch": 44.69, + "learning_rate": 2.7661082518988518e-05, + "loss": 2.0347, + "step": 15440500 + }, + { + "epoch": 44.7, + "learning_rate": 2.766035887134124e-05, + "loss": 2.0447, + "step": 15441000 + }, + { + "epoch": 44.7, + "learning_rate": 2.7659635223693965e-05, + "loss": 2.0205, + "step": 15441500 + }, + { + "epoch": 44.7, + "learning_rate": 2.7658911576046688e-05, + "loss": 2.0461, + "step": 15442000 + }, + { + "epoch": 44.7, + "learning_rate": 2.765818792839941e-05, + "loss": 2.0199, + "step": 15442500 + }, + { + "epoch": 44.7, + "learning_rate": 2.7657464280752132e-05, + "loss": 2.0215, + "step": 15443000 + }, + { + "epoch": 44.7, + "learning_rate": 2.7656740633104854e-05, + "loss": 2.0443, + "step": 15443500 + }, + { + "epoch": 44.7, + "learning_rate": 2.7656016985457577e-05, + "loss": 2.0124, + "step": 15444000 + }, + { + "epoch": 44.71, + "learning_rate": 2.7655294785105596e-05, + "loss": 2.0371, + "step": 15444500 + }, + { + "epoch": 44.71, + "learning_rate": 2.7654572584753614e-05, + "loss": 2.0312, + "step": 15445000 + }, + { + "epoch": 44.71, + "learning_rate": 2.7653848937106337e-05, + "loss": 2.0482, + "step": 15445500 + }, + { + "epoch": 44.71, + "learning_rate": 2.765312528945906e-05, + "loss": 2.0246, + "step": 15446000 + }, + { + "epoch": 44.71, + "learning_rate": 2.765240164181178e-05, + "loss": 2.0189, + "step": 15446500 + }, + { + "epoch": 44.71, + "learning_rate": 2.7651677994164503e-05, + "loss": 2.0163, + "step": 15447000 + }, + { + "epoch": 44.71, + "learning_rate": 2.7650954346517226e-05, + "loss": 2.0187, + "step": 15447500 + }, + { + "epoch": 44.72, + "learning_rate": 2.7650230698869955e-05, + "loss": 2.0338, + "step": 15448000 + }, + { + "epoch": 44.72, + "learning_rate": 2.7649508498517974e-05, + "loss": 2.0202, + "step": 15448500 + }, + { + "epoch": 44.72, + "learning_rate": 2.7648784850870696e-05, + "loss": 2.0456, + "step": 15449000 + }, + { + "epoch": 44.72, + "learning_rate": 2.7648061203223418e-05, + "loss": 2.0529, + "step": 15449500 + }, + { + "epoch": 44.72, + "learning_rate": 2.7647339002871437e-05, + "loss": 2.0123, + "step": 15450000 + }, + { + "epoch": 44.72, + "learning_rate": 2.764661535522416e-05, + "loss": 2.0485, + "step": 15450500 + }, + { + "epoch": 44.72, + "learning_rate": 2.764589170757688e-05, + "loss": 2.0424, + "step": 15451000 + }, + { + "epoch": 44.73, + "learning_rate": 2.7645168059929604e-05, + "loss": 2.0491, + "step": 15451500 + }, + { + "epoch": 44.73, + "learning_rate": 2.7644444412282326e-05, + "loss": 2.0359, + "step": 15452000 + }, + { + "epoch": 44.73, + "learning_rate": 2.764372076463505e-05, + "loss": 2.0311, + "step": 15452500 + }, + { + "epoch": 44.73, + "learning_rate": 2.7642997116987774e-05, + "loss": 2.0259, + "step": 15453000 + }, + { + "epoch": 44.73, + "learning_rate": 2.7642273469340496e-05, + "loss": 2.0095, + "step": 15453500 + }, + { + "epoch": 44.73, + "learning_rate": 2.7641549821693218e-05, + "loss": 2.035, + "step": 15454000 + }, + { + "epoch": 44.73, + "learning_rate": 2.764082617404594e-05, + "loss": 2.0398, + "step": 15454500 + }, + { + "epoch": 44.74, + "learning_rate": 2.764010397369396e-05, + "loss": 2.0426, + "step": 15455000 + }, + { + "epoch": 44.74, + "learning_rate": 2.763938032604669e-05, + "loss": 2.0299, + "step": 15455500 + }, + { + "epoch": 44.74, + "learning_rate": 2.7638658125694704e-05, + "loss": 2.0198, + "step": 15456000 + }, + { + "epoch": 44.74, + "learning_rate": 2.7637935925342723e-05, + "loss": 2.0222, + "step": 15456500 + }, + { + "epoch": 44.74, + "learning_rate": 2.7637212277695445e-05, + "loss": 2.0297, + "step": 15457000 + }, + { + "epoch": 44.74, + "learning_rate": 2.7636488630048167e-05, + "loss": 2.0313, + "step": 15457500 + }, + { + "epoch": 44.74, + "learning_rate": 2.763576498240089e-05, + "loss": 2.0074, + "step": 15458000 + }, + { + "epoch": 44.75, + "learning_rate": 2.7635041334753615e-05, + "loss": 2.0038, + "step": 15458500 + }, + { + "epoch": 44.75, + "learning_rate": 2.7634317687106338e-05, + "loss": 2.0247, + "step": 15459000 + }, + { + "epoch": 44.75, + "learning_rate": 2.763359403945906e-05, + "loss": 2.0414, + "step": 15459500 + }, + { + "epoch": 44.75, + "learning_rate": 2.7632870391811782e-05, + "loss": 2.0553, + "step": 15460000 + }, + { + "epoch": 44.75, + "learning_rate": 2.7632146744164504e-05, + "loss": 2.0353, + "step": 15460500 + }, + { + "epoch": 44.75, + "learning_rate": 2.763142309651723e-05, + "loss": 2.0196, + "step": 15461000 + }, + { + "epoch": 44.75, + "learning_rate": 2.7630699448869952e-05, + "loss": 2.0052, + "step": 15461500 + }, + { + "epoch": 44.76, + "learning_rate": 2.7629975801222674e-05, + "loss": 2.0196, + "step": 15462000 + }, + { + "epoch": 44.76, + "learning_rate": 2.7629252153575403e-05, + "loss": 2.019, + "step": 15462500 + }, + { + "epoch": 44.76, + "learning_rate": 2.7628528505928126e-05, + "loss": 2.0101, + "step": 15463000 + }, + { + "epoch": 44.76, + "learning_rate": 2.762780630557614e-05, + "loss": 2.0068, + "step": 15463500 + }, + { + "epoch": 44.76, + "learning_rate": 2.7627082657928867e-05, + "loss": 2.0058, + "step": 15464000 + }, + { + "epoch": 44.76, + "learning_rate": 2.762635901028159e-05, + "loss": 2.0491, + "step": 15464500 + }, + { + "epoch": 44.76, + "learning_rate": 2.7625636809929605e-05, + "loss": 2.0103, + "step": 15465000 + }, + { + "epoch": 44.77, + "learning_rate": 2.762491316228233e-05, + "loss": 2.0383, + "step": 15465500 + }, + { + "epoch": 44.77, + "learning_rate": 2.7624189514635052e-05, + "loss": 2.0198, + "step": 15466000 + }, + { + "epoch": 44.77, + "learning_rate": 2.7623465866987775e-05, + "loss": 2.0101, + "step": 15466500 + }, + { + "epoch": 44.77, + "learning_rate": 2.7622742219340497e-05, + "loss": 2.0484, + "step": 15467000 + }, + { + "epoch": 44.77, + "learning_rate": 2.762201857169322e-05, + "loss": 2.0137, + "step": 15467500 + }, + { + "epoch": 44.77, + "learning_rate": 2.762129492404594e-05, + "loss": 2.0236, + "step": 15468000 + }, + { + "epoch": 44.77, + "learning_rate": 2.7620571276398667e-05, + "loss": 2.0018, + "step": 15468500 + }, + { + "epoch": 44.78, + "learning_rate": 2.7619849076046682e-05, + "loss": 2.0407, + "step": 15469000 + }, + { + "epoch": 44.78, + "learning_rate": 2.7619125428399405e-05, + "loss": 2.0385, + "step": 15469500 + }, + { + "epoch": 44.78, + "learning_rate": 2.7618401780752134e-05, + "loss": 2.0361, + "step": 15470000 + }, + { + "epoch": 44.78, + "learning_rate": 2.7617678133104856e-05, + "loss": 2.0309, + "step": 15470500 + }, + { + "epoch": 44.78, + "learning_rate": 2.761695448545758e-05, + "loss": 2.0372, + "step": 15471000 + }, + { + "epoch": 44.78, + "learning_rate": 2.7616230837810304e-05, + "loss": 2.0315, + "step": 15471500 + }, + { + "epoch": 44.79, + "learning_rate": 2.7615507190163026e-05, + "loss": 2.0329, + "step": 15472000 + }, + { + "epoch": 44.79, + "learning_rate": 2.7614784989811045e-05, + "loss": 2.0215, + "step": 15472500 + }, + { + "epoch": 44.79, + "learning_rate": 2.7614061342163767e-05, + "loss": 2.0274, + "step": 15473000 + }, + { + "epoch": 44.79, + "learning_rate": 2.761333769451649e-05, + "loss": 2.0461, + "step": 15473500 + }, + { + "epoch": 44.79, + "learning_rate": 2.7612614046869212e-05, + "loss": 2.0222, + "step": 15474000 + }, + { + "epoch": 44.79, + "learning_rate": 2.7611890399221934e-05, + "loss": 2.0471, + "step": 15474500 + }, + { + "epoch": 44.79, + "learning_rate": 2.7611166751574656e-05, + "loss": 2.0223, + "step": 15475000 + }, + { + "epoch": 44.8, + "learning_rate": 2.7610443103927382e-05, + "loss": 2.0331, + "step": 15475500 + }, + { + "epoch": 44.8, + "learning_rate": 2.7609719456280104e-05, + "loss": 2.0324, + "step": 15476000 + }, + { + "epoch": 44.8, + "learning_rate": 2.7608995808632826e-05, + "loss": 2.055, + "step": 15476500 + }, + { + "epoch": 44.8, + "learning_rate": 2.760827216098555e-05, + "loss": 2.0263, + "step": 15477000 + }, + { + "epoch": 44.8, + "learning_rate": 2.7607548513338277e-05, + "loss": 2.0295, + "step": 15477500 + }, + { + "epoch": 44.8, + "learning_rate": 2.7606824865691e-05, + "loss": 2.0535, + "step": 15478000 + }, + { + "epoch": 44.8, + "learning_rate": 2.7606101218043722e-05, + "loss": 2.0511, + "step": 15478500 + }, + { + "epoch": 44.81, + "learning_rate": 2.7605377570396444e-05, + "loss": 2.0141, + "step": 15479000 + }, + { + "epoch": 44.81, + "learning_rate": 2.7604655370044463e-05, + "loss": 2.0528, + "step": 15479500 + }, + { + "epoch": 44.81, + "learning_rate": 2.7603931722397185e-05, + "loss": 2.0352, + "step": 15480000 + }, + { + "epoch": 44.81, + "learning_rate": 2.7603208074749908e-05, + "loss": 2.056, + "step": 15480500 + }, + { + "epoch": 44.81, + "learning_rate": 2.7602484427102633e-05, + "loss": 1.9979, + "step": 15481000 + }, + { + "epoch": 44.81, + "learning_rate": 2.7601760779455355e-05, + "loss": 2.031, + "step": 15481500 + }, + { + "epoch": 44.81, + "learning_rate": 2.7601037131808078e-05, + "loss": 2.049, + "step": 15482000 + }, + { + "epoch": 44.82, + "learning_rate": 2.76003134841608e-05, + "loss": 2.0241, + "step": 15482500 + }, + { + "epoch": 44.82, + "learning_rate": 2.759959128380882e-05, + "loss": 2.0455, + "step": 15483000 + }, + { + "epoch": 44.82, + "learning_rate": 2.7598869083456834e-05, + "loss": 2.0156, + "step": 15483500 + }, + { + "epoch": 44.82, + "learning_rate": 2.7598145435809557e-05, + "loss": 2.0403, + "step": 15484000 + }, + { + "epoch": 44.82, + "learning_rate": 2.7597421788162282e-05, + "loss": 2.0354, + "step": 15484500 + }, + { + "epoch": 44.82, + "learning_rate": 2.7596698140515008e-05, + "loss": 2.0274, + "step": 15485000 + }, + { + "epoch": 44.82, + "learning_rate": 2.7595974492867734e-05, + "loss": 2.0358, + "step": 15485500 + }, + { + "epoch": 44.83, + "learning_rate": 2.759525229251575e-05, + "loss": 2.0495, + "step": 15486000 + }, + { + "epoch": 44.83, + "learning_rate": 2.759452864486847e-05, + "loss": 2.025, + "step": 15486500 + }, + { + "epoch": 44.83, + "learning_rate": 2.7593804997221197e-05, + "loss": 2.0457, + "step": 15487000 + }, + { + "epoch": 44.83, + "learning_rate": 2.759308134957392e-05, + "loss": 2.0156, + "step": 15487500 + }, + { + "epoch": 44.83, + "learning_rate": 2.759235770192664e-05, + "loss": 2.0305, + "step": 15488000 + }, + { + "epoch": 44.83, + "learning_rate": 2.7591635501574657e-05, + "loss": 2.0482, + "step": 15488500 + }, + { + "epoch": 44.83, + "learning_rate": 2.7590913301222676e-05, + "loss": 2.0281, + "step": 15489000 + }, + { + "epoch": 44.84, + "learning_rate": 2.7590189653575398e-05, + "loss": 2.0263, + "step": 15489500 + }, + { + "epoch": 44.84, + "learning_rate": 2.758946600592812e-05, + "loss": 2.0127, + "step": 15490000 + }, + { + "epoch": 44.84, + "learning_rate": 2.7588742358280846e-05, + "loss": 2.0218, + "step": 15490500 + }, + { + "epoch": 44.84, + "learning_rate": 2.758802015792886e-05, + "loss": 2.0342, + "step": 15491000 + }, + { + "epoch": 44.84, + "learning_rate": 2.758729795757688e-05, + "loss": 2.0455, + "step": 15491500 + }, + { + "epoch": 44.84, + "learning_rate": 2.7586574309929603e-05, + "loss": 2.0225, + "step": 15492000 + }, + { + "epoch": 44.84, + "learning_rate": 2.7585850662282325e-05, + "loss": 2.0415, + "step": 15492500 + }, + { + "epoch": 44.85, + "learning_rate": 2.7585127014635054e-05, + "loss": 2.0251, + "step": 15493000 + }, + { + "epoch": 44.85, + "learning_rate": 2.7584403366987776e-05, + "loss": 2.0209, + "step": 15493500 + }, + { + "epoch": 44.85, + "learning_rate": 2.75836797193405e-05, + "loss": 2.0288, + "step": 15494000 + }, + { + "epoch": 44.85, + "learning_rate": 2.758295607169322e-05, + "loss": 2.0172, + "step": 15494500 + }, + { + "epoch": 44.85, + "learning_rate": 2.7582232424045946e-05, + "loss": 2.0465, + "step": 15495000 + }, + { + "epoch": 44.85, + "learning_rate": 2.758150877639867e-05, + "loss": 2.0263, + "step": 15495500 + }, + { + "epoch": 44.85, + "learning_rate": 2.758078512875139e-05, + "loss": 2.0159, + "step": 15496000 + }, + { + "epoch": 44.86, + "learning_rate": 2.7580061481104113e-05, + "loss": 2.0166, + "step": 15496500 + }, + { + "epoch": 44.86, + "learning_rate": 2.7579337833456835e-05, + "loss": 2.0321, + "step": 15497000 + }, + { + "epoch": 44.86, + "learning_rate": 2.757861418580956e-05, + "loss": 2.0175, + "step": 15497500 + }, + { + "epoch": 44.86, + "learning_rate": 2.7577890538162283e-05, + "loss": 2.0429, + "step": 15498000 + }, + { + "epoch": 44.86, + "learning_rate": 2.75771683378103e-05, + "loss": 2.0521, + "step": 15498500 + }, + { + "epoch": 44.86, + "learning_rate": 2.757644469016302e-05, + "loss": 2.0464, + "step": 15499000 + }, + { + "epoch": 44.86, + "learning_rate": 2.7575721042515746e-05, + "loss": 2.0361, + "step": 15499500 + }, + { + "epoch": 44.87, + "learning_rate": 2.7574997394868472e-05, + "loss": 2.0311, + "step": 15500000 + }, + { + "epoch": 44.87, + "learning_rate": 2.7574273747221198e-05, + "loss": 2.0271, + "step": 15500500 + }, + { + "epoch": 44.87, + "learning_rate": 2.757355009957392e-05, + "loss": 2.0149, + "step": 15501000 + }, + { + "epoch": 44.87, + "learning_rate": 2.7572827899221936e-05, + "loss": 2.0109, + "step": 15501500 + }, + { + "epoch": 44.87, + "learning_rate": 2.757210425157466e-05, + "loss": 2.0104, + "step": 15502000 + }, + { + "epoch": 44.87, + "learning_rate": 2.7571380603927383e-05, + "loss": 2.0234, + "step": 15502500 + }, + { + "epoch": 44.87, + "learning_rate": 2.7570656956280106e-05, + "loss": 2.0164, + "step": 15503000 + }, + { + "epoch": 44.88, + "learning_rate": 2.7569933308632828e-05, + "loss": 2.0387, + "step": 15503500 + }, + { + "epoch": 44.88, + "learning_rate": 2.756920966098555e-05, + "loss": 2.0503, + "step": 15504000 + }, + { + "epoch": 44.88, + "learning_rate": 2.7568486013338272e-05, + "loss": 2.0469, + "step": 15504500 + }, + { + "epoch": 44.88, + "learning_rate": 2.7567762365690998e-05, + "loss": 2.0243, + "step": 15505000 + }, + { + "epoch": 44.88, + "learning_rate": 2.7567040165339013e-05, + "loss": 2.0374, + "step": 15505500 + }, + { + "epoch": 44.88, + "learning_rate": 2.7566316517691736e-05, + "loss": 2.0444, + "step": 15506000 + }, + { + "epoch": 44.88, + "learning_rate": 2.756559287004446e-05, + "loss": 2.0414, + "step": 15506500 + }, + { + "epoch": 44.89, + "learning_rate": 2.7564869222397184e-05, + "loss": 2.0328, + "step": 15507000 + }, + { + "epoch": 44.89, + "learning_rate": 2.7564147022045206e-05, + "loss": 2.0101, + "step": 15507500 + }, + { + "epoch": 44.89, + "learning_rate": 2.7563423374397928e-05, + "loss": 2.0328, + "step": 15508000 + }, + { + "epoch": 44.89, + "learning_rate": 2.756269972675065e-05, + "loss": 2.0595, + "step": 15508500 + }, + { + "epoch": 44.89, + "learning_rate": 2.7561976079103376e-05, + "loss": 2.0125, + "step": 15509000 + }, + { + "epoch": 44.89, + "learning_rate": 2.7561252431456098e-05, + "loss": 2.0431, + "step": 15509500 + }, + { + "epoch": 44.9, + "learning_rate": 2.756052878380882e-05, + "loss": 2.0386, + "step": 15510000 + }, + { + "epoch": 44.9, + "learning_rate": 2.7559806583456836e-05, + "loss": 2.03, + "step": 15510500 + }, + { + "epoch": 44.9, + "learning_rate": 2.755908293580956e-05, + "loss": 2.034, + "step": 15511000 + }, + { + "epoch": 44.9, + "learning_rate": 2.7558359288162284e-05, + "loss": 2.0363, + "step": 15511500 + }, + { + "epoch": 44.9, + "learning_rate": 2.7557635640515006e-05, + "loss": 2.051, + "step": 15512000 + }, + { + "epoch": 44.9, + "learning_rate": 2.755691199286773e-05, + "loss": 2.0279, + "step": 15512500 + }, + { + "epoch": 44.9, + "learning_rate": 2.755618834522045e-05, + "loss": 2.0261, + "step": 15513000 + }, + { + "epoch": 44.91, + "learning_rate": 2.7555464697573176e-05, + "loss": 2.0487, + "step": 15513500 + }, + { + "epoch": 44.91, + "learning_rate": 2.75547410499259e-05, + "loss": 2.0519, + "step": 15514000 + }, + { + "epoch": 44.91, + "learning_rate": 2.7554017402278624e-05, + "loss": 2.0128, + "step": 15514500 + }, + { + "epoch": 44.91, + "learning_rate": 2.755329375463135e-05, + "loss": 2.0384, + "step": 15515000 + }, + { + "epoch": 44.91, + "learning_rate": 2.7552571554279365e-05, + "loss": 2.0343, + "step": 15515500 + }, + { + "epoch": 44.91, + "learning_rate": 2.7551847906632087e-05, + "loss": 2.0423, + "step": 15516000 + }, + { + "epoch": 44.91, + "learning_rate": 2.7551125706280106e-05, + "loss": 2.0418, + "step": 15516500 + }, + { + "epoch": 44.92, + "learning_rate": 2.755040205863283e-05, + "loss": 2.0341, + "step": 15517000 + }, + { + "epoch": 44.92, + "learning_rate": 2.754967841098555e-05, + "loss": 2.037, + "step": 15517500 + }, + { + "epoch": 44.92, + "learning_rate": 2.7548954763338276e-05, + "loss": 2.0146, + "step": 15518000 + }, + { + "epoch": 44.92, + "learning_rate": 2.754823401028159e-05, + "loss": 2.0301, + "step": 15518500 + }, + { + "epoch": 44.92, + "learning_rate": 2.754751036263431e-05, + "loss": 2.0652, + "step": 15519000 + }, + { + "epoch": 44.92, + "learning_rate": 2.7546786714987033e-05, + "loss": 2.0482, + "step": 15519500 + }, + { + "epoch": 44.92, + "learning_rate": 2.7546063067339755e-05, + "loss": 2.0185, + "step": 15520000 + }, + { + "epoch": 44.93, + "learning_rate": 2.7545339419692478e-05, + "loss": 2.0284, + "step": 15520500 + }, + { + "epoch": 44.93, + "learning_rate": 2.75446157720452e-05, + "loss": 2.0114, + "step": 15521000 + }, + { + "epoch": 44.93, + "learning_rate": 2.7543892124397926e-05, + "loss": 2.0166, + "step": 15521500 + }, + { + "epoch": 44.93, + "learning_rate": 2.7543168476750648e-05, + "loss": 2.0452, + "step": 15522000 + }, + { + "epoch": 44.93, + "learning_rate": 2.7542444829103377e-05, + "loss": 2.0083, + "step": 15522500 + }, + { + "epoch": 44.93, + "learning_rate": 2.7541722628751392e-05, + "loss": 2.0219, + "step": 15523000 + }, + { + "epoch": 44.93, + "learning_rate": 2.7540998981104115e-05, + "loss": 2.0275, + "step": 15523500 + }, + { + "epoch": 44.94, + "learning_rate": 2.754027533345684e-05, + "loss": 2.0433, + "step": 15524000 + }, + { + "epoch": 44.94, + "learning_rate": 2.7539551685809562e-05, + "loss": 2.0314, + "step": 15524500 + }, + { + "epoch": 44.94, + "learning_rate": 2.7538828038162285e-05, + "loss": 2.0086, + "step": 15525000 + }, + { + "epoch": 44.94, + "learning_rate": 2.75381058378103e-05, + "loss": 2.0364, + "step": 15525500 + }, + { + "epoch": 44.94, + "learning_rate": 2.7537382190163026e-05, + "loss": 2.0164, + "step": 15526000 + }, + { + "epoch": 44.94, + "learning_rate": 2.7536658542515748e-05, + "loss": 2.043, + "step": 15526500 + }, + { + "epoch": 44.94, + "learning_rate": 2.753593489486847e-05, + "loss": 2.0474, + "step": 15527000 + }, + { + "epoch": 44.95, + "learning_rate": 2.7535211247221193e-05, + "loss": 2.0275, + "step": 15527500 + }, + { + "epoch": 44.95, + "learning_rate": 2.753448904686921e-05, + "loss": 2.0265, + "step": 15528000 + }, + { + "epoch": 44.95, + "learning_rate": 2.7533765399221934e-05, + "loss": 2.0344, + "step": 15528500 + }, + { + "epoch": 44.95, + "learning_rate": 2.7533041751574656e-05, + "loss": 2.043, + "step": 15529000 + }, + { + "epoch": 44.95, + "learning_rate": 2.7532318103927378e-05, + "loss": 2.0273, + "step": 15529500 + }, + { + "epoch": 44.95, + "learning_rate": 2.7531594456280107e-05, + "loss": 2.0377, + "step": 15530000 + }, + { + "epoch": 44.95, + "learning_rate": 2.7530872255928126e-05, + "loss": 2.0448, + "step": 15530500 + }, + { + "epoch": 44.96, + "learning_rate": 2.753014860828085e-05, + "loss": 2.0396, + "step": 15531000 + }, + { + "epoch": 44.96, + "learning_rate": 2.752942496063357e-05, + "loss": 2.0393, + "step": 15531500 + }, + { + "epoch": 44.96, + "learning_rate": 2.752870276028159e-05, + "loss": 1.9955, + "step": 15532000 + }, + { + "epoch": 44.96, + "learning_rate": 2.7527979112634312e-05, + "loss": 2.0444, + "step": 15532500 + }, + { + "epoch": 44.96, + "learning_rate": 2.7527255464987034e-05, + "loss": 2.0264, + "step": 15533000 + }, + { + "epoch": 44.96, + "learning_rate": 2.7526531817339756e-05, + "loss": 2.0382, + "step": 15533500 + }, + { + "epoch": 44.96, + "learning_rate": 2.752580816969248e-05, + "loss": 2.0423, + "step": 15534000 + }, + { + "epoch": 44.97, + "learning_rate": 2.7525084522045204e-05, + "loss": 2.024, + "step": 15534500 + }, + { + "epoch": 44.97, + "learning_rate": 2.752436232169322e-05, + "loss": 2.0256, + "step": 15535000 + }, + { + "epoch": 44.97, + "learning_rate": 2.7523638674045942e-05, + "loss": 2.0522, + "step": 15535500 + }, + { + "epoch": 44.97, + "learning_rate": 2.7522915026398664e-05, + "loss": 2.0165, + "step": 15536000 + }, + { + "epoch": 44.97, + "learning_rate": 2.752219137875139e-05, + "loss": 2.0283, + "step": 15536500 + }, + { + "epoch": 44.97, + "learning_rate": 2.7521467731104112e-05, + "loss": 2.024, + "step": 15537000 + }, + { + "epoch": 44.97, + "learning_rate": 2.752074408345684e-05, + "loss": 2.0311, + "step": 15537500 + }, + { + "epoch": 44.98, + "learning_rate": 2.7520020435809563e-05, + "loss": 2.0281, + "step": 15538000 + }, + { + "epoch": 44.98, + "learning_rate": 2.751929823545758e-05, + "loss": 2.0424, + "step": 15538500 + }, + { + "epoch": 44.98, + "learning_rate": 2.7518574587810304e-05, + "loss": 2.0312, + "step": 15539000 + }, + { + "epoch": 44.98, + "learning_rate": 2.7517850940163027e-05, + "loss": 2.0063, + "step": 15539500 + }, + { + "epoch": 44.98, + "learning_rate": 2.751712729251575e-05, + "loss": 2.0413, + "step": 15540000 + }, + { + "epoch": 44.98, + "learning_rate": 2.751640364486847e-05, + "loss": 2.034, + "step": 15540500 + }, + { + "epoch": 44.98, + "learning_rate": 2.7515679997221193e-05, + "loss": 2.0246, + "step": 15541000 + }, + { + "epoch": 44.99, + "learning_rate": 2.7514956349573916e-05, + "loss": 2.0195, + "step": 15541500 + }, + { + "epoch": 44.99, + "learning_rate": 2.751423270192664e-05, + "loss": 2.0227, + "step": 15542000 + }, + { + "epoch": 44.99, + "learning_rate": 2.7513509054279363e-05, + "loss": 2.0369, + "step": 15542500 + }, + { + "epoch": 44.99, + "learning_rate": 2.7512785406632086e-05, + "loss": 2.0123, + "step": 15543000 + }, + { + "epoch": 44.99, + "learning_rate": 2.7512061758984808e-05, + "loss": 2.0278, + "step": 15543500 + }, + { + "epoch": 44.99, + "learning_rate": 2.7511339558632827e-05, + "loss": 2.0491, + "step": 15544000 + }, + { + "epoch": 44.99, + "learning_rate": 2.751061591098555e-05, + "loss": 2.0339, + "step": 15544500 + }, + { + "epoch": 45.0, + "learning_rate": 2.7509892263338278e-05, + "loss": 2.0192, + "step": 15545000 + }, + { + "epoch": 45.0, + "learning_rate": 2.7509168615691e-05, + "loss": 2.0317, + "step": 15545500 + }, + { + "epoch": 45.0, + "learning_rate": 2.7508444968043723e-05, + "loss": 2.0252, + "step": 15546000 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.6716350750480929, + "eval_accuracy_mlm": 0.6368857422767499, + "eval_accuracy_nsp": 0.8580935227069613, + "eval_loss": 2.1569485664367676, + "eval_runtime": 332.1327, + "eval_samples_per_second": 1313.89, + "eval_steps_per_second": 54.746, + "step": 15546240 + }, + { + "epoch": 45.0, + "learning_rate": 2.7507721320396445e-05, + "loss": 2.0416, + "step": 15546500 + }, + { + "epoch": 45.0, + "learning_rate": 2.7506997672749167e-05, + "loss": 2.0123, + "step": 15547000 + }, + { + "epoch": 45.0, + "learning_rate": 2.7506274025101893e-05, + "loss": 1.9978, + "step": 15547500 + }, + { + "epoch": 45.01, + "learning_rate": 2.7505550377454615e-05, + "loss": 1.9899, + "step": 15548000 + }, + { + "epoch": 45.01, + "learning_rate": 2.7504826729807337e-05, + "loss": 1.9903, + "step": 15548500 + }, + { + "epoch": 45.01, + "learning_rate": 2.750410308216006e-05, + "loss": 2.0151, + "step": 15549000 + }, + { + "epoch": 45.01, + "learning_rate": 2.750337943451278e-05, + "loss": 2.0027, + "step": 15549500 + }, + { + "epoch": 45.01, + "learning_rate": 2.7502655786865504e-05, + "loss": 2.0242, + "step": 15550000 + }, + { + "epoch": 45.01, + "learning_rate": 2.7501933586513523e-05, + "loss": 2.016, + "step": 15550500 + }, + { + "epoch": 45.01, + "learning_rate": 2.7501209938866245e-05, + "loss": 2.0307, + "step": 15551000 + }, + { + "epoch": 45.02, + "learning_rate": 2.7500486291218967e-05, + "loss": 2.0081, + "step": 15551500 + }, + { + "epoch": 45.02, + "learning_rate": 2.7499762643571696e-05, + "loss": 2.0161, + "step": 15552000 + }, + { + "epoch": 45.02, + "learning_rate": 2.7499043337810305e-05, + "loss": 2.0023, + "step": 15552500 + }, + { + "epoch": 45.02, + "learning_rate": 2.7498319690163027e-05, + "loss": 2.0236, + "step": 15553000 + }, + { + "epoch": 45.02, + "learning_rate": 2.749759604251575e-05, + "loss": 2.023, + "step": 15553500 + }, + { + "epoch": 45.02, + "learning_rate": 2.7496872394868472e-05, + "loss": 1.9733, + "step": 15554000 + }, + { + "epoch": 45.02, + "learning_rate": 2.7496148747221194e-05, + "loss": 2.0304, + "step": 15554500 + }, + { + "epoch": 45.03, + "learning_rate": 2.749542509957392e-05, + "loss": 2.0318, + "step": 15555000 + }, + { + "epoch": 45.03, + "learning_rate": 2.7494701451926642e-05, + "loss": 1.9963, + "step": 15555500 + }, + { + "epoch": 45.03, + "learning_rate": 2.7493977804279364e-05, + "loss": 2.0187, + "step": 15556000 + }, + { + "epoch": 45.03, + "learning_rate": 2.7493254156632086e-05, + "loss": 2.0072, + "step": 15556500 + }, + { + "epoch": 45.03, + "learning_rate": 2.74925334035754e-05, + "loss": 1.9833, + "step": 15557000 + }, + { + "epoch": 45.03, + "learning_rate": 2.749180975592812e-05, + "loss": 2.0103, + "step": 15557500 + }, + { + "epoch": 45.03, + "learning_rate": 2.7491086108280843e-05, + "loss": 2.0384, + "step": 15558000 + }, + { + "epoch": 45.04, + "learning_rate": 2.749036246063357e-05, + "loss": 2.0396, + "step": 15558500 + }, + { + "epoch": 45.04, + "learning_rate": 2.748963881298629e-05, + "loss": 1.9975, + "step": 15559000 + }, + { + "epoch": 45.04, + "learning_rate": 2.7488915165339013e-05, + "loss": 2.0022, + "step": 15559500 + }, + { + "epoch": 45.04, + "learning_rate": 2.7488191517691742e-05, + "loss": 2.0316, + "step": 15560000 + }, + { + "epoch": 45.04, + "learning_rate": 2.7487467870044465e-05, + "loss": 2.0042, + "step": 15560500 + }, + { + "epoch": 45.04, + "learning_rate": 2.7486744222397187e-05, + "loss": 2.0254, + "step": 15561000 + }, + { + "epoch": 45.04, + "learning_rate": 2.748602057474991e-05, + "loss": 2.0384, + "step": 15561500 + }, + { + "epoch": 45.05, + "learning_rate": 2.748529692710263e-05, + "loss": 1.9898, + "step": 15562000 + }, + { + "epoch": 45.05, + "learning_rate": 2.7484573279455357e-05, + "loss": 2.0317, + "step": 15562500 + }, + { + "epoch": 45.05, + "learning_rate": 2.7483851079103372e-05, + "loss": 2.0257, + "step": 15563000 + }, + { + "epoch": 45.05, + "learning_rate": 2.7483127431456095e-05, + "loss": 2.0482, + "step": 15563500 + }, + { + "epoch": 45.05, + "learning_rate": 2.748240378380882e-05, + "loss": 2.026, + "step": 15564000 + }, + { + "epoch": 45.05, + "learning_rate": 2.7481681583456836e-05, + "loss": 1.9973, + "step": 15564500 + }, + { + "epoch": 45.05, + "learning_rate": 2.7480957935809558e-05, + "loss": 2.0174, + "step": 15565000 + }, + { + "epoch": 45.06, + "learning_rate": 2.7480234288162284e-05, + "loss": 2.0223, + "step": 15565500 + }, + { + "epoch": 45.06, + "learning_rate": 2.7479510640515006e-05, + "loss": 2.0229, + "step": 15566000 + }, + { + "epoch": 45.06, + "learning_rate": 2.7478786992867728e-05, + "loss": 2.0166, + "step": 15566500 + }, + { + "epoch": 45.06, + "learning_rate": 2.7478064792515744e-05, + "loss": 1.9885, + "step": 15567000 + }, + { + "epoch": 45.06, + "learning_rate": 2.747734259216377e-05, + "loss": 2.0033, + "step": 15567500 + }, + { + "epoch": 45.06, + "learning_rate": 2.747661894451649e-05, + "loss": 1.9823, + "step": 15568000 + }, + { + "epoch": 45.06, + "learning_rate": 2.7475895296869214e-05, + "loss": 2.0091, + "step": 15568500 + }, + { + "epoch": 45.07, + "learning_rate": 2.7475171649221936e-05, + "loss": 2.0008, + "step": 15569000 + }, + { + "epoch": 45.07, + "learning_rate": 2.747444800157466e-05, + "loss": 2.0032, + "step": 15569500 + }, + { + "epoch": 45.07, + "learning_rate": 2.7473724353927384e-05, + "loss": 2.0298, + "step": 15570000 + }, + { + "epoch": 45.07, + "learning_rate": 2.7473000706280106e-05, + "loss": 2.0195, + "step": 15570500 + }, + { + "epoch": 45.07, + "learning_rate": 2.747227705863283e-05, + "loss": 2.0196, + "step": 15571000 + }, + { + "epoch": 45.07, + "learning_rate": 2.747155341098555e-05, + "loss": 2.0206, + "step": 15571500 + }, + { + "epoch": 45.07, + "learning_rate": 2.7470832657928863e-05, + "loss": 2.016, + "step": 15572000 + }, + { + "epoch": 45.08, + "learning_rate": 2.7470109010281585e-05, + "loss": 1.9954, + "step": 15572500 + }, + { + "epoch": 45.08, + "learning_rate": 2.7469385362634307e-05, + "loss": 2.0537, + "step": 15573000 + }, + { + "epoch": 45.08, + "learning_rate": 2.7468661714987033e-05, + "loss": 2.0015, + "step": 15573500 + }, + { + "epoch": 45.08, + "learning_rate": 2.7467938067339755e-05, + "loss": 2.0174, + "step": 15574000 + }, + { + "epoch": 45.08, + "learning_rate": 2.7467214419692477e-05, + "loss": 1.9911, + "step": 15574500 + }, + { + "epoch": 45.08, + "learning_rate": 2.7466490772045206e-05, + "loss": 2.0224, + "step": 15575000 + }, + { + "epoch": 45.08, + "learning_rate": 2.746576712439793e-05, + "loss": 2.0114, + "step": 15575500 + }, + { + "epoch": 45.09, + "learning_rate": 2.746504637134124e-05, + "loss": 2.0323, + "step": 15576000 + }, + { + "epoch": 45.09, + "learning_rate": 2.7464322723693963e-05, + "loss": 2.0221, + "step": 15576500 + }, + { + "epoch": 45.09, + "learning_rate": 2.7463599076046685e-05, + "loss": 2.0279, + "step": 15577000 + }, + { + "epoch": 45.09, + "learning_rate": 2.746287542839941e-05, + "loss": 2.0124, + "step": 15577500 + }, + { + "epoch": 45.09, + "learning_rate": 2.7462151780752133e-05, + "loss": 2.0071, + "step": 15578000 + }, + { + "epoch": 45.09, + "learning_rate": 2.7461428133104856e-05, + "loss": 2.0155, + "step": 15578500 + }, + { + "epoch": 45.09, + "learning_rate": 2.746070593275287e-05, + "loss": 2.0243, + "step": 15579000 + }, + { + "epoch": 45.1, + "learning_rate": 2.7459982285105597e-05, + "loss": 1.9902, + "step": 15579500 + }, + { + "epoch": 45.1, + "learning_rate": 2.7459260084753612e-05, + "loss": 2.0213, + "step": 15580000 + }, + { + "epoch": 45.1, + "learning_rate": 2.7458536437106335e-05, + "loss": 2.0354, + "step": 15580500 + }, + { + "epoch": 45.1, + "learning_rate": 2.745781278945906e-05, + "loss": 1.9913, + "step": 15581000 + }, + { + "epoch": 45.1, + "learning_rate": 2.7457090589107076e-05, + "loss": 2.0354, + "step": 15581500 + }, + { + "epoch": 45.1, + "learning_rate": 2.7456366941459798e-05, + "loss": 2.0346, + "step": 15582000 + }, + { + "epoch": 45.1, + "learning_rate": 2.7455643293812524e-05, + "loss": 2.0215, + "step": 15582500 + }, + { + "epoch": 45.11, + "learning_rate": 2.745491964616525e-05, + "loss": 1.9874, + "step": 15583000 + }, + { + "epoch": 45.11, + "learning_rate": 2.745419599851797e-05, + "loss": 1.9985, + "step": 15583500 + }, + { + "epoch": 45.11, + "learning_rate": 2.7453472350870697e-05, + "loss": 2.0036, + "step": 15584000 + }, + { + "epoch": 45.11, + "learning_rate": 2.745274870322342e-05, + "loss": 1.9983, + "step": 15584500 + }, + { + "epoch": 45.11, + "learning_rate": 2.745202505557614e-05, + "loss": 1.9991, + "step": 15585000 + }, + { + "epoch": 45.11, + "learning_rate": 2.7451301407928864e-05, + "loss": 2.0198, + "step": 15585500 + }, + { + "epoch": 45.12, + "learning_rate": 2.7450577760281586e-05, + "loss": 1.9981, + "step": 15586000 + }, + { + "epoch": 45.12, + "learning_rate": 2.744985411263431e-05, + "loss": 2.0175, + "step": 15586500 + }, + { + "epoch": 45.12, + "learning_rate": 2.7449130464987034e-05, + "loss": 2.0045, + "step": 15587000 + }, + { + "epoch": 45.12, + "learning_rate": 2.7448406817339756e-05, + "loss": 2.0014, + "step": 15587500 + }, + { + "epoch": 45.12, + "learning_rate": 2.7447683169692478e-05, + "loss": 2.0598, + "step": 15588000 + }, + { + "epoch": 45.12, + "learning_rate": 2.74469595220452e-05, + "loss": 2.015, + "step": 15588500 + }, + { + "epoch": 45.12, + "learning_rate": 2.7446235874397923e-05, + "loss": 1.9943, + "step": 15589000 + }, + { + "epoch": 45.13, + "learning_rate": 2.744551222675065e-05, + "loss": 2.016, + "step": 15589500 + }, + { + "epoch": 45.13, + "learning_rate": 2.744479002639867e-05, + "loss": 2.0049, + "step": 15590000 + }, + { + "epoch": 45.13, + "learning_rate": 2.7444066378751393e-05, + "loss": 2.0221, + "step": 15590500 + }, + { + "epoch": 45.13, + "learning_rate": 2.7443342731104115e-05, + "loss": 1.9948, + "step": 15591000 + }, + { + "epoch": 45.13, + "learning_rate": 2.7442619083456837e-05, + "loss": 2.0225, + "step": 15591500 + }, + { + "epoch": 45.13, + "learning_rate": 2.7441896883104856e-05, + "loss": 2.0264, + "step": 15592000 + }, + { + "epoch": 45.13, + "learning_rate": 2.744117323545758e-05, + "loss": 2.0116, + "step": 15592500 + }, + { + "epoch": 45.14, + "learning_rate": 2.744045248240089e-05, + "loss": 2.0177, + "step": 15593000 + }, + { + "epoch": 45.14, + "learning_rate": 2.7439728834753613e-05, + "loss": 2.0192, + "step": 15593500 + }, + { + "epoch": 45.14, + "learning_rate": 2.7439005187106335e-05, + "loss": 2.0326, + "step": 15594000 + }, + { + "epoch": 45.14, + "learning_rate": 2.743828153945906e-05, + "loss": 2.0417, + "step": 15594500 + }, + { + "epoch": 45.14, + "learning_rate": 2.7437557891811783e-05, + "loss": 2.0436, + "step": 15595000 + }, + { + "epoch": 45.14, + "learning_rate": 2.7436834244164505e-05, + "loss": 1.9975, + "step": 15595500 + }, + { + "epoch": 45.14, + "learning_rate": 2.7436110596517228e-05, + "loss": 2.0196, + "step": 15596000 + }, + { + "epoch": 45.15, + "learning_rate": 2.743538694886995e-05, + "loss": 2.0186, + "step": 15596500 + }, + { + "epoch": 45.15, + "learning_rate": 2.7434663301222675e-05, + "loss": 1.9983, + "step": 15597000 + }, + { + "epoch": 45.15, + "learning_rate": 2.74339396535754e-05, + "loss": 2.0021, + "step": 15597500 + }, + { + "epoch": 45.15, + "learning_rate": 2.7433218900518713e-05, + "loss": 2.0101, + "step": 15598000 + }, + { + "epoch": 45.15, + "learning_rate": 2.743249525287144e-05, + "loss": 1.9964, + "step": 15598500 + }, + { + "epoch": 45.15, + "learning_rate": 2.743177160522416e-05, + "loss": 2.0136, + "step": 15599000 + }, + { + "epoch": 45.15, + "learning_rate": 2.7431047957576883e-05, + "loss": 2.0049, + "step": 15599500 + }, + { + "epoch": 45.16, + "learning_rate": 2.7430324309929606e-05, + "loss": 2.0326, + "step": 15600000 + }, + { + "epoch": 45.16, + "learning_rate": 2.7429600662282328e-05, + "loss": 1.9939, + "step": 15600500 + }, + { + "epoch": 45.16, + "learning_rate": 2.742887701463505e-05, + "loss": 2.0278, + "step": 15601000 + }, + { + "epoch": 45.16, + "learning_rate": 2.7428153366987776e-05, + "loss": 2.0067, + "step": 15601500 + }, + { + "epoch": 45.16, + "learning_rate": 2.7427429719340498e-05, + "loss": 2.0199, + "step": 15602000 + }, + { + "epoch": 45.16, + "learning_rate": 2.742670607169322e-05, + "loss": 2.0145, + "step": 15602500 + }, + { + "epoch": 45.16, + "learning_rate": 2.7425982424045942e-05, + "loss": 2.0046, + "step": 15603000 + }, + { + "epoch": 45.17, + "learning_rate": 2.742526022369396e-05, + "loss": 2.025, + "step": 15603500 + }, + { + "epoch": 45.17, + "learning_rate": 2.7424536576046684e-05, + "loss": 2.0459, + "step": 15604000 + }, + { + "epoch": 45.17, + "learning_rate": 2.7423812928399406e-05, + "loss": 2.0333, + "step": 15604500 + }, + { + "epoch": 45.17, + "learning_rate": 2.7423089280752135e-05, + "loss": 2.0395, + "step": 15605000 + }, + { + "epoch": 45.17, + "learning_rate": 2.7422365633104857e-05, + "loss": 2.0247, + "step": 15605500 + }, + { + "epoch": 45.17, + "learning_rate": 2.742164198545758e-05, + "loss": 2.0154, + "step": 15606000 + }, + { + "epoch": 45.17, + "learning_rate": 2.74209197851056e-05, + "loss": 2.022, + "step": 15606500 + }, + { + "epoch": 45.18, + "learning_rate": 2.742019613745832e-05, + "loss": 2.017, + "step": 15607000 + }, + { + "epoch": 45.18, + "learning_rate": 2.7419472489811043e-05, + "loss": 2.04, + "step": 15607500 + }, + { + "epoch": 45.18, + "learning_rate": 2.7418748842163765e-05, + "loss": 2.0086, + "step": 15608000 + }, + { + "epoch": 45.18, + "learning_rate": 2.7418026641811784e-05, + "loss": 1.9932, + "step": 15608500 + }, + { + "epoch": 45.18, + "learning_rate": 2.7417302994164506e-05, + "loss": 1.9986, + "step": 15609000 + }, + { + "epoch": 45.18, + "learning_rate": 2.741657934651723e-05, + "loss": 2.0325, + "step": 15609500 + }, + { + "epoch": 45.18, + "learning_rate": 2.741585569886995e-05, + "loss": 2.0273, + "step": 15610000 + }, + { + "epoch": 45.19, + "learning_rate": 2.7415132051222676e-05, + "loss": 2.0133, + "step": 15610500 + }, + { + "epoch": 45.19, + "learning_rate": 2.74144084035754e-05, + "loss": 2.0477, + "step": 15611000 + }, + { + "epoch": 45.19, + "learning_rate": 2.741368475592812e-05, + "loss": 2.028, + "step": 15611500 + }, + { + "epoch": 45.19, + "learning_rate": 2.7412961108280843e-05, + "loss": 2.0257, + "step": 15612000 + }, + { + "epoch": 45.19, + "learning_rate": 2.7412237460633572e-05, + "loss": 2.0436, + "step": 15612500 + }, + { + "epoch": 45.19, + "learning_rate": 2.7411513812986294e-05, + "loss": 2.0117, + "step": 15613000 + }, + { + "epoch": 45.19, + "learning_rate": 2.7410790165339016e-05, + "loss": 2.021, + "step": 15613500 + }, + { + "epoch": 45.2, + "learning_rate": 2.741006651769174e-05, + "loss": 2.0414, + "step": 15614000 + }, + { + "epoch": 45.2, + "learning_rate": 2.7409342870044464e-05, + "loss": 2.0162, + "step": 15614500 + }, + { + "epoch": 45.2, + "learning_rate": 2.7408619222397187e-05, + "loss": 2.0162, + "step": 15615000 + }, + { + "epoch": 45.2, + "learning_rate": 2.7407897022045202e-05, + "loss": 2.0276, + "step": 15615500 + }, + { + "epoch": 45.2, + "learning_rate": 2.7407173374397928e-05, + "loss": 2.0175, + "step": 15616000 + }, + { + "epoch": 45.2, + "learning_rate": 2.7406451174045943e-05, + "loss": 2.0036, + "step": 15616500 + }, + { + "epoch": 45.2, + "learning_rate": 2.7405727526398666e-05, + "loss": 2.0167, + "step": 15617000 + }, + { + "epoch": 45.21, + "learning_rate": 2.7405005326046684e-05, + "loss": 2.0254, + "step": 15617500 + }, + { + "epoch": 45.21, + "learning_rate": 2.7404281678399407e-05, + "loss": 2.0143, + "step": 15618000 + }, + { + "epoch": 45.21, + "learning_rate": 2.740355803075213e-05, + "loss": 2.023, + "step": 15618500 + }, + { + "epoch": 45.21, + "learning_rate": 2.740283438310485e-05, + "loss": 2.0118, + "step": 15619000 + }, + { + "epoch": 45.21, + "learning_rate": 2.7402110735457577e-05, + "loss": 2.0206, + "step": 15619500 + }, + { + "epoch": 45.21, + "learning_rate": 2.7401387087810302e-05, + "loss": 2.0226, + "step": 15620000 + }, + { + "epoch": 45.21, + "learning_rate": 2.740066488745832e-05, + "loss": 2.0097, + "step": 15620500 + }, + { + "epoch": 45.22, + "learning_rate": 2.7399941239811044e-05, + "loss": 2.0143, + "step": 15621000 + }, + { + "epoch": 45.22, + "learning_rate": 2.7399217592163766e-05, + "loss": 2.0037, + "step": 15621500 + }, + { + "epoch": 45.22, + "learning_rate": 2.739849394451649e-05, + "loss": 2.0159, + "step": 15622000 + }, + { + "epoch": 45.22, + "learning_rate": 2.7397770296869214e-05, + "loss": 1.9923, + "step": 15622500 + }, + { + "epoch": 45.22, + "learning_rate": 2.7397046649221936e-05, + "loss": 2.0072, + "step": 15623000 + }, + { + "epoch": 45.22, + "learning_rate": 2.7396323001574658e-05, + "loss": 2.0296, + "step": 15623500 + }, + { + "epoch": 45.23, + "learning_rate": 2.739559935392738e-05, + "loss": 2.02, + "step": 15624000 + }, + { + "epoch": 45.23, + "learning_rate": 2.7394875706280103e-05, + "loss": 2.0278, + "step": 15624500 + }, + { + "epoch": 45.23, + "learning_rate": 2.7394152058632828e-05, + "loss": 2.0321, + "step": 15625000 + }, + { + "epoch": 45.23, + "learning_rate": 2.739342841098555e-05, + "loss": 2.0246, + "step": 15625500 + }, + { + "epoch": 45.23, + "learning_rate": 2.7392704763338273e-05, + "loss": 2.016, + "step": 15626000 + }, + { + "epoch": 45.23, + "learning_rate": 2.7391981115690995e-05, + "loss": 2.0077, + "step": 15626500 + }, + { + "epoch": 45.23, + "learning_rate": 2.7391257468043724e-05, + "loss": 1.9938, + "step": 15627000 + }, + { + "epoch": 45.24, + "learning_rate": 2.7390533820396446e-05, + "loss": 2.0229, + "step": 15627500 + }, + { + "epoch": 45.24, + "learning_rate": 2.7389811620044465e-05, + "loss": 2.0213, + "step": 15628000 + }, + { + "epoch": 45.24, + "learning_rate": 2.7389087972397187e-05, + "loss": 2.0134, + "step": 15628500 + }, + { + "epoch": 45.24, + "learning_rate": 2.738836432474991e-05, + "loss": 2.0396, + "step": 15629000 + }, + { + "epoch": 45.24, + "learning_rate": 2.7387640677102632e-05, + "loss": 2.0354, + "step": 15629500 + }, + { + "epoch": 45.24, + "learning_rate": 2.7386917029455354e-05, + "loss": 2.0319, + "step": 15630000 + }, + { + "epoch": 45.24, + "learning_rate": 2.738619338180808e-05, + "loss": 2.005, + "step": 15630500 + }, + { + "epoch": 45.25, + "learning_rate": 2.7385471181456095e-05, + "loss": 1.9977, + "step": 15631000 + }, + { + "epoch": 45.25, + "learning_rate": 2.7384747533808817e-05, + "loss": 2.0271, + "step": 15631500 + }, + { + "epoch": 45.25, + "learning_rate": 2.7384023886161543e-05, + "loss": 2.0272, + "step": 15632000 + }, + { + "epoch": 45.25, + "learning_rate": 2.738330168580956e-05, + "loss": 2.0139, + "step": 15632500 + }, + { + "epoch": 45.25, + "learning_rate": 2.738257803816228e-05, + "loss": 2.018, + "step": 15633000 + }, + { + "epoch": 45.25, + "learning_rate": 2.7381854390515006e-05, + "loss": 2.0043, + "step": 15633500 + }, + { + "epoch": 45.25, + "learning_rate": 2.738113074286773e-05, + "loss": 2.0291, + "step": 15634000 + }, + { + "epoch": 45.26, + "learning_rate": 2.7380407095220458e-05, + "loss": 2.0099, + "step": 15634500 + }, + { + "epoch": 45.26, + "learning_rate": 2.737968344757318e-05, + "loss": 2.0053, + "step": 15635000 + }, + { + "epoch": 45.26, + "learning_rate": 2.7378959799925902e-05, + "loss": 2.0116, + "step": 15635500 + }, + { + "epoch": 45.26, + "learning_rate": 2.7378237599573918e-05, + "loss": 2.0162, + "step": 15636000 + }, + { + "epoch": 45.26, + "learning_rate": 2.7377513951926643e-05, + "loss": 2.045, + "step": 15636500 + }, + { + "epoch": 45.26, + "learning_rate": 2.7376790304279366e-05, + "loss": 2.0209, + "step": 15637000 + }, + { + "epoch": 45.26, + "learning_rate": 2.7376066656632088e-05, + "loss": 2.0087, + "step": 15637500 + }, + { + "epoch": 45.27, + "learning_rate": 2.737534300898481e-05, + "loss": 2.0295, + "step": 15638000 + }, + { + "epoch": 45.27, + "learning_rate": 2.7374619361337532e-05, + "loss": 2.0238, + "step": 15638500 + }, + { + "epoch": 45.27, + "learning_rate": 2.737389716098555e-05, + "loss": 2.0139, + "step": 15639000 + }, + { + "epoch": 45.27, + "learning_rate": 2.7373173513338273e-05, + "loss": 2.0092, + "step": 15639500 + }, + { + "epoch": 45.27, + "learning_rate": 2.7372449865690996e-05, + "loss": 2.02, + "step": 15640000 + }, + { + "epoch": 45.27, + "learning_rate": 2.7371727665339015e-05, + "loss": 2.0168, + "step": 15640500 + }, + { + "epoch": 45.27, + "learning_rate": 2.737100546498703e-05, + "loss": 2.0296, + "step": 15641000 + }, + { + "epoch": 45.28, + "learning_rate": 2.7370281817339756e-05, + "loss": 2.0312, + "step": 15641500 + }, + { + "epoch": 45.28, + "learning_rate": 2.7369558169692478e-05, + "loss": 2.014, + "step": 15642000 + }, + { + "epoch": 45.28, + "learning_rate": 2.7368834522045207e-05, + "loss": 2.0096, + "step": 15642500 + }, + { + "epoch": 45.28, + "learning_rate": 2.736811087439793e-05, + "loss": 2.0293, + "step": 15643000 + }, + { + "epoch": 45.28, + "learning_rate": 2.736738722675065e-05, + "loss": 2.035, + "step": 15643500 + }, + { + "epoch": 45.28, + "learning_rate": 2.7366663579103374e-05, + "loss": 2.0193, + "step": 15644000 + }, + { + "epoch": 45.28, + "learning_rate": 2.7365939931456096e-05, + "loss": 2.019, + "step": 15644500 + }, + { + "epoch": 45.29, + "learning_rate": 2.736521628380882e-05, + "loss": 2.0325, + "step": 15645000 + }, + { + "epoch": 45.29, + "learning_rate": 2.7364492636161544e-05, + "loss": 2.0206, + "step": 15645500 + }, + { + "epoch": 45.29, + "learning_rate": 2.7363768988514266e-05, + "loss": 2.0244, + "step": 15646000 + }, + { + "epoch": 45.29, + "learning_rate": 2.736304534086699e-05, + "loss": 2.0211, + "step": 15646500 + }, + { + "epoch": 45.29, + "learning_rate": 2.736232169321971e-05, + "loss": 2.0241, + "step": 15647000 + }, + { + "epoch": 45.29, + "learning_rate": 2.7361598045572433e-05, + "loss": 2.0299, + "step": 15647500 + }, + { + "epoch": 45.29, + "learning_rate": 2.7360875845220452e-05, + "loss": 2.0352, + "step": 15648000 + }, + { + "epoch": 45.3, + "learning_rate": 2.7360152197573174e-05, + "loss": 2.0294, + "step": 15648500 + }, + { + "epoch": 45.3, + "learning_rate": 2.7359428549925896e-05, + "loss": 2.0284, + "step": 15649000 + }, + { + "epoch": 45.3, + "learning_rate": 2.7358704902278625e-05, + "loss": 2.0483, + "step": 15649500 + }, + { + "epoch": 45.3, + "learning_rate": 2.7357981254631347e-05, + "loss": 2.0568, + "step": 15650000 + }, + { + "epoch": 45.3, + "learning_rate": 2.7357259054279366e-05, + "loss": 2.009, + "step": 15650500 + }, + { + "epoch": 45.3, + "learning_rate": 2.7356536853927382e-05, + "loss": 2.0088, + "step": 15651000 + }, + { + "epoch": 45.3, + "learning_rate": 2.7355813206280108e-05, + "loss": 2.0012, + "step": 15651500 + }, + { + "epoch": 45.31, + "learning_rate": 2.735508955863283e-05, + "loss": 2.02, + "step": 15652000 + }, + { + "epoch": 45.31, + "learning_rate": 2.7354365910985552e-05, + "loss": 2.0241, + "step": 15652500 + }, + { + "epoch": 45.31, + "learning_rate": 2.7353642263338274e-05, + "loss": 1.9945, + "step": 15653000 + }, + { + "epoch": 45.31, + "learning_rate": 2.7352918615690997e-05, + "loss": 2.0044, + "step": 15653500 + }, + { + "epoch": 45.31, + "learning_rate": 2.7352194968043722e-05, + "loss": 2.0149, + "step": 15654000 + }, + { + "epoch": 45.31, + "learning_rate": 2.7351472767691738e-05, + "loss": 2.0211, + "step": 15654500 + }, + { + "epoch": 45.31, + "learning_rate": 2.735074912004446e-05, + "loss": 2.0077, + "step": 15655000 + }, + { + "epoch": 45.32, + "learning_rate": 2.7350025472397182e-05, + "loss": 2.0258, + "step": 15655500 + }, + { + "epoch": 45.32, + "learning_rate": 2.7349301824749908e-05, + "loss": 2.0338, + "step": 15656000 + }, + { + "epoch": 45.32, + "learning_rate": 2.734857817710263e-05, + "loss": 2.0149, + "step": 15656500 + }, + { + "epoch": 45.32, + "learning_rate": 2.734785452945536e-05, + "loss": 2.0337, + "step": 15657000 + }, + { + "epoch": 45.32, + "learning_rate": 2.734713088180808e-05, + "loss": 2.0113, + "step": 15657500 + }, + { + "epoch": 45.32, + "learning_rate": 2.7346407234160803e-05, + "loss": 1.996, + "step": 15658000 + }, + { + "epoch": 45.32, + "learning_rate": 2.7345683586513526e-05, + "loss": 2.0085, + "step": 15658500 + }, + { + "epoch": 45.33, + "learning_rate": 2.7344959938866248e-05, + "loss": 2.0119, + "step": 15659000 + }, + { + "epoch": 45.33, + "learning_rate": 2.7344236291218974e-05, + "loss": 2.0201, + "step": 15659500 + }, + { + "epoch": 45.33, + "learning_rate": 2.7343512643571696e-05, + "loss": 2.0195, + "step": 15660000 + }, + { + "epoch": 45.33, + "learning_rate": 2.7342788995924418e-05, + "loss": 1.9933, + "step": 15660500 + }, + { + "epoch": 45.33, + "learning_rate": 2.7342066795572434e-05, + "loss": 2.014, + "step": 15661000 + }, + { + "epoch": 45.33, + "learning_rate": 2.7341344595220453e-05, + "loss": 2.0142, + "step": 15661500 + }, + { + "epoch": 45.34, + "learning_rate": 2.7340620947573175e-05, + "loss": 2.023, + "step": 15662000 + }, + { + "epoch": 45.34, + "learning_rate": 2.7339897299925897e-05, + "loss": 2.0049, + "step": 15662500 + }, + { + "epoch": 45.34, + "learning_rate": 2.7339173652278623e-05, + "loss": 2.0042, + "step": 15663000 + }, + { + "epoch": 45.34, + "learning_rate": 2.7338450004631345e-05, + "loss": 2.0484, + "step": 15663500 + }, + { + "epoch": 45.34, + "learning_rate": 2.7337726356984067e-05, + "loss": 2.023, + "step": 15664000 + }, + { + "epoch": 45.34, + "learning_rate": 2.7337002709336796e-05, + "loss": 2.0012, + "step": 15664500 + }, + { + "epoch": 45.34, + "learning_rate": 2.733627906168952e-05, + "loss": 2.0242, + "step": 15665000 + }, + { + "epoch": 45.35, + "learning_rate": 2.733555541404224e-05, + "loss": 2.0104, + "step": 15665500 + }, + { + "epoch": 45.35, + "learning_rate": 2.7334831766394963e-05, + "loss": 2.0062, + "step": 15666000 + }, + { + "epoch": 45.35, + "learning_rate": 2.7334108118747685e-05, + "loss": 2.0249, + "step": 15666500 + }, + { + "epoch": 45.35, + "learning_rate": 2.7333385918395704e-05, + "loss": 2.0142, + "step": 15667000 + }, + { + "epoch": 45.35, + "learning_rate": 2.7332663718043723e-05, + "loss": 2.0427, + "step": 15667500 + }, + { + "epoch": 45.35, + "learning_rate": 2.7331940070396445e-05, + "loss": 2.012, + "step": 15668000 + }, + { + "epoch": 45.35, + "learning_rate": 2.7331216422749167e-05, + "loss": 2.0141, + "step": 15668500 + }, + { + "epoch": 45.36, + "learning_rate": 2.733049277510189e-05, + "loss": 2.0267, + "step": 15669000 + }, + { + "epoch": 45.36, + "learning_rate": 2.7329769127454612e-05, + "loss": 2.0411, + "step": 15669500 + }, + { + "epoch": 45.36, + "learning_rate": 2.7329045479807337e-05, + "loss": 2.0182, + "step": 15670000 + }, + { + "epoch": 45.36, + "learning_rate": 2.732832183216006e-05, + "loss": 1.9914, + "step": 15670500 + }, + { + "epoch": 45.36, + "learning_rate": 2.7327599631808075e-05, + "loss": 2.034, + "step": 15671000 + }, + { + "epoch": 45.36, + "learning_rate": 2.7326875984160798e-05, + "loss": 1.9993, + "step": 15671500 + }, + { + "epoch": 45.36, + "learning_rate": 2.7326152336513527e-05, + "loss": 2.0332, + "step": 15672000 + }, + { + "epoch": 45.37, + "learning_rate": 2.7325430136161545e-05, + "loss": 2.0269, + "step": 15672500 + }, + { + "epoch": 45.37, + "learning_rate": 2.7324706488514268e-05, + "loss": 2.0238, + "step": 15673000 + }, + { + "epoch": 45.37, + "learning_rate": 2.732398284086699e-05, + "loss": 2.0179, + "step": 15673500 + }, + { + "epoch": 45.37, + "learning_rate": 2.7323259193219712e-05, + "loss": 2.0051, + "step": 15674000 + }, + { + "epoch": 45.37, + "learning_rate": 2.732253699286773e-05, + "loss": 2.036, + "step": 15674500 + }, + { + "epoch": 45.37, + "learning_rate": 2.7321813345220453e-05, + "loss": 2.0339, + "step": 15675000 + }, + { + "epoch": 45.37, + "learning_rate": 2.7321089697573176e-05, + "loss": 2.0465, + "step": 15675500 + }, + { + "epoch": 45.38, + "learning_rate": 2.73203660499259e-05, + "loss": 2.0262, + "step": 15676000 + }, + { + "epoch": 45.38, + "learning_rate": 2.7319642402278623e-05, + "loss": 2.0218, + "step": 15676500 + }, + { + "epoch": 45.38, + "learning_rate": 2.7318918754631346e-05, + "loss": 2.0297, + "step": 15677000 + }, + { + "epoch": 45.38, + "learning_rate": 2.7318195106984068e-05, + "loss": 2.0409, + "step": 15677500 + }, + { + "epoch": 45.38, + "learning_rate": 2.731747145933679e-05, + "loss": 2.0333, + "step": 15678000 + }, + { + "epoch": 45.38, + "learning_rate": 2.7316747811689512e-05, + "loss": 2.0318, + "step": 15678500 + }, + { + "epoch": 45.38, + "learning_rate": 2.731602561133753e-05, + "loss": 2.0131, + "step": 15679000 + }, + { + "epoch": 45.39, + "learning_rate": 2.731530196369026e-05, + "loss": 1.9895, + "step": 15679500 + }, + { + "epoch": 45.39, + "learning_rate": 2.7314578316042983e-05, + "loss": 2.0303, + "step": 15680000 + }, + { + "epoch": 45.39, + "learning_rate": 2.7313854668395705e-05, + "loss": 2.0146, + "step": 15680500 + }, + { + "epoch": 45.39, + "learning_rate": 2.7313132468043724e-05, + "loss": 2.0344, + "step": 15681000 + }, + { + "epoch": 45.39, + "learning_rate": 2.7312408820396446e-05, + "loss": 2.0317, + "step": 15681500 + }, + { + "epoch": 45.39, + "learning_rate": 2.7311685172749168e-05, + "loss": 1.9903, + "step": 15682000 + }, + { + "epoch": 45.39, + "learning_rate": 2.731096152510189e-05, + "loss": 2.008, + "step": 15682500 + }, + { + "epoch": 45.4, + "learning_rate": 2.7310237877454613e-05, + "loss": 2.0159, + "step": 15683000 + }, + { + "epoch": 45.4, + "learning_rate": 2.7309514229807338e-05, + "loss": 2.0214, + "step": 15683500 + }, + { + "epoch": 45.4, + "learning_rate": 2.730879058216006e-05, + "loss": 2.0328, + "step": 15684000 + }, + { + "epoch": 45.4, + "learning_rate": 2.7308066934512783e-05, + "loss": 2.0455, + "step": 15684500 + }, + { + "epoch": 45.4, + "learning_rate": 2.7307344734160802e-05, + "loss": 2.0417, + "step": 15685000 + }, + { + "epoch": 45.4, + "learning_rate": 2.7306621086513524e-05, + "loss": 2.0043, + "step": 15685500 + }, + { + "epoch": 45.4, + "learning_rate": 2.7305897438866246e-05, + "loss": 2.0195, + "step": 15686000 + }, + { + "epoch": 45.41, + "learning_rate": 2.730517379121897e-05, + "loss": 2.0138, + "step": 15686500 + }, + { + "epoch": 45.41, + "learning_rate": 2.7304450143571697e-05, + "loss": 2.023, + "step": 15687000 + }, + { + "epoch": 45.41, + "learning_rate": 2.730372649592442e-05, + "loss": 2.0381, + "step": 15687500 + }, + { + "epoch": 45.41, + "learning_rate": 2.730300429557244e-05, + "loss": 2.0223, + "step": 15688000 + }, + { + "epoch": 45.41, + "learning_rate": 2.730228064792516e-05, + "loss": 2.0203, + "step": 15688500 + }, + { + "epoch": 45.41, + "learning_rate": 2.7301557000277883e-05, + "loss": 2.019, + "step": 15689000 + }, + { + "epoch": 45.41, + "learning_rate": 2.7300833352630605e-05, + "loss": 2.001, + "step": 15689500 + }, + { + "epoch": 45.42, + "learning_rate": 2.7300111152278624e-05, + "loss": 2.0316, + "step": 15690000 + }, + { + "epoch": 45.42, + "learning_rate": 2.7299387504631346e-05, + "loss": 2.0367, + "step": 15690500 + }, + { + "epoch": 45.42, + "learning_rate": 2.729866385698407e-05, + "loss": 2.0258, + "step": 15691000 + }, + { + "epoch": 45.42, + "learning_rate": 2.729794020933679e-05, + "loss": 2.0095, + "step": 15691500 + }, + { + "epoch": 45.42, + "learning_rate": 2.7297216561689513e-05, + "loss": 2.0261, + "step": 15692000 + }, + { + "epoch": 45.42, + "learning_rate": 2.729649291404224e-05, + "loss": 2.0092, + "step": 15692500 + }, + { + "epoch": 45.42, + "learning_rate": 2.729576926639496e-05, + "loss": 2.033, + "step": 15693000 + }, + { + "epoch": 45.43, + "learning_rate": 2.7295045618747683e-05, + "loss": 2.0441, + "step": 15693500 + }, + { + "epoch": 45.43, + "learning_rate": 2.7294321971100405e-05, + "loss": 2.0519, + "step": 15694000 + }, + { + "epoch": 45.43, + "learning_rate": 2.7293598323453134e-05, + "loss": 2.037, + "step": 15694500 + }, + { + "epoch": 45.43, + "learning_rate": 2.7292876123101153e-05, + "loss": 2.0221, + "step": 15695000 + }, + { + "epoch": 45.43, + "learning_rate": 2.7292152475453876e-05, + "loss": 2.0529, + "step": 15695500 + }, + { + "epoch": 45.43, + "learning_rate": 2.7291428827806598e-05, + "loss": 2.0316, + "step": 15696000 + }, + { + "epoch": 45.43, + "learning_rate": 2.729070518015932e-05, + "loss": 2.0325, + "step": 15696500 + }, + { + "epoch": 45.44, + "learning_rate": 2.7289981532512042e-05, + "loss": 2.0256, + "step": 15697000 + }, + { + "epoch": 45.44, + "learning_rate": 2.728925933216006e-05, + "loss": 2.0127, + "step": 15697500 + }, + { + "epoch": 45.44, + "learning_rate": 2.7288535684512784e-05, + "loss": 2.0209, + "step": 15698000 + }, + { + "epoch": 45.44, + "learning_rate": 2.7287813484160802e-05, + "loss": 2.0193, + "step": 15698500 + }, + { + "epoch": 45.44, + "learning_rate": 2.7287089836513525e-05, + "loss": 2.0299, + "step": 15699000 + }, + { + "epoch": 45.44, + "learning_rate": 2.7286366188866247e-05, + "loss": 2.0158, + "step": 15699500 + }, + { + "epoch": 45.45, + "learning_rate": 2.728564254121897e-05, + "loss": 1.9998, + "step": 15700000 + }, + { + "epoch": 45.45, + "learning_rate": 2.728491889357169e-05, + "loss": 2.0068, + "step": 15700500 + }, + { + "epoch": 45.45, + "learning_rate": 2.7284195245924417e-05, + "loss": 2.0274, + "step": 15701000 + }, + { + "epoch": 45.45, + "learning_rate": 2.728347159827714e-05, + "loss": 2.0021, + "step": 15701500 + }, + { + "epoch": 45.45, + "learning_rate": 2.7282747950629868e-05, + "loss": 2.0482, + "step": 15702000 + }, + { + "epoch": 45.45, + "learning_rate": 2.728202430298259e-05, + "loss": 2.0158, + "step": 15702500 + }, + { + "epoch": 45.45, + "learning_rate": 2.7281302102630606e-05, + "loss": 2.0078, + "step": 15703000 + }, + { + "epoch": 45.46, + "learning_rate": 2.728057845498333e-05, + "loss": 2.0231, + "step": 15703500 + }, + { + "epoch": 45.46, + "learning_rate": 2.7279854807336054e-05, + "loss": 2.0153, + "step": 15704000 + }, + { + "epoch": 45.46, + "learning_rate": 2.7279131159688776e-05, + "loss": 2.0264, + "step": 15704500 + }, + { + "epoch": 45.46, + "learning_rate": 2.7278408959336792e-05, + "loss": 2.029, + "step": 15705000 + }, + { + "epoch": 45.46, + "learning_rate": 2.7277685311689517e-05, + "loss": 2.0362, + "step": 15705500 + }, + { + "epoch": 45.46, + "learning_rate": 2.727696166404224e-05, + "loss": 2.0339, + "step": 15706000 + }, + { + "epoch": 45.46, + "learning_rate": 2.7276239463690255e-05, + "loss": 2.0305, + "step": 15706500 + }, + { + "epoch": 45.47, + "learning_rate": 2.727551581604298e-05, + "loss": 2.0241, + "step": 15707000 + }, + { + "epoch": 45.47, + "learning_rate": 2.7274792168395703e-05, + "loss": 2.0242, + "step": 15707500 + }, + { + "epoch": 45.47, + "learning_rate": 2.7274068520748425e-05, + "loss": 2.023, + "step": 15708000 + }, + { + "epoch": 45.47, + "learning_rate": 2.7273344873101147e-05, + "loss": 1.9901, + "step": 15708500 + }, + { + "epoch": 45.47, + "learning_rate": 2.727262122545387e-05, + "loss": 2.0261, + "step": 15709000 + }, + { + "epoch": 45.47, + "learning_rate": 2.72718975778066e-05, + "loss": 2.0336, + "step": 15709500 + }, + { + "epoch": 45.47, + "learning_rate": 2.727117393015932e-05, + "loss": 2.0033, + "step": 15710000 + }, + { + "epoch": 45.48, + "learning_rate": 2.727045172980734e-05, + "loss": 2.054, + "step": 15710500 + }, + { + "epoch": 45.48, + "learning_rate": 2.7269728082160062e-05, + "loss": 2.0548, + "step": 15711000 + }, + { + "epoch": 45.48, + "learning_rate": 2.7269004434512784e-05, + "loss": 2.0318, + "step": 15711500 + }, + { + "epoch": 45.48, + "learning_rate": 2.7268280786865507e-05, + "loss": 2.0351, + "step": 15712000 + }, + { + "epoch": 45.48, + "learning_rate": 2.7267557139218232e-05, + "loss": 2.0408, + "step": 15712500 + }, + { + "epoch": 45.48, + "learning_rate": 2.7266833491570954e-05, + "loss": 2.0184, + "step": 15713000 + }, + { + "epoch": 45.48, + "learning_rate": 2.7266109843923677e-05, + "loss": 1.9892, + "step": 15713500 + }, + { + "epoch": 45.49, + "learning_rate": 2.72653861962764e-05, + "loss": 2.0121, + "step": 15714000 + }, + { + "epoch": 45.49, + "learning_rate": 2.7264663995924418e-05, + "loss": 2.0276, + "step": 15714500 + }, + { + "epoch": 45.49, + "learning_rate": 2.726394034827714e-05, + "loss": 2.0345, + "step": 15715000 + }, + { + "epoch": 45.49, + "learning_rate": 2.7263216700629862e-05, + "loss": 2.0451, + "step": 15715500 + }, + { + "epoch": 45.49, + "learning_rate": 2.7262493052982585e-05, + "loss": 2.0007, + "step": 15716000 + }, + { + "epoch": 45.49, + "learning_rate": 2.7261769405335314e-05, + "loss": 2.0116, + "step": 15716500 + }, + { + "epoch": 45.49, + "learning_rate": 2.7261045757688036e-05, + "loss": 2.0366, + "step": 15717000 + }, + { + "epoch": 45.5, + "learning_rate": 2.7260322110040758e-05, + "loss": 2.0168, + "step": 15717500 + }, + { + "epoch": 45.5, + "learning_rate": 2.725959846239348e-05, + "loss": 2.0157, + "step": 15718000 + }, + { + "epoch": 45.5, + "learning_rate": 2.7258874814746206e-05, + "loss": 2.0235, + "step": 15718500 + }, + { + "epoch": 45.5, + "learning_rate": 2.7258151167098928e-05, + "loss": 2.0082, + "step": 15719000 + }, + { + "epoch": 45.5, + "learning_rate": 2.725742751945165e-05, + "loss": 2.0206, + "step": 15719500 + }, + { + "epoch": 45.5, + "learning_rate": 2.7256706766394963e-05, + "loss": 2.014, + "step": 15720000 + }, + { + "epoch": 45.5, + "learning_rate": 2.7255983118747685e-05, + "loss": 2.026, + "step": 15720500 + }, + { + "epoch": 45.51, + "learning_rate": 2.7255259471100407e-05, + "loss": 2.0324, + "step": 15721000 + }, + { + "epoch": 45.51, + "learning_rate": 2.7254535823453133e-05, + "loss": 2.0481, + "step": 15721500 + }, + { + "epoch": 45.51, + "learning_rate": 2.7253812175805855e-05, + "loss": 2.0191, + "step": 15722000 + }, + { + "epoch": 45.51, + "learning_rate": 2.7253088528158577e-05, + "loss": 2.0258, + "step": 15722500 + }, + { + "epoch": 45.51, + "learning_rate": 2.72523648805113e-05, + "loss": 2.0458, + "step": 15723000 + }, + { + "epoch": 45.51, + "learning_rate": 2.725164123286402e-05, + "loss": 2.0284, + "step": 15723500 + }, + { + "epoch": 45.51, + "learning_rate": 2.725091758521675e-05, + "loss": 2.0261, + "step": 15724000 + }, + { + "epoch": 45.52, + "learning_rate": 2.7250193937569473e-05, + "loss": 2.0263, + "step": 15724500 + }, + { + "epoch": 45.52, + "learning_rate": 2.7249471737217492e-05, + "loss": 2.0065, + "step": 15725000 + }, + { + "epoch": 45.52, + "learning_rate": 2.7248748089570214e-05, + "loss": 2.0252, + "step": 15725500 + }, + { + "epoch": 45.52, + "learning_rate": 2.7248024441922936e-05, + "loss": 2.0141, + "step": 15726000 + }, + { + "epoch": 45.52, + "learning_rate": 2.7247302241570955e-05, + "loss": 2.0328, + "step": 15726500 + }, + { + "epoch": 45.52, + "learning_rate": 2.7246578593923677e-05, + "loss": 2.0038, + "step": 15727000 + }, + { + "epoch": 45.52, + "learning_rate": 2.72458549462764e-05, + "loss": 2.012, + "step": 15727500 + }, + { + "epoch": 45.53, + "learning_rate": 2.7245131298629122e-05, + "loss": 2.043, + "step": 15728000 + }, + { + "epoch": 45.53, + "learning_rate": 2.7244407650981844e-05, + "loss": 2.0314, + "step": 15728500 + }, + { + "epoch": 45.53, + "learning_rate": 2.7243686897925156e-05, + "loss": 2.0219, + "step": 15729000 + }, + { + "epoch": 45.53, + "learning_rate": 2.7242963250277882e-05, + "loss": 2.0184, + "step": 15729500 + }, + { + "epoch": 45.53, + "learning_rate": 2.7242239602630604e-05, + "loss": 2.0085, + "step": 15730000 + }, + { + "epoch": 45.53, + "learning_rate": 2.7241515954983327e-05, + "loss": 2.0125, + "step": 15730500 + }, + { + "epoch": 45.53, + "learning_rate": 2.724079230733605e-05, + "loss": 1.9994, + "step": 15731000 + }, + { + "epoch": 45.54, + "learning_rate": 2.724006865968877e-05, + "loss": 2.0331, + "step": 15731500 + }, + { + "epoch": 45.54, + "learning_rate": 2.72393450120415e-05, + "loss": 2.0227, + "step": 15732000 + }, + { + "epoch": 45.54, + "learning_rate": 2.723862281168952e-05, + "loss": 2.0151, + "step": 15732500 + }, + { + "epoch": 45.54, + "learning_rate": 2.723789916404224e-05, + "loss": 2.0129, + "step": 15733000 + }, + { + "epoch": 45.54, + "learning_rate": 2.7237175516394963e-05, + "loss": 2.0116, + "step": 15733500 + }, + { + "epoch": 45.54, + "learning_rate": 2.7236451868747686e-05, + "loss": 2.0155, + "step": 15734000 + }, + { + "epoch": 45.54, + "learning_rate": 2.7235729668395705e-05, + "loss": 2.0022, + "step": 15734500 + }, + { + "epoch": 45.55, + "learning_rate": 2.7235006020748427e-05, + "loss": 2.0189, + "step": 15735000 + }, + { + "epoch": 45.55, + "learning_rate": 2.723428237310115e-05, + "loss": 2.0149, + "step": 15735500 + }, + { + "epoch": 45.55, + "learning_rate": 2.723355872545387e-05, + "loss": 2.0296, + "step": 15736000 + }, + { + "epoch": 45.55, + "learning_rate": 2.7232835077806597e-05, + "loss": 2.0212, + "step": 15736500 + }, + { + "epoch": 45.55, + "learning_rate": 2.723211143015932e-05, + "loss": 2.0276, + "step": 15737000 + }, + { + "epoch": 45.55, + "learning_rate": 2.723138778251204e-05, + "loss": 2.0181, + "step": 15737500 + }, + { + "epoch": 45.56, + "learning_rate": 2.7230664134864764e-05, + "loss": 2.033, + "step": 15738000 + }, + { + "epoch": 45.56, + "learning_rate": 2.7229941934512783e-05, + "loss": 2.0281, + "step": 15738500 + }, + { + "epoch": 45.56, + "learning_rate": 2.7229218286865505e-05, + "loss": 2.0259, + "step": 15739000 + }, + { + "epoch": 45.56, + "learning_rate": 2.7228496086513527e-05, + "loss": 2.0274, + "step": 15739500 + }, + { + "epoch": 45.56, + "learning_rate": 2.722777243886625e-05, + "loss": 2.034, + "step": 15740000 + }, + { + "epoch": 45.56, + "learning_rate": 2.722704879121897e-05, + "loss": 2.0406, + "step": 15740500 + }, + { + "epoch": 45.56, + "learning_rate": 2.7226325143571697e-05, + "loss": 2.035, + "step": 15741000 + }, + { + "epoch": 45.57, + "learning_rate": 2.722560149592442e-05, + "loss": 2.0317, + "step": 15741500 + }, + { + "epoch": 45.57, + "learning_rate": 2.722487784827714e-05, + "loss": 2.0047, + "step": 15742000 + }, + { + "epoch": 45.57, + "learning_rate": 2.7224154200629864e-05, + "loss": 2.0259, + "step": 15742500 + }, + { + "epoch": 45.57, + "learning_rate": 2.7223430552982586e-05, + "loss": 2.05, + "step": 15743000 + }, + { + "epoch": 45.57, + "learning_rate": 2.7222706905335312e-05, + "loss": 2.0305, + "step": 15743500 + }, + { + "epoch": 45.57, + "learning_rate": 2.7221983257688034e-05, + "loss": 2.0518, + "step": 15744000 + }, + { + "epoch": 45.57, + "learning_rate": 2.722126105733605e-05, + "loss": 2.0372, + "step": 15744500 + }, + { + "epoch": 45.58, + "learning_rate": 2.722053885698407e-05, + "loss": 2.0415, + "step": 15745000 + }, + { + "epoch": 45.58, + "learning_rate": 2.721981520933679e-05, + "loss": 2.0083, + "step": 15745500 + }, + { + "epoch": 45.58, + "learning_rate": 2.7219091561689513e-05, + "loss": 2.0333, + "step": 15746000 + }, + { + "epoch": 45.58, + "learning_rate": 2.7218367914042235e-05, + "loss": 2.0164, + "step": 15746500 + }, + { + "epoch": 45.58, + "learning_rate": 2.7217644266394964e-05, + "loss": 2.0265, + "step": 15747000 + }, + { + "epoch": 45.58, + "learning_rate": 2.7216920618747686e-05, + "loss": 2.047, + "step": 15747500 + }, + { + "epoch": 45.58, + "learning_rate": 2.7216198418395705e-05, + "loss": 2.0437, + "step": 15748000 + }, + { + "epoch": 45.59, + "learning_rate": 2.7215474770748428e-05, + "loss": 1.987, + "step": 15748500 + }, + { + "epoch": 45.59, + "learning_rate": 2.721475112310115e-05, + "loss": 2.017, + "step": 15749000 + }, + { + "epoch": 45.59, + "learning_rate": 2.7214027475453872e-05, + "loss": 2.0209, + "step": 15749500 + }, + { + "epoch": 45.59, + "learning_rate": 2.7213303827806598e-05, + "loss": 2.0105, + "step": 15750000 + }, + { + "epoch": 45.59, + "learning_rate": 2.721258018015932e-05, + "loss": 2.023, + "step": 15750500 + }, + { + "epoch": 45.59, + "learning_rate": 2.7211856532512042e-05, + "loss": 2.0091, + "step": 15751000 + }, + { + "epoch": 45.59, + "learning_rate": 2.7211132884864764e-05, + "loss": 2.018, + "step": 15751500 + }, + { + "epoch": 45.6, + "learning_rate": 2.7210410684512783e-05, + "loss": 2.0312, + "step": 15752000 + }, + { + "epoch": 45.6, + "learning_rate": 2.72096884841608e-05, + "loss": 2.0301, + "step": 15752500 + }, + { + "epoch": 45.6, + "learning_rate": 2.7208964836513525e-05, + "loss": 2.0205, + "step": 15753000 + }, + { + "epoch": 45.6, + "learning_rate": 2.7208241188866247e-05, + "loss": 2.0194, + "step": 15753500 + }, + { + "epoch": 45.6, + "learning_rate": 2.720751754121897e-05, + "loss": 2.0216, + "step": 15754000 + }, + { + "epoch": 45.6, + "learning_rate": 2.7206793893571698e-05, + "loss": 2.014, + "step": 15754500 + }, + { + "epoch": 45.6, + "learning_rate": 2.720607024592442e-05, + "loss": 2.0088, + "step": 15755000 + }, + { + "epoch": 45.61, + "learning_rate": 2.7205346598277142e-05, + "loss": 2.0369, + "step": 15755500 + }, + { + "epoch": 45.61, + "learning_rate": 2.7204622950629865e-05, + "loss": 2.0458, + "step": 15756000 + }, + { + "epoch": 45.61, + "learning_rate": 2.7203899302982587e-05, + "loss": 2.0498, + "step": 15756500 + }, + { + "epoch": 45.61, + "learning_rate": 2.7203175655335313e-05, + "loss": 2.0213, + "step": 15757000 + }, + { + "epoch": 45.61, + "learning_rate": 2.7202452007688035e-05, + "loss": 2.0289, + "step": 15757500 + }, + { + "epoch": 45.61, + "learning_rate": 2.7201728360040757e-05, + "loss": 2.0153, + "step": 15758000 + }, + { + "epoch": 45.61, + "learning_rate": 2.720100471239348e-05, + "loss": 2.0121, + "step": 15758500 + }, + { + "epoch": 45.62, + "learning_rate": 2.72002810647462e-05, + "loss": 2.0423, + "step": 15759000 + }, + { + "epoch": 45.62, + "learning_rate": 2.719955886439422e-05, + "loss": 2.0082, + "step": 15759500 + }, + { + "epoch": 45.62, + "learning_rate": 2.7198835216746943e-05, + "loss": 2.0169, + "step": 15760000 + }, + { + "epoch": 45.62, + "learning_rate": 2.7198111569099665e-05, + "loss": 2.0596, + "step": 15760500 + }, + { + "epoch": 45.62, + "learning_rate": 2.7197387921452387e-05, + "loss": 2.0341, + "step": 15761000 + }, + { + "epoch": 45.62, + "learning_rate": 2.7196664273805116e-05, + "loss": 2.0148, + "step": 15761500 + }, + { + "epoch": 45.62, + "learning_rate": 2.719594062615784e-05, + "loss": 2.054, + "step": 15762000 + }, + { + "epoch": 45.63, + "learning_rate": 2.7195216978510564e-05, + "loss": 2.0297, + "step": 15762500 + }, + { + "epoch": 45.63, + "learning_rate": 2.7194493330863286e-05, + "loss": 2.0386, + "step": 15763000 + }, + { + "epoch": 45.63, + "learning_rate": 2.7193771130511302e-05, + "loss": 2.0307, + "step": 15763500 + }, + { + "epoch": 45.63, + "learning_rate": 2.7193047482864027e-05, + "loss": 2.0172, + "step": 15764000 + }, + { + "epoch": 45.63, + "learning_rate": 2.719232383521675e-05, + "loss": 2.0297, + "step": 15764500 + }, + { + "epoch": 45.63, + "learning_rate": 2.7191601634864765e-05, + "loss": 2.0455, + "step": 15765000 + }, + { + "epoch": 45.63, + "learning_rate": 2.7190877987217487e-05, + "loss": 2.0332, + "step": 15765500 + }, + { + "epoch": 45.64, + "learning_rate": 2.7190154339570213e-05, + "loss": 2.0202, + "step": 15766000 + }, + { + "epoch": 45.64, + "learning_rate": 2.718943213921823e-05, + "loss": 2.0601, + "step": 15766500 + }, + { + "epoch": 45.64, + "learning_rate": 2.718870849157095e-05, + "loss": 2.011, + "step": 15767000 + }, + { + "epoch": 45.64, + "learning_rate": 2.7187984843923676e-05, + "loss": 2.0248, + "step": 15767500 + }, + { + "epoch": 45.64, + "learning_rate": 2.7187262643571692e-05, + "loss": 2.0157, + "step": 15768000 + }, + { + "epoch": 45.64, + "learning_rate": 2.7186538995924414e-05, + "loss": 2.0207, + "step": 15768500 + }, + { + "epoch": 45.64, + "learning_rate": 2.718581534827714e-05, + "loss": 2.0122, + "step": 15769000 + }, + { + "epoch": 45.65, + "learning_rate": 2.7185091700629865e-05, + "loss": 2.0231, + "step": 15769500 + }, + { + "epoch": 45.65, + "learning_rate": 2.718436805298259e-05, + "loss": 2.0317, + "step": 15770000 + }, + { + "epoch": 45.65, + "learning_rate": 2.7183644405335313e-05, + "loss": 2.0345, + "step": 15770500 + }, + { + "epoch": 45.65, + "learning_rate": 2.7182920757688036e-05, + "loss": 2.0475, + "step": 15771000 + }, + { + "epoch": 45.65, + "learning_rate": 2.7182197110040758e-05, + "loss": 2.0016, + "step": 15771500 + }, + { + "epoch": 45.65, + "learning_rate": 2.718147346239348e-05, + "loss": 2.0258, + "step": 15772000 + }, + { + "epoch": 45.65, + "learning_rate": 2.7180749814746202e-05, + "loss": 1.9978, + "step": 15772500 + }, + { + "epoch": 45.66, + "learning_rate": 2.7180026167098928e-05, + "loss": 2.0245, + "step": 15773000 + }, + { + "epoch": 45.66, + "learning_rate": 2.717930251945165e-05, + "loss": 2.044, + "step": 15773500 + }, + { + "epoch": 45.66, + "learning_rate": 2.7178578871804372e-05, + "loss": 2.0124, + "step": 15774000 + }, + { + "epoch": 45.66, + "learning_rate": 2.7177855224157095e-05, + "loss": 2.019, + "step": 15774500 + }, + { + "epoch": 45.66, + "learning_rate": 2.7177131576509817e-05, + "loss": 2.0329, + "step": 15775000 + }, + { + "epoch": 45.66, + "learning_rate": 2.717640792886254e-05, + "loss": 2.0141, + "step": 15775500 + }, + { + "epoch": 45.67, + "learning_rate": 2.7175685728510558e-05, + "loss": 2.0107, + "step": 15776000 + }, + { + "epoch": 45.67, + "learning_rate": 2.7174962080863287e-05, + "loss": 2.0254, + "step": 15776500 + }, + { + "epoch": 45.67, + "learning_rate": 2.7174239880511303e-05, + "loss": 2.0264, + "step": 15777000 + }, + { + "epoch": 45.67, + "learning_rate": 2.7173516232864028e-05, + "loss": 2.0222, + "step": 15777500 + }, + { + "epoch": 45.67, + "learning_rate": 2.717279258521675e-05, + "loss": 2.0257, + "step": 15778000 + }, + { + "epoch": 45.67, + "learning_rate": 2.7172070384864766e-05, + "loss": 2.0201, + "step": 15778500 + }, + { + "epoch": 45.67, + "learning_rate": 2.717134673721749e-05, + "loss": 2.0274, + "step": 15779000 + }, + { + "epoch": 45.68, + "learning_rate": 2.7170623089570214e-05, + "loss": 2.0202, + "step": 15779500 + }, + { + "epoch": 45.68, + "learning_rate": 2.7169899441922936e-05, + "loss": 2.034, + "step": 15780000 + }, + { + "epoch": 45.68, + "learning_rate": 2.716917579427566e-05, + "loss": 2.02, + "step": 15780500 + }, + { + "epoch": 45.68, + "learning_rate": 2.716845214662838e-05, + "loss": 2.0153, + "step": 15781000 + }, + { + "epoch": 45.68, + "learning_rate": 2.7167728498981103e-05, + "loss": 2.0303, + "step": 15781500 + }, + { + "epoch": 45.68, + "learning_rate": 2.716700485133383e-05, + "loss": 2.0227, + "step": 15782000 + }, + { + "epoch": 45.68, + "learning_rate": 2.716628120368655e-05, + "loss": 2.0077, + "step": 15782500 + }, + { + "epoch": 45.69, + "learning_rate": 2.7165557556039273e-05, + "loss": 2.0318, + "step": 15783000 + }, + { + "epoch": 45.69, + "learning_rate": 2.7164835355687292e-05, + "loss": 2.0304, + "step": 15783500 + }, + { + "epoch": 45.69, + "learning_rate": 2.7164111708040017e-05, + "loss": 2.0475, + "step": 15784000 + }, + { + "epoch": 45.69, + "learning_rate": 2.7163388060392743e-05, + "loss": 2.006, + "step": 15784500 + }, + { + "epoch": 45.69, + "learning_rate": 2.7162664412745465e-05, + "loss": 2.0276, + "step": 15785000 + }, + { + "epoch": 45.69, + "learning_rate": 2.716194221239348e-05, + "loss": 2.016, + "step": 15785500 + }, + { + "epoch": 45.69, + "learning_rate": 2.7161218564746203e-05, + "loss": 2.0252, + "step": 15786000 + }, + { + "epoch": 45.7, + "learning_rate": 2.716049491709893e-05, + "loss": 2.0418, + "step": 15786500 + }, + { + "epoch": 45.7, + "learning_rate": 2.715977126945165e-05, + "loss": 2.0366, + "step": 15787000 + }, + { + "epoch": 45.7, + "learning_rate": 2.7159047621804373e-05, + "loss": 2.0328, + "step": 15787500 + }, + { + "epoch": 45.7, + "learning_rate": 2.7158323974157095e-05, + "loss": 2.0385, + "step": 15788000 + }, + { + "epoch": 45.7, + "learning_rate": 2.7157600326509818e-05, + "loss": 2.0388, + "step": 15788500 + }, + { + "epoch": 45.7, + "learning_rate": 2.7156876678862543e-05, + "loss": 2.0334, + "step": 15789000 + }, + { + "epoch": 45.7, + "learning_rate": 2.715615447851056e-05, + "loss": 2.0311, + "step": 15789500 + }, + { + "epoch": 45.71, + "learning_rate": 2.715543083086328e-05, + "loss": 2.0264, + "step": 15790000 + }, + { + "epoch": 45.71, + "learning_rate": 2.7154707183216003e-05, + "loss": 2.011, + "step": 15790500 + }, + { + "epoch": 45.71, + "learning_rate": 2.715398353556873e-05, + "loss": 2.0081, + "step": 15791000 + }, + { + "epoch": 45.71, + "learning_rate": 2.7153259887921455e-05, + "loss": 2.032, + "step": 15791500 + }, + { + "epoch": 45.71, + "learning_rate": 2.7152537687569473e-05, + "loss": 2.0077, + "step": 15792000 + }, + { + "epoch": 45.71, + "learning_rate": 2.7151815487217492e-05, + "loss": 2.0461, + "step": 15792500 + }, + { + "epoch": 45.71, + "learning_rate": 2.7151091839570215e-05, + "loss": 2.0412, + "step": 15793000 + }, + { + "epoch": 45.72, + "learning_rate": 2.7150368191922937e-05, + "loss": 2.023, + "step": 15793500 + }, + { + "epoch": 45.72, + "learning_rate": 2.714964454427566e-05, + "loss": 2.042, + "step": 15794000 + }, + { + "epoch": 45.72, + "learning_rate": 2.714892089662838e-05, + "loss": 2.0213, + "step": 15794500 + }, + { + "epoch": 45.72, + "learning_rate": 2.7148197248981107e-05, + "loss": 2.0548, + "step": 15795000 + }, + { + "epoch": 45.72, + "learning_rate": 2.714747360133383e-05, + "loss": 2.029, + "step": 15795500 + }, + { + "epoch": 45.72, + "learning_rate": 2.714674995368655e-05, + "loss": 2.0255, + "step": 15796000 + }, + { + "epoch": 45.72, + "learning_rate": 2.7146027753334567e-05, + "loss": 2.0278, + "step": 15796500 + }, + { + "epoch": 45.73, + "learning_rate": 2.7145304105687293e-05, + "loss": 2.0335, + "step": 15797000 + }, + { + "epoch": 45.73, + "learning_rate": 2.7144580458040015e-05, + "loss": 2.0184, + "step": 15797500 + }, + { + "epoch": 45.73, + "learning_rate": 2.7143856810392737e-05, + "loss": 2.0302, + "step": 15798000 + }, + { + "epoch": 45.73, + "learning_rate": 2.714313316274546e-05, + "loss": 2.0164, + "step": 15798500 + }, + { + "epoch": 45.73, + "learning_rate": 2.714240951509819e-05, + "loss": 2.0343, + "step": 15799000 + }, + { + "epoch": 45.73, + "learning_rate": 2.714168586745091e-05, + "loss": 2.0279, + "step": 15799500 + }, + { + "epoch": 45.73, + "learning_rate": 2.7140962219803633e-05, + "loss": 2.0217, + "step": 15800000 + }, + { + "epoch": 45.74, + "learning_rate": 2.7140240019451652e-05, + "loss": 2.0338, + "step": 15800500 + }, + { + "epoch": 45.74, + "learning_rate": 2.713951781909967e-05, + "loss": 2.0157, + "step": 15801000 + }, + { + "epoch": 45.74, + "learning_rate": 2.7138794171452393e-05, + "loss": 2.0413, + "step": 15801500 + }, + { + "epoch": 45.74, + "learning_rate": 2.713807197110041e-05, + "loss": 2.0432, + "step": 15802000 + }, + { + "epoch": 45.74, + "learning_rate": 2.713734832345313e-05, + "loss": 2.0343, + "step": 15802500 + }, + { + "epoch": 45.74, + "learning_rate": 2.7136624675805856e-05, + "loss": 2.016, + "step": 15803000 + }, + { + "epoch": 45.74, + "learning_rate": 2.713590102815858e-05, + "loss": 2.0364, + "step": 15803500 + }, + { + "epoch": 45.75, + "learning_rate": 2.71351773805113e-05, + "loss": 2.0388, + "step": 15804000 + }, + { + "epoch": 45.75, + "learning_rate": 2.7134453732864023e-05, + "loss": 2.0059, + "step": 15804500 + }, + { + "epoch": 45.75, + "learning_rate": 2.7133731532512042e-05, + "loss": 2.0422, + "step": 15805000 + }, + { + "epoch": 45.75, + "learning_rate": 2.7133007884864764e-05, + "loss": 2.0059, + "step": 15805500 + }, + { + "epoch": 45.75, + "learning_rate": 2.7132284237217486e-05, + "loss": 2.0362, + "step": 15806000 + }, + { + "epoch": 45.75, + "learning_rate": 2.7131560589570215e-05, + "loss": 2.0143, + "step": 15806500 + }, + { + "epoch": 45.75, + "learning_rate": 2.7130839836513528e-05, + "loss": 2.0054, + "step": 15807000 + }, + { + "epoch": 45.76, + "learning_rate": 2.713011618886625e-05, + "loss": 2.0567, + "step": 15807500 + }, + { + "epoch": 45.76, + "learning_rate": 2.7129392541218972e-05, + "loss": 2.0125, + "step": 15808000 + }, + { + "epoch": 45.76, + "learning_rate": 2.7128668893571694e-05, + "loss": 2.049, + "step": 15808500 + }, + { + "epoch": 45.76, + "learning_rate": 2.712794524592442e-05, + "loss": 2.0025, + "step": 15809000 + }, + { + "epoch": 45.76, + "learning_rate": 2.7127221598277142e-05, + "loss": 2.033, + "step": 15809500 + }, + { + "epoch": 45.76, + "learning_rate": 2.7126497950629864e-05, + "loss": 2.0085, + "step": 15810000 + }, + { + "epoch": 45.76, + "learning_rate": 2.7125774302982587e-05, + "loss": 2.0193, + "step": 15810500 + }, + { + "epoch": 45.77, + "learning_rate": 2.712505065533531e-05, + "loss": 2.0263, + "step": 15811000 + }, + { + "epoch": 45.77, + "learning_rate": 2.7124327007688035e-05, + "loss": 2.0286, + "step": 15811500 + }, + { + "epoch": 45.77, + "learning_rate": 2.7123603360040757e-05, + "loss": 2.0276, + "step": 15812000 + }, + { + "epoch": 45.77, + "learning_rate": 2.712287971239348e-05, + "loss": 2.0491, + "step": 15812500 + }, + { + "epoch": 45.77, + "learning_rate": 2.71221560647462e-05, + "loss": 2.0364, + "step": 15813000 + }, + { + "epoch": 45.77, + "learning_rate": 2.7121432417098924e-05, + "loss": 2.0487, + "step": 15813500 + }, + { + "epoch": 45.78, + "learning_rate": 2.7120708769451653e-05, + "loss": 2.0417, + "step": 15814000 + }, + { + "epoch": 45.78, + "learning_rate": 2.7119985121804375e-05, + "loss": 2.0279, + "step": 15814500 + }, + { + "epoch": 45.78, + "learning_rate": 2.7119261474157097e-05, + "loss": 2.0206, + "step": 15815000 + }, + { + "epoch": 45.78, + "learning_rate": 2.7118537826509823e-05, + "loss": 2.0369, + "step": 15815500 + }, + { + "epoch": 45.78, + "learning_rate": 2.7117814178862545e-05, + "loss": 2.0275, + "step": 15816000 + }, + { + "epoch": 45.78, + "learning_rate": 2.7117090531215267e-05, + "loss": 2.017, + "step": 15816500 + }, + { + "epoch": 45.78, + "learning_rate": 2.711636688356799e-05, + "loss": 2.0494, + "step": 15817000 + }, + { + "epoch": 45.79, + "learning_rate": 2.711564323592071e-05, + "loss": 2.0405, + "step": 15817500 + }, + { + "epoch": 45.79, + "learning_rate": 2.711492103556873e-05, + "loss": 2.0324, + "step": 15818000 + }, + { + "epoch": 45.79, + "learning_rate": 2.7114197387921453e-05, + "loss": 2.0427, + "step": 15818500 + }, + { + "epoch": 45.79, + "learning_rate": 2.7113473740274175e-05, + "loss": 2.0188, + "step": 15819000 + }, + { + "epoch": 45.79, + "learning_rate": 2.7112750092626897e-05, + "loss": 2.0234, + "step": 15819500 + }, + { + "epoch": 45.79, + "learning_rate": 2.7112026444979623e-05, + "loss": 2.0519, + "step": 15820000 + }, + { + "epoch": 45.79, + "learning_rate": 2.711130424462764e-05, + "loss": 2.024, + "step": 15820500 + }, + { + "epoch": 45.8, + "learning_rate": 2.711058059698036e-05, + "loss": 2.0285, + "step": 15821000 + }, + { + "epoch": 45.8, + "learning_rate": 2.710985694933309e-05, + "loss": 2.0311, + "step": 15821500 + }, + { + "epoch": 45.8, + "learning_rate": 2.7109133301685812e-05, + "loss": 2.0303, + "step": 15822000 + }, + { + "epoch": 45.8, + "learning_rate": 2.710841110133383e-05, + "loss": 2.0245, + "step": 15822500 + }, + { + "epoch": 45.8, + "learning_rate": 2.7107687453686553e-05, + "loss": 2.013, + "step": 15823000 + }, + { + "epoch": 45.8, + "learning_rate": 2.7106965253334572e-05, + "loss": 2.0419, + "step": 15823500 + }, + { + "epoch": 45.8, + "learning_rate": 2.7106241605687294e-05, + "loss": 2.0237, + "step": 15824000 + }, + { + "epoch": 45.81, + "learning_rate": 2.7105517958040016e-05, + "loss": 1.9835, + "step": 15824500 + }, + { + "epoch": 45.81, + "learning_rate": 2.710479431039274e-05, + "loss": 2.0439, + "step": 15825000 + }, + { + "epoch": 45.81, + "learning_rate": 2.710407066274546e-05, + "loss": 2.0395, + "step": 15825500 + }, + { + "epoch": 45.81, + "learning_rate": 2.7103347015098187e-05, + "loss": 2.041, + "step": 15826000 + }, + { + "epoch": 45.81, + "learning_rate": 2.7102624814746202e-05, + "loss": 2.0132, + "step": 15826500 + }, + { + "epoch": 45.81, + "learning_rate": 2.7101901167098924e-05, + "loss": 2.0173, + "step": 15827000 + }, + { + "epoch": 45.81, + "learning_rate": 2.7101177519451647e-05, + "loss": 2.0405, + "step": 15827500 + }, + { + "epoch": 45.82, + "learning_rate": 2.7100453871804372e-05, + "loss": 2.0335, + "step": 15828000 + }, + { + "epoch": 45.82, + "learning_rate": 2.7099730224157094e-05, + "loss": 2.0294, + "step": 15828500 + }, + { + "epoch": 45.82, + "learning_rate": 2.7099006576509823e-05, + "loss": 2.0333, + "step": 15829000 + }, + { + "epoch": 45.82, + "learning_rate": 2.7098282928862546e-05, + "loss": 2.0319, + "step": 15829500 + }, + { + "epoch": 45.82, + "learning_rate": 2.7097559281215268e-05, + "loss": 2.0249, + "step": 15830000 + }, + { + "epoch": 45.82, + "learning_rate": 2.709683563356799e-05, + "loss": 2.0293, + "step": 15830500 + }, + { + "epoch": 45.82, + "learning_rate": 2.7096111985920712e-05, + "loss": 2.036, + "step": 15831000 + }, + { + "epoch": 45.83, + "learning_rate": 2.7095388338273438e-05, + "loss": 2.048, + "step": 15831500 + }, + { + "epoch": 45.83, + "learning_rate": 2.709466469062616e-05, + "loss": 2.0318, + "step": 15832000 + }, + { + "epoch": 45.83, + "learning_rate": 2.7093943937569472e-05, + "loss": 2.0185, + "step": 15832500 + }, + { + "epoch": 45.83, + "learning_rate": 2.7093220289922195e-05, + "loss": 2.0232, + "step": 15833000 + }, + { + "epoch": 45.83, + "learning_rate": 2.7092496642274917e-05, + "loss": 2.0241, + "step": 15833500 + }, + { + "epoch": 45.83, + "learning_rate": 2.709177299462764e-05, + "loss": 2.0398, + "step": 15834000 + }, + { + "epoch": 45.83, + "learning_rate": 2.709104934698036e-05, + "loss": 1.9963, + "step": 15834500 + }, + { + "epoch": 45.84, + "learning_rate": 2.7090325699333087e-05, + "loss": 2.0288, + "step": 15835000 + }, + { + "epoch": 45.84, + "learning_rate": 2.708960205168581e-05, + "loss": 2.0099, + "step": 15835500 + }, + { + "epoch": 45.84, + "learning_rate": 2.7088878404038538e-05, + "loss": 2.0276, + "step": 15836000 + }, + { + "epoch": 45.84, + "learning_rate": 2.7088156203686554e-05, + "loss": 2.0532, + "step": 15836500 + }, + { + "epoch": 45.84, + "learning_rate": 2.7087432556039276e-05, + "loss": 2.0514, + "step": 15837000 + }, + { + "epoch": 45.84, + "learning_rate": 2.7086708908392e-05, + "loss": 2.0098, + "step": 15837500 + }, + { + "epoch": 45.84, + "learning_rate": 2.7085985260744724e-05, + "loss": 2.0243, + "step": 15838000 + }, + { + "epoch": 45.85, + "learning_rate": 2.7085261613097446e-05, + "loss": 2.0201, + "step": 15838500 + }, + { + "epoch": 45.85, + "learning_rate": 2.708453796545017e-05, + "loss": 2.0177, + "step": 15839000 + }, + { + "epoch": 45.85, + "learning_rate": 2.708381431780289e-05, + "loss": 2.0482, + "step": 15839500 + }, + { + "epoch": 45.85, + "learning_rate": 2.7083090670155613e-05, + "loss": 2.0139, + "step": 15840000 + }, + { + "epoch": 45.85, + "learning_rate": 2.7082368469803632e-05, + "loss": 2.027, + "step": 15840500 + }, + { + "epoch": 45.85, + "learning_rate": 2.7081644822156354e-05, + "loss": 2.0169, + "step": 15841000 + }, + { + "epoch": 45.85, + "learning_rate": 2.7080921174509076e-05, + "loss": 2.0378, + "step": 15841500 + }, + { + "epoch": 45.86, + "learning_rate": 2.7080198974157095e-05, + "loss": 2.0375, + "step": 15842000 + }, + { + "epoch": 45.86, + "learning_rate": 2.7079475326509817e-05, + "loss": 2.0083, + "step": 15842500 + }, + { + "epoch": 45.86, + "learning_rate": 2.707875167886254e-05, + "loss": 2.0235, + "step": 15843000 + }, + { + "epoch": 45.86, + "learning_rate": 2.7078028031215262e-05, + "loss": 2.0512, + "step": 15843500 + }, + { + "epoch": 45.86, + "learning_rate": 2.707730438356799e-05, + "loss": 2.0053, + "step": 15844000 + }, + { + "epoch": 45.86, + "learning_rate": 2.7076580735920713e-05, + "loss": 2.034, + "step": 15844500 + }, + { + "epoch": 45.86, + "learning_rate": 2.707585708827344e-05, + "loss": 2.021, + "step": 15845000 + }, + { + "epoch": 45.87, + "learning_rate": 2.707513344062616e-05, + "loss": 2.0365, + "step": 15845500 + }, + { + "epoch": 45.87, + "learning_rate": 2.7074409792978883e-05, + "loss": 2.0238, + "step": 15846000 + }, + { + "epoch": 45.87, + "learning_rate": 2.7073686145331605e-05, + "loss": 2.0365, + "step": 15846500 + }, + { + "epoch": 45.87, + "learning_rate": 2.7072963944979624e-05, + "loss": 2.0183, + "step": 15847000 + }, + { + "epoch": 45.87, + "learning_rate": 2.7072240297332347e-05, + "loss": 2.0268, + "step": 15847500 + }, + { + "epoch": 45.87, + "learning_rate": 2.7071518096980366e-05, + "loss": 2.0096, + "step": 15848000 + }, + { + "epoch": 45.87, + "learning_rate": 2.7070794449333088e-05, + "loss": 2.0496, + "step": 15848500 + }, + { + "epoch": 45.88, + "learning_rate": 2.707007080168581e-05, + "loss": 2.0309, + "step": 15849000 + }, + { + "epoch": 45.88, + "learning_rate": 2.7069347154038532e-05, + "loss": 2.0186, + "step": 15849500 + }, + { + "epoch": 45.88, + "learning_rate": 2.7068623506391255e-05, + "loss": 2.041, + "step": 15850000 + }, + { + "epoch": 45.88, + "learning_rate": 2.7067899858743977e-05, + "loss": 2.0364, + "step": 15850500 + }, + { + "epoch": 45.88, + "learning_rate": 2.7067176211096706e-05, + "loss": 2.0244, + "step": 15851000 + }, + { + "epoch": 45.88, + "learning_rate": 2.7066452563449428e-05, + "loss": 1.9888, + "step": 15851500 + }, + { + "epoch": 45.89, + "learning_rate": 2.7065728915802154e-05, + "loss": 2.0394, + "step": 15852000 + }, + { + "epoch": 45.89, + "learning_rate": 2.7065005268154876e-05, + "loss": 2.0155, + "step": 15852500 + }, + { + "epoch": 45.89, + "learning_rate": 2.706428306780289e-05, + "loss": 2.0015, + "step": 15853000 + }, + { + "epoch": 45.89, + "learning_rate": 2.706356086745091e-05, + "loss": 2.0178, + "step": 15853500 + }, + { + "epoch": 45.89, + "learning_rate": 2.7062837219803633e-05, + "loss": 2.0217, + "step": 15854000 + }, + { + "epoch": 45.89, + "learning_rate": 2.7062113572156355e-05, + "loss": 2.0196, + "step": 15854500 + }, + { + "epoch": 45.89, + "learning_rate": 2.7061389924509077e-05, + "loss": 2.0111, + "step": 15855000 + }, + { + "epoch": 45.9, + "learning_rate": 2.7060666276861803e-05, + "loss": 2.0334, + "step": 15855500 + }, + { + "epoch": 45.9, + "learning_rate": 2.7059942629214525e-05, + "loss": 2.0317, + "step": 15856000 + }, + { + "epoch": 45.9, + "learning_rate": 2.7059218981567247e-05, + "loss": 2.0198, + "step": 15856500 + }, + { + "epoch": 45.9, + "learning_rate": 2.705849533391997e-05, + "loss": 2.0246, + "step": 15857000 + }, + { + "epoch": 45.9, + "learning_rate": 2.705777168627269e-05, + "loss": 2.0468, + "step": 15857500 + }, + { + "epoch": 45.9, + "learning_rate": 2.7057048038625414e-05, + "loss": 2.0407, + "step": 15858000 + }, + { + "epoch": 45.9, + "learning_rate": 2.7056324390978143e-05, + "loss": 2.0446, + "step": 15858500 + }, + { + "epoch": 45.91, + "learning_rate": 2.7055602190626162e-05, + "loss": 2.0012, + "step": 15859000 + }, + { + "epoch": 45.91, + "learning_rate": 2.7054879990274177e-05, + "loss": 2.0082, + "step": 15859500 + }, + { + "epoch": 45.91, + "learning_rate": 2.7054156342626903e-05, + "loss": 2.0313, + "step": 15860000 + }, + { + "epoch": 45.91, + "learning_rate": 2.7053432694979625e-05, + "loss": 2.0259, + "step": 15860500 + }, + { + "epoch": 45.91, + "learning_rate": 2.7052709047332347e-05, + "loss": 2.0078, + "step": 15861000 + }, + { + "epoch": 45.91, + "learning_rate": 2.7051986846980366e-05, + "loss": 2.0017, + "step": 15861500 + }, + { + "epoch": 45.91, + "learning_rate": 2.705126319933309e-05, + "loss": 2.0401, + "step": 15862000 + }, + { + "epoch": 45.92, + "learning_rate": 2.705053955168581e-05, + "loss": 2.0483, + "step": 15862500 + }, + { + "epoch": 45.92, + "learning_rate": 2.7049815904038533e-05, + "loss": 2.0086, + "step": 15863000 + }, + { + "epoch": 45.92, + "learning_rate": 2.7049092256391255e-05, + "loss": 2.0355, + "step": 15863500 + }, + { + "epoch": 45.92, + "learning_rate": 2.7048368608743978e-05, + "loss": 2.0418, + "step": 15864000 + }, + { + "epoch": 45.92, + "learning_rate": 2.7047646408391996e-05, + "loss": 2.0066, + "step": 15864500 + }, + { + "epoch": 45.92, + "learning_rate": 2.704692276074472e-05, + "loss": 2.0252, + "step": 15865000 + }, + { + "epoch": 45.92, + "learning_rate": 2.704619911309744e-05, + "loss": 2.0377, + "step": 15865500 + }, + { + "epoch": 45.93, + "learning_rate": 2.704547546545017e-05, + "loss": 2.0401, + "step": 15866000 + }, + { + "epoch": 45.93, + "learning_rate": 2.7044751817802892e-05, + "loss": 2.0403, + "step": 15866500 + }, + { + "epoch": 45.93, + "learning_rate": 2.704402961745091e-05, + "loss": 2.0631, + "step": 15867000 + }, + { + "epoch": 45.93, + "learning_rate": 2.7043305969803633e-05, + "loss": 2.0179, + "step": 15867500 + }, + { + "epoch": 45.93, + "learning_rate": 2.7042582322156356e-05, + "loss": 2.0401, + "step": 15868000 + }, + { + "epoch": 45.93, + "learning_rate": 2.704185867450908e-05, + "loss": 2.0209, + "step": 15868500 + }, + { + "epoch": 45.93, + "learning_rate": 2.7041135026861803e-05, + "loss": 2.0095, + "step": 15869000 + }, + { + "epoch": 45.94, + "learning_rate": 2.7040411379214526e-05, + "loss": 2.0352, + "step": 15869500 + }, + { + "epoch": 45.94, + "learning_rate": 2.7039687731567248e-05, + "loss": 2.0303, + "step": 15870000 + }, + { + "epoch": 45.94, + "learning_rate": 2.703896408391997e-05, + "loss": 2.0351, + "step": 15870500 + }, + { + "epoch": 45.94, + "learning_rate": 2.7038240436272692e-05, + "loss": 2.0017, + "step": 15871000 + }, + { + "epoch": 45.94, + "learning_rate": 2.7037516788625418e-05, + "loss": 2.0409, + "step": 15871500 + }, + { + "epoch": 45.94, + "learning_rate": 2.703679314097814e-05, + "loss": 2.0493, + "step": 15872000 + }, + { + "epoch": 45.94, + "learning_rate": 2.7036069493330862e-05, + "loss": 2.0444, + "step": 15872500 + }, + { + "epoch": 45.95, + "learning_rate": 2.7035345845683585e-05, + "loss": 2.0073, + "step": 15873000 + }, + { + "epoch": 45.95, + "learning_rate": 2.7034623645331607e-05, + "loss": 2.0462, + "step": 15873500 + }, + { + "epoch": 45.95, + "learning_rate": 2.7033901444979626e-05, + "loss": 2.0324, + "step": 15874000 + }, + { + "epoch": 45.95, + "learning_rate": 2.7033177797332348e-05, + "loss": 2.0257, + "step": 15874500 + }, + { + "epoch": 45.95, + "learning_rate": 2.703245414968507e-05, + "loss": 2.0121, + "step": 15875000 + }, + { + "epoch": 45.95, + "learning_rate": 2.7031730502037793e-05, + "loss": 2.0352, + "step": 15875500 + }, + { + "epoch": 45.95, + "learning_rate": 2.703100685439052e-05, + "loss": 2.0278, + "step": 15876000 + }, + { + "epoch": 45.96, + "learning_rate": 2.7030284654038534e-05, + "loss": 2.0306, + "step": 15876500 + }, + { + "epoch": 45.96, + "learning_rate": 2.7029562453686553e-05, + "loss": 2.0268, + "step": 15877000 + }, + { + "epoch": 45.96, + "learning_rate": 2.702884314792516e-05, + "loss": 2.0432, + "step": 15877500 + }, + { + "epoch": 45.96, + "learning_rate": 2.702811950027788e-05, + "loss": 2.0415, + "step": 15878000 + }, + { + "epoch": 45.96, + "learning_rate": 2.7027395852630606e-05, + "loss": 2.046, + "step": 15878500 + }, + { + "epoch": 45.96, + "learning_rate": 2.702667220498333e-05, + "loss": 2.0302, + "step": 15879000 + }, + { + "epoch": 45.96, + "learning_rate": 2.702594855733605e-05, + "loss": 2.0348, + "step": 15879500 + }, + { + "epoch": 45.97, + "learning_rate": 2.7025224909688773e-05, + "loss": 2.022, + "step": 15880000 + }, + { + "epoch": 45.97, + "learning_rate": 2.7024501262041495e-05, + "loss": 2.0384, + "step": 15880500 + }, + { + "epoch": 45.97, + "learning_rate": 2.7023777614394217e-05, + "loss": 2.0358, + "step": 15881000 + }, + { + "epoch": 45.97, + "learning_rate": 2.7023055414042236e-05, + "loss": 2.0211, + "step": 15881500 + }, + { + "epoch": 45.97, + "learning_rate": 2.7022331766394965e-05, + "loss": 2.0233, + "step": 15882000 + }, + { + "epoch": 45.97, + "learning_rate": 2.7021608118747688e-05, + "loss": 2.0589, + "step": 15882500 + }, + { + "epoch": 45.97, + "learning_rate": 2.702088447110041e-05, + "loss": 2.0349, + "step": 15883000 + }, + { + "epoch": 45.98, + "learning_rate": 2.7020160823453132e-05, + "loss": 2.0187, + "step": 15883500 + }, + { + "epoch": 45.98, + "learning_rate": 2.7019437175805858e-05, + "loss": 2.0231, + "step": 15884000 + }, + { + "epoch": 45.98, + "learning_rate": 2.701871352815858e-05, + "loss": 2.0444, + "step": 15884500 + }, + { + "epoch": 45.98, + "learning_rate": 2.7017989880511302e-05, + "loss": 2.0273, + "step": 15885000 + }, + { + "epoch": 45.98, + "learning_rate": 2.7017266232864024e-05, + "loss": 2.0156, + "step": 15885500 + }, + { + "epoch": 45.98, + "learning_rate": 2.7016542585216747e-05, + "loss": 1.9957, + "step": 15886000 + }, + { + "epoch": 45.98, + "learning_rate": 2.701581893756947e-05, + "loss": 2.0297, + "step": 15886500 + }, + { + "epoch": 45.99, + "learning_rate": 2.7015095289922194e-05, + "loss": 2.0135, + "step": 15887000 + }, + { + "epoch": 45.99, + "learning_rate": 2.7014371642274917e-05, + "loss": 2.0436, + "step": 15887500 + }, + { + "epoch": 45.99, + "learning_rate": 2.701364799462764e-05, + "loss": 2.009, + "step": 15888000 + }, + { + "epoch": 45.99, + "learning_rate": 2.701292434698036e-05, + "loss": 2.0197, + "step": 15888500 + }, + { + "epoch": 45.99, + "learning_rate": 2.701220069933309e-05, + "loss": 2.0243, + "step": 15889000 + }, + { + "epoch": 45.99, + "learning_rate": 2.7011477051685812e-05, + "loss": 2.0323, + "step": 15889500 + }, + { + "epoch": 46.0, + "learning_rate": 2.701075485133383e-05, + "loss": 2.0312, + "step": 15890000 + }, + { + "epoch": 46.0, + "learning_rate": 2.7010032650981847e-05, + "loss": 2.0394, + "step": 15890500 + }, + { + "epoch": 46.0, + "learning_rate": 2.700930900333457e-05, + "loss": 2.0192, + "step": 15891000 + }, + { + "epoch": 46.0, + "learning_rate": 2.7008585355687295e-05, + "loss": 2.0276, + "step": 15891500 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.6718168801260671, + "eval_accuracy_mlm": 0.6375680241316996, + "eval_accuracy_nsp": 0.8556094833473118, + "eval_loss": 2.1730716228485107, + "eval_runtime": 331.3659, + "eval_samples_per_second": 1316.931, + "eval_steps_per_second": 54.873, + "step": 15891712 + }, + { + "epoch": 46.0, + "learning_rate": 2.7007861708040017e-05, + "loss": 1.9985, + "step": 15892000 + }, + { + "epoch": 46.0, + "learning_rate": 2.700713806039274e-05, + "loss": 2.0023, + "step": 15892500 + }, + { + "epoch": 46.0, + "learning_rate": 2.700641441274546e-05, + "loss": 2.035, + "step": 15893000 + }, + { + "epoch": 46.01, + "learning_rate": 2.7005690765098184e-05, + "loss": 1.9903, + "step": 15893500 + }, + { + "epoch": 46.01, + "learning_rate": 2.700496711745091e-05, + "loss": 1.9988, + "step": 15894000 + }, + { + "epoch": 46.01, + "learning_rate": 2.700424346980363e-05, + "loss": 1.9995, + "step": 15894500 + }, + { + "epoch": 46.01, + "learning_rate": 2.7003519822156354e-05, + "loss": 1.9948, + "step": 15895000 + }, + { + "epoch": 46.01, + "learning_rate": 2.7002796174509076e-05, + "loss": 1.9956, + "step": 15895500 + }, + { + "epoch": 46.01, + "learning_rate": 2.7002072526861805e-05, + "loss": 1.993, + "step": 15896000 + }, + { + "epoch": 46.01, + "learning_rate": 2.7001348879214527e-05, + "loss": 2.01, + "step": 15896500 + }, + { + "epoch": 46.02, + "learning_rate": 2.700062523156725e-05, + "loss": 2.0034, + "step": 15897000 + }, + { + "epoch": 46.02, + "learning_rate": 2.6999901583919972e-05, + "loss": 2.0214, + "step": 15897500 + }, + { + "epoch": 46.02, + "learning_rate": 2.6999177936272697e-05, + "loss": 2.0176, + "step": 15898000 + }, + { + "epoch": 46.02, + "learning_rate": 2.699845428862542e-05, + "loss": 2.0045, + "step": 15898500 + }, + { + "epoch": 46.02, + "learning_rate": 2.6997732088273435e-05, + "loss": 1.9933, + "step": 15899000 + }, + { + "epoch": 46.02, + "learning_rate": 2.699700844062616e-05, + "loss": 1.9905, + "step": 15899500 + }, + { + "epoch": 46.02, + "learning_rate": 2.6996284792978883e-05, + "loss": 2.0168, + "step": 15900000 + }, + { + "epoch": 46.03, + "learning_rate": 2.69955625926269e-05, + "loss": 2.0133, + "step": 15900500 + }, + { + "epoch": 46.03, + "learning_rate": 2.6994840392274918e-05, + "loss": 2.0365, + "step": 15901000 + }, + { + "epoch": 46.03, + "learning_rate": 2.699411674462764e-05, + "loss": 2.0242, + "step": 15901500 + }, + { + "epoch": 46.03, + "learning_rate": 2.6993393096980362e-05, + "loss": 2.0142, + "step": 15902000 + }, + { + "epoch": 46.03, + "learning_rate": 2.699267089662838e-05, + "loss": 2.0126, + "step": 15902500 + }, + { + "epoch": 46.03, + "learning_rate": 2.6991948696276396e-05, + "loss": 1.9939, + "step": 15903000 + }, + { + "epoch": 46.03, + "learning_rate": 2.6991225048629122e-05, + "loss": 2.0054, + "step": 15903500 + }, + { + "epoch": 46.04, + "learning_rate": 2.6990501400981848e-05, + "loss": 1.9977, + "step": 15904000 + }, + { + "epoch": 46.04, + "learning_rate": 2.6989777753334573e-05, + "loss": 2.0106, + "step": 15904500 + }, + { + "epoch": 46.04, + "learning_rate": 2.6989054105687296e-05, + "loss": 2.0104, + "step": 15905000 + }, + { + "epoch": 46.04, + "learning_rate": 2.6988330458040018e-05, + "loss": 2.0162, + "step": 15905500 + }, + { + "epoch": 46.04, + "learning_rate": 2.698760681039274e-05, + "loss": 2.0092, + "step": 15906000 + }, + { + "epoch": 46.04, + "learning_rate": 2.6986883162745462e-05, + "loss": 2.0157, + "step": 15906500 + }, + { + "epoch": 46.04, + "learning_rate": 2.6986159515098185e-05, + "loss": 2.0011, + "step": 15907000 + }, + { + "epoch": 46.05, + "learning_rate": 2.698543586745091e-05, + "loss": 2.0162, + "step": 15907500 + }, + { + "epoch": 46.05, + "learning_rate": 2.6984712219803632e-05, + "loss": 2.0117, + "step": 15908000 + }, + { + "epoch": 46.05, + "learning_rate": 2.6983988572156355e-05, + "loss": 2.0073, + "step": 15908500 + }, + { + "epoch": 46.05, + "learning_rate": 2.6983266371804374e-05, + "loss": 2.0058, + "step": 15909000 + }, + { + "epoch": 46.05, + "learning_rate": 2.6982542724157096e-05, + "loss": 1.9911, + "step": 15909500 + }, + { + "epoch": 46.05, + "learning_rate": 2.6981819076509818e-05, + "loss": 1.9951, + "step": 15910000 + }, + { + "epoch": 46.05, + "learning_rate": 2.6981096876157837e-05, + "loss": 2.0266, + "step": 15910500 + }, + { + "epoch": 46.06, + "learning_rate": 2.698037322851056e-05, + "loss": 2.0092, + "step": 15911000 + }, + { + "epoch": 46.06, + "learning_rate": 2.6979649580863288e-05, + "loss": 2.0122, + "step": 15911500 + }, + { + "epoch": 46.06, + "learning_rate": 2.697892593321601e-05, + "loss": 2.0094, + "step": 15912000 + }, + { + "epoch": 46.06, + "learning_rate": 2.6978202285568733e-05, + "loss": 2.0108, + "step": 15912500 + }, + { + "epoch": 46.06, + "learning_rate": 2.6977478637921455e-05, + "loss": 2.0112, + "step": 15913000 + }, + { + "epoch": 46.06, + "learning_rate": 2.6976754990274177e-05, + "loss": 1.9968, + "step": 15913500 + }, + { + "epoch": 46.06, + "learning_rate": 2.69760313426269e-05, + "loss": 1.9882, + "step": 15914000 + }, + { + "epoch": 46.07, + "learning_rate": 2.6975307694979625e-05, + "loss": 2.0421, + "step": 15914500 + }, + { + "epoch": 46.07, + "learning_rate": 2.6974584047332347e-05, + "loss": 2.0322, + "step": 15915000 + }, + { + "epoch": 46.07, + "learning_rate": 2.697386039968507e-05, + "loss": 2.0063, + "step": 15915500 + }, + { + "epoch": 46.07, + "learning_rate": 2.6973136752037792e-05, + "loss": 1.9937, + "step": 15916000 + }, + { + "epoch": 46.07, + "learning_rate": 2.6972413104390514e-05, + "loss": 2.0137, + "step": 15916500 + }, + { + "epoch": 46.07, + "learning_rate": 2.6971689456743236e-05, + "loss": 2.0021, + "step": 15917000 + }, + { + "epoch": 46.07, + "learning_rate": 2.6970965809095962e-05, + "loss": 2.0094, + "step": 15917500 + }, + { + "epoch": 46.08, + "learning_rate": 2.6970242161448684e-05, + "loss": 2.0142, + "step": 15918000 + }, + { + "epoch": 46.08, + "learning_rate": 2.6969519961096706e-05, + "loss": 2.0127, + "step": 15918500 + }, + { + "epoch": 46.08, + "learning_rate": 2.6968797760744725e-05, + "loss": 2.0056, + "step": 15919000 + }, + { + "epoch": 46.08, + "learning_rate": 2.6968074113097448e-05, + "loss": 2.038, + "step": 15919500 + }, + { + "epoch": 46.08, + "learning_rate": 2.696735046545017e-05, + "loss": 2.0184, + "step": 15920000 + }, + { + "epoch": 46.08, + "learning_rate": 2.6966626817802892e-05, + "loss": 2.0286, + "step": 15920500 + }, + { + "epoch": 46.08, + "learning_rate": 2.696590461745091e-05, + "loss": 2.0169, + "step": 15921000 + }, + { + "epoch": 46.09, + "learning_rate": 2.6965180969803633e-05, + "loss": 2.0142, + "step": 15921500 + }, + { + "epoch": 46.09, + "learning_rate": 2.6964457322156355e-05, + "loss": 1.9985, + "step": 15922000 + }, + { + "epoch": 46.09, + "learning_rate": 2.6963733674509078e-05, + "loss": 2.0167, + "step": 15922500 + }, + { + "epoch": 46.09, + "learning_rate": 2.69630100268618e-05, + "loss": 2.0377, + "step": 15923000 + }, + { + "epoch": 46.09, + "learning_rate": 2.6962286379214525e-05, + "loss": 2.0031, + "step": 15923500 + }, + { + "epoch": 46.09, + "learning_rate": 2.6961562731567248e-05, + "loss": 2.0127, + "step": 15924000 + }, + { + "epoch": 46.09, + "learning_rate": 2.6960840531215263e-05, + "loss": 2.0001, + "step": 15924500 + }, + { + "epoch": 46.1, + "learning_rate": 2.696011688356799e-05, + "loss": 2.0058, + "step": 15925000 + }, + { + "epoch": 46.1, + "learning_rate": 2.695939323592071e-05, + "loss": 2.0104, + "step": 15925500 + }, + { + "epoch": 46.1, + "learning_rate": 2.695866958827344e-05, + "loss": 2.0137, + "step": 15926000 + }, + { + "epoch": 46.1, + "learning_rate": 2.6957945940626162e-05, + "loss": 2.0023, + "step": 15926500 + }, + { + "epoch": 46.1, + "learning_rate": 2.6957222292978885e-05, + "loss": 2.0173, + "step": 15927000 + }, + { + "epoch": 46.1, + "learning_rate": 2.6956498645331607e-05, + "loss": 2.0341, + "step": 15927500 + }, + { + "epoch": 46.11, + "learning_rate": 2.695577499768433e-05, + "loss": 1.9917, + "step": 15928000 + }, + { + "epoch": 46.11, + "learning_rate": 2.695505135003705e-05, + "loss": 2.0276, + "step": 15928500 + }, + { + "epoch": 46.11, + "learning_rate": 2.6954327702389777e-05, + "loss": 2.0088, + "step": 15929000 + }, + { + "epoch": 46.11, + "learning_rate": 2.69536040547425e-05, + "loss": 2.0115, + "step": 15929500 + }, + { + "epoch": 46.11, + "learning_rate": 2.6952881854390515e-05, + "loss": 2.0278, + "step": 15930000 + }, + { + "epoch": 46.11, + "learning_rate": 2.6952159654038534e-05, + "loss": 2.0032, + "step": 15930500 + }, + { + "epoch": 46.11, + "learning_rate": 2.6951436006391256e-05, + "loss": 2.0107, + "step": 15931000 + }, + { + "epoch": 46.12, + "learning_rate": 2.6950712358743978e-05, + "loss": 2.0136, + "step": 15931500 + }, + { + "epoch": 46.12, + "learning_rate": 2.69499887110967e-05, + "loss": 2.0139, + "step": 15932000 + }, + { + "epoch": 46.12, + "learning_rate": 2.6949265063449426e-05, + "loss": 2.0094, + "step": 15932500 + }, + { + "epoch": 46.12, + "learning_rate": 2.6948541415802148e-05, + "loss": 2.0302, + "step": 15933000 + }, + { + "epoch": 46.12, + "learning_rate": 2.6947817768154877e-05, + "loss": 2.0268, + "step": 15933500 + }, + { + "epoch": 46.12, + "learning_rate": 2.69470941205076e-05, + "loss": 2.0146, + "step": 15934000 + }, + { + "epoch": 46.12, + "learning_rate": 2.6946371920155615e-05, + "loss": 2.0107, + "step": 15934500 + }, + { + "epoch": 46.13, + "learning_rate": 2.694564827250834e-05, + "loss": 2.0306, + "step": 15935000 + }, + { + "epoch": 46.13, + "learning_rate": 2.6944924624861063e-05, + "loss": 2.0149, + "step": 15935500 + }, + { + "epoch": 46.13, + "learning_rate": 2.6944200977213785e-05, + "loss": 2.0089, + "step": 15936000 + }, + { + "epoch": 46.13, + "learning_rate": 2.6943478776861804e-05, + "loss": 2.0092, + "step": 15936500 + }, + { + "epoch": 46.13, + "learning_rate": 2.694275657650982e-05, + "loss": 2.03, + "step": 15937000 + }, + { + "epoch": 46.13, + "learning_rate": 2.6942032928862542e-05, + "loss": 1.9878, + "step": 15937500 + }, + { + "epoch": 46.13, + "learning_rate": 2.6941309281215264e-05, + "loss": 2.003, + "step": 15938000 + }, + { + "epoch": 46.14, + "learning_rate": 2.6940587080863283e-05, + "loss": 2.0201, + "step": 15938500 + }, + { + "epoch": 46.14, + "learning_rate": 2.6939863433216005e-05, + "loss": 2.0296, + "step": 15939000 + }, + { + "epoch": 46.14, + "learning_rate": 2.6939139785568727e-05, + "loss": 2.0137, + "step": 15939500 + }, + { + "epoch": 46.14, + "learning_rate": 2.6938416137921453e-05, + "loss": 2.0063, + "step": 15940000 + }, + { + "epoch": 46.14, + "learning_rate": 2.6937692490274175e-05, + "loss": 2.0145, + "step": 15940500 + }, + { + "epoch": 46.14, + "learning_rate": 2.6936968842626904e-05, + "loss": 2.0143, + "step": 15941000 + }, + { + "epoch": 46.14, + "learning_rate": 2.6936245194979627e-05, + "loss": 1.9999, + "step": 15941500 + }, + { + "epoch": 46.15, + "learning_rate": 2.693552154733235e-05, + "loss": 2.0374, + "step": 15942000 + }, + { + "epoch": 46.15, + "learning_rate": 2.693479789968507e-05, + "loss": 2.0146, + "step": 15942500 + }, + { + "epoch": 46.15, + "learning_rate": 2.6934074252037793e-05, + "loss": 2.0187, + "step": 15943000 + }, + { + "epoch": 46.15, + "learning_rate": 2.6933350604390516e-05, + "loss": 2.0102, + "step": 15943500 + }, + { + "epoch": 46.15, + "learning_rate": 2.693262695674324e-05, + "loss": 1.9936, + "step": 15944000 + }, + { + "epoch": 46.15, + "learning_rate": 2.6931903309095963e-05, + "loss": 2.0111, + "step": 15944500 + }, + { + "epoch": 46.15, + "learning_rate": 2.6931179661448686e-05, + "loss": 2.0024, + "step": 15945000 + }, + { + "epoch": 46.16, + "learning_rate": 2.6930457461096705e-05, + "loss": 2.021, + "step": 15945500 + }, + { + "epoch": 46.16, + "learning_rate": 2.6929733813449427e-05, + "loss": 2.0228, + "step": 15946000 + }, + { + "epoch": 46.16, + "learning_rate": 2.6929011613097442e-05, + "loss": 2.0188, + "step": 15946500 + }, + { + "epoch": 46.16, + "learning_rate": 2.6928287965450168e-05, + "loss": 2.0219, + "step": 15947000 + }, + { + "epoch": 46.16, + "learning_rate": 2.692756431780289e-05, + "loss": 2.005, + "step": 15947500 + }, + { + "epoch": 46.16, + "learning_rate": 2.6926840670155612e-05, + "loss": 2.0074, + "step": 15948000 + }, + { + "epoch": 46.16, + "learning_rate": 2.6926118469803628e-05, + "loss": 2.0188, + "step": 15948500 + }, + { + "epoch": 46.17, + "learning_rate": 2.6925394822156357e-05, + "loss": 2.0114, + "step": 15949000 + }, + { + "epoch": 46.17, + "learning_rate": 2.692467117450908e-05, + "loss": 2.006, + "step": 15949500 + }, + { + "epoch": 46.17, + "learning_rate": 2.6923947526861805e-05, + "loss": 1.9915, + "step": 15950000 + }, + { + "epoch": 46.17, + "learning_rate": 2.6923223879214527e-05, + "loss": 2.0103, + "step": 15950500 + }, + { + "epoch": 46.17, + "learning_rate": 2.6922501678862543e-05, + "loss": 1.9995, + "step": 15951000 + }, + { + "epoch": 46.17, + "learning_rate": 2.6921778031215268e-05, + "loss": 1.9913, + "step": 15951500 + }, + { + "epoch": 46.17, + "learning_rate": 2.692105438356799e-05, + "loss": 2.0216, + "step": 15952000 + }, + { + "epoch": 46.18, + "learning_rate": 2.6920330735920713e-05, + "loss": 1.9647, + "step": 15952500 + }, + { + "epoch": 46.18, + "learning_rate": 2.6919607088273435e-05, + "loss": 2.0086, + "step": 15953000 + }, + { + "epoch": 46.18, + "learning_rate": 2.6918883440626157e-05, + "loss": 2.0057, + "step": 15953500 + }, + { + "epoch": 46.18, + "learning_rate": 2.691815979297888e-05, + "loss": 2.0096, + "step": 15954000 + }, + { + "epoch": 46.18, + "learning_rate": 2.6917436145331605e-05, + "loss": 2.0168, + "step": 15954500 + }, + { + "epoch": 46.18, + "learning_rate": 2.6916712497684327e-05, + "loss": 2.0318, + "step": 15955000 + }, + { + "epoch": 46.18, + "learning_rate": 2.691598885003705e-05, + "loss": 2.0257, + "step": 15955500 + }, + { + "epoch": 46.19, + "learning_rate": 2.691526520238978e-05, + "loss": 2.0384, + "step": 15956000 + }, + { + "epoch": 46.19, + "learning_rate": 2.69145415547425e-05, + "loss": 2.0052, + "step": 15956500 + }, + { + "epoch": 46.19, + "learning_rate": 2.6913817907095223e-05, + "loss": 2.005, + "step": 15957000 + }, + { + "epoch": 46.19, + "learning_rate": 2.6913097154038535e-05, + "loss": 1.9957, + "step": 15957500 + }, + { + "epoch": 46.19, + "learning_rate": 2.6912374953686554e-05, + "loss": 2.0122, + "step": 15958000 + }, + { + "epoch": 46.19, + "learning_rate": 2.6911651306039276e-05, + "loss": 2.0174, + "step": 15958500 + }, + { + "epoch": 46.19, + "learning_rate": 2.6910927658392e-05, + "loss": 1.9995, + "step": 15959000 + }, + { + "epoch": 46.2, + "learning_rate": 2.691020401074472e-05, + "loss": 2.0227, + "step": 15959500 + }, + { + "epoch": 46.2, + "learning_rate": 2.6909480363097443e-05, + "loss": 2.007, + "step": 15960000 + }, + { + "epoch": 46.2, + "learning_rate": 2.690875671545017e-05, + "loss": 2.0094, + "step": 15960500 + }, + { + "epoch": 46.2, + "learning_rate": 2.690803306780289e-05, + "loss": 2.0037, + "step": 15961000 + }, + { + "epoch": 46.2, + "learning_rate": 2.6907309420155613e-05, + "loss": 2.0274, + "step": 15961500 + }, + { + "epoch": 46.2, + "learning_rate": 2.6906585772508335e-05, + "loss": 2.0305, + "step": 15962000 + }, + { + "epoch": 46.2, + "learning_rate": 2.6905863572156354e-05, + "loss": 2.0074, + "step": 15962500 + }, + { + "epoch": 46.21, + "learning_rate": 2.690514137180437e-05, + "loss": 2.0111, + "step": 15963000 + }, + { + "epoch": 46.21, + "learning_rate": 2.6904417724157092e-05, + "loss": 2.0154, + "step": 15963500 + }, + { + "epoch": 46.21, + "learning_rate": 2.690369407650982e-05, + "loss": 2.0065, + "step": 15964000 + }, + { + "epoch": 46.21, + "learning_rate": 2.6902970428862543e-05, + "loss": 2.0041, + "step": 15964500 + }, + { + "epoch": 46.21, + "learning_rate": 2.690224678121527e-05, + "loss": 2.0229, + "step": 15965000 + }, + { + "epoch": 46.21, + "learning_rate": 2.6901524580863285e-05, + "loss": 2.0535, + "step": 15965500 + }, + { + "epoch": 46.22, + "learning_rate": 2.6900800933216007e-05, + "loss": 2.0124, + "step": 15966000 + }, + { + "epoch": 46.22, + "learning_rate": 2.6900077285568732e-05, + "loss": 2.0179, + "step": 15966500 + }, + { + "epoch": 46.22, + "learning_rate": 2.6899353637921455e-05, + "loss": 2.0049, + "step": 15967000 + }, + { + "epoch": 46.22, + "learning_rate": 2.6898629990274177e-05, + "loss": 2.0434, + "step": 15967500 + }, + { + "epoch": 46.22, + "learning_rate": 2.6897907789922196e-05, + "loss": 2.0348, + "step": 15968000 + }, + { + "epoch": 46.22, + "learning_rate": 2.6897184142274918e-05, + "loss": 2.0111, + "step": 15968500 + }, + { + "epoch": 46.22, + "learning_rate": 2.6896461941922934e-05, + "loss": 2.0153, + "step": 15969000 + }, + { + "epoch": 46.23, + "learning_rate": 2.6895738294275656e-05, + "loss": 2.0025, + "step": 15969500 + }, + { + "epoch": 46.23, + "learning_rate": 2.689501464662838e-05, + "loss": 2.0005, + "step": 15970000 + }, + { + "epoch": 46.23, + "learning_rate": 2.6894290998981104e-05, + "loss": 1.997, + "step": 15970500 + }, + { + "epoch": 46.23, + "learning_rate": 2.6893567351333826e-05, + "loss": 2.0179, + "step": 15971000 + }, + { + "epoch": 46.23, + "learning_rate": 2.6892843703686555e-05, + "loss": 2.0, + "step": 15971500 + }, + { + "epoch": 46.23, + "learning_rate": 2.6892120056039277e-05, + "loss": 2.0334, + "step": 15972000 + }, + { + "epoch": 46.23, + "learning_rate": 2.6891396408392e-05, + "loss": 2.0032, + "step": 15972500 + }, + { + "epoch": 46.24, + "learning_rate": 2.6890672760744722e-05, + "loss": 2.0126, + "step": 15973000 + }, + { + "epoch": 46.24, + "learning_rate": 2.6889949113097447e-05, + "loss": 2.0198, + "step": 15973500 + }, + { + "epoch": 46.24, + "learning_rate": 2.688922546545017e-05, + "loss": 2.0013, + "step": 15974000 + }, + { + "epoch": 46.24, + "learning_rate": 2.6888501817802892e-05, + "loss": 2.0048, + "step": 15974500 + }, + { + "epoch": 46.24, + "learning_rate": 2.6887781064746204e-05, + "loss": 2.0135, + "step": 15975000 + }, + { + "epoch": 46.24, + "learning_rate": 2.6887057417098926e-05, + "loss": 2.0265, + "step": 15975500 + }, + { + "epoch": 46.24, + "learning_rate": 2.688633376945165e-05, + "loss": 2.0002, + "step": 15976000 + }, + { + "epoch": 46.25, + "learning_rate": 2.688561012180437e-05, + "loss": 2.0234, + "step": 15976500 + }, + { + "epoch": 46.25, + "learning_rate": 2.688488792145239e-05, + "loss": 2.0172, + "step": 15977000 + }, + { + "epoch": 46.25, + "learning_rate": 2.6884164273805112e-05, + "loss": 2.0148, + "step": 15977500 + }, + { + "epoch": 46.25, + "learning_rate": 2.6883440626157834e-05, + "loss": 2.0063, + "step": 15978000 + }, + { + "epoch": 46.25, + "learning_rate": 2.6882718425805853e-05, + "loss": 2.0123, + "step": 15978500 + }, + { + "epoch": 46.25, + "learning_rate": 2.6881994778158582e-05, + "loss": 2.0326, + "step": 15979000 + }, + { + "epoch": 46.25, + "learning_rate": 2.6881271130511304e-05, + "loss": 1.9967, + "step": 15979500 + }, + { + "epoch": 46.26, + "learning_rate": 2.6880547482864027e-05, + "loss": 2.0239, + "step": 15980000 + }, + { + "epoch": 46.26, + "learning_rate": 2.687982383521675e-05, + "loss": 2.0271, + "step": 15980500 + }, + { + "epoch": 46.26, + "learning_rate": 2.687910018756947e-05, + "loss": 2.0217, + "step": 15981000 + }, + { + "epoch": 46.26, + "learning_rate": 2.6878376539922197e-05, + "loss": 2.015, + "step": 15981500 + }, + { + "epoch": 46.26, + "learning_rate": 2.687765289227492e-05, + "loss": 2.0208, + "step": 15982000 + }, + { + "epoch": 46.26, + "learning_rate": 2.687692924462764e-05, + "loss": 2.0178, + "step": 15982500 + }, + { + "epoch": 46.26, + "learning_rate": 2.6876205596980363e-05, + "loss": 1.9887, + "step": 15983000 + }, + { + "epoch": 46.27, + "learning_rate": 2.6875481949333086e-05, + "loss": 2.0085, + "step": 15983500 + }, + { + "epoch": 46.27, + "learning_rate": 2.687475830168581e-05, + "loss": 2.0137, + "step": 15984000 + }, + { + "epoch": 46.27, + "learning_rate": 2.6874034654038533e-05, + "loss": 2.0159, + "step": 15984500 + }, + { + "epoch": 46.27, + "learning_rate": 2.6873311006391256e-05, + "loss": 2.0105, + "step": 15985000 + }, + { + "epoch": 46.27, + "learning_rate": 2.6872587358743978e-05, + "loss": 2.0174, + "step": 15985500 + }, + { + "epoch": 46.27, + "learning_rate": 2.6871865158391997e-05, + "loss": 2.0261, + "step": 15986000 + }, + { + "epoch": 46.27, + "learning_rate": 2.6871141510744723e-05, + "loss": 2.0242, + "step": 15986500 + }, + { + "epoch": 46.28, + "learning_rate": 2.687041931039274e-05, + "loss": 2.0059, + "step": 15987000 + }, + { + "epoch": 46.28, + "learning_rate": 2.6869695662745464e-05, + "loss": 2.0145, + "step": 15987500 + }, + { + "epoch": 46.28, + "learning_rate": 2.6868972015098186e-05, + "loss": 2.0448, + "step": 15988000 + }, + { + "epoch": 46.28, + "learning_rate": 2.686824836745091e-05, + "loss": 2.0079, + "step": 15988500 + }, + { + "epoch": 46.28, + "learning_rate": 2.6867524719803634e-05, + "loss": 2.0397, + "step": 15989000 + }, + { + "epoch": 46.28, + "learning_rate": 2.686680251945165e-05, + "loss": 2.0275, + "step": 15989500 + }, + { + "epoch": 46.28, + "learning_rate": 2.686607887180437e-05, + "loss": 2.0108, + "step": 15990000 + }, + { + "epoch": 46.29, + "learning_rate": 2.6865355224157097e-05, + "loss": 2.0304, + "step": 15990500 + }, + { + "epoch": 46.29, + "learning_rate": 2.686463157650982e-05, + "loss": 2.0043, + "step": 15991000 + }, + { + "epoch": 46.29, + "learning_rate": 2.6863909376157835e-05, + "loss": 1.9993, + "step": 15991500 + }, + { + "epoch": 46.29, + "learning_rate": 2.686318572851056e-05, + "loss": 1.9863, + "step": 15992000 + }, + { + "epoch": 46.29, + "learning_rate": 2.6862462080863283e-05, + "loss": 2.0138, + "step": 15992500 + }, + { + "epoch": 46.29, + "learning_rate": 2.6861738433216005e-05, + "loss": 1.989, + "step": 15993000 + }, + { + "epoch": 46.29, + "learning_rate": 2.6861014785568727e-05, + "loss": 2.0312, + "step": 15993500 + }, + { + "epoch": 46.3, + "learning_rate": 2.6860291137921456e-05, + "loss": 1.9926, + "step": 15994000 + }, + { + "epoch": 46.3, + "learning_rate": 2.685956749027418e-05, + "loss": 2.0391, + "step": 15994500 + }, + { + "epoch": 46.3, + "learning_rate": 2.68588438426269e-05, + "loss": 2.0125, + "step": 15995000 + }, + { + "epoch": 46.3, + "learning_rate": 2.6858120194979623e-05, + "loss": 2.0152, + "step": 15995500 + }, + { + "epoch": 46.3, + "learning_rate": 2.685739654733235e-05, + "loss": 2.0086, + "step": 15996000 + }, + { + "epoch": 46.3, + "learning_rate": 2.6856674346980364e-05, + "loss": 1.9981, + "step": 15996500 + }, + { + "epoch": 46.3, + "learning_rate": 2.6855952146628383e-05, + "loss": 2.0041, + "step": 15997000 + }, + { + "epoch": 46.31, + "learning_rate": 2.6855228498981105e-05, + "loss": 2.024, + "step": 15997500 + }, + { + "epoch": 46.31, + "learning_rate": 2.6854504851333828e-05, + "loss": 1.9922, + "step": 15998000 + }, + { + "epoch": 46.31, + "learning_rate": 2.685378120368655e-05, + "loss": 2.0332, + "step": 15998500 + }, + { + "epoch": 46.31, + "learning_rate": 2.685305900333457e-05, + "loss": 2.006, + "step": 15999000 + }, + { + "epoch": 46.31, + "learning_rate": 2.685233535568729e-05, + "loss": 1.9998, + "step": 15999500 + }, + { + "epoch": 46.31, + "learning_rate": 2.6851611708040013e-05, + "loss": 2.0511, + "step": 16000000 + }, + { + "epoch": 46.31, + "learning_rate": 2.6850888060392735e-05, + "loss": 2.0407, + "step": 16000500 + }, + { + "epoch": 46.32, + "learning_rate": 2.685016441274546e-05, + "loss": 2.0295, + "step": 16001000 + }, + { + "epoch": 46.32, + "learning_rate": 2.6849440765098187e-05, + "loss": 2.0244, + "step": 16001500 + }, + { + "epoch": 46.32, + "learning_rate": 2.6848717117450912e-05, + "loss": 2.01, + "step": 16002000 + }, + { + "epoch": 46.32, + "learning_rate": 2.6847993469803635e-05, + "loss": 2.0331, + "step": 16002500 + }, + { + "epoch": 46.32, + "learning_rate": 2.6847269822156357e-05, + "loss": 1.9945, + "step": 16003000 + }, + { + "epoch": 46.32, + "learning_rate": 2.684654617450908e-05, + "loss": 2.0342, + "step": 16003500 + }, + { + "epoch": 46.33, + "learning_rate": 2.68458225268618e-05, + "loss": 2.0183, + "step": 16004000 + }, + { + "epoch": 46.33, + "learning_rate": 2.684510032650982e-05, + "loss": 2.0128, + "step": 16004500 + }, + { + "epoch": 46.33, + "learning_rate": 2.684437812615784e-05, + "loss": 2.0004, + "step": 16005000 + }, + { + "epoch": 46.33, + "learning_rate": 2.684365447851056e-05, + "loss": 2.0425, + "step": 16005500 + }, + { + "epoch": 46.33, + "learning_rate": 2.6842930830863284e-05, + "loss": 2.0194, + "step": 16006000 + }, + { + "epoch": 46.33, + "learning_rate": 2.6842207183216006e-05, + "loss": 2.0424, + "step": 16006500 + }, + { + "epoch": 46.33, + "learning_rate": 2.6841483535568728e-05, + "loss": 2.0077, + "step": 16007000 + }, + { + "epoch": 46.34, + "learning_rate": 2.684075988792145e-05, + "loss": 2.0282, + "step": 16007500 + }, + { + "epoch": 46.34, + "learning_rate": 2.6840036240274176e-05, + "loss": 2.0077, + "step": 16008000 + }, + { + "epoch": 46.34, + "learning_rate": 2.68393125926269e-05, + "loss": 2.0077, + "step": 16008500 + }, + { + "epoch": 46.34, + "learning_rate": 2.6838588944979627e-05, + "loss": 2.0142, + "step": 16009000 + }, + { + "epoch": 46.34, + "learning_rate": 2.683786529733235e-05, + "loss": 2.0272, + "step": 16009500 + }, + { + "epoch": 46.34, + "learning_rate": 2.683714164968507e-05, + "loss": 2.0041, + "step": 16010000 + }, + { + "epoch": 46.34, + "learning_rate": 2.6836418002037794e-05, + "loss": 1.9908, + "step": 16010500 + }, + { + "epoch": 46.35, + "learning_rate": 2.6835695801685813e-05, + "loss": 2.0176, + "step": 16011000 + }, + { + "epoch": 46.35, + "learning_rate": 2.6834972154038535e-05, + "loss": 2.0233, + "step": 16011500 + }, + { + "epoch": 46.35, + "learning_rate": 2.6834248506391257e-05, + "loss": 2.0154, + "step": 16012000 + }, + { + "epoch": 46.35, + "learning_rate": 2.683352485874398e-05, + "loss": 2.023, + "step": 16012500 + }, + { + "epoch": 46.35, + "learning_rate": 2.6832801211096702e-05, + "loss": 2.0283, + "step": 16013000 + }, + { + "epoch": 46.35, + "learning_rate": 2.6832077563449427e-05, + "loss": 2.0041, + "step": 16013500 + }, + { + "epoch": 46.35, + "learning_rate": 2.683135391580215e-05, + "loss": 1.9918, + "step": 16014000 + }, + { + "epoch": 46.36, + "learning_rate": 2.6830630268154872e-05, + "loss": 2.0165, + "step": 16014500 + }, + { + "epoch": 46.36, + "learning_rate": 2.6829906620507594e-05, + "loss": 2.0054, + "step": 16015000 + }, + { + "epoch": 46.36, + "learning_rate": 2.6829182972860316e-05, + "loss": 2.0215, + "step": 16015500 + }, + { + "epoch": 46.36, + "learning_rate": 2.6828459325213045e-05, + "loss": 2.0009, + "step": 16016000 + }, + { + "epoch": 46.36, + "learning_rate": 2.6827735677565768e-05, + "loss": 2.0304, + "step": 16016500 + }, + { + "epoch": 46.36, + "learning_rate": 2.682701202991849e-05, + "loss": 2.023, + "step": 16017000 + }, + { + "epoch": 46.36, + "learning_rate": 2.6826288382271215e-05, + "loss": 2.022, + "step": 16017500 + }, + { + "epoch": 46.37, + "learning_rate": 2.682556618191923e-05, + "loss": 2.0334, + "step": 16018000 + }, + { + "epoch": 46.37, + "learning_rate": 2.6824842534271953e-05, + "loss": 2.0024, + "step": 16018500 + }, + { + "epoch": 46.37, + "learning_rate": 2.682411888662468e-05, + "loss": 2.0219, + "step": 16019000 + }, + { + "epoch": 46.37, + "learning_rate": 2.68233952389774e-05, + "loss": 2.0204, + "step": 16019500 + }, + { + "epoch": 46.37, + "learning_rate": 2.6822671591330123e-05, + "loss": 1.9871, + "step": 16020000 + }, + { + "epoch": 46.37, + "learning_rate": 2.6821947943682846e-05, + "loss": 2.0036, + "step": 16020500 + }, + { + "epoch": 46.37, + "learning_rate": 2.6821224296035568e-05, + "loss": 2.0164, + "step": 16021000 + }, + { + "epoch": 46.38, + "learning_rate": 2.682050064838829e-05, + "loss": 2.0123, + "step": 16021500 + }, + { + "epoch": 46.38, + "learning_rate": 2.6819777000741016e-05, + "loss": 2.0025, + "step": 16022000 + }, + { + "epoch": 46.38, + "learning_rate": 2.6819053353093738e-05, + "loss": 2.0383, + "step": 16022500 + }, + { + "epoch": 46.38, + "learning_rate": 2.6818329705446467e-05, + "loss": 2.0121, + "step": 16023000 + }, + { + "epoch": 46.38, + "learning_rate": 2.681760605779919e-05, + "loss": 2.0066, + "step": 16023500 + }, + { + "epoch": 46.38, + "learning_rate": 2.681688241015191e-05, + "loss": 2.0085, + "step": 16024000 + }, + { + "epoch": 46.38, + "learning_rate": 2.6816158762504634e-05, + "loss": 2.0258, + "step": 16024500 + }, + { + "epoch": 46.39, + "learning_rate": 2.6815436562152653e-05, + "loss": 1.9954, + "step": 16025000 + }, + { + "epoch": 46.39, + "learning_rate": 2.6814712914505375e-05, + "loss": 1.9894, + "step": 16025500 + }, + { + "epoch": 46.39, + "learning_rate": 2.681399071415339e-05, + "loss": 2.0518, + "step": 16026000 + }, + { + "epoch": 46.39, + "learning_rate": 2.6813267066506116e-05, + "loss": 1.9916, + "step": 16026500 + }, + { + "epoch": 46.39, + "learning_rate": 2.6812543418858838e-05, + "loss": 2.0222, + "step": 16027000 + }, + { + "epoch": 46.39, + "learning_rate": 2.681181977121156e-05, + "loss": 2.0279, + "step": 16027500 + }, + { + "epoch": 46.39, + "learning_rate": 2.681109757085958e-05, + "loss": 2.0129, + "step": 16028000 + }, + { + "epoch": 46.4, + "learning_rate": 2.6810375370507595e-05, + "loss": 2.0234, + "step": 16028500 + }, + { + "epoch": 46.4, + "learning_rate": 2.6809651722860317e-05, + "loss": 2.0283, + "step": 16029000 + }, + { + "epoch": 46.4, + "learning_rate": 2.6808928075213043e-05, + "loss": 2.0239, + "step": 16029500 + }, + { + "epoch": 46.4, + "learning_rate": 2.6808204427565765e-05, + "loss": 2.0145, + "step": 16030000 + }, + { + "epoch": 46.4, + "learning_rate": 2.6807480779918494e-05, + "loss": 2.0193, + "step": 16030500 + }, + { + "epoch": 46.4, + "learning_rate": 2.6806757132271216e-05, + "loss": 2.0226, + "step": 16031000 + }, + { + "epoch": 46.4, + "learning_rate": 2.680603348462394e-05, + "loss": 2.0186, + "step": 16031500 + }, + { + "epoch": 46.41, + "learning_rate": 2.6805311284271954e-05, + "loss": 2.0179, + "step": 16032000 + }, + { + "epoch": 46.41, + "learning_rate": 2.680458763662468e-05, + "loss": 2.0103, + "step": 16032500 + }, + { + "epoch": 46.41, + "learning_rate": 2.6803865436272695e-05, + "loss": 2.0116, + "step": 16033000 + }, + { + "epoch": 46.41, + "learning_rate": 2.6803143235920714e-05, + "loss": 2.024, + "step": 16033500 + }, + { + "epoch": 46.41, + "learning_rate": 2.6802419588273436e-05, + "loss": 2.0299, + "step": 16034000 + }, + { + "epoch": 46.41, + "learning_rate": 2.6801697387921455e-05, + "loss": 2.0438, + "step": 16034500 + }, + { + "epoch": 46.41, + "learning_rate": 2.6800973740274178e-05, + "loss": 2.027, + "step": 16035000 + }, + { + "epoch": 46.42, + "learning_rate": 2.68002500926269e-05, + "loss": 2.0263, + "step": 16035500 + }, + { + "epoch": 46.42, + "learning_rate": 2.6799526444979622e-05, + "loss": 2.0323, + "step": 16036000 + }, + { + "epoch": 46.42, + "learning_rate": 2.6798802797332344e-05, + "loss": 1.9995, + "step": 16036500 + }, + { + "epoch": 46.42, + "learning_rate": 2.6798079149685066e-05, + "loss": 2.0279, + "step": 16037000 + }, + { + "epoch": 46.42, + "learning_rate": 2.6797355502037792e-05, + "loss": 1.979, + "step": 16037500 + }, + { + "epoch": 46.42, + "learning_rate": 2.6796631854390514e-05, + "loss": 2.0211, + "step": 16038000 + }, + { + "epoch": 46.42, + "learning_rate": 2.6795908206743243e-05, + "loss": 2.0112, + "step": 16038500 + }, + { + "epoch": 46.43, + "learning_rate": 2.679518600639126e-05, + "loss": 2.0282, + "step": 16039000 + }, + { + "epoch": 46.43, + "learning_rate": 2.679446235874398e-05, + "loss": 2.0115, + "step": 16039500 + }, + { + "epoch": 46.43, + "learning_rate": 2.6793738711096707e-05, + "loss": 2.0398, + "step": 16040000 + }, + { + "epoch": 46.43, + "learning_rate": 2.679301506344943e-05, + "loss": 2.0148, + "step": 16040500 + }, + { + "epoch": 46.43, + "learning_rate": 2.679229141580215e-05, + "loss": 1.9981, + "step": 16041000 + }, + { + "epoch": 46.43, + "learning_rate": 2.6791567768154873e-05, + "loss": 2.0014, + "step": 16041500 + }, + { + "epoch": 46.44, + "learning_rate": 2.6790844120507596e-05, + "loss": 2.0178, + "step": 16042000 + }, + { + "epoch": 46.44, + "learning_rate": 2.6790120472860318e-05, + "loss": 2.0154, + "step": 16042500 + }, + { + "epoch": 46.44, + "learning_rate": 2.6789396825213044e-05, + "loss": 2.0105, + "step": 16043000 + }, + { + "epoch": 46.44, + "learning_rate": 2.6788673177565766e-05, + "loss": 2.0089, + "step": 16043500 + }, + { + "epoch": 46.44, + "learning_rate": 2.6787949529918488e-05, + "loss": 2.0169, + "step": 16044000 + }, + { + "epoch": 46.44, + "learning_rate": 2.678722588227121e-05, + "loss": 2.0115, + "step": 16044500 + }, + { + "epoch": 46.44, + "learning_rate": 2.6786502234623932e-05, + "loss": 2.0155, + "step": 16045000 + }, + { + "epoch": 46.45, + "learning_rate": 2.678577858697666e-05, + "loss": 2.0263, + "step": 16045500 + }, + { + "epoch": 46.45, + "learning_rate": 2.6785054939329384e-05, + "loss": 2.0096, + "step": 16046000 + }, + { + "epoch": 46.45, + "learning_rate": 2.678433129168211e-05, + "loss": 2.0232, + "step": 16046500 + }, + { + "epoch": 46.45, + "learning_rate": 2.678360764403483e-05, + "loss": 2.0139, + "step": 16047000 + }, + { + "epoch": 46.45, + "learning_rate": 2.6782885443682847e-05, + "loss": 2.0018, + "step": 16047500 + }, + { + "epoch": 46.45, + "learning_rate": 2.678216179603557e-05, + "loss": 2.0288, + "step": 16048000 + }, + { + "epoch": 46.45, + "learning_rate": 2.6781438148388295e-05, + "loss": 2.0217, + "step": 16048500 + }, + { + "epoch": 46.46, + "learning_rate": 2.678071594803631e-05, + "loss": 2.0036, + "step": 16049000 + }, + { + "epoch": 46.46, + "learning_rate": 2.6779992300389033e-05, + "loss": 2.0338, + "step": 16049500 + }, + { + "epoch": 46.46, + "learning_rate": 2.677926865274176e-05, + "loss": 2.0212, + "step": 16050000 + }, + { + "epoch": 46.46, + "learning_rate": 2.677854500509448e-05, + "loss": 2.0053, + "step": 16050500 + }, + { + "epoch": 46.46, + "learning_rate": 2.6777821357447203e-05, + "loss": 2.0179, + "step": 16051000 + }, + { + "epoch": 46.46, + "learning_rate": 2.6777099157095222e-05, + "loss": 2.018, + "step": 16051500 + }, + { + "epoch": 46.46, + "learning_rate": 2.6776375509447944e-05, + "loss": 2.0057, + "step": 16052000 + }, + { + "epoch": 46.47, + "learning_rate": 2.6775651861800666e-05, + "loss": 2.0345, + "step": 16052500 + }, + { + "epoch": 46.47, + "learning_rate": 2.677493110874398e-05, + "loss": 2.0015, + "step": 16053000 + }, + { + "epoch": 46.47, + "learning_rate": 2.6774207461096708e-05, + "loss": 2.0263, + "step": 16053500 + }, + { + "epoch": 46.47, + "learning_rate": 2.677348381344943e-05, + "loss": 2.0269, + "step": 16054000 + }, + { + "epoch": 46.47, + "learning_rate": 2.6772760165802152e-05, + "loss": 2.0121, + "step": 16054500 + }, + { + "epoch": 46.47, + "learning_rate": 2.6772036518154874e-05, + "loss": 2.0224, + "step": 16055000 + }, + { + "epoch": 46.47, + "learning_rate": 2.6771312870507596e-05, + "loss": 2.011, + "step": 16055500 + }, + { + "epoch": 46.48, + "learning_rate": 2.6770589222860322e-05, + "loss": 2.0278, + "step": 16056000 + }, + { + "epoch": 46.48, + "learning_rate": 2.6769865575213044e-05, + "loss": 2.0198, + "step": 16056500 + }, + { + "epoch": 46.48, + "learning_rate": 2.6769141927565767e-05, + "loss": 2.0276, + "step": 16057000 + }, + { + "epoch": 46.48, + "learning_rate": 2.676841827991849e-05, + "loss": 2.0137, + "step": 16057500 + }, + { + "epoch": 46.48, + "learning_rate": 2.676769463227121e-05, + "loss": 2.0012, + "step": 16058000 + }, + { + "epoch": 46.48, + "learning_rate": 2.6766970984623933e-05, + "loss": 2.0175, + "step": 16058500 + }, + { + "epoch": 46.48, + "learning_rate": 2.6766248784271952e-05, + "loss": 1.9908, + "step": 16059000 + }, + { + "epoch": 46.49, + "learning_rate": 2.676552658391997e-05, + "loss": 2.0202, + "step": 16059500 + }, + { + "epoch": 46.49, + "learning_rate": 2.6764802936272693e-05, + "loss": 2.0213, + "step": 16060000 + }, + { + "epoch": 46.49, + "learning_rate": 2.6764079288625416e-05, + "loss": 2.0156, + "step": 16060500 + }, + { + "epoch": 46.49, + "learning_rate": 2.6763355640978145e-05, + "loss": 2.0071, + "step": 16061000 + }, + { + "epoch": 46.49, + "learning_rate": 2.6762631993330867e-05, + "loss": 2.0159, + "step": 16061500 + }, + { + "epoch": 46.49, + "learning_rate": 2.676190834568359e-05, + "loss": 2.0041, + "step": 16062000 + }, + { + "epoch": 46.49, + "learning_rate": 2.676118469803631e-05, + "loss": 2.0298, + "step": 16062500 + }, + { + "epoch": 46.5, + "learning_rate": 2.6760461050389034e-05, + "loss": 2.0211, + "step": 16063000 + }, + { + "epoch": 46.5, + "learning_rate": 2.6759738850037053e-05, + "loss": 2.0162, + "step": 16063500 + }, + { + "epoch": 46.5, + "learning_rate": 2.6759015202389775e-05, + "loss": 2.0047, + "step": 16064000 + }, + { + "epoch": 46.5, + "learning_rate": 2.6758291554742497e-05, + "loss": 2.0116, + "step": 16064500 + }, + { + "epoch": 46.5, + "learning_rate": 2.6757567907095223e-05, + "loss": 1.9965, + "step": 16065000 + }, + { + "epoch": 46.5, + "learning_rate": 2.6756844259447945e-05, + "loss": 2.0051, + "step": 16065500 + }, + { + "epoch": 46.5, + "learning_rate": 2.6756120611800667e-05, + "loss": 2.0377, + "step": 16066000 + }, + { + "epoch": 46.51, + "learning_rate": 2.675539696415339e-05, + "loss": 2.0138, + "step": 16066500 + }, + { + "epoch": 46.51, + "learning_rate": 2.675467331650611e-05, + "loss": 2.025, + "step": 16067000 + }, + { + "epoch": 46.51, + "learning_rate": 2.6753949668858834e-05, + "loss": 2.0229, + "step": 16067500 + }, + { + "epoch": 46.51, + "learning_rate": 2.6753226021211563e-05, + "loss": 2.011, + "step": 16068000 + }, + { + "epoch": 46.51, + "learning_rate": 2.6752502373564285e-05, + "loss": 2.0031, + "step": 16068500 + }, + { + "epoch": 46.51, + "learning_rate": 2.675177872591701e-05, + "loss": 2.0457, + "step": 16069000 + }, + { + "epoch": 46.51, + "learning_rate": 2.6751055078269733e-05, + "loss": 2.0275, + "step": 16069500 + }, + { + "epoch": 46.52, + "learning_rate": 2.6750331430622455e-05, + "loss": 2.0298, + "step": 16070000 + }, + { + "epoch": 46.52, + "learning_rate": 2.6749607782975177e-05, + "loss": 2.0342, + "step": 16070500 + }, + { + "epoch": 46.52, + "learning_rate": 2.6748885582623196e-05, + "loss": 2.0385, + "step": 16071000 + }, + { + "epoch": 46.52, + "learning_rate": 2.674816193497592e-05, + "loss": 2.0278, + "step": 16071500 + }, + { + "epoch": 46.52, + "learning_rate": 2.674743828732864e-05, + "loss": 2.0265, + "step": 16072000 + }, + { + "epoch": 46.52, + "learning_rate": 2.6746717534271953e-05, + "loss": 2.0052, + "step": 16072500 + }, + { + "epoch": 46.52, + "learning_rate": 2.6745995333919972e-05, + "loss": 2.008, + "step": 16073000 + }, + { + "epoch": 46.53, + "learning_rate": 2.6745271686272694e-05, + "loss": 2.0119, + "step": 16073500 + }, + { + "epoch": 46.53, + "learning_rate": 2.6744548038625416e-05, + "loss": 2.0243, + "step": 16074000 + }, + { + "epoch": 46.53, + "learning_rate": 2.674382439097814e-05, + "loss": 2.0259, + "step": 16074500 + }, + { + "epoch": 46.53, + "learning_rate": 2.674310074333086e-05, + "loss": 2.0047, + "step": 16075000 + }, + { + "epoch": 46.53, + "learning_rate": 2.6742377095683586e-05, + "loss": 1.9962, + "step": 16075500 + }, + { + "epoch": 46.53, + "learning_rate": 2.6741653448036312e-05, + "loss": 2.0038, + "step": 16076000 + }, + { + "epoch": 46.53, + "learning_rate": 2.6740929800389038e-05, + "loss": 2.0027, + "step": 16076500 + }, + { + "epoch": 46.54, + "learning_rate": 2.6740207600037053e-05, + "loss": 2.0114, + "step": 16077000 + }, + { + "epoch": 46.54, + "learning_rate": 2.6739483952389776e-05, + "loss": 2.0488, + "step": 16077500 + }, + { + "epoch": 46.54, + "learning_rate": 2.67387603047425e-05, + "loss": 2.0176, + "step": 16078000 + }, + { + "epoch": 46.54, + "learning_rate": 2.6738036657095223e-05, + "loss": 1.9989, + "step": 16078500 + }, + { + "epoch": 46.54, + "learning_rate": 2.6737313009447946e-05, + "loss": 2.0229, + "step": 16079000 + }, + { + "epoch": 46.54, + "learning_rate": 2.6736589361800668e-05, + "loss": 1.9998, + "step": 16079500 + }, + { + "epoch": 46.55, + "learning_rate": 2.6735867161448687e-05, + "loss": 1.9984, + "step": 16080000 + }, + { + "epoch": 46.55, + "learning_rate": 2.673514351380141e-05, + "loss": 2.0102, + "step": 16080500 + }, + { + "epoch": 46.55, + "learning_rate": 2.673441986615413e-05, + "loss": 2.0248, + "step": 16081000 + }, + { + "epoch": 46.55, + "learning_rate": 2.6733696218506853e-05, + "loss": 2.0283, + "step": 16081500 + }, + { + "epoch": 46.55, + "learning_rate": 2.6732972570859576e-05, + "loss": 2.0455, + "step": 16082000 + }, + { + "epoch": 46.55, + "learning_rate": 2.67322489232123e-05, + "loss": 2.0105, + "step": 16082500 + }, + { + "epoch": 46.55, + "learning_rate": 2.6731525275565027e-05, + "loss": 2.0236, + "step": 16083000 + }, + { + "epoch": 46.56, + "learning_rate": 2.673080162791775e-05, + "loss": 2.0298, + "step": 16083500 + }, + { + "epoch": 46.56, + "learning_rate": 2.6730077980270475e-05, + "loss": 2.0033, + "step": 16084000 + }, + { + "epoch": 46.56, + "learning_rate": 2.6729354332623197e-05, + "loss": 2.0202, + "step": 16084500 + }, + { + "epoch": 46.56, + "learning_rate": 2.672863068497592e-05, + "loss": 2.0119, + "step": 16085000 + }, + { + "epoch": 46.56, + "learning_rate": 2.672790703732864e-05, + "loss": 2.0056, + "step": 16085500 + }, + { + "epoch": 46.56, + "learning_rate": 2.672718483697666e-05, + "loss": 2.0057, + "step": 16086000 + }, + { + "epoch": 46.56, + "learning_rate": 2.6726462636624676e-05, + "loss": 2.0129, + "step": 16086500 + }, + { + "epoch": 46.57, + "learning_rate": 2.67257389889774e-05, + "loss": 2.0222, + "step": 16087000 + }, + { + "epoch": 46.57, + "learning_rate": 2.6725016788625417e-05, + "loss": 2.0139, + "step": 16087500 + }, + { + "epoch": 46.57, + "learning_rate": 2.672429314097814e-05, + "loss": 2.0104, + "step": 16088000 + }, + { + "epoch": 46.57, + "learning_rate": 2.672356949333086e-05, + "loss": 2.0298, + "step": 16088500 + }, + { + "epoch": 46.57, + "learning_rate": 2.6722845845683587e-05, + "loss": 2.0233, + "step": 16089000 + }, + { + "epoch": 46.57, + "learning_rate": 2.672212219803631e-05, + "loss": 2.0126, + "step": 16089500 + }, + { + "epoch": 46.57, + "learning_rate": 2.6721398550389032e-05, + "loss": 2.024, + "step": 16090000 + }, + { + "epoch": 46.58, + "learning_rate": 2.672067635003705e-05, + "loss": 2.0376, + "step": 16090500 + }, + { + "epoch": 46.58, + "learning_rate": 2.6719952702389776e-05, + "loss": 2.0003, + "step": 16091000 + }, + { + "epoch": 46.58, + "learning_rate": 2.6719229054742502e-05, + "loss": 2.0412, + "step": 16091500 + }, + { + "epoch": 46.58, + "learning_rate": 2.6718505407095224e-05, + "loss": 2.0183, + "step": 16092000 + }, + { + "epoch": 46.58, + "learning_rate": 2.6717781759447946e-05, + "loss": 2.0097, + "step": 16092500 + }, + { + "epoch": 46.58, + "learning_rate": 2.671705811180067e-05, + "loss": 2.0024, + "step": 16093000 + }, + { + "epoch": 46.58, + "learning_rate": 2.6716335911448688e-05, + "loss": 2.0237, + "step": 16093500 + }, + { + "epoch": 46.59, + "learning_rate": 2.671561226380141e-05, + "loss": 2.0217, + "step": 16094000 + }, + { + "epoch": 46.59, + "learning_rate": 2.6714888616154132e-05, + "loss": 2.0187, + "step": 16094500 + }, + { + "epoch": 46.59, + "learning_rate": 2.671416641580215e-05, + "loss": 2.0303, + "step": 16095000 + }, + { + "epoch": 46.59, + "learning_rate": 2.6713442768154873e-05, + "loss": 2.0096, + "step": 16095500 + }, + { + "epoch": 46.59, + "learning_rate": 2.6712719120507595e-05, + "loss": 2.0278, + "step": 16096000 + }, + { + "epoch": 46.59, + "learning_rate": 2.6711995472860318e-05, + "loss": 2.0357, + "step": 16096500 + }, + { + "epoch": 46.59, + "learning_rate": 2.671127182521304e-05, + "loss": 2.0281, + "step": 16097000 + }, + { + "epoch": 46.6, + "learning_rate": 2.671054962486106e-05, + "loss": 2.0228, + "step": 16097500 + }, + { + "epoch": 46.6, + "learning_rate": 2.670982597721378e-05, + "loss": 2.012, + "step": 16098000 + }, + { + "epoch": 46.6, + "learning_rate": 2.670910232956651e-05, + "loss": 2.0267, + "step": 16098500 + }, + { + "epoch": 46.6, + "learning_rate": 2.6708378681919232e-05, + "loss": 2.027, + "step": 16099000 + }, + { + "epoch": 46.6, + "learning_rate": 2.6707655034271955e-05, + "loss": 1.9977, + "step": 16099500 + }, + { + "epoch": 46.6, + "learning_rate": 2.6706931386624677e-05, + "loss": 2.0142, + "step": 16100000 + }, + { + "epoch": 46.6, + "learning_rate": 2.6706207738977402e-05, + "loss": 2.0088, + "step": 16100500 + }, + { + "epoch": 46.61, + "learning_rate": 2.6705484091330125e-05, + "loss": 2.0238, + "step": 16101000 + }, + { + "epoch": 46.61, + "learning_rate": 2.6704760443682847e-05, + "loss": 2.0004, + "step": 16101500 + }, + { + "epoch": 46.61, + "learning_rate": 2.670403679603557e-05, + "loss": 2.0295, + "step": 16102000 + }, + { + "epoch": 46.61, + "learning_rate": 2.670331314838829e-05, + "loss": 1.9974, + "step": 16102500 + }, + { + "epoch": 46.61, + "learning_rate": 2.6702589500741017e-05, + "loss": 2.0426, + "step": 16103000 + }, + { + "epoch": 46.61, + "learning_rate": 2.670186585309374e-05, + "loss": 1.9883, + "step": 16103500 + }, + { + "epoch": 46.61, + "learning_rate": 2.670114220544646e-05, + "loss": 1.9998, + "step": 16104000 + }, + { + "epoch": 46.62, + "learning_rate": 2.6700418557799184e-05, + "loss": 2.0267, + "step": 16104500 + }, + { + "epoch": 46.62, + "learning_rate": 2.6699696357447203e-05, + "loss": 2.0051, + "step": 16105000 + }, + { + "epoch": 46.62, + "learning_rate": 2.6698972709799928e-05, + "loss": 2.0326, + "step": 16105500 + }, + { + "epoch": 46.62, + "learning_rate": 2.6698249062152654e-05, + "loss": 2.0083, + "step": 16106000 + }, + { + "epoch": 46.62, + "learning_rate": 2.6697525414505376e-05, + "loss": 2.011, + "step": 16106500 + }, + { + "epoch": 46.62, + "learning_rate": 2.66968017668581e-05, + "loss": 2.0041, + "step": 16107000 + }, + { + "epoch": 46.62, + "learning_rate": 2.6696079566506117e-05, + "loss": 2.0098, + "step": 16107500 + }, + { + "epoch": 46.63, + "learning_rate": 2.669535591885884e-05, + "loss": 2.0193, + "step": 16108000 + }, + { + "epoch": 46.63, + "learning_rate": 2.6694632271211562e-05, + "loss": 2.0233, + "step": 16108500 + }, + { + "epoch": 46.63, + "learning_rate": 2.6693908623564284e-05, + "loss": 2.0108, + "step": 16109000 + }, + { + "epoch": 46.63, + "learning_rate": 2.6693184975917006e-05, + "loss": 2.0363, + "step": 16109500 + }, + { + "epoch": 46.63, + "learning_rate": 2.669246132826973e-05, + "loss": 2.0229, + "step": 16110000 + }, + { + "epoch": 46.63, + "learning_rate": 2.6691737680622454e-05, + "loss": 2.0101, + "step": 16110500 + }, + { + "epoch": 46.63, + "learning_rate": 2.669101548027047e-05, + "loss": 2.0221, + "step": 16111000 + }, + { + "epoch": 46.64, + "learning_rate": 2.6690291832623192e-05, + "loss": 2.0343, + "step": 16111500 + }, + { + "epoch": 46.64, + "learning_rate": 2.6689568184975917e-05, + "loss": 2.0007, + "step": 16112000 + }, + { + "epoch": 46.64, + "learning_rate": 2.668884453732864e-05, + "loss": 2.0102, + "step": 16112500 + }, + { + "epoch": 46.64, + "learning_rate": 2.668812088968137e-05, + "loss": 2.0127, + "step": 16113000 + }, + { + "epoch": 46.64, + "learning_rate": 2.668739724203409e-05, + "loss": 2.0235, + "step": 16113500 + }, + { + "epoch": 46.64, + "learning_rate": 2.6686675041682107e-05, + "loss": 1.9965, + "step": 16114000 + }, + { + "epoch": 46.64, + "learning_rate": 2.6685951394034832e-05, + "loss": 2.0218, + "step": 16114500 + }, + { + "epoch": 46.65, + "learning_rate": 2.6685227746387554e-05, + "loss": 2.0315, + "step": 16115000 + }, + { + "epoch": 46.65, + "learning_rate": 2.6684504098740277e-05, + "loss": 2.0364, + "step": 16115500 + }, + { + "epoch": 46.65, + "learning_rate": 2.6683780451093e-05, + "loss": 2.0128, + "step": 16116000 + }, + { + "epoch": 46.65, + "learning_rate": 2.668305680344572e-05, + "loss": 2.0241, + "step": 16116500 + }, + { + "epoch": 46.65, + "learning_rate": 2.668233460309374e-05, + "loss": 2.0025, + "step": 16117000 + }, + { + "epoch": 46.65, + "learning_rate": 2.6681610955446462e-05, + "loss": 2.0173, + "step": 16117500 + }, + { + "epoch": 46.66, + "learning_rate": 2.668088875509448e-05, + "loss": 1.9961, + "step": 16118000 + }, + { + "epoch": 46.66, + "learning_rate": 2.6680165107447203e-05, + "loss": 2.0198, + "step": 16118500 + }, + { + "epoch": 46.66, + "learning_rate": 2.6679441459799926e-05, + "loss": 2.018, + "step": 16119000 + }, + { + "epoch": 46.66, + "learning_rate": 2.6678717812152648e-05, + "loss": 2.0136, + "step": 16119500 + }, + { + "epoch": 46.66, + "learning_rate": 2.667799416450537e-05, + "loss": 2.0081, + "step": 16120000 + }, + { + "epoch": 46.66, + "learning_rate": 2.66772705168581e-05, + "loss": 2.0129, + "step": 16120500 + }, + { + "epoch": 46.66, + "learning_rate": 2.667654686921082e-05, + "loss": 1.999, + "step": 16121000 + }, + { + "epoch": 46.67, + "learning_rate": 2.6675823221563544e-05, + "loss": 2.0203, + "step": 16121500 + }, + { + "epoch": 46.67, + "learning_rate": 2.6675101021211563e-05, + "loss": 2.0109, + "step": 16122000 + }, + { + "epoch": 46.67, + "learning_rate": 2.6674377373564285e-05, + "loss": 2.0431, + "step": 16122500 + }, + { + "epoch": 46.67, + "learning_rate": 2.6673653725917007e-05, + "loss": 2.0023, + "step": 16123000 + }, + { + "epoch": 46.67, + "learning_rate": 2.6672930078269733e-05, + "loss": 2.0003, + "step": 16123500 + }, + { + "epoch": 46.67, + "learning_rate": 2.6672206430622455e-05, + "loss": 2.0329, + "step": 16124000 + }, + { + "epoch": 46.67, + "learning_rate": 2.6671482782975177e-05, + "loss": 2.0103, + "step": 16124500 + }, + { + "epoch": 46.68, + "learning_rate": 2.66707591353279e-05, + "loss": 2.0266, + "step": 16125000 + }, + { + "epoch": 46.68, + "learning_rate": 2.667003548768062e-05, + "loss": 2.0291, + "step": 16125500 + }, + { + "epoch": 46.68, + "learning_rate": 2.666931328732864e-05, + "loss": 1.9999, + "step": 16126000 + }, + { + "epoch": 46.68, + "learning_rate": 2.6668589639681363e-05, + "loss": 2.0435, + "step": 16126500 + }, + { + "epoch": 46.68, + "learning_rate": 2.6667865992034085e-05, + "loss": 2.0041, + "step": 16127000 + }, + { + "epoch": 46.68, + "learning_rate": 2.6667142344386807e-05, + "loss": 2.0314, + "step": 16127500 + }, + { + "epoch": 46.68, + "learning_rate": 2.6666418696739536e-05, + "loss": 2.0175, + "step": 16128000 + }, + { + "epoch": 46.69, + "learning_rate": 2.666569504909226e-05, + "loss": 2.0316, + "step": 16128500 + }, + { + "epoch": 46.69, + "learning_rate": 2.6664971401444984e-05, + "loss": 2.0211, + "step": 16129000 + }, + { + "epoch": 46.69, + "learning_rate": 2.6664247753797706e-05, + "loss": 2.0142, + "step": 16129500 + }, + { + "epoch": 46.69, + "learning_rate": 2.6663525553445722e-05, + "loss": 2.0231, + "step": 16130000 + }, + { + "epoch": 46.69, + "learning_rate": 2.6662801905798444e-05, + "loss": 1.999, + "step": 16130500 + }, + { + "epoch": 46.69, + "learning_rate": 2.666207825815117e-05, + "loss": 2.0161, + "step": 16131000 + }, + { + "epoch": 46.69, + "learning_rate": 2.6661354610503892e-05, + "loss": 2.0431, + "step": 16131500 + }, + { + "epoch": 46.7, + "learning_rate": 2.6660632410151908e-05, + "loss": 2.041, + "step": 16132000 + }, + { + "epoch": 46.7, + "learning_rate": 2.6659908762504633e-05, + "loss": 2.0007, + "step": 16132500 + }, + { + "epoch": 46.7, + "learning_rate": 2.665918656215265e-05, + "loss": 2.0428, + "step": 16133000 + }, + { + "epoch": 46.7, + "learning_rate": 2.665846291450537e-05, + "loss": 2.0174, + "step": 16133500 + }, + { + "epoch": 46.7, + "learning_rate": 2.6657739266858097e-05, + "loss": 2.0265, + "step": 16134000 + }, + { + "epoch": 46.7, + "learning_rate": 2.665701561921082e-05, + "loss": 1.9991, + "step": 16134500 + }, + { + "epoch": 46.7, + "learning_rate": 2.665629197156354e-05, + "loss": 2.0102, + "step": 16135000 + }, + { + "epoch": 46.71, + "learning_rate": 2.665556832391627e-05, + "loss": 2.0031, + "step": 16135500 + }, + { + "epoch": 46.71, + "learning_rate": 2.6654844676268992e-05, + "loss": 2.0305, + "step": 16136000 + }, + { + "epoch": 46.71, + "learning_rate": 2.6654121028621715e-05, + "loss": 2.0038, + "step": 16136500 + }, + { + "epoch": 46.71, + "learning_rate": 2.6653397380974437e-05, + "loss": 2.0287, + "step": 16137000 + }, + { + "epoch": 46.71, + "learning_rate": 2.6652675180622456e-05, + "loss": 2.0168, + "step": 16137500 + }, + { + "epoch": 46.71, + "learning_rate": 2.6651951532975178e-05, + "loss": 2.0342, + "step": 16138000 + }, + { + "epoch": 46.71, + "learning_rate": 2.66512278853279e-05, + "loss": 2.0241, + "step": 16138500 + }, + { + "epoch": 46.72, + "learning_rate": 2.6650504237680622e-05, + "loss": 2.0284, + "step": 16139000 + }, + { + "epoch": 46.72, + "learning_rate": 2.6649780590033348e-05, + "loss": 2.025, + "step": 16139500 + }, + { + "epoch": 46.72, + "learning_rate": 2.6649058389681364e-05, + "loss": 2.0278, + "step": 16140000 + }, + { + "epoch": 46.72, + "learning_rate": 2.6648336189329383e-05, + "loss": 2.0396, + "step": 16140500 + }, + { + "epoch": 46.72, + "learning_rate": 2.6647612541682105e-05, + "loss": 2.0032, + "step": 16141000 + }, + { + "epoch": 46.72, + "learning_rate": 2.6646888894034827e-05, + "loss": 2.0042, + "step": 16141500 + }, + { + "epoch": 46.72, + "learning_rate": 2.664616524638755e-05, + "loss": 2.0307, + "step": 16142000 + }, + { + "epoch": 46.73, + "learning_rate": 2.6645443046035568e-05, + "loss": 2.0051, + "step": 16142500 + }, + { + "epoch": 46.73, + "learning_rate": 2.6644720845683584e-05, + "loss": 2.0013, + "step": 16143000 + }, + { + "epoch": 46.73, + "learning_rate": 2.6643997198036313e-05, + "loss": 2.0118, + "step": 16143500 + }, + { + "epoch": 46.73, + "learning_rate": 2.6643273550389035e-05, + "loss": 2.0153, + "step": 16144000 + }, + { + "epoch": 46.73, + "learning_rate": 2.664254990274176e-05, + "loss": 2.0094, + "step": 16144500 + }, + { + "epoch": 46.73, + "learning_rate": 2.6641826255094483e-05, + "loss": 2.01, + "step": 16145000 + }, + { + "epoch": 46.73, + "learning_rate": 2.6641102607447205e-05, + "loss": 2.0137, + "step": 16145500 + }, + { + "epoch": 46.74, + "learning_rate": 2.6640378959799927e-05, + "loss": 2.02, + "step": 16146000 + }, + { + "epoch": 46.74, + "learning_rate": 2.663965531215265e-05, + "loss": 2.0233, + "step": 16146500 + }, + { + "epoch": 46.74, + "learning_rate": 2.6638931664505372e-05, + "loss": 2.0197, + "step": 16147000 + }, + { + "epoch": 46.74, + "learning_rate": 2.663820946415339e-05, + "loss": 2.0135, + "step": 16147500 + }, + { + "epoch": 46.74, + "learning_rate": 2.6637485816506113e-05, + "loss": 2.0608, + "step": 16148000 + }, + { + "epoch": 46.74, + "learning_rate": 2.6636762168858835e-05, + "loss": 2.0296, + "step": 16148500 + }, + { + "epoch": 46.74, + "learning_rate": 2.663603852121156e-05, + "loss": 2.0118, + "step": 16149000 + }, + { + "epoch": 46.75, + "learning_rate": 2.6635314873564283e-05, + "loss": 2.014, + "step": 16149500 + }, + { + "epoch": 46.75, + "learning_rate": 2.6634591225917005e-05, + "loss": 2.0394, + "step": 16150000 + }, + { + "epoch": 46.75, + "learning_rate": 2.6633867578269734e-05, + "loss": 2.002, + "step": 16150500 + }, + { + "epoch": 46.75, + "learning_rate": 2.663314537791775e-05, + "loss": 1.9866, + "step": 16151000 + }, + { + "epoch": 46.75, + "learning_rate": 2.6632421730270472e-05, + "loss": 2.024, + "step": 16151500 + }, + { + "epoch": 46.75, + "learning_rate": 2.6631698082623198e-05, + "loss": 2.0262, + "step": 16152000 + }, + { + "epoch": 46.75, + "learning_rate": 2.663097443497592e-05, + "loss": 2.0409, + "step": 16152500 + }, + { + "epoch": 46.76, + "learning_rate": 2.6630252234623935e-05, + "loss": 2.0236, + "step": 16153000 + }, + { + "epoch": 46.76, + "learning_rate": 2.662952858697666e-05, + "loss": 2.0089, + "step": 16153500 + }, + { + "epoch": 46.76, + "learning_rate": 2.6628804939329383e-05, + "loss": 2.0156, + "step": 16154000 + }, + { + "epoch": 46.76, + "learning_rate": 2.6628081291682106e-05, + "loss": 2.0134, + "step": 16154500 + }, + { + "epoch": 46.76, + "learning_rate": 2.6627359091330124e-05, + "loss": 2.0546, + "step": 16155000 + }, + { + "epoch": 46.76, + "learning_rate": 2.6626635443682847e-05, + "loss": 2.0335, + "step": 16155500 + }, + { + "epoch": 46.77, + "learning_rate": 2.662591179603557e-05, + "loss": 2.0257, + "step": 16156000 + }, + { + "epoch": 46.77, + "learning_rate": 2.6625189595683584e-05, + "loss": 2.0221, + "step": 16156500 + }, + { + "epoch": 46.77, + "learning_rate": 2.662446594803631e-05, + "loss": 1.9984, + "step": 16157000 + }, + { + "epoch": 46.77, + "learning_rate": 2.6623742300389032e-05, + "loss": 2.0454, + "step": 16157500 + }, + { + "epoch": 46.77, + "learning_rate": 2.662301865274176e-05, + "loss": 2.0263, + "step": 16158000 + }, + { + "epoch": 46.77, + "learning_rate": 2.6622295005094484e-05, + "loss": 2.0292, + "step": 16158500 + }, + { + "epoch": 46.77, + "learning_rate": 2.6621571357447206e-05, + "loss": 2.0551, + "step": 16159000 + }, + { + "epoch": 46.78, + "learning_rate": 2.6620847709799928e-05, + "loss": 2.0193, + "step": 16159500 + }, + { + "epoch": 46.78, + "learning_rate": 2.662012695674324e-05, + "loss": 2.0207, + "step": 16160000 + }, + { + "epoch": 46.78, + "learning_rate": 2.6619403309095963e-05, + "loss": 2.0165, + "step": 16160500 + }, + { + "epoch": 46.78, + "learning_rate": 2.6618679661448688e-05, + "loss": 1.9945, + "step": 16161000 + }, + { + "epoch": 46.78, + "learning_rate": 2.661795601380141e-05, + "loss": 2.0121, + "step": 16161500 + }, + { + "epoch": 46.78, + "learning_rate": 2.6617232366154133e-05, + "loss": 2.0368, + "step": 16162000 + }, + { + "epoch": 46.78, + "learning_rate": 2.6616508718506855e-05, + "loss": 2.0504, + "step": 16162500 + }, + { + "epoch": 46.79, + "learning_rate": 2.6615785070859577e-05, + "loss": 2.0282, + "step": 16163000 + }, + { + "epoch": 46.79, + "learning_rate": 2.66150614232123e-05, + "loss": 2.0324, + "step": 16163500 + }, + { + "epoch": 46.79, + "learning_rate": 2.6614337775565025e-05, + "loss": 2.0044, + "step": 16164000 + }, + { + "epoch": 46.79, + "learning_rate": 2.6613614127917747e-05, + "loss": 2.0123, + "step": 16164500 + }, + { + "epoch": 46.79, + "learning_rate": 2.661289048027047e-05, + "loss": 2.0198, + "step": 16165000 + }, + { + "epoch": 46.79, + "learning_rate": 2.66121668326232e-05, + "loss": 2.0013, + "step": 16165500 + }, + { + "epoch": 46.79, + "learning_rate": 2.661144318497592e-05, + "loss": 2.0412, + "step": 16166000 + }, + { + "epoch": 46.8, + "learning_rate": 2.661072098462394e-05, + "loss": 2.0281, + "step": 16166500 + }, + { + "epoch": 46.8, + "learning_rate": 2.6609998784271955e-05, + "loss": 1.9957, + "step": 16167000 + }, + { + "epoch": 46.8, + "learning_rate": 2.6609275136624677e-05, + "loss": 1.9999, + "step": 16167500 + }, + { + "epoch": 46.8, + "learning_rate": 2.66085514889774e-05, + "loss": 1.9973, + "step": 16168000 + }, + { + "epoch": 46.8, + "learning_rate": 2.6607827841330125e-05, + "loss": 2.0115, + "step": 16168500 + }, + { + "epoch": 46.8, + "learning_rate": 2.6607104193682848e-05, + "loss": 2.0239, + "step": 16169000 + }, + { + "epoch": 46.8, + "learning_rate": 2.660638054603557e-05, + "loss": 2.0118, + "step": 16169500 + }, + { + "epoch": 46.81, + "learning_rate": 2.6605656898388292e-05, + "loss": 2.0099, + "step": 16170000 + }, + { + "epoch": 46.81, + "learning_rate": 2.6604933250741014e-05, + "loss": 2.0065, + "step": 16170500 + }, + { + "epoch": 46.81, + "learning_rate": 2.660420960309374e-05, + "loss": 2.0138, + "step": 16171000 + }, + { + "epoch": 46.81, + "learning_rate": 2.6603485955446462e-05, + "loss": 2.0115, + "step": 16171500 + }, + { + "epoch": 46.81, + "learning_rate": 2.6602762307799184e-05, + "loss": 2.0273, + "step": 16172000 + }, + { + "epoch": 46.81, + "learning_rate": 2.6602038660151907e-05, + "loss": 2.0362, + "step": 16172500 + }, + { + "epoch": 46.81, + "learning_rate": 2.660131645979993e-05, + "loss": 2.0397, + "step": 16173000 + }, + { + "epoch": 46.82, + "learning_rate": 2.660059281215265e-05, + "loss": 2.0096, + "step": 16173500 + }, + { + "epoch": 46.82, + "learning_rate": 2.6599869164505377e-05, + "loss": 2.0123, + "step": 16174000 + }, + { + "epoch": 46.82, + "learning_rate": 2.65991455168581e-05, + "loss": 1.9935, + "step": 16174500 + }, + { + "epoch": 46.82, + "learning_rate": 2.6598423316506115e-05, + "loss": 2.0065, + "step": 16175000 + }, + { + "epoch": 46.82, + "learning_rate": 2.659769966885884e-05, + "loss": 2.0295, + "step": 16175500 + }, + { + "epoch": 46.82, + "learning_rate": 2.6596977468506856e-05, + "loss": 1.9913, + "step": 16176000 + }, + { + "epoch": 46.82, + "learning_rate": 2.6596253820859578e-05, + "loss": 2.0194, + "step": 16176500 + }, + { + "epoch": 46.83, + "learning_rate": 2.6595530173212304e-05, + "loss": 2.0305, + "step": 16177000 + }, + { + "epoch": 46.83, + "learning_rate": 2.6594806525565026e-05, + "loss": 2.0338, + "step": 16177500 + }, + { + "epoch": 46.83, + "learning_rate": 2.659408432521304e-05, + "loss": 2.0305, + "step": 16178000 + }, + { + "epoch": 46.83, + "learning_rate": 2.6593360677565764e-05, + "loss": 2.01, + "step": 16178500 + }, + { + "epoch": 46.83, + "learning_rate": 2.659263702991849e-05, + "loss": 2.0462, + "step": 16179000 + }, + { + "epoch": 46.83, + "learning_rate": 2.659191338227121e-05, + "loss": 1.9937, + "step": 16179500 + }, + { + "epoch": 46.83, + "learning_rate": 2.6591189734623934e-05, + "loss": 2.0395, + "step": 16180000 + }, + { + "epoch": 46.84, + "learning_rate": 2.6590466086976663e-05, + "loss": 2.0129, + "step": 16180500 + }, + { + "epoch": 46.84, + "learning_rate": 2.6589742439329385e-05, + "loss": 2.0166, + "step": 16181000 + }, + { + "epoch": 46.84, + "learning_rate": 2.6589018791682107e-05, + "loss": 2.0012, + "step": 16181500 + }, + { + "epoch": 46.84, + "learning_rate": 2.658829514403483e-05, + "loss": 2.0459, + "step": 16182000 + }, + { + "epoch": 46.84, + "learning_rate": 2.6587572943682848e-05, + "loss": 2.0319, + "step": 16182500 + }, + { + "epoch": 46.84, + "learning_rate": 2.658684929603557e-05, + "loss": 2.0199, + "step": 16183000 + }, + { + "epoch": 46.84, + "learning_rate": 2.6586125648388293e-05, + "loss": 2.0261, + "step": 16183500 + }, + { + "epoch": 46.85, + "learning_rate": 2.6585403448036312e-05, + "loss": 2.0057, + "step": 16184000 + }, + { + "epoch": 46.85, + "learning_rate": 2.6584679800389034e-05, + "loss": 2.0306, + "step": 16184500 + }, + { + "epoch": 46.85, + "learning_rate": 2.6583956152741756e-05, + "loss": 2.0344, + "step": 16185000 + }, + { + "epoch": 46.85, + "learning_rate": 2.658323250509448e-05, + "loss": 2.019, + "step": 16185500 + }, + { + "epoch": 46.85, + "learning_rate": 2.6582508857447204e-05, + "loss": 2.0428, + "step": 16186000 + }, + { + "epoch": 46.85, + "learning_rate": 2.6581785209799926e-05, + "loss": 2.0176, + "step": 16186500 + }, + { + "epoch": 46.85, + "learning_rate": 2.658106156215265e-05, + "loss": 2.0474, + "step": 16187000 + }, + { + "epoch": 46.86, + "learning_rate": 2.658033791450537e-05, + "loss": 2.0284, + "step": 16187500 + }, + { + "epoch": 46.86, + "learning_rate": 2.65796142668581e-05, + "loss": 2.0192, + "step": 16188000 + }, + { + "epoch": 46.86, + "learning_rate": 2.6578890619210822e-05, + "loss": 2.023, + "step": 16188500 + }, + { + "epoch": 46.86, + "learning_rate": 2.6578166971563544e-05, + "loss": 2.0093, + "step": 16189000 + }, + { + "epoch": 46.86, + "learning_rate": 2.6577443323916266e-05, + "loss": 2.0157, + "step": 16189500 + }, + { + "epoch": 46.86, + "learning_rate": 2.6576719676268992e-05, + "loss": 1.9902, + "step": 16190000 + }, + { + "epoch": 46.86, + "learning_rate": 2.6575998923212304e-05, + "loss": 2.0011, + "step": 16190500 + }, + { + "epoch": 46.87, + "learning_rate": 2.6575275275565027e-05, + "loss": 2.01, + "step": 16191000 + }, + { + "epoch": 46.87, + "learning_rate": 2.657455162791775e-05, + "loss": 2.0154, + "step": 16191500 + }, + { + "epoch": 46.87, + "learning_rate": 2.657382798027047e-05, + "loss": 2.0055, + "step": 16192000 + }, + { + "epoch": 46.87, + "learning_rate": 2.6573104332623193e-05, + "loss": 2.0125, + "step": 16192500 + }, + { + "epoch": 46.87, + "learning_rate": 2.6572382132271212e-05, + "loss": 2.0261, + "step": 16193000 + }, + { + "epoch": 46.87, + "learning_rate": 2.6571658484623934e-05, + "loss": 1.9999, + "step": 16193500 + }, + { + "epoch": 46.88, + "learning_rate": 2.6570934836976657e-05, + "loss": 2.0306, + "step": 16194000 + }, + { + "epoch": 46.88, + "learning_rate": 2.6570212636624676e-05, + "loss": 2.0389, + "step": 16194500 + }, + { + "epoch": 46.88, + "learning_rate": 2.6569488988977398e-05, + "loss": 2.022, + "step": 16195000 + }, + { + "epoch": 46.88, + "learning_rate": 2.6568765341330127e-05, + "loss": 1.9915, + "step": 16195500 + }, + { + "epoch": 46.88, + "learning_rate": 2.656804169368285e-05, + "loss": 2.0139, + "step": 16196000 + }, + { + "epoch": 46.88, + "learning_rate": 2.656731804603557e-05, + "loss": 2.013, + "step": 16196500 + }, + { + "epoch": 46.88, + "learning_rate": 2.6566594398388294e-05, + "loss": 2.0115, + "step": 16197000 + }, + { + "epoch": 46.89, + "learning_rate": 2.6565872198036313e-05, + "loss": 1.9834, + "step": 16197500 + }, + { + "epoch": 46.89, + "learning_rate": 2.6565148550389035e-05, + "loss": 2.0116, + "step": 16198000 + }, + { + "epoch": 46.89, + "learning_rate": 2.6564424902741757e-05, + "loss": 2.0067, + "step": 16198500 + }, + { + "epoch": 46.89, + "learning_rate": 2.656370125509448e-05, + "loss": 2.0466, + "step": 16199000 + }, + { + "epoch": 46.89, + "learning_rate": 2.6562979054742498e-05, + "loss": 2.0388, + "step": 16199500 + }, + { + "epoch": 46.89, + "learning_rate": 2.656225540709522e-05, + "loss": 2.0394, + "step": 16200000 + }, + { + "epoch": 46.89, + "learning_rate": 2.6561531759447943e-05, + "loss": 2.0123, + "step": 16200500 + }, + { + "epoch": 46.9, + "learning_rate": 2.6560808111800668e-05, + "loss": 2.0032, + "step": 16201000 + }, + { + "epoch": 46.9, + "learning_rate": 2.656008446415339e-05, + "loss": 2.0047, + "step": 16201500 + }, + { + "epoch": 46.9, + "learning_rate": 2.6559360816506113e-05, + "loss": 2.0289, + "step": 16202000 + }, + { + "epoch": 46.9, + "learning_rate": 2.6558637168858835e-05, + "loss": 2.0293, + "step": 16202500 + }, + { + "epoch": 46.9, + "learning_rate": 2.6557913521211564e-05, + "loss": 2.0212, + "step": 16203000 + }, + { + "epoch": 46.9, + "learning_rate": 2.6557189873564286e-05, + "loss": 2.0385, + "step": 16203500 + }, + { + "epoch": 46.9, + "learning_rate": 2.655646622591701e-05, + "loss": 2.0195, + "step": 16204000 + }, + { + "epoch": 46.91, + "learning_rate": 2.655574257826973e-05, + "loss": 2.0249, + "step": 16204500 + }, + { + "epoch": 46.91, + "learning_rate": 2.6555018930622456e-05, + "loss": 2.019, + "step": 16205000 + }, + { + "epoch": 46.91, + "learning_rate": 2.655429528297518e-05, + "loss": 2.0348, + "step": 16205500 + }, + { + "epoch": 46.91, + "learning_rate": 2.65535716353279e-05, + "loss": 2.0277, + "step": 16206000 + }, + { + "epoch": 46.91, + "learning_rate": 2.655284943497592e-05, + "loss": 2.0257, + "step": 16206500 + }, + { + "epoch": 46.91, + "learning_rate": 2.6552125787328642e-05, + "loss": 2.0328, + "step": 16207000 + }, + { + "epoch": 46.91, + "learning_rate": 2.6551402139681364e-05, + "loss": 2.0506, + "step": 16207500 + }, + { + "epoch": 46.92, + "learning_rate": 2.6550678492034086e-05, + "loss": 2.0613, + "step": 16208000 + }, + { + "epoch": 46.92, + "learning_rate": 2.654995484438681e-05, + "loss": 1.9983, + "step": 16208500 + }, + { + "epoch": 46.92, + "learning_rate": 2.654923119673953e-05, + "loss": 2.0021, + "step": 16209000 + }, + { + "epoch": 46.92, + "learning_rate": 2.6548507549092256e-05, + "loss": 2.0177, + "step": 16209500 + }, + { + "epoch": 46.92, + "learning_rate": 2.6547783901444982e-05, + "loss": 2.0078, + "step": 16210000 + }, + { + "epoch": 46.92, + "learning_rate": 2.6547060253797708e-05, + "loss": 2.0201, + "step": 16210500 + }, + { + "epoch": 46.92, + "learning_rate": 2.6546338053445723e-05, + "loss": 2.0336, + "step": 16211000 + }, + { + "epoch": 46.93, + "learning_rate": 2.6545614405798446e-05, + "loss": 2.0209, + "step": 16211500 + }, + { + "epoch": 46.93, + "learning_rate": 2.654489075815117e-05, + "loss": 2.0285, + "step": 16212000 + }, + { + "epoch": 46.93, + "learning_rate": 2.6544167110503893e-05, + "loss": 2.0313, + "step": 16212500 + }, + { + "epoch": 46.93, + "learning_rate": 2.6543443462856616e-05, + "loss": 2.0029, + "step": 16213000 + }, + { + "epoch": 46.93, + "learning_rate": 2.6542722709799928e-05, + "loss": 2.0234, + "step": 16213500 + }, + { + "epoch": 46.93, + "learning_rate": 2.654199906215265e-05, + "loss": 2.0117, + "step": 16214000 + }, + { + "epoch": 46.93, + "learning_rate": 2.6541275414505372e-05, + "loss": 2.0235, + "step": 16214500 + }, + { + "epoch": 46.94, + "learning_rate": 2.6540551766858095e-05, + "loss": 2.0301, + "step": 16215000 + }, + { + "epoch": 46.94, + "learning_rate": 2.653982811921082e-05, + "loss": 2.0358, + "step": 16215500 + }, + { + "epoch": 46.94, + "learning_rate": 2.6539104471563542e-05, + "loss": 2.0192, + "step": 16216000 + }, + { + "epoch": 46.94, + "learning_rate": 2.6538380823916265e-05, + "loss": 2.0148, + "step": 16216500 + }, + { + "epoch": 46.94, + "learning_rate": 2.6537657176268987e-05, + "loss": 2.0015, + "step": 16217000 + }, + { + "epoch": 46.94, + "learning_rate": 2.6536934975917006e-05, + "loss": 2.026, + "step": 16217500 + }, + { + "epoch": 46.94, + "learning_rate": 2.6536211328269735e-05, + "loss": 2.0174, + "step": 16218000 + }, + { + "epoch": 46.95, + "learning_rate": 2.6535487680622457e-05, + "loss": 2.0093, + "step": 16218500 + }, + { + "epoch": 46.95, + "learning_rate": 2.653476403297518e-05, + "loss": 2.0194, + "step": 16219000 + }, + { + "epoch": 46.95, + "learning_rate": 2.65340403853279e-05, + "loss": 2.0544, + "step": 16219500 + }, + { + "epoch": 46.95, + "learning_rate": 2.6533316737680624e-05, + "loss": 2.0305, + "step": 16220000 + }, + { + "epoch": 46.95, + "learning_rate": 2.6532593090033346e-05, + "loss": 2.0287, + "step": 16220500 + }, + { + "epoch": 46.95, + "learning_rate": 2.653186944238607e-05, + "loss": 2.029, + "step": 16221000 + }, + { + "epoch": 46.95, + "learning_rate": 2.6531145794738794e-05, + "loss": 2.0467, + "step": 16221500 + }, + { + "epoch": 46.96, + "learning_rate": 2.6530422147091516e-05, + "loss": 2.0261, + "step": 16222000 + }, + { + "epoch": 46.96, + "learning_rate": 2.6529699946739535e-05, + "loss": 2.0302, + "step": 16222500 + }, + { + "epoch": 46.96, + "learning_rate": 2.6528976299092257e-05, + "loss": 2.0492, + "step": 16223000 + }, + { + "epoch": 46.96, + "learning_rate": 2.652825265144498e-05, + "loss": 2.0154, + "step": 16223500 + }, + { + "epoch": 46.96, + "learning_rate": 2.6527529003797702e-05, + "loss": 1.9939, + "step": 16224000 + }, + { + "epoch": 46.96, + "learning_rate": 2.6526805356150424e-05, + "loss": 2.0316, + "step": 16224500 + }, + { + "epoch": 46.96, + "learning_rate": 2.6526083155798443e-05, + "loss": 2.0216, + "step": 16225000 + }, + { + "epoch": 46.97, + "learning_rate": 2.6525359508151172e-05, + "loss": 1.9904, + "step": 16225500 + }, + { + "epoch": 46.97, + "learning_rate": 2.6524635860503894e-05, + "loss": 2.0194, + "step": 16226000 + }, + { + "epoch": 46.97, + "learning_rate": 2.6523912212856616e-05, + "loss": 2.024, + "step": 16226500 + }, + { + "epoch": 46.97, + "learning_rate": 2.652318856520934e-05, + "loss": 2.0107, + "step": 16227000 + }, + { + "epoch": 46.97, + "learning_rate": 2.652246491756206e-05, + "loss": 2.0238, + "step": 16227500 + }, + { + "epoch": 46.97, + "learning_rate": 2.6521741269914786e-05, + "loss": 2.0366, + "step": 16228000 + }, + { + "epoch": 46.97, + "learning_rate": 2.65210205168581e-05, + "loss": 2.0183, + "step": 16228500 + }, + { + "epoch": 46.98, + "learning_rate": 2.652029686921082e-05, + "loss": 2.0229, + "step": 16229000 + }, + { + "epoch": 46.98, + "learning_rate": 2.6519573221563543e-05, + "loss": 2.0183, + "step": 16229500 + }, + { + "epoch": 46.98, + "learning_rate": 2.6518849573916265e-05, + "loss": 2.0177, + "step": 16230000 + }, + { + "epoch": 46.98, + "learning_rate": 2.6518125926268988e-05, + "loss": 2.031, + "step": 16230500 + }, + { + "epoch": 46.98, + "learning_rate": 2.651740227862171e-05, + "loss": 2.0306, + "step": 16231000 + }, + { + "epoch": 46.98, + "learning_rate": 2.6516678630974436e-05, + "loss": 2.0178, + "step": 16231500 + }, + { + "epoch": 46.98, + "learning_rate": 2.651595643062245e-05, + "loss": 2.0021, + "step": 16232000 + }, + { + "epoch": 46.99, + "learning_rate": 2.6515232782975173e-05, + "loss": 2.0334, + "step": 16232500 + }, + { + "epoch": 46.99, + "learning_rate": 2.6514509135327902e-05, + "loss": 2.0101, + "step": 16233000 + }, + { + "epoch": 46.99, + "learning_rate": 2.6513785487680625e-05, + "loss": 2.0134, + "step": 16233500 + }, + { + "epoch": 46.99, + "learning_rate": 2.651306184003335e-05, + "loss": 2.0177, + "step": 16234000 + }, + { + "epoch": 46.99, + "learning_rate": 2.6512338192386072e-05, + "loss": 2.0161, + "step": 16234500 + }, + { + "epoch": 46.99, + "learning_rate": 2.6511614544738795e-05, + "loss": 2.0203, + "step": 16235000 + }, + { + "epoch": 47.0, + "learning_rate": 2.6510890897091517e-05, + "loss": 2.0128, + "step": 16235500 + }, + { + "epoch": 47.0, + "learning_rate": 2.6510168696739536e-05, + "loss": 2.0218, + "step": 16236000 + }, + { + "epoch": 47.0, + "learning_rate": 2.650944649638755e-05, + "loss": 2.0444, + "step": 16236500 + }, + { + "epoch": 47.0, + "learning_rate": 2.6508722848740274e-05, + "loss": 2.0357, + "step": 16237000 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.672088574466875, + "eval_accuracy_mlm": 0.6375967336898031, + "eval_accuracy_nsp": 0.8571585706232555, + "eval_loss": 2.169609785079956, + "eval_runtime": 331.6707, + "eval_samples_per_second": 1315.721, + "eval_steps_per_second": 54.822, + "step": 16237184 + }, + { + "epoch": 47.0, + "learning_rate": 2.6507999201093e-05, + "loss": 2.0167, + "step": 16237500 + }, + { + "epoch": 47.0, + "learning_rate": 2.650727555344572e-05, + "loss": 2.0055, + "step": 16238000 + }, + { + "epoch": 47.0, + "learning_rate": 2.6506553353093737e-05, + "loss": 2.028, + "step": 16238500 + }, + { + "epoch": 47.01, + "learning_rate": 2.6505829705446463e-05, + "loss": 1.9944, + "step": 16239000 + }, + { + "epoch": 47.01, + "learning_rate": 2.6505106057799185e-05, + "loss": 2.0081, + "step": 16239500 + }, + { + "epoch": 47.01, + "learning_rate": 2.6504382410151907e-05, + "loss": 2.0141, + "step": 16240000 + }, + { + "epoch": 47.01, + "learning_rate": 2.6503658762504636e-05, + "loss": 1.9945, + "step": 16240500 + }, + { + "epoch": 47.01, + "learning_rate": 2.650293511485736e-05, + "loss": 1.9809, + "step": 16241000 + }, + { + "epoch": 47.01, + "learning_rate": 2.650221146721008e-05, + "loss": 2.0042, + "step": 16241500 + }, + { + "epoch": 47.01, + "learning_rate": 2.6501487819562803e-05, + "loss": 1.9931, + "step": 16242000 + }, + { + "epoch": 47.02, + "learning_rate": 2.6500764171915525e-05, + "loss": 1.9765, + "step": 16242500 + }, + { + "epoch": 47.02, + "learning_rate": 2.650004052426825e-05, + "loss": 1.9911, + "step": 16243000 + }, + { + "epoch": 47.02, + "learning_rate": 2.6499316876620973e-05, + "loss": 1.9891, + "step": 16243500 + }, + { + "epoch": 47.02, + "learning_rate": 2.6498593228973695e-05, + "loss": 2.0185, + "step": 16244000 + }, + { + "epoch": 47.02, + "learning_rate": 2.6497871028621714e-05, + "loss": 2.0038, + "step": 16244500 + }, + { + "epoch": 47.02, + "learning_rate": 2.649714882826973e-05, + "loss": 1.9875, + "step": 16245000 + }, + { + "epoch": 47.02, + "learning_rate": 2.6496425180622452e-05, + "loss": 1.9972, + "step": 16245500 + }, + { + "epoch": 47.03, + "learning_rate": 2.6495701532975174e-05, + "loss": 2.0023, + "step": 16246000 + }, + { + "epoch": 47.03, + "learning_rate": 2.64949778853279e-05, + "loss": 1.9806, + "step": 16246500 + }, + { + "epoch": 47.03, + "learning_rate": 2.6494254237680622e-05, + "loss": 1.9785, + "step": 16247000 + }, + { + "epoch": 47.03, + "learning_rate": 2.6493532037328638e-05, + "loss": 1.9971, + "step": 16247500 + }, + { + "epoch": 47.03, + "learning_rate": 2.6492808389681367e-05, + "loss": 1.9984, + "step": 16248000 + }, + { + "epoch": 47.03, + "learning_rate": 2.649208474203409e-05, + "loss": 2.0143, + "step": 16248500 + }, + { + "epoch": 47.03, + "learning_rate": 2.6491361094386814e-05, + "loss": 2.0059, + "step": 16249000 + }, + { + "epoch": 47.04, + "learning_rate": 2.6490637446739537e-05, + "loss": 2.0115, + "step": 16249500 + }, + { + "epoch": 47.04, + "learning_rate": 2.648991379909226e-05, + "loss": 1.9961, + "step": 16250000 + }, + { + "epoch": 47.04, + "learning_rate": 2.6489191598740278e-05, + "loss": 1.9944, + "step": 16250500 + }, + { + "epoch": 47.04, + "learning_rate": 2.6488467951093e-05, + "loss": 2.0279, + "step": 16251000 + }, + { + "epoch": 47.04, + "learning_rate": 2.6487744303445722e-05, + "loss": 1.9967, + "step": 16251500 + }, + { + "epoch": 47.04, + "learning_rate": 2.6487020655798445e-05, + "loss": 2.01, + "step": 16252000 + }, + { + "epoch": 47.04, + "learning_rate": 2.6486297008151167e-05, + "loss": 1.9854, + "step": 16252500 + }, + { + "epoch": 47.05, + "learning_rate": 2.648557336050389e-05, + "loss": 2.0177, + "step": 16253000 + }, + { + "epoch": 47.05, + "learning_rate": 2.6484851160151908e-05, + "loss": 1.9834, + "step": 16253500 + }, + { + "epoch": 47.05, + "learning_rate": 2.648412751250463e-05, + "loss": 2.0051, + "step": 16254000 + }, + { + "epoch": 47.05, + "learning_rate": 2.6483403864857352e-05, + "loss": 2.0042, + "step": 16254500 + }, + { + "epoch": 47.05, + "learning_rate": 2.6482680217210075e-05, + "loss": 2.0084, + "step": 16255000 + }, + { + "epoch": 47.05, + "learning_rate": 2.64819580168581e-05, + "loss": 2.0143, + "step": 16255500 + }, + { + "epoch": 47.05, + "learning_rate": 2.6481234369210823e-05, + "loss": 2.0037, + "step": 16256000 + }, + { + "epoch": 47.06, + "learning_rate": 2.6480510721563545e-05, + "loss": 1.9933, + "step": 16256500 + }, + { + "epoch": 47.06, + "learning_rate": 2.6479787073916267e-05, + "loss": 1.9866, + "step": 16257000 + }, + { + "epoch": 47.06, + "learning_rate": 2.647906342626899e-05, + "loss": 2.0103, + "step": 16257500 + }, + { + "epoch": 47.06, + "learning_rate": 2.6478339778621715e-05, + "loss": 1.9992, + "step": 16258000 + }, + { + "epoch": 47.06, + "learning_rate": 2.6477619025565027e-05, + "loss": 2.0001, + "step": 16258500 + }, + { + "epoch": 47.06, + "learning_rate": 2.647689537791775e-05, + "loss": 1.9887, + "step": 16259000 + }, + { + "epoch": 47.06, + "learning_rate": 2.647617173027047e-05, + "loss": 2.0086, + "step": 16259500 + }, + { + "epoch": 47.07, + "learning_rate": 2.6475448082623194e-05, + "loss": 1.9814, + "step": 16260000 + }, + { + "epoch": 47.07, + "learning_rate": 2.6474724434975916e-05, + "loss": 2.0141, + "step": 16260500 + }, + { + "epoch": 47.07, + "learning_rate": 2.6474002234623935e-05, + "loss": 1.9882, + "step": 16261000 + }, + { + "epoch": 47.07, + "learning_rate": 2.6473278586976657e-05, + "loss": 2.0177, + "step": 16261500 + }, + { + "epoch": 47.07, + "learning_rate": 2.647255493932938e-05, + "loss": 2.0047, + "step": 16262000 + }, + { + "epoch": 47.07, + "learning_rate": 2.6471831291682102e-05, + "loss": 2.0122, + "step": 16262500 + }, + { + "epoch": 47.07, + "learning_rate": 2.647110764403483e-05, + "loss": 1.9908, + "step": 16263000 + }, + { + "epoch": 47.08, + "learning_rate": 2.6470383996387553e-05, + "loss": 1.9781, + "step": 16263500 + }, + { + "epoch": 47.08, + "learning_rate": 2.646966034874028e-05, + "loss": 2.0095, + "step": 16264000 + }, + { + "epoch": 47.08, + "learning_rate": 2.6468936701093e-05, + "loss": 2.0017, + "step": 16264500 + }, + { + "epoch": 47.08, + "learning_rate": 2.6468213053445723e-05, + "loss": 2.0256, + "step": 16265000 + }, + { + "epoch": 47.08, + "learning_rate": 2.6467490853093742e-05, + "loss": 2.0126, + "step": 16265500 + }, + { + "epoch": 47.08, + "learning_rate": 2.6466767205446464e-05, + "loss": 1.987, + "step": 16266000 + }, + { + "epoch": 47.08, + "learning_rate": 2.646604500509448e-05, + "loss": 1.9914, + "step": 16266500 + }, + { + "epoch": 47.09, + "learning_rate": 2.6465321357447202e-05, + "loss": 1.9869, + "step": 16267000 + }, + { + "epoch": 47.09, + "learning_rate": 2.6464597709799928e-05, + "loss": 2.0148, + "step": 16267500 + }, + { + "epoch": 47.09, + "learning_rate": 2.646387406215265e-05, + "loss": 1.9782, + "step": 16268000 + }, + { + "epoch": 47.09, + "learning_rate": 2.6463150414505372e-05, + "loss": 2.0241, + "step": 16268500 + }, + { + "epoch": 47.09, + "learning_rate": 2.6462426766858094e-05, + "loss": 1.9896, + "step": 16269000 + }, + { + "epoch": 47.09, + "learning_rate": 2.6461703119210817e-05, + "loss": 2.0107, + "step": 16269500 + }, + { + "epoch": 47.09, + "learning_rate": 2.646098236615413e-05, + "loss": 2.0062, + "step": 16270000 + }, + { + "epoch": 47.1, + "learning_rate": 2.6460258718506854e-05, + "loss": 2.0045, + "step": 16270500 + }, + { + "epoch": 47.1, + "learning_rate": 2.645953507085958e-05, + "loss": 2.0029, + "step": 16271000 + }, + { + "epoch": 47.1, + "learning_rate": 2.6458811423212306e-05, + "loss": 2.0221, + "step": 16271500 + }, + { + "epoch": 47.1, + "learning_rate": 2.645808922286032e-05, + "loss": 1.9869, + "step": 16272000 + }, + { + "epoch": 47.1, + "learning_rate": 2.645736702250834e-05, + "loss": 2.0297, + "step": 16272500 + }, + { + "epoch": 47.1, + "learning_rate": 2.6456643374861062e-05, + "loss": 2.0038, + "step": 16273000 + }, + { + "epoch": 47.11, + "learning_rate": 2.6455919727213785e-05, + "loss": 2.0056, + "step": 16273500 + }, + { + "epoch": 47.11, + "learning_rate": 2.6455196079566507e-05, + "loss": 2.0079, + "step": 16274000 + }, + { + "epoch": 47.11, + "learning_rate": 2.645447243191923e-05, + "loss": 1.9754, + "step": 16274500 + }, + { + "epoch": 47.11, + "learning_rate": 2.6453748784271955e-05, + "loss": 1.9992, + "step": 16275000 + }, + { + "epoch": 47.11, + "learning_rate": 2.6453025136624677e-05, + "loss": 1.9686, + "step": 16275500 + }, + { + "epoch": 47.11, + "learning_rate": 2.64523014889774e-05, + "loss": 2.0061, + "step": 16276000 + }, + { + "epoch": 47.11, + "learning_rate": 2.645157784133012e-05, + "loss": 1.9908, + "step": 16276500 + }, + { + "epoch": 47.12, + "learning_rate": 2.6450854193682844e-05, + "loss": 1.9912, + "step": 16277000 + }, + { + "epoch": 47.12, + "learning_rate": 2.6450130546035566e-05, + "loss": 1.9964, + "step": 16277500 + }, + { + "epoch": 47.12, + "learning_rate": 2.6449406898388295e-05, + "loss": 2.0016, + "step": 16278000 + }, + { + "epoch": 47.12, + "learning_rate": 2.6448683250741017e-05, + "loss": 2.0154, + "step": 16278500 + }, + { + "epoch": 47.12, + "learning_rate": 2.6447959603093743e-05, + "loss": 2.0106, + "step": 16279000 + }, + { + "epoch": 47.12, + "learning_rate": 2.6447235955446465e-05, + "loss": 1.9801, + "step": 16279500 + }, + { + "epoch": 47.12, + "learning_rate": 2.644651375509448e-05, + "loss": 2.0191, + "step": 16280000 + }, + { + "epoch": 47.13, + "learning_rate": 2.6445790107447206e-05, + "loss": 1.9984, + "step": 16280500 + }, + { + "epoch": 47.13, + "learning_rate": 2.6445067907095222e-05, + "loss": 1.9883, + "step": 16281000 + }, + { + "epoch": 47.13, + "learning_rate": 2.6444344259447944e-05, + "loss": 2.0342, + "step": 16281500 + }, + { + "epoch": 47.13, + "learning_rate": 2.644362061180067e-05, + "loss": 2.0042, + "step": 16282000 + }, + { + "epoch": 47.13, + "learning_rate": 2.6442896964153392e-05, + "loss": 1.989, + "step": 16282500 + }, + { + "epoch": 47.13, + "learning_rate": 2.6442173316506114e-05, + "loss": 2.0144, + "step": 16283000 + }, + { + "epoch": 47.13, + "learning_rate": 2.6441449668858836e-05, + "loss": 2.0145, + "step": 16283500 + }, + { + "epoch": 47.14, + "learning_rate": 2.644072602121156e-05, + "loss": 1.9841, + "step": 16284000 + }, + { + "epoch": 47.14, + "learning_rate": 2.644000237356428e-05, + "loss": 2.0091, + "step": 16284500 + }, + { + "epoch": 47.14, + "learning_rate": 2.6439281620507593e-05, + "loss": 2.0015, + "step": 16285000 + }, + { + "epoch": 47.14, + "learning_rate": 2.643855797286032e-05, + "loss": 2.0131, + "step": 16285500 + }, + { + "epoch": 47.14, + "learning_rate": 2.6437834325213044e-05, + "loss": 1.9874, + "step": 16286000 + }, + { + "epoch": 47.14, + "learning_rate": 2.643711067756577e-05, + "loss": 2.049, + "step": 16286500 + }, + { + "epoch": 47.14, + "learning_rate": 2.6436387029918492e-05, + "loss": 2.0233, + "step": 16287000 + }, + { + "epoch": 47.15, + "learning_rate": 2.6435663382271214e-05, + "loss": 1.9926, + "step": 16287500 + }, + { + "epoch": 47.15, + "learning_rate": 2.6434939734623937e-05, + "loss": 1.9898, + "step": 16288000 + }, + { + "epoch": 47.15, + "learning_rate": 2.643421608697666e-05, + "loss": 2.0219, + "step": 16288500 + }, + { + "epoch": 47.15, + "learning_rate": 2.643349243932938e-05, + "loss": 2.0217, + "step": 16289000 + }, + { + "epoch": 47.15, + "learning_rate": 2.6432768791682107e-05, + "loss": 1.9784, + "step": 16289500 + }, + { + "epoch": 47.15, + "learning_rate": 2.643204514403483e-05, + "loss": 2.0124, + "step": 16290000 + }, + { + "epoch": 47.15, + "learning_rate": 2.643132149638755e-05, + "loss": 2.007, + "step": 16290500 + }, + { + "epoch": 47.16, + "learning_rate": 2.643059929603557e-05, + "loss": 1.9855, + "step": 16291000 + }, + { + "epoch": 47.16, + "learning_rate": 2.6429875648388292e-05, + "loss": 2.0095, + "step": 16291500 + }, + { + "epoch": 47.16, + "learning_rate": 2.6429152000741015e-05, + "loss": 2.017, + "step": 16292000 + }, + { + "epoch": 47.16, + "learning_rate": 2.6428428353093737e-05, + "loss": 1.9794, + "step": 16292500 + }, + { + "epoch": 47.16, + "learning_rate": 2.6427704705446466e-05, + "loss": 2.0038, + "step": 16293000 + }, + { + "epoch": 47.16, + "learning_rate": 2.642698250509448e-05, + "loss": 2.0104, + "step": 16293500 + }, + { + "epoch": 47.16, + "learning_rate": 2.64262603047425e-05, + "loss": 1.9943, + "step": 16294000 + }, + { + "epoch": 47.17, + "learning_rate": 2.6425536657095223e-05, + "loss": 2.0119, + "step": 16294500 + }, + { + "epoch": 47.17, + "learning_rate": 2.642481445674324e-05, + "loss": 2.0061, + "step": 16295000 + }, + { + "epoch": 47.17, + "learning_rate": 2.6424092256391257e-05, + "loss": 2.0041, + "step": 16295500 + }, + { + "epoch": 47.17, + "learning_rate": 2.6423368608743983e-05, + "loss": 2.0168, + "step": 16296000 + }, + { + "epoch": 47.17, + "learning_rate": 2.6422644961096705e-05, + "loss": 1.983, + "step": 16296500 + }, + { + "epoch": 47.17, + "learning_rate": 2.6421921313449427e-05, + "loss": 2.0104, + "step": 16297000 + }, + { + "epoch": 47.17, + "learning_rate": 2.6421199113097446e-05, + "loss": 1.9985, + "step": 16297500 + }, + { + "epoch": 47.18, + "learning_rate": 2.642047546545017e-05, + "loss": 1.9915, + "step": 16298000 + }, + { + "epoch": 47.18, + "learning_rate": 2.641975181780289e-05, + "loss": 2.0136, + "step": 16298500 + }, + { + "epoch": 47.18, + "learning_rate": 2.6419028170155613e-05, + "loss": 2.0119, + "step": 16299000 + }, + { + "epoch": 47.18, + "learning_rate": 2.6418304522508335e-05, + "loss": 2.0058, + "step": 16299500 + }, + { + "epoch": 47.18, + "learning_rate": 2.6417582322156354e-05, + "loss": 2.0085, + "step": 16300000 + }, + { + "epoch": 47.18, + "learning_rate": 2.6416858674509076e-05, + "loss": 2.0056, + "step": 16300500 + }, + { + "epoch": 47.18, + "learning_rate": 2.6416135026861805e-05, + "loss": 2.0153, + "step": 16301000 + }, + { + "epoch": 47.19, + "learning_rate": 2.6415411379214527e-05, + "loss": 2.0066, + "step": 16301500 + }, + { + "epoch": 47.19, + "learning_rate": 2.641468773156725e-05, + "loss": 1.9834, + "step": 16302000 + }, + { + "epoch": 47.19, + "learning_rate": 2.6413964083919972e-05, + "loss": 2.0004, + "step": 16302500 + }, + { + "epoch": 47.19, + "learning_rate": 2.6413240436272698e-05, + "loss": 2.0015, + "step": 16303000 + }, + { + "epoch": 47.19, + "learning_rate": 2.641251678862542e-05, + "loss": 2.0234, + "step": 16303500 + }, + { + "epoch": 47.19, + "learning_rate": 2.6411793140978142e-05, + "loss": 2.0069, + "step": 16304000 + }, + { + "epoch": 47.19, + "learning_rate": 2.6411069493330864e-05, + "loss": 1.9965, + "step": 16304500 + }, + { + "epoch": 47.2, + "learning_rate": 2.6410345845683586e-05, + "loss": 1.9851, + "step": 16305000 + }, + { + "epoch": 47.2, + "learning_rate": 2.640962219803631e-05, + "loss": 2.0069, + "step": 16305500 + }, + { + "epoch": 47.2, + "learning_rate": 2.6408898550389034e-05, + "loss": 1.9979, + "step": 16306000 + }, + { + "epoch": 47.2, + "learning_rate": 2.6408174902741757e-05, + "loss": 2.0327, + "step": 16306500 + }, + { + "epoch": 47.2, + "learning_rate": 2.6407452702389772e-05, + "loss": 2.001, + "step": 16307000 + }, + { + "epoch": 47.2, + "learning_rate": 2.6406729054742498e-05, + "loss": 2.0299, + "step": 16307500 + }, + { + "epoch": 47.2, + "learning_rate": 2.640600540709522e-05, + "loss": 1.9893, + "step": 16308000 + }, + { + "epoch": 47.21, + "learning_rate": 2.640528175944795e-05, + "loss": 2.0143, + "step": 16308500 + }, + { + "epoch": 47.21, + "learning_rate": 2.640455811180067e-05, + "loss": 2.0184, + "step": 16309000 + }, + { + "epoch": 47.21, + "learning_rate": 2.6403834464153393e-05, + "loss": 1.9848, + "step": 16309500 + }, + { + "epoch": 47.21, + "learning_rate": 2.6403110816506116e-05, + "loss": 2.0062, + "step": 16310000 + }, + { + "epoch": 47.21, + "learning_rate": 2.6402387168858838e-05, + "loss": 1.9871, + "step": 16310500 + }, + { + "epoch": 47.21, + "learning_rate": 2.6401664968506857e-05, + "loss": 1.9977, + "step": 16311000 + }, + { + "epoch": 47.22, + "learning_rate": 2.640094132085958e-05, + "loss": 2.0173, + "step": 16311500 + }, + { + "epoch": 47.22, + "learning_rate": 2.64002176732123e-05, + "loss": 2.0125, + "step": 16312000 + }, + { + "epoch": 47.22, + "learning_rate": 2.6399494025565024e-05, + "loss": 2.013, + "step": 16312500 + }, + { + "epoch": 47.22, + "learning_rate": 2.639877037791775e-05, + "loss": 1.9997, + "step": 16313000 + }, + { + "epoch": 47.22, + "learning_rate": 2.639804673027047e-05, + "loss": 2.0101, + "step": 16313500 + }, + { + "epoch": 47.22, + "learning_rate": 2.6397323082623194e-05, + "loss": 1.9984, + "step": 16314000 + }, + { + "epoch": 47.22, + "learning_rate": 2.6396599434975916e-05, + "loss": 2.0268, + "step": 16314500 + }, + { + "epoch": 47.23, + "learning_rate": 2.6395875787328638e-05, + "loss": 2.0094, + "step": 16315000 + }, + { + "epoch": 47.23, + "learning_rate": 2.6395152139681367e-05, + "loss": 2.0193, + "step": 16315500 + }, + { + "epoch": 47.23, + "learning_rate": 2.639442849203409e-05, + "loss": 2.0044, + "step": 16316000 + }, + { + "epoch": 47.23, + "learning_rate": 2.6393706291682108e-05, + "loss": 2.0221, + "step": 16316500 + }, + { + "epoch": 47.23, + "learning_rate": 2.6392984091330124e-05, + "loss": 2.0269, + "step": 16317000 + }, + { + "epoch": 47.23, + "learning_rate": 2.639226044368285e-05, + "loss": 2.0133, + "step": 16317500 + }, + { + "epoch": 47.23, + "learning_rate": 2.6391536796035572e-05, + "loss": 1.9977, + "step": 16318000 + }, + { + "epoch": 47.24, + "learning_rate": 2.6390813148388294e-05, + "loss": 2.0048, + "step": 16318500 + }, + { + "epoch": 47.24, + "learning_rate": 2.6390089500741016e-05, + "loss": 1.9914, + "step": 16319000 + }, + { + "epoch": 47.24, + "learning_rate": 2.6389367300389035e-05, + "loss": 2.0082, + "step": 16319500 + }, + { + "epoch": 47.24, + "learning_rate": 2.6388643652741757e-05, + "loss": 1.9893, + "step": 16320000 + }, + { + "epoch": 47.24, + "learning_rate": 2.638792000509448e-05, + "loss": 1.99, + "step": 16320500 + }, + { + "epoch": 47.24, + "learning_rate": 2.6387196357447202e-05, + "loss": 2.0123, + "step": 16321000 + }, + { + "epoch": 47.24, + "learning_rate": 2.6386472709799924e-05, + "loss": 2.004, + "step": 16321500 + }, + { + "epoch": 47.25, + "learning_rate": 2.638574906215265e-05, + "loss": 1.9895, + "step": 16322000 + }, + { + "epoch": 47.25, + "learning_rate": 2.6385025414505372e-05, + "loss": 2.0231, + "step": 16322500 + }, + { + "epoch": 47.25, + "learning_rate": 2.63843017668581e-05, + "loss": 2.0211, + "step": 16323000 + }, + { + "epoch": 47.25, + "learning_rate": 2.6383578119210823e-05, + "loss": 2.0197, + "step": 16323500 + }, + { + "epoch": 47.25, + "learning_rate": 2.6382854471563545e-05, + "loss": 2.0166, + "step": 16324000 + }, + { + "epoch": 47.25, + "learning_rate": 2.6382130823916268e-05, + "loss": 2.0057, + "step": 16324500 + }, + { + "epoch": 47.25, + "learning_rate": 2.638140717626899e-05, + "loss": 1.9987, + "step": 16325000 + }, + { + "epoch": 47.26, + "learning_rate": 2.6380683528621712e-05, + "loss": 1.987, + "step": 16325500 + }, + { + "epoch": 47.26, + "learning_rate": 2.6379959880974438e-05, + "loss": 1.9817, + "step": 16326000 + }, + { + "epoch": 47.26, + "learning_rate": 2.637923623332716e-05, + "loss": 1.9871, + "step": 16326500 + }, + { + "epoch": 47.26, + "learning_rate": 2.6378512585679882e-05, + "loss": 2.0226, + "step": 16327000 + }, + { + "epoch": 47.26, + "learning_rate": 2.63777903853279e-05, + "loss": 2.0067, + "step": 16327500 + }, + { + "epoch": 47.26, + "learning_rate": 2.6377068184975917e-05, + "loss": 1.9891, + "step": 16328000 + }, + { + "epoch": 47.26, + "learning_rate": 2.637634453732864e-05, + "loss": 2.0269, + "step": 16328500 + }, + { + "epoch": 47.27, + "learning_rate": 2.637562088968136e-05, + "loss": 2.0247, + "step": 16329000 + }, + { + "epoch": 47.27, + "learning_rate": 2.6374897242034087e-05, + "loss": 2.0083, + "step": 16329500 + }, + { + "epoch": 47.27, + "learning_rate": 2.637417359438681e-05, + "loss": 2.0155, + "step": 16330000 + }, + { + "epoch": 47.27, + "learning_rate": 2.6373449946739538e-05, + "loss": 2.0135, + "step": 16330500 + }, + { + "epoch": 47.27, + "learning_rate": 2.637272629909226e-05, + "loss": 1.9891, + "step": 16331000 + }, + { + "epoch": 47.27, + "learning_rate": 2.6372002651444982e-05, + "loss": 2.0146, + "step": 16331500 + }, + { + "epoch": 47.27, + "learning_rate": 2.6371279003797705e-05, + "loss": 2.0242, + "step": 16332000 + }, + { + "epoch": 47.28, + "learning_rate": 2.6370555356150427e-05, + "loss": 2.0138, + "step": 16332500 + }, + { + "epoch": 47.28, + "learning_rate": 2.6369833155798446e-05, + "loss": 2.0022, + "step": 16333000 + }, + { + "epoch": 47.28, + "learning_rate": 2.6369109508151168e-05, + "loss": 2.0029, + "step": 16333500 + }, + { + "epoch": 47.28, + "learning_rate": 2.6368387307799187e-05, + "loss": 1.9876, + "step": 16334000 + }, + { + "epoch": 47.28, + "learning_rate": 2.6367665107447203e-05, + "loss": 2.0318, + "step": 16334500 + }, + { + "epoch": 47.28, + "learning_rate": 2.6366941459799925e-05, + "loss": 2.0194, + "step": 16335000 + }, + { + "epoch": 47.28, + "learning_rate": 2.636621781215265e-05, + "loss": 2.0375, + "step": 16335500 + }, + { + "epoch": 47.29, + "learning_rate": 2.6365494164505373e-05, + "loss": 1.9954, + "step": 16336000 + }, + { + "epoch": 47.29, + "learning_rate": 2.6364770516858095e-05, + "loss": 2.0106, + "step": 16336500 + }, + { + "epoch": 47.29, + "learning_rate": 2.6364046869210817e-05, + "loss": 1.9903, + "step": 16337000 + }, + { + "epoch": 47.29, + "learning_rate": 2.636332322156354e-05, + "loss": 2.004, + "step": 16337500 + }, + { + "epoch": 47.29, + "learning_rate": 2.636259957391627e-05, + "loss": 2.0019, + "step": 16338000 + }, + { + "epoch": 47.29, + "learning_rate": 2.636187592626899e-05, + "loss": 1.9963, + "step": 16338500 + }, + { + "epoch": 47.29, + "learning_rate": 2.6361152278621716e-05, + "loss": 2.0387, + "step": 16339000 + }, + { + "epoch": 47.3, + "learning_rate": 2.636042863097444e-05, + "loss": 1.9958, + "step": 16339500 + }, + { + "epoch": 47.3, + "learning_rate": 2.6359706430622454e-05, + "loss": 2.0255, + "step": 16340000 + }, + { + "epoch": 47.3, + "learning_rate": 2.6358982782975176e-05, + "loss": 1.9951, + "step": 16340500 + }, + { + "epoch": 47.3, + "learning_rate": 2.6358259135327902e-05, + "loss": 2.0139, + "step": 16341000 + }, + { + "epoch": 47.3, + "learning_rate": 2.6357536934975917e-05, + "loss": 2.018, + "step": 16341500 + }, + { + "epoch": 47.3, + "learning_rate": 2.635681328732864e-05, + "loss": 2.0176, + "step": 16342000 + }, + { + "epoch": 47.3, + "learning_rate": 2.6356089639681365e-05, + "loss": 1.993, + "step": 16342500 + }, + { + "epoch": 47.31, + "learning_rate": 2.6355365992034088e-05, + "loss": 2.0066, + "step": 16343000 + }, + { + "epoch": 47.31, + "learning_rate": 2.6354643791682103e-05, + "loss": 1.9846, + "step": 16343500 + }, + { + "epoch": 47.31, + "learning_rate": 2.635392014403483e-05, + "loss": 2.0079, + "step": 16344000 + }, + { + "epoch": 47.31, + "learning_rate": 2.635319649638755e-05, + "loss": 2.0094, + "step": 16344500 + }, + { + "epoch": 47.31, + "learning_rate": 2.6352472848740273e-05, + "loss": 1.9937, + "step": 16345000 + }, + { + "epoch": 47.31, + "learning_rate": 2.6351749201093002e-05, + "loss": 2.0138, + "step": 16345500 + }, + { + "epoch": 47.31, + "learning_rate": 2.6351027000741018e-05, + "loss": 1.9858, + "step": 16346000 + }, + { + "epoch": 47.32, + "learning_rate": 2.635030335309374e-05, + "loss": 1.9942, + "step": 16346500 + }, + { + "epoch": 47.32, + "learning_rate": 2.6349579705446466e-05, + "loss": 1.9924, + "step": 16347000 + }, + { + "epoch": 47.32, + "learning_rate": 2.6348856057799188e-05, + "loss": 1.9803, + "step": 16347500 + }, + { + "epoch": 47.32, + "learning_rate": 2.634813241015191e-05, + "loss": 2.0046, + "step": 16348000 + }, + { + "epoch": 47.32, + "learning_rate": 2.6347408762504632e-05, + "loss": 2.0043, + "step": 16348500 + }, + { + "epoch": 47.32, + "learning_rate": 2.634668656215265e-05, + "loss": 2.0243, + "step": 16349000 + }, + { + "epoch": 47.33, + "learning_rate": 2.6345962914505374e-05, + "loss": 2.0111, + "step": 16349500 + }, + { + "epoch": 47.33, + "learning_rate": 2.6345239266858096e-05, + "loss": 2.0081, + "step": 16350000 + }, + { + "epoch": 47.33, + "learning_rate": 2.6344515619210818e-05, + "loss": 1.9986, + "step": 16350500 + }, + { + "epoch": 47.33, + "learning_rate": 2.634379197156354e-05, + "loss": 2.0231, + "step": 16351000 + }, + { + "epoch": 47.33, + "learning_rate": 2.6343068323916266e-05, + "loss": 2.0094, + "step": 16351500 + }, + { + "epoch": 47.33, + "learning_rate": 2.6342344676268988e-05, + "loss": 2.0184, + "step": 16352000 + }, + { + "epoch": 47.33, + "learning_rate": 2.6341621028621717e-05, + "loss": 1.9829, + "step": 16352500 + }, + { + "epoch": 47.34, + "learning_rate": 2.6340898828269733e-05, + "loss": 2.0146, + "step": 16353000 + }, + { + "epoch": 47.34, + "learning_rate": 2.6340175180622455e-05, + "loss": 2.0034, + "step": 16353500 + }, + { + "epoch": 47.34, + "learning_rate": 2.633945153297518e-05, + "loss": 2.0014, + "step": 16354000 + }, + { + "epoch": 47.34, + "learning_rate": 2.6338727885327903e-05, + "loss": 2.0199, + "step": 16354500 + }, + { + "epoch": 47.34, + "learning_rate": 2.6338005684975918e-05, + "loss": 2.0218, + "step": 16355000 + }, + { + "epoch": 47.34, + "learning_rate": 2.633728203732864e-05, + "loss": 2.0042, + "step": 16355500 + }, + { + "epoch": 47.34, + "learning_rate": 2.6336558389681366e-05, + "loss": 1.9926, + "step": 16356000 + }, + { + "epoch": 47.35, + "learning_rate": 2.633583474203409e-05, + "loss": 1.9974, + "step": 16356500 + }, + { + "epoch": 47.35, + "learning_rate": 2.633511109438681e-05, + "loss": 2.008, + "step": 16357000 + }, + { + "epoch": 47.35, + "learning_rate": 2.633438889403483e-05, + "loss": 2.0049, + "step": 16357500 + }, + { + "epoch": 47.35, + "learning_rate": 2.6333665246387552e-05, + "loss": 2.0176, + "step": 16358000 + }, + { + "epoch": 47.35, + "learning_rate": 2.6332941598740274e-05, + "loss": 2.0239, + "step": 16358500 + }, + { + "epoch": 47.35, + "learning_rate": 2.6332217951092996e-05, + "loss": 2.0147, + "step": 16359000 + }, + { + "epoch": 47.35, + "learning_rate": 2.6331495750741015e-05, + "loss": 2.0387, + "step": 16359500 + }, + { + "epoch": 47.36, + "learning_rate": 2.6330772103093737e-05, + "loss": 2.0079, + "step": 16360000 + }, + { + "epoch": 47.36, + "learning_rate": 2.6330048455446466e-05, + "loss": 2.0159, + "step": 16360500 + }, + { + "epoch": 47.36, + "learning_rate": 2.632932480779919e-05, + "loss": 2.037, + "step": 16361000 + }, + { + "epoch": 47.36, + "learning_rate": 2.6328602607447204e-05, + "loss": 2.0206, + "step": 16361500 + }, + { + "epoch": 47.36, + "learning_rate": 2.632787895979993e-05, + "loss": 2.0081, + "step": 16362000 + }, + { + "epoch": 47.36, + "learning_rate": 2.6327155312152652e-05, + "loss": 2.0134, + "step": 16362500 + }, + { + "epoch": 47.36, + "learning_rate": 2.6326431664505374e-05, + "loss": 2.0279, + "step": 16363000 + }, + { + "epoch": 47.37, + "learning_rate": 2.6325708016858097e-05, + "loss": 2.0289, + "step": 16363500 + }, + { + "epoch": 47.37, + "learning_rate": 2.632498436921082e-05, + "loss": 2.0225, + "step": 16364000 + }, + { + "epoch": 47.37, + "learning_rate": 2.6324260721563544e-05, + "loss": 2.0154, + "step": 16364500 + }, + { + "epoch": 47.37, + "learning_rate": 2.6323537073916267e-05, + "loss": 2.0336, + "step": 16365000 + }, + { + "epoch": 47.37, + "learning_rate": 2.632281342626899e-05, + "loss": 2.003, + "step": 16365500 + }, + { + "epoch": 47.37, + "learning_rate": 2.632208977862171e-05, + "loss": 1.9761, + "step": 16366000 + }, + { + "epoch": 47.37, + "learning_rate": 2.632136757826973e-05, + "loss": 1.9819, + "step": 16366500 + }, + { + "epoch": 47.38, + "learning_rate": 2.6320643930622452e-05, + "loss": 2.0061, + "step": 16367000 + }, + { + "epoch": 47.38, + "learning_rate": 2.6319920282975175e-05, + "loss": 2.0131, + "step": 16367500 + }, + { + "epoch": 47.38, + "learning_rate": 2.6319196635327904e-05, + "loss": 1.9965, + "step": 16368000 + }, + { + "epoch": 47.38, + "learning_rate": 2.6318472987680626e-05, + "loss": 2.0203, + "step": 16368500 + }, + { + "epoch": 47.38, + "learning_rate": 2.6317750787328645e-05, + "loss": 2.0057, + "step": 16369000 + }, + { + "epoch": 47.38, + "learning_rate": 2.6317027139681367e-05, + "loss": 2.0288, + "step": 16369500 + }, + { + "epoch": 47.38, + "learning_rate": 2.631630349203409e-05, + "loss": 2.0132, + "step": 16370000 + }, + { + "epoch": 47.39, + "learning_rate": 2.631557984438681e-05, + "loss": 2.0267, + "step": 16370500 + }, + { + "epoch": 47.39, + "learning_rate": 2.6314856196739534e-05, + "loss": 2.0316, + "step": 16371000 + }, + { + "epoch": 47.39, + "learning_rate": 2.6314132549092256e-05, + "loss": 1.997, + "step": 16371500 + }, + { + "epoch": 47.39, + "learning_rate": 2.631340890144498e-05, + "loss": 1.9998, + "step": 16372000 + }, + { + "epoch": 47.39, + "learning_rate": 2.6312685253797704e-05, + "loss": 2.0143, + "step": 16372500 + }, + { + "epoch": 47.39, + "learning_rate": 2.6311964500741016e-05, + "loss": 2.0033, + "step": 16373000 + }, + { + "epoch": 47.39, + "learning_rate": 2.6311240853093738e-05, + "loss": 2.0248, + "step": 16373500 + }, + { + "epoch": 47.4, + "learning_rate": 2.631051720544646e-05, + "loss": 1.9824, + "step": 16374000 + }, + { + "epoch": 47.4, + "learning_rate": 2.630979500509448e-05, + "loss": 2.0423, + "step": 16374500 + }, + { + "epoch": 47.4, + "learning_rate": 2.63090713574472e-05, + "loss": 2.0303, + "step": 16375000 + }, + { + "epoch": 47.4, + "learning_rate": 2.630834770979993e-05, + "loss": 1.9959, + "step": 16375500 + }, + { + "epoch": 47.4, + "learning_rate": 2.6307624062152653e-05, + "loss": 2.0308, + "step": 16376000 + }, + { + "epoch": 47.4, + "learning_rate": 2.6306900414505375e-05, + "loss": 2.02, + "step": 16376500 + }, + { + "epoch": 47.4, + "learning_rate": 2.6306176766858097e-05, + "loss": 1.9993, + "step": 16377000 + }, + { + "epoch": 47.41, + "learning_rate": 2.630545311921082e-05, + "loss": 2.012, + "step": 16377500 + }, + { + "epoch": 47.41, + "learning_rate": 2.6304729471563545e-05, + "loss": 1.9946, + "step": 16378000 + }, + { + "epoch": 47.41, + "learning_rate": 2.6304005823916267e-05, + "loss": 2.0324, + "step": 16378500 + }, + { + "epoch": 47.41, + "learning_rate": 2.630328217626899e-05, + "loss": 1.9805, + "step": 16379000 + }, + { + "epoch": 47.41, + "learning_rate": 2.6302558528621712e-05, + "loss": 1.9956, + "step": 16379500 + }, + { + "epoch": 47.41, + "learning_rate": 2.6301834880974434e-05, + "loss": 2.0126, + "step": 16380000 + }, + { + "epoch": 47.41, + "learning_rate": 2.630111123332716e-05, + "loss": 1.9921, + "step": 16380500 + }, + { + "epoch": 47.42, + "learning_rate": 2.6300389032975175e-05, + "loss": 2.0083, + "step": 16381000 + }, + { + "epoch": 47.42, + "learning_rate": 2.6299665385327898e-05, + "loss": 2.0235, + "step": 16381500 + }, + { + "epoch": 47.42, + "learning_rate": 2.6298943184975916e-05, + "loss": 2.0389, + "step": 16382000 + }, + { + "epoch": 47.42, + "learning_rate": 2.629821953732864e-05, + "loss": 2.0282, + "step": 16382500 + }, + { + "epoch": 47.42, + "learning_rate": 2.6297495889681368e-05, + "loss": 1.9951, + "step": 16383000 + }, + { + "epoch": 47.42, + "learning_rate": 2.629677224203409e-05, + "loss": 2.0261, + "step": 16383500 + }, + { + "epoch": 47.42, + "learning_rate": 2.6296048594386812e-05, + "loss": 2.0066, + "step": 16384000 + }, + { + "epoch": 47.43, + "learning_rate": 2.629532639403483e-05, + "loss": 2.0181, + "step": 16384500 + }, + { + "epoch": 47.43, + "learning_rate": 2.6294602746387553e-05, + "loss": 2.0007, + "step": 16385000 + }, + { + "epoch": 47.43, + "learning_rate": 2.6293879098740276e-05, + "loss": 2.0155, + "step": 16385500 + }, + { + "epoch": 47.43, + "learning_rate": 2.6293155451092998e-05, + "loss": 2.002, + "step": 16386000 + }, + { + "epoch": 47.43, + "learning_rate": 2.6292431803445723e-05, + "loss": 2.0183, + "step": 16386500 + }, + { + "epoch": 47.43, + "learning_rate": 2.6291708155798446e-05, + "loss": 2.0276, + "step": 16387000 + }, + { + "epoch": 47.44, + "learning_rate": 2.6290984508151168e-05, + "loss": 2.0376, + "step": 16387500 + }, + { + "epoch": 47.44, + "learning_rate": 2.629026086050389e-05, + "loss": 2.0428, + "step": 16388000 + }, + { + "epoch": 47.44, + "learning_rate": 2.6289537212856612e-05, + "loss": 1.9948, + "step": 16388500 + }, + { + "epoch": 47.44, + "learning_rate": 2.628881501250463e-05, + "loss": 2.0283, + "step": 16389000 + }, + { + "epoch": 47.44, + "learning_rate": 2.6288091364857354e-05, + "loss": 2.0138, + "step": 16389500 + }, + { + "epoch": 47.44, + "learning_rate": 2.6287367717210076e-05, + "loss": 2.0037, + "step": 16390000 + }, + { + "epoch": 47.44, + "learning_rate": 2.6286645516858098e-05, + "loss": 2.0117, + "step": 16390500 + }, + { + "epoch": 47.45, + "learning_rate": 2.6285921869210824e-05, + "loss": 2.0153, + "step": 16391000 + }, + { + "epoch": 47.45, + "learning_rate": 2.6285198221563546e-05, + "loss": 1.996, + "step": 16391500 + }, + { + "epoch": 47.45, + "learning_rate": 2.6284474573916268e-05, + "loss": 2.0256, + "step": 16392000 + }, + { + "epoch": 47.45, + "learning_rate": 2.628375092626899e-05, + "loss": 2.0133, + "step": 16392500 + }, + { + "epoch": 47.45, + "learning_rate": 2.6283027278621713e-05, + "loss": 1.9783, + "step": 16393000 + }, + { + "epoch": 47.45, + "learning_rate": 2.6282303630974435e-05, + "loss": 2.0093, + "step": 16393500 + }, + { + "epoch": 47.45, + "learning_rate": 2.6281581430622454e-05, + "loss": 2.0034, + "step": 16394000 + }, + { + "epoch": 47.46, + "learning_rate": 2.6280857782975176e-05, + "loss": 2.0135, + "step": 16394500 + }, + { + "epoch": 47.46, + "learning_rate": 2.62801341353279e-05, + "loss": 2.0092, + "step": 16395000 + }, + { + "epoch": 47.46, + "learning_rate": 2.6279410487680624e-05, + "loss": 2.0232, + "step": 16395500 + }, + { + "epoch": 47.46, + "learning_rate": 2.6278686840033346e-05, + "loss": 2.0079, + "step": 16396000 + }, + { + "epoch": 47.46, + "learning_rate": 2.6277964639681362e-05, + "loss": 2.0002, + "step": 16396500 + }, + { + "epoch": 47.46, + "learning_rate": 2.6277240992034084e-05, + "loss": 2.0299, + "step": 16397000 + }, + { + "epoch": 47.46, + "learning_rate": 2.627651734438681e-05, + "loss": 2.0163, + "step": 16397500 + }, + { + "epoch": 47.47, + "learning_rate": 2.6275793696739535e-05, + "loss": 2.0058, + "step": 16398000 + }, + { + "epoch": 47.47, + "learning_rate": 2.6275071496387554e-05, + "loss": 1.9934, + "step": 16398500 + }, + { + "epoch": 47.47, + "learning_rate": 2.6274347848740276e-05, + "loss": 2.0191, + "step": 16399000 + }, + { + "epoch": 47.47, + "learning_rate": 2.6273625648388295e-05, + "loss": 2.0023, + "step": 16399500 + }, + { + "epoch": 47.47, + "learning_rate": 2.6272902000741018e-05, + "loss": 2.0137, + "step": 16400000 + }, + { + "epoch": 47.47, + "learning_rate": 2.627217835309374e-05, + "loss": 2.0005, + "step": 16400500 + }, + { + "epoch": 47.47, + "learning_rate": 2.6271454705446462e-05, + "loss": 1.9948, + "step": 16401000 + }, + { + "epoch": 47.48, + "learning_rate": 2.6270731057799188e-05, + "loss": 2.0085, + "step": 16401500 + }, + { + "epoch": 47.48, + "learning_rate": 2.627000741015191e-05, + "loss": 2.0158, + "step": 16402000 + }, + { + "epoch": 47.48, + "learning_rate": 2.6269283762504632e-05, + "loss": 2.0282, + "step": 16402500 + }, + { + "epoch": 47.48, + "learning_rate": 2.6268560114857354e-05, + "loss": 2.0144, + "step": 16403000 + }, + { + "epoch": 47.48, + "learning_rate": 2.6267836467210077e-05, + "loss": 2.002, + "step": 16403500 + }, + { + "epoch": 47.48, + "learning_rate": 2.62671128195628e-05, + "loss": 2.0173, + "step": 16404000 + }, + { + "epoch": 47.48, + "learning_rate": 2.6266390619210818e-05, + "loss": 2.0076, + "step": 16404500 + }, + { + "epoch": 47.49, + "learning_rate": 2.626566697156354e-05, + "loss": 1.9791, + "step": 16405000 + }, + { + "epoch": 47.49, + "learning_rate": 2.626494332391627e-05, + "loss": 1.9964, + "step": 16405500 + }, + { + "epoch": 47.49, + "learning_rate": 2.626421967626899e-05, + "loss": 2.0081, + "step": 16406000 + }, + { + "epoch": 47.49, + "learning_rate": 2.626349747591701e-05, + "loss": 2.002, + "step": 16406500 + }, + { + "epoch": 47.49, + "learning_rate": 2.6262773828269732e-05, + "loss": 2.0224, + "step": 16407000 + }, + { + "epoch": 47.49, + "learning_rate": 2.6262050180622455e-05, + "loss": 2.023, + "step": 16407500 + }, + { + "epoch": 47.49, + "learning_rate": 2.6261326532975177e-05, + "loss": 2.017, + "step": 16408000 + }, + { + "epoch": 47.5, + "learning_rate": 2.62606028853279e-05, + "loss": 2.019, + "step": 16408500 + }, + { + "epoch": 47.5, + "learning_rate": 2.6259880684975918e-05, + "loss": 2.0249, + "step": 16409000 + }, + { + "epoch": 47.5, + "learning_rate": 2.625915703732864e-05, + "loss": 2.0192, + "step": 16409500 + }, + { + "epoch": 47.5, + "learning_rate": 2.625843483697666e-05, + "loss": 2.0133, + "step": 16410000 + }, + { + "epoch": 47.5, + "learning_rate": 2.625771118932938e-05, + "loss": 2.0151, + "step": 16410500 + }, + { + "epoch": 47.5, + "learning_rate": 2.6256987541682104e-05, + "loss": 2.0081, + "step": 16411000 + }, + { + "epoch": 47.5, + "learning_rate": 2.6256263894034826e-05, + "loss": 1.9954, + "step": 16411500 + }, + { + "epoch": 47.51, + "learning_rate": 2.625554024638755e-05, + "loss": 2.0016, + "step": 16412000 + }, + { + "epoch": 47.51, + "learning_rate": 2.6254816598740274e-05, + "loss": 2.0156, + "step": 16412500 + }, + { + "epoch": 47.51, + "learning_rate": 2.6254092951093003e-05, + "loss": 1.9923, + "step": 16413000 + }, + { + "epoch": 47.51, + "learning_rate": 2.6253369303445725e-05, + "loss": 1.9882, + "step": 16413500 + }, + { + "epoch": 47.51, + "learning_rate": 2.6252645655798447e-05, + "loss": 2.0145, + "step": 16414000 + }, + { + "epoch": 47.51, + "learning_rate": 2.625192200815117e-05, + "loss": 2.0059, + "step": 16414500 + }, + { + "epoch": 47.51, + "learning_rate": 2.6251198360503892e-05, + "loss": 1.9963, + "step": 16415000 + }, + { + "epoch": 47.52, + "learning_rate": 2.625047616015191e-05, + "loss": 2.0113, + "step": 16415500 + }, + { + "epoch": 47.52, + "learning_rate": 2.6249752512504633e-05, + "loss": 2.0239, + "step": 16416000 + }, + { + "epoch": 47.52, + "learning_rate": 2.6249028864857355e-05, + "loss": 2.0054, + "step": 16416500 + }, + { + "epoch": 47.52, + "learning_rate": 2.6248305217210077e-05, + "loss": 2.0156, + "step": 16417000 + }, + { + "epoch": 47.52, + "learning_rate": 2.6247581569562803e-05, + "loss": 1.9949, + "step": 16417500 + }, + { + "epoch": 47.52, + "learning_rate": 2.6246857921915525e-05, + "loss": 2.0006, + "step": 16418000 + }, + { + "epoch": 47.52, + "learning_rate": 2.624613572156354e-05, + "loss": 2.0106, + "step": 16418500 + }, + { + "epoch": 47.53, + "learning_rate": 2.6245412073916263e-05, + "loss": 2.0399, + "step": 16419000 + }, + { + "epoch": 47.53, + "learning_rate": 2.624468842626899e-05, + "loss": 2.0177, + "step": 16419500 + }, + { + "epoch": 47.53, + "learning_rate": 2.6243964778621714e-05, + "loss": 2.0142, + "step": 16420000 + }, + { + "epoch": 47.53, + "learning_rate": 2.624324113097444e-05, + "loss": 2.0021, + "step": 16420500 + }, + { + "epoch": 47.53, + "learning_rate": 2.6242517483327162e-05, + "loss": 2.0072, + "step": 16421000 + }, + { + "epoch": 47.53, + "learning_rate": 2.6241793835679884e-05, + "loss": 2.0204, + "step": 16421500 + }, + { + "epoch": 47.53, + "learning_rate": 2.6241071635327903e-05, + "loss": 1.9889, + "step": 16422000 + }, + { + "epoch": 47.54, + "learning_rate": 2.6240347987680626e-05, + "loss": 2.0089, + "step": 16422500 + }, + { + "epoch": 47.54, + "learning_rate": 2.6239624340033348e-05, + "loss": 1.9755, + "step": 16423000 + }, + { + "epoch": 47.54, + "learning_rate": 2.623890069238607e-05, + "loss": 2.013, + "step": 16423500 + }, + { + "epoch": 47.54, + "learning_rate": 2.6238177044738792e-05, + "loss": 2.02, + "step": 16424000 + }, + { + "epoch": 47.54, + "learning_rate": 2.6237453397091514e-05, + "loss": 2.0034, + "step": 16424500 + }, + { + "epoch": 47.54, + "learning_rate": 2.623672974944424e-05, + "loss": 2.0141, + "step": 16425000 + }, + { + "epoch": 47.55, + "learning_rate": 2.6236007549092256e-05, + "loss": 1.993, + "step": 16425500 + }, + { + "epoch": 47.55, + "learning_rate": 2.6235283901444978e-05, + "loss": 2.0122, + "step": 16426000 + }, + { + "epoch": 47.55, + "learning_rate": 2.6234560253797704e-05, + "loss": 2.0107, + "step": 16426500 + }, + { + "epoch": 47.55, + "learning_rate": 2.6233836606150426e-05, + "loss": 2.0276, + "step": 16427000 + }, + { + "epoch": 47.55, + "learning_rate": 2.6233112958503155e-05, + "loss": 2.0161, + "step": 16427500 + }, + { + "epoch": 47.55, + "learning_rate": 2.6232389310855877e-05, + "loss": 1.9924, + "step": 16428000 + }, + { + "epoch": 47.55, + "learning_rate": 2.62316656632086e-05, + "loss": 2.0359, + "step": 16428500 + }, + { + "epoch": 47.56, + "learning_rate": 2.623094201556132e-05, + "loss": 1.9987, + "step": 16429000 + }, + { + "epoch": 47.56, + "learning_rate": 2.6230218367914044e-05, + "loss": 2.0006, + "step": 16429500 + }, + { + "epoch": 47.56, + "learning_rate": 2.6229494720266766e-05, + "loss": 2.0022, + "step": 16430000 + }, + { + "epoch": 47.56, + "learning_rate": 2.6228772519914785e-05, + "loss": 2.0208, + "step": 16430500 + }, + { + "epoch": 47.56, + "learning_rate": 2.6228048872267507e-05, + "loss": 2.0285, + "step": 16431000 + }, + { + "epoch": 47.56, + "learning_rate": 2.622732522462023e-05, + "loss": 2.0319, + "step": 16431500 + }, + { + "epoch": 47.56, + "learning_rate": 2.622660447156354e-05, + "loss": 2.0046, + "step": 16432000 + }, + { + "epoch": 47.57, + "learning_rate": 2.6225880823916267e-05, + "loss": 2.0144, + "step": 16432500 + }, + { + "epoch": 47.57, + "learning_rate": 2.622515717626899e-05, + "loss": 2.0066, + "step": 16433000 + }, + { + "epoch": 47.57, + "learning_rate": 2.622443352862171e-05, + "loss": 1.9969, + "step": 16433500 + }, + { + "epoch": 47.57, + "learning_rate": 2.6223709880974434e-05, + "loss": 1.9998, + "step": 16434000 + }, + { + "epoch": 47.57, + "learning_rate": 2.6222987680622453e-05, + "loss": 1.9973, + "step": 16434500 + }, + { + "epoch": 47.57, + "learning_rate": 2.6222264032975175e-05, + "loss": 2.0095, + "step": 16435000 + }, + { + "epoch": 47.57, + "learning_rate": 2.6221540385327904e-05, + "loss": 2.0239, + "step": 16435500 + }, + { + "epoch": 47.58, + "learning_rate": 2.6220816737680626e-05, + "loss": 2.0055, + "step": 16436000 + }, + { + "epoch": 47.58, + "learning_rate": 2.6220094537328642e-05, + "loss": 2.0246, + "step": 16436500 + }, + { + "epoch": 47.58, + "learning_rate": 2.6219370889681368e-05, + "loss": 2.0227, + "step": 16437000 + }, + { + "epoch": 47.58, + "learning_rate": 2.621864724203409e-05, + "loss": 2.007, + "step": 16437500 + }, + { + "epoch": 47.58, + "learning_rate": 2.6217923594386812e-05, + "loss": 2.0323, + "step": 16438000 + }, + { + "epoch": 47.58, + "learning_rate": 2.6217199946739534e-05, + "loss": 2.0138, + "step": 16438500 + }, + { + "epoch": 47.58, + "learning_rate": 2.6216476299092256e-05, + "loss": 2.0164, + "step": 16439000 + }, + { + "epoch": 47.59, + "learning_rate": 2.621575265144498e-05, + "loss": 2.0208, + "step": 16439500 + }, + { + "epoch": 47.59, + "learning_rate": 2.6215030451092998e-05, + "loss": 2.0453, + "step": 16440000 + }, + { + "epoch": 47.59, + "learning_rate": 2.621430680344572e-05, + "loss": 2.0222, + "step": 16440500 + }, + { + "epoch": 47.59, + "learning_rate": 2.621358460309374e-05, + "loss": 2.0007, + "step": 16441000 + }, + { + "epoch": 47.59, + "learning_rate": 2.621286095544646e-05, + "loss": 2.0023, + "step": 16441500 + }, + { + "epoch": 47.59, + "learning_rate": 2.6212137307799183e-05, + "loss": 1.9812, + "step": 16442000 + }, + { + "epoch": 47.59, + "learning_rate": 2.6211413660151906e-05, + "loss": 2.032, + "step": 16442500 + }, + { + "epoch": 47.6, + "learning_rate": 2.6210690012504635e-05, + "loss": 2.0153, + "step": 16443000 + }, + { + "epoch": 47.6, + "learning_rate": 2.6209966364857357e-05, + "loss": 2.0308, + "step": 16443500 + }, + { + "epoch": 47.6, + "learning_rate": 2.6209242717210082e-05, + "loss": 2.009, + "step": 16444000 + }, + { + "epoch": 47.6, + "learning_rate": 2.6208519069562805e-05, + "loss": 2.0077, + "step": 16444500 + }, + { + "epoch": 47.6, + "learning_rate": 2.620779686921082e-05, + "loss": 2.015, + "step": 16445000 + }, + { + "epoch": 47.6, + "learning_rate": 2.6207073221563542e-05, + "loss": 2.0279, + "step": 16445500 + }, + { + "epoch": 47.6, + "learning_rate": 2.620635102121156e-05, + "loss": 2.0074, + "step": 16446000 + }, + { + "epoch": 47.61, + "learning_rate": 2.620562882085958e-05, + "loss": 2.0173, + "step": 16446500 + }, + { + "epoch": 47.61, + "learning_rate": 2.6204905173212303e-05, + "loss": 2.0228, + "step": 16447000 + }, + { + "epoch": 47.61, + "learning_rate": 2.6204181525565025e-05, + "loss": 2.0211, + "step": 16447500 + }, + { + "epoch": 47.61, + "learning_rate": 2.6203457877917747e-05, + "loss": 2.0023, + "step": 16448000 + }, + { + "epoch": 47.61, + "learning_rate": 2.620273423027047e-05, + "loss": 2.0191, + "step": 16448500 + }, + { + "epoch": 47.61, + "learning_rate": 2.6202012029918488e-05, + "loss": 2.0132, + "step": 16449000 + }, + { + "epoch": 47.61, + "learning_rate": 2.620128838227121e-05, + "loss": 2.0186, + "step": 16449500 + }, + { + "epoch": 47.62, + "learning_rate": 2.6200564734623933e-05, + "loss": 2.0093, + "step": 16450000 + }, + { + "epoch": 47.62, + "learning_rate": 2.619984108697666e-05, + "loss": 1.9901, + "step": 16450500 + }, + { + "epoch": 47.62, + "learning_rate": 2.6199117439329384e-05, + "loss": 2.0089, + "step": 16451000 + }, + { + "epoch": 47.62, + "learning_rate": 2.6198393791682106e-05, + "loss": 2.0226, + "step": 16451500 + }, + { + "epoch": 47.62, + "learning_rate": 2.6197670144034832e-05, + "loss": 2.0172, + "step": 16452000 + }, + { + "epoch": 47.62, + "learning_rate": 2.6196946496387554e-05, + "loss": 2.005, + "step": 16452500 + }, + { + "epoch": 47.62, + "learning_rate": 2.6196222848740276e-05, + "loss": 2.0184, + "step": 16453000 + }, + { + "epoch": 47.63, + "learning_rate": 2.6195499201093e-05, + "loss": 2.0096, + "step": 16453500 + }, + { + "epoch": 47.63, + "learning_rate": 2.619477555344572e-05, + "loss": 2.0126, + "step": 16454000 + }, + { + "epoch": 47.63, + "learning_rate": 2.6194051905798446e-05, + "loss": 2.0163, + "step": 16454500 + }, + { + "epoch": 47.63, + "learning_rate": 2.619332825815117e-05, + "loss": 2.0134, + "step": 16455000 + }, + { + "epoch": 47.63, + "learning_rate": 2.619260461050389e-05, + "loss": 2.0084, + "step": 16455500 + }, + { + "epoch": 47.63, + "learning_rate": 2.6191880962856613e-05, + "loss": 1.9979, + "step": 16456000 + }, + { + "epoch": 47.63, + "learning_rate": 2.6191157315209335e-05, + "loss": 2.0206, + "step": 16456500 + }, + { + "epoch": 47.64, + "learning_rate": 2.6190435114857354e-05, + "loss": 2.0128, + "step": 16457000 + }, + { + "epoch": 47.64, + "learning_rate": 2.6189711467210076e-05, + "loss": 2.0239, + "step": 16457500 + }, + { + "epoch": 47.64, + "learning_rate": 2.6188987819562805e-05, + "loss": 2.0077, + "step": 16458000 + }, + { + "epoch": 47.64, + "learning_rate": 2.6188264171915528e-05, + "loss": 2.0276, + "step": 16458500 + }, + { + "epoch": 47.64, + "learning_rate": 2.618754052426825e-05, + "loss": 2.0049, + "step": 16459000 + }, + { + "epoch": 47.64, + "learning_rate": 2.6186816876620972e-05, + "loss": 2.0126, + "step": 16459500 + }, + { + "epoch": 47.64, + "learning_rate": 2.6186093228973694e-05, + "loss": 2.0108, + "step": 16460000 + }, + { + "epoch": 47.65, + "learning_rate": 2.618536958132642e-05, + "loss": 2.0261, + "step": 16460500 + }, + { + "epoch": 47.65, + "learning_rate": 2.6184645933679142e-05, + "loss": 2.0298, + "step": 16461000 + }, + { + "epoch": 47.65, + "learning_rate": 2.6183923733327158e-05, + "loss": 2.0395, + "step": 16461500 + }, + { + "epoch": 47.65, + "learning_rate": 2.6183200085679883e-05, + "loss": 2.0235, + "step": 16462000 + }, + { + "epoch": 47.65, + "learning_rate": 2.6182476438032606e-05, + "loss": 2.0274, + "step": 16462500 + }, + { + "epoch": 47.65, + "learning_rate": 2.6181752790385328e-05, + "loss": 1.9983, + "step": 16463000 + }, + { + "epoch": 47.66, + "learning_rate": 2.618102914273805e-05, + "loss": 2.0386, + "step": 16463500 + }, + { + "epoch": 47.66, + "learning_rate": 2.6180305495090772e-05, + "loss": 1.9961, + "step": 16464000 + }, + { + "epoch": 47.66, + "learning_rate": 2.6179581847443495e-05, + "loss": 2.0145, + "step": 16464500 + }, + { + "epoch": 47.66, + "learning_rate": 2.617885964709152e-05, + "loss": 2.0203, + "step": 16465000 + }, + { + "epoch": 47.66, + "learning_rate": 2.6178135999444242e-05, + "loss": 2.0087, + "step": 16465500 + }, + { + "epoch": 47.66, + "learning_rate": 2.6177412351796965e-05, + "loss": 1.9872, + "step": 16466000 + }, + { + "epoch": 47.66, + "learning_rate": 2.6176690151444984e-05, + "loss": 2.0184, + "step": 16466500 + }, + { + "epoch": 47.67, + "learning_rate": 2.6175966503797706e-05, + "loss": 2.0363, + "step": 16467000 + }, + { + "epoch": 47.67, + "learning_rate": 2.6175242856150428e-05, + "loss": 2.0197, + "step": 16467500 + }, + { + "epoch": 47.67, + "learning_rate": 2.617451920850315e-05, + "loss": 2.0042, + "step": 16468000 + }, + { + "epoch": 47.67, + "learning_rate": 2.6173795560855873e-05, + "loss": 2.0245, + "step": 16468500 + }, + { + "epoch": 47.67, + "learning_rate": 2.6173071913208598e-05, + "loss": 2.0087, + "step": 16469000 + }, + { + "epoch": 47.67, + "learning_rate": 2.6172349712856614e-05, + "loss": 2.0033, + "step": 16469500 + }, + { + "epoch": 47.67, + "learning_rate": 2.6171626065209336e-05, + "loss": 1.9989, + "step": 16470000 + }, + { + "epoch": 47.68, + "learning_rate": 2.6170902417562058e-05, + "loss": 2.0101, + "step": 16470500 + }, + { + "epoch": 47.68, + "learning_rate": 2.6170178769914784e-05, + "loss": 2.0037, + "step": 16471000 + }, + { + "epoch": 47.68, + "learning_rate": 2.6169455122267506e-05, + "loss": 1.9838, + "step": 16471500 + }, + { + "epoch": 47.68, + "learning_rate": 2.616873147462023e-05, + "loss": 2.0089, + "step": 16472000 + }, + { + "epoch": 47.68, + "learning_rate": 2.6168007826972957e-05, + "loss": 2.0172, + "step": 16472500 + }, + { + "epoch": 47.68, + "learning_rate": 2.616728417932568e-05, + "loss": 2.0054, + "step": 16473000 + }, + { + "epoch": 47.68, + "learning_rate": 2.6166560531678402e-05, + "loss": 2.0122, + "step": 16473500 + }, + { + "epoch": 47.69, + "learning_rate": 2.6165836884031124e-05, + "loss": 1.9878, + "step": 16474000 + }, + { + "epoch": 47.69, + "learning_rate": 2.6165114683679143e-05, + "loss": 2.0214, + "step": 16474500 + }, + { + "epoch": 47.69, + "learning_rate": 2.6164391036031865e-05, + "loss": 2.0222, + "step": 16475000 + }, + { + "epoch": 47.69, + "learning_rate": 2.6163667388384587e-05, + "loss": 2.0353, + "step": 16475500 + }, + { + "epoch": 47.69, + "learning_rate": 2.616294374073731e-05, + "loss": 2.0336, + "step": 16476000 + }, + { + "epoch": 47.69, + "learning_rate": 2.6162220093090035e-05, + "loss": 2.0132, + "step": 16476500 + }, + { + "epoch": 47.69, + "learning_rate": 2.6161496445442758e-05, + "loss": 2.0079, + "step": 16477000 + }, + { + "epoch": 47.7, + "learning_rate": 2.616077279779548e-05, + "loss": 1.9968, + "step": 16477500 + }, + { + "epoch": 47.7, + "learning_rate": 2.6160049150148202e-05, + "loss": 2.0178, + "step": 16478000 + }, + { + "epoch": 47.7, + "learning_rate": 2.6159325502500924e-05, + "loss": 2.0029, + "step": 16478500 + }, + { + "epoch": 47.7, + "learning_rate": 2.615860185485365e-05, + "loss": 2.0024, + "step": 16479000 + }, + { + "epoch": 47.7, + "learning_rate": 2.6157878207206375e-05, + "loss": 2.0351, + "step": 16479500 + }, + { + "epoch": 47.7, + "learning_rate": 2.61571545595591e-05, + "loss": 2.0048, + "step": 16480000 + }, + { + "epoch": 47.7, + "learning_rate": 2.6156430911911823e-05, + "loss": 1.9937, + "step": 16480500 + }, + { + "epoch": 47.71, + "learning_rate": 2.615570871155984e-05, + "loss": 2.0192, + "step": 16481000 + }, + { + "epoch": 47.71, + "learning_rate": 2.615498506391256e-05, + "loss": 2.007, + "step": 16481500 + }, + { + "epoch": 47.71, + "learning_rate": 2.615426286356058e-05, + "loss": 2.024, + "step": 16482000 + }, + { + "epoch": 47.71, + "learning_rate": 2.6153539215913302e-05, + "loss": 2.0016, + "step": 16482500 + }, + { + "epoch": 47.71, + "learning_rate": 2.6152815568266025e-05, + "loss": 2.0195, + "step": 16483000 + }, + { + "epoch": 47.71, + "learning_rate": 2.615209192061875e-05, + "loss": 2.0247, + "step": 16483500 + }, + { + "epoch": 47.71, + "learning_rate": 2.6151368272971472e-05, + "loss": 2.0028, + "step": 16484000 + }, + { + "epoch": 47.72, + "learning_rate": 2.6150644625324195e-05, + "loss": 2.031, + "step": 16484500 + }, + { + "epoch": 47.72, + "learning_rate": 2.6149920977676917e-05, + "loss": 2.0452, + "step": 16485000 + }, + { + "epoch": 47.72, + "learning_rate": 2.6149198777324936e-05, + "loss": 2.01, + "step": 16485500 + }, + { + "epoch": 47.72, + "learning_rate": 2.6148475129677658e-05, + "loss": 2.0249, + "step": 16486000 + }, + { + "epoch": 47.72, + "learning_rate": 2.614775148203038e-05, + "loss": 2.0292, + "step": 16486500 + }, + { + "epoch": 47.72, + "learning_rate": 2.614702783438311e-05, + "loss": 2.0207, + "step": 16487000 + }, + { + "epoch": 47.72, + "learning_rate": 2.614630418673583e-05, + "loss": 2.0141, + "step": 16487500 + }, + { + "epoch": 47.73, + "learning_rate": 2.6145580539088554e-05, + "loss": 2.019, + "step": 16488000 + }, + { + "epoch": 47.73, + "learning_rate": 2.6144856891441276e-05, + "loss": 2.0158, + "step": 16488500 + }, + { + "epoch": 47.73, + "learning_rate": 2.6144133243794e-05, + "loss": 2.0228, + "step": 16489000 + }, + { + "epoch": 47.73, + "learning_rate": 2.6143409596146724e-05, + "loss": 2.0079, + "step": 16489500 + }, + { + "epoch": 47.73, + "learning_rate": 2.6142685948499446e-05, + "loss": 2.0218, + "step": 16490000 + }, + { + "epoch": 47.73, + "learning_rate": 2.6141962300852168e-05, + "loss": 2.0142, + "step": 16490500 + }, + { + "epoch": 47.73, + "learning_rate": 2.614123865320489e-05, + "loss": 2.021, + "step": 16491000 + }, + { + "epoch": 47.74, + "learning_rate": 2.6140515005557613e-05, + "loss": 2.0316, + "step": 16491500 + }, + { + "epoch": 47.74, + "learning_rate": 2.6139792805205632e-05, + "loss": 2.0112, + "step": 16492000 + }, + { + "epoch": 47.74, + "learning_rate": 2.6139072052148944e-05, + "loss": 2.0289, + "step": 16492500 + }, + { + "epoch": 47.74, + "learning_rate": 2.613835274638755e-05, + "loss": 2.0284, + "step": 16493000 + }, + { + "epoch": 47.74, + "learning_rate": 2.6137629098740275e-05, + "loss": 2.0181, + "step": 16493500 + }, + { + "epoch": 47.74, + "learning_rate": 2.6136905451092997e-05, + "loss": 2.0128, + "step": 16494000 + }, + { + "epoch": 47.74, + "learning_rate": 2.613618180344572e-05, + "loss": 2.0045, + "step": 16494500 + }, + { + "epoch": 47.75, + "learning_rate": 2.6135458155798442e-05, + "loss": 2.0055, + "step": 16495000 + }, + { + "epoch": 47.75, + "learning_rate": 2.613473450815117e-05, + "loss": 2.0012, + "step": 16495500 + }, + { + "epoch": 47.75, + "learning_rate": 2.6134010860503893e-05, + "loss": 2.0211, + "step": 16496000 + }, + { + "epoch": 47.75, + "learning_rate": 2.6133291554742502e-05, + "loss": 2.046, + "step": 16496500 + }, + { + "epoch": 47.75, + "learning_rate": 2.6132567907095224e-05, + "loss": 2.0706, + "step": 16497000 + }, + { + "epoch": 47.75, + "learning_rate": 2.6131844259447947e-05, + "loss": 2.0104, + "step": 16497500 + }, + { + "epoch": 47.75, + "learning_rate": 2.613112061180067e-05, + "loss": 2.02, + "step": 16498000 + }, + { + "epoch": 47.76, + "learning_rate": 2.613039696415339e-05, + "loss": 2.0103, + "step": 16498500 + }, + { + "epoch": 47.76, + "learning_rate": 2.6129673316506113e-05, + "loss": 2.0073, + "step": 16499000 + }, + { + "epoch": 47.76, + "learning_rate": 2.612894966885884e-05, + "loss": 2.0256, + "step": 16499500 + }, + { + "epoch": 47.76, + "learning_rate": 2.612822602121156e-05, + "loss": 2.0203, + "step": 16500000 + }, + { + "epoch": 47.76, + "learning_rate": 2.6127502373564283e-05, + "loss": 2.0095, + "step": 16500500 + }, + { + "epoch": 47.76, + "learning_rate": 2.6126778725917006e-05, + "loss": 2.011, + "step": 16501000 + }, + { + "epoch": 47.77, + "learning_rate": 2.6126055078269728e-05, + "loss": 2.0105, + "step": 16501500 + }, + { + "epoch": 47.77, + "learning_rate": 2.612533143062245e-05, + "loss": 2.0139, + "step": 16502000 + }, + { + "epoch": 47.77, + "learning_rate": 2.6124607782975176e-05, + "loss": 2.0296, + "step": 16502500 + }, + { + "epoch": 47.77, + "learning_rate": 2.61238841353279e-05, + "loss": 2.0275, + "step": 16503000 + }, + { + "epoch": 47.77, + "learning_rate": 2.6123160487680627e-05, + "loss": 1.9994, + "step": 16503500 + }, + { + "epoch": 47.77, + "learning_rate": 2.612243684003335e-05, + "loss": 2.0088, + "step": 16504000 + }, + { + "epoch": 47.77, + "learning_rate": 2.612171319238607e-05, + "loss": 2.024, + "step": 16504500 + }, + { + "epoch": 47.78, + "learning_rate": 2.6120989544738794e-05, + "loss": 2.0437, + "step": 16505000 + }, + { + "epoch": 47.78, + "learning_rate": 2.6120265897091516e-05, + "loss": 2.0048, + "step": 16505500 + }, + { + "epoch": 47.78, + "learning_rate": 2.6119543696739535e-05, + "loss": 2.0066, + "step": 16506000 + }, + { + "epoch": 47.78, + "learning_rate": 2.6118820049092257e-05, + "loss": 2.0295, + "step": 16506500 + }, + { + "epoch": 47.78, + "learning_rate": 2.611809640144498e-05, + "loss": 2.0129, + "step": 16507000 + }, + { + "epoch": 47.78, + "learning_rate": 2.61173727537977e-05, + "loss": 2.0167, + "step": 16507500 + }, + { + "epoch": 47.78, + "learning_rate": 2.6116649106150427e-05, + "loss": 2.0313, + "step": 16508000 + }, + { + "epoch": 47.79, + "learning_rate": 2.611592545850315e-05, + "loss": 1.9929, + "step": 16508500 + }, + { + "epoch": 47.79, + "learning_rate": 2.611520181085587e-05, + "loss": 2.0028, + "step": 16509000 + }, + { + "epoch": 47.79, + "learning_rate": 2.6114478163208594e-05, + "loss": 2.0331, + "step": 16509500 + }, + { + "epoch": 47.79, + "learning_rate": 2.6113754515561323e-05, + "loss": 2.0007, + "step": 16510000 + }, + { + "epoch": 47.79, + "learning_rate": 2.6113030867914045e-05, + "loss": 2.0001, + "step": 16510500 + }, + { + "epoch": 47.79, + "learning_rate": 2.6112307220266767e-05, + "loss": 2.015, + "step": 16511000 + }, + { + "epoch": 47.79, + "learning_rate": 2.6111583572619493e-05, + "loss": 2.0053, + "step": 16511500 + }, + { + "epoch": 47.8, + "learning_rate": 2.6110859924972215e-05, + "loss": 1.9972, + "step": 16512000 + }, + { + "epoch": 47.8, + "learning_rate": 2.6110136277324937e-05, + "loss": 1.9995, + "step": 16512500 + }, + { + "epoch": 47.8, + "learning_rate": 2.610941262967766e-05, + "loss": 2.0346, + "step": 16513000 + }, + { + "epoch": 47.8, + "learning_rate": 2.6108688982030382e-05, + "loss": 2.0162, + "step": 16513500 + }, + { + "epoch": 47.8, + "learning_rate": 2.6107968228973694e-05, + "loss": 2.0156, + "step": 16514000 + }, + { + "epoch": 47.8, + "learning_rate": 2.6107246028621713e-05, + "loss": 2.0085, + "step": 16514500 + }, + { + "epoch": 47.8, + "learning_rate": 2.6106522380974435e-05, + "loss": 2.0156, + "step": 16515000 + }, + { + "epoch": 47.81, + "learning_rate": 2.6105798733327158e-05, + "loss": 2.0203, + "step": 16515500 + }, + { + "epoch": 47.81, + "learning_rate": 2.610507508567988e-05, + "loss": 2.0291, + "step": 16516000 + }, + { + "epoch": 47.81, + "learning_rate": 2.6104351438032605e-05, + "loss": 2.0198, + "step": 16516500 + }, + { + "epoch": 47.81, + "learning_rate": 2.6103627790385328e-05, + "loss": 1.9924, + "step": 16517000 + }, + { + "epoch": 47.81, + "learning_rate": 2.6102904142738057e-05, + "loss": 2.0169, + "step": 16517500 + }, + { + "epoch": 47.81, + "learning_rate": 2.610218049509078e-05, + "loss": 2.0096, + "step": 16518000 + }, + { + "epoch": 47.81, + "learning_rate": 2.61014568474435e-05, + "loss": 2.0314, + "step": 16518500 + }, + { + "epoch": 47.82, + "learning_rate": 2.6100733199796223e-05, + "loss": 2.011, + "step": 16519000 + }, + { + "epoch": 47.82, + "learning_rate": 2.6100009552148946e-05, + "loss": 2.0148, + "step": 16519500 + }, + { + "epoch": 47.82, + "learning_rate": 2.6099285904501668e-05, + "loss": 2.0172, + "step": 16520000 + }, + { + "epoch": 47.82, + "learning_rate": 2.6098562256854393e-05, + "loss": 2.0628, + "step": 16520500 + }, + { + "epoch": 47.82, + "learning_rate": 2.6097838609207116e-05, + "loss": 2.0224, + "step": 16521000 + }, + { + "epoch": 47.82, + "learning_rate": 2.6097114961559838e-05, + "loss": 2.0335, + "step": 16521500 + }, + { + "epoch": 47.82, + "learning_rate": 2.6096392761207857e-05, + "loss": 2.021, + "step": 16522000 + }, + { + "epoch": 47.83, + "learning_rate": 2.609566911356058e-05, + "loss": 2.0101, + "step": 16522500 + }, + { + "epoch": 47.83, + "learning_rate": 2.60949454659133e-05, + "loss": 2.0287, + "step": 16523000 + }, + { + "epoch": 47.83, + "learning_rate": 2.6094223265561317e-05, + "loss": 2.0177, + "step": 16523500 + }, + { + "epoch": 47.83, + "learning_rate": 2.6093499617914042e-05, + "loss": 2.0136, + "step": 16524000 + }, + { + "epoch": 47.83, + "learning_rate": 2.6092775970266765e-05, + "loss": 2.0332, + "step": 16524500 + }, + { + "epoch": 47.83, + "learning_rate": 2.6092052322619494e-05, + "loss": 2.0135, + "step": 16525000 + }, + { + "epoch": 47.83, + "learning_rate": 2.6091328674972216e-05, + "loss": 2.0016, + "step": 16525500 + }, + { + "epoch": 47.84, + "learning_rate": 2.609060647462023e-05, + "loss": 2.0189, + "step": 16526000 + }, + { + "epoch": 47.84, + "learning_rate": 2.608988427426825e-05, + "loss": 2.016, + "step": 16526500 + }, + { + "epoch": 47.84, + "learning_rate": 2.6089160626620973e-05, + "loss": 2.0256, + "step": 16527000 + }, + { + "epoch": 47.84, + "learning_rate": 2.6088436978973695e-05, + "loss": 1.9924, + "step": 16527500 + }, + { + "epoch": 47.84, + "learning_rate": 2.6087713331326417e-05, + "loss": 2.0231, + "step": 16528000 + }, + { + "epoch": 47.84, + "learning_rate": 2.6086989683679143e-05, + "loss": 1.9967, + "step": 16528500 + }, + { + "epoch": 47.84, + "learning_rate": 2.6086266036031865e-05, + "loss": 2.0209, + "step": 16529000 + }, + { + "epoch": 47.85, + "learning_rate": 2.6085542388384587e-05, + "loss": 2.0127, + "step": 16529500 + }, + { + "epoch": 47.85, + "learning_rate": 2.608481874073731e-05, + "loss": 2.0141, + "step": 16530000 + }, + { + "epoch": 47.85, + "learning_rate": 2.6084095093090032e-05, + "loss": 2.0303, + "step": 16530500 + }, + { + "epoch": 47.85, + "learning_rate": 2.6083371445442757e-05, + "loss": 1.9991, + "step": 16531000 + }, + { + "epoch": 47.85, + "learning_rate": 2.608264779779548e-05, + "loss": 2.0296, + "step": 16531500 + }, + { + "epoch": 47.85, + "learning_rate": 2.608192415014821e-05, + "loss": 2.0148, + "step": 16532000 + }, + { + "epoch": 47.85, + "learning_rate": 2.608120050250093e-05, + "loss": 2.0237, + "step": 16532500 + }, + { + "epoch": 47.86, + "learning_rate": 2.6080476854853653e-05, + "loss": 1.985, + "step": 16533000 + }, + { + "epoch": 47.86, + "learning_rate": 2.6079753207206375e-05, + "loss": 1.9955, + "step": 16533500 + }, + { + "epoch": 47.86, + "learning_rate": 2.6079029559559098e-05, + "loss": 1.9841, + "step": 16534000 + }, + { + "epoch": 47.86, + "learning_rate": 2.6078307359207116e-05, + "loss": 2.0266, + "step": 16534500 + }, + { + "epoch": 47.86, + "learning_rate": 2.6077585158855132e-05, + "loss": 2.0053, + "step": 16535000 + }, + { + "epoch": 47.86, + "learning_rate": 2.6076861511207858e-05, + "loss": 1.9952, + "step": 16535500 + }, + { + "epoch": 47.86, + "learning_rate": 2.607613786356058e-05, + "loss": 2.0188, + "step": 16536000 + }, + { + "epoch": 47.87, + "learning_rate": 2.6075414215913302e-05, + "loss": 2.0231, + "step": 16536500 + }, + { + "epoch": 47.87, + "learning_rate": 2.6074690568266024e-05, + "loss": 2.0253, + "step": 16537000 + }, + { + "epoch": 47.87, + "learning_rate": 2.6073966920618747e-05, + "loss": 2.0285, + "step": 16537500 + }, + { + "epoch": 47.87, + "learning_rate": 2.607324327297147e-05, + "loss": 2.0218, + "step": 16538000 + }, + { + "epoch": 47.87, + "learning_rate": 2.6072519625324194e-05, + "loss": 2.0481, + "step": 16538500 + }, + { + "epoch": 47.87, + "learning_rate": 2.6071795977676917e-05, + "loss": 1.9992, + "step": 16539000 + }, + { + "epoch": 47.88, + "learning_rate": 2.607107377732494e-05, + "loss": 1.9858, + "step": 16539500 + }, + { + "epoch": 47.88, + "learning_rate": 2.607035012967766e-05, + "loss": 2.015, + "step": 16540000 + }, + { + "epoch": 47.88, + "learning_rate": 2.6069626482030383e-05, + "loss": 2.0117, + "step": 16540500 + }, + { + "epoch": 47.88, + "learning_rate": 2.606890283438311e-05, + "loss": 2.0371, + "step": 16541000 + }, + { + "epoch": 47.88, + "learning_rate": 2.606817918673583e-05, + "loss": 2.0309, + "step": 16541500 + }, + { + "epoch": 47.88, + "learning_rate": 2.6067456986383847e-05, + "loss": 2.0118, + "step": 16542000 + }, + { + "epoch": 47.88, + "learning_rate": 2.606673623332716e-05, + "loss": 2.0005, + "step": 16542500 + }, + { + "epoch": 47.89, + "learning_rate": 2.6066012585679885e-05, + "loss": 1.9945, + "step": 16543000 + }, + { + "epoch": 47.89, + "learning_rate": 2.6065288938032607e-05, + "loss": 2.0138, + "step": 16543500 + }, + { + "epoch": 47.89, + "learning_rate": 2.606456529038533e-05, + "loss": 2.0057, + "step": 16544000 + }, + { + "epoch": 47.89, + "learning_rate": 2.6063843090033345e-05, + "loss": 2.0058, + "step": 16544500 + }, + { + "epoch": 47.89, + "learning_rate": 2.606311944238607e-05, + "loss": 2.019, + "step": 16545000 + }, + { + "epoch": 47.89, + "learning_rate": 2.6062395794738793e-05, + "loss": 2.0154, + "step": 16545500 + }, + { + "epoch": 47.89, + "learning_rate": 2.6061672147091515e-05, + "loss": 2.04, + "step": 16546000 + }, + { + "epoch": 47.9, + "learning_rate": 2.6060948499444237e-05, + "loss": 2.0019, + "step": 16546500 + }, + { + "epoch": 47.9, + "learning_rate": 2.6060226299092256e-05, + "loss": 2.0092, + "step": 16547000 + }, + { + "epoch": 47.9, + "learning_rate": 2.6059502651444985e-05, + "loss": 2.0384, + "step": 16547500 + }, + { + "epoch": 47.9, + "learning_rate": 2.6058779003797707e-05, + "loss": 2.005, + "step": 16548000 + }, + { + "epoch": 47.9, + "learning_rate": 2.605805535615043e-05, + "loss": 2.0226, + "step": 16548500 + }, + { + "epoch": 47.9, + "learning_rate": 2.605733315579845e-05, + "loss": 2.0074, + "step": 16549000 + }, + { + "epoch": 47.9, + "learning_rate": 2.605660950815117e-05, + "loss": 1.9913, + "step": 16549500 + }, + { + "epoch": 47.91, + "learning_rate": 2.6055885860503893e-05, + "loss": 2.0255, + "step": 16550000 + }, + { + "epoch": 47.91, + "learning_rate": 2.6055162212856615e-05, + "loss": 2.0296, + "step": 16550500 + }, + { + "epoch": 47.91, + "learning_rate": 2.6054438565209337e-05, + "loss": 2.0274, + "step": 16551000 + }, + { + "epoch": 47.91, + "learning_rate": 2.605371491756206e-05, + "loss": 2.0208, + "step": 16551500 + }, + { + "epoch": 47.91, + "learning_rate": 2.6052991269914785e-05, + "loss": 2.0285, + "step": 16552000 + }, + { + "epoch": 47.91, + "learning_rate": 2.6052267622267507e-05, + "loss": 1.9884, + "step": 16552500 + }, + { + "epoch": 47.91, + "learning_rate": 2.605154397462023e-05, + "loss": 2.0162, + "step": 16553000 + }, + { + "epoch": 47.92, + "learning_rate": 2.6050820326972952e-05, + "loss": 2.0141, + "step": 16553500 + }, + { + "epoch": 47.92, + "learning_rate": 2.6050096679325674e-05, + "loss": 1.9977, + "step": 16554000 + }, + { + "epoch": 47.92, + "learning_rate": 2.6049373031678396e-05, + "loss": 2.0303, + "step": 16554500 + }, + { + "epoch": 47.92, + "learning_rate": 2.6048650831326422e-05, + "loss": 2.0048, + "step": 16555000 + }, + { + "epoch": 47.92, + "learning_rate": 2.6047927183679144e-05, + "loss": 2.0163, + "step": 16555500 + }, + { + "epoch": 47.92, + "learning_rate": 2.6047203536031867e-05, + "loss": 2.0164, + "step": 16556000 + }, + { + "epoch": 47.92, + "learning_rate": 2.604647988838459e-05, + "loss": 2.0243, + "step": 16556500 + }, + { + "epoch": 47.93, + "learning_rate": 2.6045757688032608e-05, + "loss": 2.0285, + "step": 16557000 + }, + { + "epoch": 47.93, + "learning_rate": 2.604503404038533e-05, + "loss": 2.0346, + "step": 16557500 + }, + { + "epoch": 47.93, + "learning_rate": 2.6044310392738052e-05, + "loss": 2.0388, + "step": 16558000 + }, + { + "epoch": 47.93, + "learning_rate": 2.604358819238607e-05, + "loss": 2.0052, + "step": 16558500 + }, + { + "epoch": 47.93, + "learning_rate": 2.6042864544738793e-05, + "loss": 2.02, + "step": 16559000 + }, + { + "epoch": 47.93, + "learning_rate": 2.6042140897091516e-05, + "loss": 2.0247, + "step": 16559500 + }, + { + "epoch": 47.93, + "learning_rate": 2.6041417249444238e-05, + "loss": 2.0152, + "step": 16560000 + }, + { + "epoch": 47.94, + "learning_rate": 2.604069360179696e-05, + "loss": 2.0407, + "step": 16560500 + }, + { + "epoch": 47.94, + "learning_rate": 2.603997140144498e-05, + "loss": 1.9922, + "step": 16561000 + }, + { + "epoch": 47.94, + "learning_rate": 2.60392477537977e-05, + "loss": 2.02, + "step": 16561500 + }, + { + "epoch": 47.94, + "learning_rate": 2.6038524106150424e-05, + "loss": 2.0239, + "step": 16562000 + }, + { + "epoch": 47.94, + "learning_rate": 2.6037800458503153e-05, + "loss": 2.0137, + "step": 16562500 + }, + { + "epoch": 47.94, + "learning_rate": 2.6037076810855875e-05, + "loss": 2.0181, + "step": 16563000 + }, + { + "epoch": 47.94, + "learning_rate": 2.60363531632086e-05, + "loss": 2.0154, + "step": 16563500 + }, + { + "epoch": 47.95, + "learning_rate": 2.6035629515561323e-05, + "loss": 2.0037, + "step": 16564000 + }, + { + "epoch": 47.95, + "learning_rate": 2.6034905867914045e-05, + "loss": 2.0053, + "step": 16564500 + }, + { + "epoch": 47.95, + "learning_rate": 2.6034182220266767e-05, + "loss": 2.0177, + "step": 16565000 + }, + { + "epoch": 47.95, + "learning_rate": 2.603345857261949e-05, + "loss": 2.0047, + "step": 16565500 + }, + { + "epoch": 47.95, + "learning_rate": 2.603273492497221e-05, + "loss": 2.0063, + "step": 16566000 + }, + { + "epoch": 47.95, + "learning_rate": 2.6032011277324937e-05, + "loss": 2.005, + "step": 16566500 + }, + { + "epoch": 47.95, + "learning_rate": 2.603128762967766e-05, + "loss": 2.0106, + "step": 16567000 + }, + { + "epoch": 47.96, + "learning_rate": 2.6030565429325675e-05, + "loss": 2.0004, + "step": 16567500 + }, + { + "epoch": 47.96, + "learning_rate": 2.60298417816784e-05, + "loss": 1.9813, + "step": 16568000 + }, + { + "epoch": 47.96, + "learning_rate": 2.6029118134031123e-05, + "loss": 2.0217, + "step": 16568500 + }, + { + "epoch": 47.96, + "learning_rate": 2.6028394486383845e-05, + "loss": 2.0139, + "step": 16569000 + }, + { + "epoch": 47.96, + "learning_rate": 2.602767228603186e-05, + "loss": 2.0038, + "step": 16569500 + }, + { + "epoch": 47.96, + "learning_rate": 2.602694863838459e-05, + "loss": 2.0123, + "step": 16570000 + }, + { + "epoch": 47.96, + "learning_rate": 2.602622643803261e-05, + "loss": 2.0323, + "step": 16570500 + }, + { + "epoch": 47.97, + "learning_rate": 2.602550279038533e-05, + "loss": 2.0187, + "step": 16571000 + }, + { + "epoch": 47.97, + "learning_rate": 2.602478059003335e-05, + "loss": 2.0152, + "step": 16571500 + }, + { + "epoch": 47.97, + "learning_rate": 2.6024056942386072e-05, + "loss": 1.9897, + "step": 16572000 + }, + { + "epoch": 47.97, + "learning_rate": 2.6023333294738794e-05, + "loss": 2.0136, + "step": 16572500 + }, + { + "epoch": 47.97, + "learning_rate": 2.6022609647091516e-05, + "loss": 2.0093, + "step": 16573000 + }, + { + "epoch": 47.97, + "learning_rate": 2.602188599944424e-05, + "loss": 1.9944, + "step": 16573500 + }, + { + "epoch": 47.97, + "learning_rate": 2.6021162351796964e-05, + "loss": 2.0217, + "step": 16574000 + }, + { + "epoch": 47.98, + "learning_rate": 2.6020438704149687e-05, + "loss": 2.0159, + "step": 16574500 + }, + { + "epoch": 47.98, + "learning_rate": 2.601971505650241e-05, + "loss": 2.0156, + "step": 16575000 + }, + { + "epoch": 47.98, + "learning_rate": 2.601899140885513e-05, + "loss": 2.0112, + "step": 16575500 + }, + { + "epoch": 47.98, + "learning_rate": 2.601826920850315e-05, + "loss": 2.0098, + "step": 16576000 + }, + { + "epoch": 47.98, + "learning_rate": 2.6017545560855872e-05, + "loss": 2.0277, + "step": 16576500 + }, + { + "epoch": 47.98, + "learning_rate": 2.6016821913208594e-05, + "loss": 2.0026, + "step": 16577000 + }, + { + "epoch": 47.99, + "learning_rate": 2.6016098265561323e-05, + "loss": 2.015, + "step": 16577500 + }, + { + "epoch": 47.99, + "learning_rate": 2.6015374617914046e-05, + "loss": 2.033, + "step": 16578000 + }, + { + "epoch": 47.99, + "learning_rate": 2.6014650970266768e-05, + "loss": 2.0095, + "step": 16578500 + }, + { + "epoch": 47.99, + "learning_rate": 2.601392732261949e-05, + "loss": 2.0216, + "step": 16579000 + }, + { + "epoch": 47.99, + "learning_rate": 2.601320512226751e-05, + "loss": 2.0012, + "step": 16579500 + }, + { + "epoch": 47.99, + "learning_rate": 2.601248147462023e-05, + "loss": 1.9971, + "step": 16580000 + }, + { + "epoch": 47.99, + "learning_rate": 2.6011757826972954e-05, + "loss": 2.0165, + "step": 16580500 + }, + { + "epoch": 48.0, + "learning_rate": 2.6011034179325676e-05, + "loss": 2.0261, + "step": 16581000 + }, + { + "epoch": 48.0, + "learning_rate": 2.60103105316784e-05, + "loss": 2.0208, + "step": 16581500 + }, + { + "epoch": 48.0, + "learning_rate": 2.6009586884031124e-05, + "loss": 1.9938, + "step": 16582000 + }, + { + "epoch": 48.0, + "learning_rate": 2.6008863236383846e-05, + "loss": 2.0093, + "step": 16582500 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.6727647036358356, + "eval_accuracy_mlm": 0.6387673823676272, + "eval_accuracy_nsp": 0.8549999312535232, + "eval_loss": 2.163412570953369, + "eval_runtime": 331.6025, + "eval_samples_per_second": 1315.991, + "eval_steps_per_second": 54.834, + "step": 16582656 + }, + { + "epoch": 48.0, + "learning_rate": 2.6008139588736568e-05, + "loss": 2.0014, + "step": 16583000 + }, + { + "epoch": 48.0, + "learning_rate": 2.600741594108929e-05, + "loss": 1.9942, + "step": 16583500 + }, + { + "epoch": 48.0, + "learning_rate": 2.6006692293442016e-05, + "loss": 1.9808, + "step": 16584000 + }, + { + "epoch": 48.01, + "learning_rate": 2.600596864579474e-05, + "loss": 1.9759, + "step": 16584500 + }, + { + "epoch": 48.01, + "learning_rate": 2.6005244998147467e-05, + "loss": 2.0071, + "step": 16585000 + }, + { + "epoch": 48.01, + "learning_rate": 2.600452135050019e-05, + "loss": 2.0084, + "step": 16585500 + }, + { + "epoch": 48.01, + "learning_rate": 2.6003799150148205e-05, + "loss": 2.0025, + "step": 16586000 + }, + { + "epoch": 48.01, + "learning_rate": 2.6003078397091517e-05, + "loss": 1.9974, + "step": 16586500 + }, + { + "epoch": 48.01, + "learning_rate": 2.600235474944424e-05, + "loss": 1.9839, + "step": 16587000 + }, + { + "epoch": 48.01, + "learning_rate": 2.6001631101796965e-05, + "loss": 1.978, + "step": 16587500 + }, + { + "epoch": 48.02, + "learning_rate": 2.6000907454149687e-05, + "loss": 2.0232, + "step": 16588000 + }, + { + "epoch": 48.02, + "learning_rate": 2.600018380650241e-05, + "loss": 2.0117, + "step": 16588500 + }, + { + "epoch": 48.02, + "learning_rate": 2.5999463053445722e-05, + "loss": 1.9927, + "step": 16589000 + }, + { + "epoch": 48.02, + "learning_rate": 2.599874085309374e-05, + "loss": 2.0049, + "step": 16589500 + }, + { + "epoch": 48.02, + "learning_rate": 2.5998017205446463e-05, + "loss": 1.9852, + "step": 16590000 + }, + { + "epoch": 48.02, + "learning_rate": 2.5997293557799185e-05, + "loss": 1.9818, + "step": 16590500 + }, + { + "epoch": 48.02, + "learning_rate": 2.5996569910151907e-05, + "loss": 2.023, + "step": 16591000 + }, + { + "epoch": 48.03, + "learning_rate": 2.599584626250463e-05, + "loss": 1.9989, + "step": 16591500 + }, + { + "epoch": 48.03, + "learning_rate": 2.5995122614857352e-05, + "loss": 1.9887, + "step": 16592000 + }, + { + "epoch": 48.03, + "learning_rate": 2.5994398967210078e-05, + "loss": 1.9811, + "step": 16592500 + }, + { + "epoch": 48.03, + "learning_rate": 2.5993675319562803e-05, + "loss": 1.9907, + "step": 16593000 + }, + { + "epoch": 48.03, + "learning_rate": 2.599295167191553e-05, + "loss": 1.9865, + "step": 16593500 + }, + { + "epoch": 48.03, + "learning_rate": 2.5992229471563544e-05, + "loss": 1.9976, + "step": 16594000 + }, + { + "epoch": 48.03, + "learning_rate": 2.5991505823916267e-05, + "loss": 2.0073, + "step": 16594500 + }, + { + "epoch": 48.04, + "learning_rate": 2.5990782176268992e-05, + "loss": 1.9724, + "step": 16595000 + }, + { + "epoch": 48.04, + "learning_rate": 2.5990058528621714e-05, + "loss": 2.0018, + "step": 16595500 + }, + { + "epoch": 48.04, + "learning_rate": 2.5989334880974437e-05, + "loss": 2.0171, + "step": 16596000 + }, + { + "epoch": 48.04, + "learning_rate": 2.598861123332716e-05, + "loss": 1.9986, + "step": 16596500 + }, + { + "epoch": 48.04, + "learning_rate": 2.598788758567988e-05, + "loss": 2.0129, + "step": 16597000 + }, + { + "epoch": 48.04, + "learning_rate": 2.5987163938032603e-05, + "loss": 1.9889, + "step": 16597500 + }, + { + "epoch": 48.04, + "learning_rate": 2.598644029038533e-05, + "loss": 1.9941, + "step": 16598000 + }, + { + "epoch": 48.05, + "learning_rate": 2.598571664273805e-05, + "loss": 2.0047, + "step": 16598500 + }, + { + "epoch": 48.05, + "learning_rate": 2.5984992995090773e-05, + "loss": 2.0055, + "step": 16599000 + }, + { + "epoch": 48.05, + "learning_rate": 2.5984269347443496e-05, + "loss": 1.9859, + "step": 16599500 + }, + { + "epoch": 48.05, + "learning_rate": 2.5983547147091518e-05, + "loss": 2.0024, + "step": 16600000 + }, + { + "epoch": 48.05, + "learning_rate": 2.5982823499444244e-05, + "loss": 1.9872, + "step": 16600500 + }, + { + "epoch": 48.05, + "learning_rate": 2.5982099851796966e-05, + "loss": 1.9964, + "step": 16601000 + }, + { + "epoch": 48.05, + "learning_rate": 2.5981376204149688e-05, + "loss": 2.001, + "step": 16601500 + }, + { + "epoch": 48.06, + "learning_rate": 2.5980654003797704e-05, + "loss": 1.9917, + "step": 16602000 + }, + { + "epoch": 48.06, + "learning_rate": 2.597993035615043e-05, + "loss": 1.9906, + "step": 16602500 + }, + { + "epoch": 48.06, + "learning_rate": 2.597920670850315e-05, + "loss": 1.9932, + "step": 16603000 + }, + { + "epoch": 48.06, + "learning_rate": 2.5978483060855874e-05, + "loss": 2.0174, + "step": 16603500 + }, + { + "epoch": 48.06, + "learning_rate": 2.5977759413208596e-05, + "loss": 1.9911, + "step": 16604000 + }, + { + "epoch": 48.06, + "learning_rate": 2.5977035765561318e-05, + "loss": 2.0098, + "step": 16604500 + }, + { + "epoch": 48.06, + "learning_rate": 2.5976312117914044e-05, + "loss": 1.9951, + "step": 16605000 + }, + { + "epoch": 48.07, + "learning_rate": 2.597558991756206e-05, + "loss": 1.9922, + "step": 16605500 + }, + { + "epoch": 48.07, + "learning_rate": 2.597486626991478e-05, + "loss": 2.0194, + "step": 16606000 + }, + { + "epoch": 48.07, + "learning_rate": 2.5974142622267504e-05, + "loss": 1.9991, + "step": 16606500 + }, + { + "epoch": 48.07, + "learning_rate": 2.597341897462023e-05, + "loss": 1.9729, + "step": 16607000 + }, + { + "epoch": 48.07, + "learning_rate": 2.5972695326972955e-05, + "loss": 1.9586, + "step": 16607500 + }, + { + "epoch": 48.07, + "learning_rate": 2.5971973126620974e-05, + "loss": 1.9924, + "step": 16608000 + }, + { + "epoch": 48.07, + "learning_rate": 2.5971249478973696e-05, + "loss": 2.0101, + "step": 16608500 + }, + { + "epoch": 48.08, + "learning_rate": 2.597052583132642e-05, + "loss": 1.9957, + "step": 16609000 + }, + { + "epoch": 48.08, + "learning_rate": 2.5969802183679144e-05, + "loss": 2.0081, + "step": 16609500 + }, + { + "epoch": 48.08, + "learning_rate": 2.5969078536031866e-05, + "loss": 1.9904, + "step": 16610000 + }, + { + "epoch": 48.08, + "learning_rate": 2.596835488838459e-05, + "loss": 2.0035, + "step": 16610500 + }, + { + "epoch": 48.08, + "learning_rate": 2.5967632688032608e-05, + "loss": 1.9966, + "step": 16611000 + }, + { + "epoch": 48.08, + "learning_rate": 2.5966910487680623e-05, + "loss": 1.9789, + "step": 16611500 + }, + { + "epoch": 48.08, + "learning_rate": 2.5966186840033345e-05, + "loss": 2.0032, + "step": 16612000 + }, + { + "epoch": 48.09, + "learning_rate": 2.5965464639681364e-05, + "loss": 1.9877, + "step": 16612500 + }, + { + "epoch": 48.09, + "learning_rate": 2.5964740992034087e-05, + "loss": 2.0065, + "step": 16613000 + }, + { + "epoch": 48.09, + "learning_rate": 2.596401734438681e-05, + "loss": 2.023, + "step": 16613500 + }, + { + "epoch": 48.09, + "learning_rate": 2.596329369673953e-05, + "loss": 1.9961, + "step": 16614000 + }, + { + "epoch": 48.09, + "learning_rate": 2.5962570049092257e-05, + "loss": 1.9986, + "step": 16614500 + }, + { + "epoch": 48.09, + "learning_rate": 2.5961846401444982e-05, + "loss": 1.9982, + "step": 16615000 + }, + { + "epoch": 48.1, + "learning_rate": 2.5961122753797708e-05, + "loss": 1.9836, + "step": 16615500 + }, + { + "epoch": 48.1, + "learning_rate": 2.596039910615043e-05, + "loss": 1.9785, + "step": 16616000 + }, + { + "epoch": 48.1, + "learning_rate": 2.5959675458503152e-05, + "loss": 1.9943, + "step": 16616500 + }, + { + "epoch": 48.1, + "learning_rate": 2.595895325815117e-05, + "loss": 1.995, + "step": 16617000 + }, + { + "epoch": 48.1, + "learning_rate": 2.5958229610503894e-05, + "loss": 2.0007, + "step": 16617500 + }, + { + "epoch": 48.1, + "learning_rate": 2.5957505962856616e-05, + "loss": 2.0049, + "step": 16618000 + }, + { + "epoch": 48.1, + "learning_rate": 2.5956782315209338e-05, + "loss": 1.9961, + "step": 16618500 + }, + { + "epoch": 48.11, + "learning_rate": 2.595605866756206e-05, + "loss": 1.9886, + "step": 16619000 + }, + { + "epoch": 48.11, + "learning_rate": 2.5955335019914782e-05, + "loss": 1.9742, + "step": 16619500 + }, + { + "epoch": 48.11, + "learning_rate": 2.5954611372267508e-05, + "loss": 1.9964, + "step": 16620000 + }, + { + "epoch": 48.11, + "learning_rate": 2.5953889171915524e-05, + "loss": 2.0015, + "step": 16620500 + }, + { + "epoch": 48.11, + "learning_rate": 2.5953165524268246e-05, + "loss": 2.001, + "step": 16621000 + }, + { + "epoch": 48.11, + "learning_rate": 2.595244187662097e-05, + "loss": 1.9848, + "step": 16621500 + }, + { + "epoch": 48.11, + "learning_rate": 2.5951718228973694e-05, + "loss": 2.004, + "step": 16622000 + }, + { + "epoch": 48.12, + "learning_rate": 2.595099458132642e-05, + "loss": 2.001, + "step": 16622500 + }, + { + "epoch": 48.12, + "learning_rate": 2.5950270933679145e-05, + "loss": 1.9664, + "step": 16623000 + }, + { + "epoch": 48.12, + "learning_rate": 2.5949547286031867e-05, + "loss": 1.9905, + "step": 16623500 + }, + { + "epoch": 48.12, + "learning_rate": 2.594882363838459e-05, + "loss": 2.006, + "step": 16624000 + }, + { + "epoch": 48.12, + "learning_rate": 2.594810143803261e-05, + "loss": 1.9986, + "step": 16624500 + }, + { + "epoch": 48.12, + "learning_rate": 2.594737779038533e-05, + "loss": 2.0104, + "step": 16625000 + }, + { + "epoch": 48.12, + "learning_rate": 2.5946654142738053e-05, + "loss": 2.001, + "step": 16625500 + }, + { + "epoch": 48.13, + "learning_rate": 2.5945930495090775e-05, + "loss": 1.98, + "step": 16626000 + }, + { + "epoch": 48.13, + "learning_rate": 2.5945206847443497e-05, + "loss": 1.985, + "step": 16626500 + }, + { + "epoch": 48.13, + "learning_rate": 2.5944484647091516e-05, + "loss": 1.993, + "step": 16627000 + }, + { + "epoch": 48.13, + "learning_rate": 2.594376099944424e-05, + "loss": 1.9892, + "step": 16627500 + }, + { + "epoch": 48.13, + "learning_rate": 2.594303735179696e-05, + "loss": 1.9778, + "step": 16628000 + }, + { + "epoch": 48.13, + "learning_rate": 2.5942313704149683e-05, + "loss": 2.0139, + "step": 16628500 + }, + { + "epoch": 48.13, + "learning_rate": 2.594159005650241e-05, + "loss": 1.9955, + "step": 16629000 + }, + { + "epoch": 48.14, + "learning_rate": 2.594086640885513e-05, + "loss": 2.0036, + "step": 16629500 + }, + { + "epoch": 48.14, + "learning_rate": 2.594014276120786e-05, + "loss": 2.0082, + "step": 16630000 + }, + { + "epoch": 48.14, + "learning_rate": 2.5939419113560582e-05, + "loss": 1.9896, + "step": 16630500 + }, + { + "epoch": 48.14, + "learning_rate": 2.5938695465913304e-05, + "loss": 2.0129, + "step": 16631000 + }, + { + "epoch": 48.14, + "learning_rate": 2.5937971818266027e-05, + "loss": 1.9901, + "step": 16631500 + }, + { + "epoch": 48.14, + "learning_rate": 2.593724817061875e-05, + "loss": 1.9911, + "step": 16632000 + }, + { + "epoch": 48.14, + "learning_rate": 2.593652452297147e-05, + "loss": 1.9994, + "step": 16632500 + }, + { + "epoch": 48.15, + "learning_rate": 2.5935800875324197e-05, + "loss": 1.9901, + "step": 16633000 + }, + { + "epoch": 48.15, + "learning_rate": 2.5935078674972212e-05, + "loss": 2.0107, + "step": 16633500 + }, + { + "epoch": 48.15, + "learning_rate": 2.5934355027324934e-05, + "loss": 1.9941, + "step": 16634000 + }, + { + "epoch": 48.15, + "learning_rate": 2.5933632826972953e-05, + "loss": 1.9971, + "step": 16634500 + }, + { + "epoch": 48.15, + "learning_rate": 2.5932909179325676e-05, + "loss": 2.0302, + "step": 16635000 + }, + { + "epoch": 48.15, + "learning_rate": 2.5932186978973695e-05, + "loss": 2.0021, + "step": 16635500 + }, + { + "epoch": 48.15, + "learning_rate": 2.5931463331326417e-05, + "loss": 2.0054, + "step": 16636000 + }, + { + "epoch": 48.16, + "learning_rate": 2.593073968367914e-05, + "loss": 2.0086, + "step": 16636500 + }, + { + "epoch": 48.16, + "learning_rate": 2.593001603603186e-05, + "loss": 2.0114, + "step": 16637000 + }, + { + "epoch": 48.16, + "learning_rate": 2.592929238838459e-05, + "loss": 1.9852, + "step": 16637500 + }, + { + "epoch": 48.16, + "learning_rate": 2.5928568740737312e-05, + "loss": 1.9982, + "step": 16638000 + }, + { + "epoch": 48.16, + "learning_rate": 2.592784654038533e-05, + "loss": 1.9838, + "step": 16638500 + }, + { + "epoch": 48.16, + "learning_rate": 2.5927122892738054e-05, + "loss": 1.9771, + "step": 16639000 + }, + { + "epoch": 48.16, + "learning_rate": 2.5926400692386073e-05, + "loss": 1.9954, + "step": 16639500 + }, + { + "epoch": 48.17, + "learning_rate": 2.5925677044738795e-05, + "loss": 1.9898, + "step": 16640000 + }, + { + "epoch": 48.17, + "learning_rate": 2.5924953397091517e-05, + "loss": 1.9983, + "step": 16640500 + }, + { + "epoch": 48.17, + "learning_rate": 2.592422974944424e-05, + "loss": 1.9834, + "step": 16641000 + }, + { + "epoch": 48.17, + "learning_rate": 2.592350610179696e-05, + "loss": 2.025, + "step": 16641500 + }, + { + "epoch": 48.17, + "learning_rate": 2.5922782454149687e-05, + "loss": 2.0247, + "step": 16642000 + }, + { + "epoch": 48.17, + "learning_rate": 2.592205880650241e-05, + "loss": 2.0048, + "step": 16642500 + }, + { + "epoch": 48.17, + "learning_rate": 2.592133515885513e-05, + "loss": 2.0096, + "step": 16643000 + }, + { + "epoch": 48.18, + "learning_rate": 2.5920611511207854e-05, + "loss": 2.0137, + "step": 16643500 + }, + { + "epoch": 48.18, + "learning_rate": 2.5919887863560576e-05, + "loss": 2.038, + "step": 16644000 + }, + { + "epoch": 48.18, + "learning_rate": 2.5919164215913305e-05, + "loss": 2.0313, + "step": 16644500 + }, + { + "epoch": 48.18, + "learning_rate": 2.5918440568266027e-05, + "loss": 2.0196, + "step": 16645000 + }, + { + "epoch": 48.18, + "learning_rate": 2.5917718367914046e-05, + "loss": 1.9965, + "step": 16645500 + }, + { + "epoch": 48.18, + "learning_rate": 2.591699472026677e-05, + "loss": 2.0092, + "step": 16646000 + }, + { + "epoch": 48.18, + "learning_rate": 2.591627107261949e-05, + "loss": 2.0292, + "step": 16646500 + }, + { + "epoch": 48.19, + "learning_rate": 2.591554887226751e-05, + "loss": 1.9765, + "step": 16647000 + }, + { + "epoch": 48.19, + "learning_rate": 2.5914825224620232e-05, + "loss": 2.0156, + "step": 16647500 + }, + { + "epoch": 48.19, + "learning_rate": 2.5914101576972954e-05, + "loss": 2.0085, + "step": 16648000 + }, + { + "epoch": 48.19, + "learning_rate": 2.5913377929325676e-05, + "loss": 1.9953, + "step": 16648500 + }, + { + "epoch": 48.19, + "learning_rate": 2.59126542816784e-05, + "loss": 2.0102, + "step": 16649000 + }, + { + "epoch": 48.19, + "learning_rate": 2.5911932081326418e-05, + "loss": 2.004, + "step": 16649500 + }, + { + "epoch": 48.19, + "learning_rate": 2.591120843367914e-05, + "loss": 2.0075, + "step": 16650000 + }, + { + "epoch": 48.2, + "learning_rate": 2.5910484786031862e-05, + "loss": 1.999, + "step": 16650500 + }, + { + "epoch": 48.2, + "learning_rate": 2.5909761138384588e-05, + "loss": 1.9961, + "step": 16651000 + }, + { + "epoch": 48.2, + "learning_rate": 2.590903749073731e-05, + "loss": 1.9949, + "step": 16651500 + }, + { + "epoch": 48.2, + "learning_rate": 2.5908313843090032e-05, + "loss": 2.017, + "step": 16652000 + }, + { + "epoch": 48.2, + "learning_rate": 2.5907591642738054e-05, + "loss": 2.012, + "step": 16652500 + }, + { + "epoch": 48.2, + "learning_rate": 2.5906867995090777e-05, + "loss": 2.0146, + "step": 16653000 + }, + { + "epoch": 48.21, + "learning_rate": 2.5906144347443502e-05, + "loss": 2.0002, + "step": 16653500 + }, + { + "epoch": 48.21, + "learning_rate": 2.5905420699796225e-05, + "loss": 2.021, + "step": 16654000 + }, + { + "epoch": 48.21, + "learning_rate": 2.5904697052148947e-05, + "loss": 2.0023, + "step": 16654500 + }, + { + "epoch": 48.21, + "learning_rate": 2.590397340450167e-05, + "loss": 1.9919, + "step": 16655000 + }, + { + "epoch": 48.21, + "learning_rate": 2.590324975685439e-05, + "loss": 1.9787, + "step": 16655500 + }, + { + "epoch": 48.21, + "learning_rate": 2.5902526109207113e-05, + "loss": 2.0071, + "step": 16656000 + }, + { + "epoch": 48.21, + "learning_rate": 2.590180246155984e-05, + "loss": 2.0246, + "step": 16656500 + }, + { + "epoch": 48.22, + "learning_rate": 2.590108170850315e-05, + "loss": 2.0095, + "step": 16657000 + }, + { + "epoch": 48.22, + "learning_rate": 2.5900358060855874e-05, + "loss": 2.0032, + "step": 16657500 + }, + { + "epoch": 48.22, + "learning_rate": 2.5899634413208596e-05, + "loss": 2.0007, + "step": 16658000 + }, + { + "epoch": 48.22, + "learning_rate": 2.5898910765561318e-05, + "loss": 2.004, + "step": 16658500 + }, + { + "epoch": 48.22, + "learning_rate": 2.589818711791404e-05, + "loss": 2.038, + "step": 16659000 + }, + { + "epoch": 48.22, + "learning_rate": 2.5897463470266763e-05, + "loss": 1.9832, + "step": 16659500 + }, + { + "epoch": 48.22, + "learning_rate": 2.589673982261949e-05, + "loss": 2.0024, + "step": 16660000 + }, + { + "epoch": 48.23, + "learning_rate": 2.5896016174972214e-05, + "loss": 2.0035, + "step": 16660500 + }, + { + "epoch": 48.23, + "learning_rate": 2.589529252732494e-05, + "loss": 2.0222, + "step": 16661000 + }, + { + "epoch": 48.23, + "learning_rate": 2.5894570326972955e-05, + "loss": 1.9875, + "step": 16661500 + }, + { + "epoch": 48.23, + "learning_rate": 2.5893846679325677e-05, + "loss": 2.008, + "step": 16662000 + }, + { + "epoch": 48.23, + "learning_rate": 2.5893123031678403e-05, + "loss": 1.9869, + "step": 16662500 + }, + { + "epoch": 48.23, + "learning_rate": 2.5892399384031125e-05, + "loss": 1.9839, + "step": 16663000 + }, + { + "epoch": 48.23, + "learning_rate": 2.589167718367914e-05, + "loss": 2.0055, + "step": 16663500 + }, + { + "epoch": 48.24, + "learning_rate": 2.589095498332716e-05, + "loss": 1.9752, + "step": 16664000 + }, + { + "epoch": 48.24, + "learning_rate": 2.5890231335679882e-05, + "loss": 1.9866, + "step": 16664500 + }, + { + "epoch": 48.24, + "learning_rate": 2.58895091353279e-05, + "loss": 2.0118, + "step": 16665000 + }, + { + "epoch": 48.24, + "learning_rate": 2.5888785487680623e-05, + "loss": 2.0068, + "step": 16665500 + }, + { + "epoch": 48.24, + "learning_rate": 2.5888061840033345e-05, + "loss": 1.9997, + "step": 16666000 + }, + { + "epoch": 48.24, + "learning_rate": 2.5887338192386067e-05, + "loss": 1.9943, + "step": 16666500 + }, + { + "epoch": 48.24, + "learning_rate": 2.5886615992034086e-05, + "loss": 1.9984, + "step": 16667000 + }, + { + "epoch": 48.25, + "learning_rate": 2.588589234438681e-05, + "loss": 1.9911, + "step": 16667500 + }, + { + "epoch": 48.25, + "learning_rate": 2.5885168696739538e-05, + "loss": 1.9836, + "step": 16668000 + }, + { + "epoch": 48.25, + "learning_rate": 2.588444504909226e-05, + "loss": 1.9949, + "step": 16668500 + }, + { + "epoch": 48.25, + "learning_rate": 2.5883721401444982e-05, + "loss": 1.9899, + "step": 16669000 + }, + { + "epoch": 48.25, + "learning_rate": 2.5882999201093e-05, + "loss": 1.9809, + "step": 16669500 + }, + { + "epoch": 48.25, + "learning_rate": 2.5882275553445723e-05, + "loss": 1.9928, + "step": 16670000 + }, + { + "epoch": 48.25, + "learning_rate": 2.5881551905798445e-05, + "loss": 1.9952, + "step": 16670500 + }, + { + "epoch": 48.26, + "learning_rate": 2.5880828258151168e-05, + "loss": 2.0224, + "step": 16671000 + }, + { + "epoch": 48.26, + "learning_rate": 2.588010461050389e-05, + "loss": 2.03, + "step": 16671500 + }, + { + "epoch": 48.26, + "learning_rate": 2.5879380962856616e-05, + "loss": 1.9937, + "step": 16672000 + }, + { + "epoch": 48.26, + "learning_rate": 2.5878657315209338e-05, + "loss": 1.9901, + "step": 16672500 + }, + { + "epoch": 48.26, + "learning_rate": 2.587793366756206e-05, + "loss": 2.0094, + "step": 16673000 + }, + { + "epoch": 48.26, + "learning_rate": 2.5877210019914782e-05, + "loss": 2.025, + "step": 16673500 + }, + { + "epoch": 48.26, + "learning_rate": 2.5876486372267504e-05, + "loss": 1.9978, + "step": 16674000 + }, + { + "epoch": 48.27, + "learning_rate": 2.5875762724620227e-05, + "loss": 2.0098, + "step": 16674500 + }, + { + "epoch": 48.27, + "learning_rate": 2.5875039076972956e-05, + "loss": 2.0082, + "step": 16675000 + }, + { + "epoch": 48.27, + "learning_rate": 2.5874315429325678e-05, + "loss": 2.0003, + "step": 16675500 + }, + { + "epoch": 48.27, + "learning_rate": 2.5873591781678404e-05, + "loss": 1.995, + "step": 16676000 + }, + { + "epoch": 48.27, + "learning_rate": 2.5872868134031126e-05, + "loss": 2.0023, + "step": 16676500 + }, + { + "epoch": 48.27, + "learning_rate": 2.587214593367914e-05, + "loss": 2.0175, + "step": 16677000 + }, + { + "epoch": 48.27, + "learning_rate": 2.5871422286031867e-05, + "loss": 2.0287, + "step": 16677500 + }, + { + "epoch": 48.28, + "learning_rate": 2.587069863838459e-05, + "loss": 2.0133, + "step": 16678000 + }, + { + "epoch": 48.28, + "learning_rate": 2.5869976438032605e-05, + "loss": 1.9973, + "step": 16678500 + }, + { + "epoch": 48.28, + "learning_rate": 2.5869254237680624e-05, + "loss": 1.9893, + "step": 16679000 + }, + { + "epoch": 48.28, + "learning_rate": 2.5868532037328643e-05, + "loss": 1.9957, + "step": 16679500 + }, + { + "epoch": 48.28, + "learning_rate": 2.5867808389681365e-05, + "loss": 2.0128, + "step": 16680000 + }, + { + "epoch": 48.28, + "learning_rate": 2.586708618932938e-05, + "loss": 1.9991, + "step": 16680500 + }, + { + "epoch": 48.28, + "learning_rate": 2.5866362541682103e-05, + "loss": 1.9993, + "step": 16681000 + }, + { + "epoch": 48.29, + "learning_rate": 2.586563889403483e-05, + "loss": 2.0171, + "step": 16681500 + }, + { + "epoch": 48.29, + "learning_rate": 2.586491524638755e-05, + "loss": 2.0086, + "step": 16682000 + }, + { + "epoch": 48.29, + "learning_rate": 2.5864191598740273e-05, + "loss": 1.9928, + "step": 16682500 + }, + { + "epoch": 48.29, + "learning_rate": 2.5863467951093002e-05, + "loss": 1.9985, + "step": 16683000 + }, + { + "epoch": 48.29, + "learning_rate": 2.5862744303445724e-05, + "loss": 1.9936, + "step": 16683500 + }, + { + "epoch": 48.29, + "learning_rate": 2.5862020655798446e-05, + "loss": 1.9914, + "step": 16684000 + }, + { + "epoch": 48.29, + "learning_rate": 2.586129700815117e-05, + "loss": 1.9803, + "step": 16684500 + }, + { + "epoch": 48.3, + "learning_rate": 2.5860573360503894e-05, + "loss": 2.0307, + "step": 16685000 + }, + { + "epoch": 48.3, + "learning_rate": 2.5859849712856616e-05, + "loss": 1.9882, + "step": 16685500 + }, + { + "epoch": 48.3, + "learning_rate": 2.585912606520934e-05, + "loss": 1.996, + "step": 16686000 + }, + { + "epoch": 48.3, + "learning_rate": 2.585840241756206e-05, + "loss": 2.0015, + "step": 16686500 + }, + { + "epoch": 48.3, + "learning_rate": 2.5857678769914783e-05, + "loss": 1.9958, + "step": 16687000 + }, + { + "epoch": 48.3, + "learning_rate": 2.5856955122267505e-05, + "loss": 1.9996, + "step": 16687500 + }, + { + "epoch": 48.3, + "learning_rate": 2.585623147462023e-05, + "loss": 2.0227, + "step": 16688000 + }, + { + "epoch": 48.31, + "learning_rate": 2.5855509274268246e-05, + "loss": 2.013, + "step": 16688500 + }, + { + "epoch": 48.31, + "learning_rate": 2.585478562662097e-05, + "loss": 2.007, + "step": 16689000 + }, + { + "epoch": 48.31, + "learning_rate": 2.5854061978973694e-05, + "loss": 1.9998, + "step": 16689500 + }, + { + "epoch": 48.31, + "learning_rate": 2.585333833132642e-05, + "loss": 2.0022, + "step": 16690000 + }, + { + "epoch": 48.31, + "learning_rate": 2.5852614683679146e-05, + "loss": 2.0229, + "step": 16690500 + }, + { + "epoch": 48.31, + "learning_rate": 2.5851891036031868e-05, + "loss": 2.0104, + "step": 16691000 + }, + { + "epoch": 48.32, + "learning_rate": 2.585116738838459e-05, + "loss": 1.9947, + "step": 16691500 + }, + { + "epoch": 48.32, + "learning_rate": 2.5850443740737312e-05, + "loss": 2.0037, + "step": 16692000 + }, + { + "epoch": 48.32, + "learning_rate": 2.5849720093090034e-05, + "loss": 2.0058, + "step": 16692500 + }, + { + "epoch": 48.32, + "learning_rate": 2.5848996445442757e-05, + "loss": 2.0015, + "step": 16693000 + }, + { + "epoch": 48.32, + "learning_rate": 2.5848274245090776e-05, + "loss": 2.0149, + "step": 16693500 + }, + { + "epoch": 48.32, + "learning_rate": 2.5847550597443498e-05, + "loss": 2.0304, + "step": 16694000 + }, + { + "epoch": 48.32, + "learning_rate": 2.584682694979622e-05, + "loss": 1.965, + "step": 16694500 + }, + { + "epoch": 48.33, + "learning_rate": 2.5846103302148942e-05, + "loss": 2.0143, + "step": 16695000 + }, + { + "epoch": 48.33, + "learning_rate": 2.5845379654501668e-05, + "loss": 2.0073, + "step": 16695500 + }, + { + "epoch": 48.33, + "learning_rate": 2.584465890144498e-05, + "loss": 1.9647, + "step": 16696000 + }, + { + "epoch": 48.33, + "learning_rate": 2.5843935253797702e-05, + "loss": 2.0124, + "step": 16696500 + }, + { + "epoch": 48.33, + "learning_rate": 2.5843211606150425e-05, + "loss": 2.0209, + "step": 16697000 + }, + { + "epoch": 48.33, + "learning_rate": 2.5842487958503154e-05, + "loss": 2.0258, + "step": 16697500 + }, + { + "epoch": 48.33, + "learning_rate": 2.5841764310855876e-05, + "loss": 2.0202, + "step": 16698000 + }, + { + "epoch": 48.34, + "learning_rate": 2.5841040663208598e-05, + "loss": 2.0139, + "step": 16698500 + }, + { + "epoch": 48.34, + "learning_rate": 2.5840318462856617e-05, + "loss": 2.0052, + "step": 16699000 + }, + { + "epoch": 48.34, + "learning_rate": 2.583959481520934e-05, + "loss": 2.0385, + "step": 16699500 + }, + { + "epoch": 48.34, + "learning_rate": 2.583887116756206e-05, + "loss": 2.0097, + "step": 16700000 + }, + { + "epoch": 48.34, + "learning_rate": 2.5838147519914784e-05, + "loss": 2.0107, + "step": 16700500 + }, + { + "epoch": 48.34, + "learning_rate": 2.5837423872267506e-05, + "loss": 2.0102, + "step": 16701000 + }, + { + "epoch": 48.34, + "learning_rate": 2.583670022462023e-05, + "loss": 1.9968, + "step": 16701500 + }, + { + "epoch": 48.35, + "learning_rate": 2.5835976576972954e-05, + "loss": 1.9946, + "step": 16702000 + }, + { + "epoch": 48.35, + "learning_rate": 2.583525437662097e-05, + "loss": 2.0321, + "step": 16702500 + }, + { + "epoch": 48.35, + "learning_rate": 2.5834530728973695e-05, + "loss": 2.0107, + "step": 16703000 + }, + { + "epoch": 48.35, + "learning_rate": 2.583380852862171e-05, + "loss": 1.9749, + "step": 16703500 + }, + { + "epoch": 48.35, + "learning_rate": 2.5833084880974433e-05, + "loss": 2.002, + "step": 16704000 + }, + { + "epoch": 48.35, + "learning_rate": 2.583236123332716e-05, + "loss": 2.0037, + "step": 16704500 + }, + { + "epoch": 48.35, + "learning_rate": 2.5831637585679884e-05, + "loss": 1.9901, + "step": 16705000 + }, + { + "epoch": 48.36, + "learning_rate": 2.583091393803261e-05, + "loss": 1.9874, + "step": 16705500 + }, + { + "epoch": 48.36, + "learning_rate": 2.5830190290385332e-05, + "loss": 1.9729, + "step": 16706000 + }, + { + "epoch": 48.36, + "learning_rate": 2.5829466642738054e-05, + "loss": 1.9712, + "step": 16706500 + }, + { + "epoch": 48.36, + "learning_rate": 2.5828742995090776e-05, + "loss": 2.0083, + "step": 16707000 + }, + { + "epoch": 48.36, + "learning_rate": 2.58280193474435e-05, + "loss": 1.9939, + "step": 16707500 + }, + { + "epoch": 48.36, + "learning_rate": 2.582729569979622e-05, + "loss": 2.0024, + "step": 16708000 + }, + { + "epoch": 48.36, + "learning_rate": 2.5826572052148947e-05, + "loss": 2.0001, + "step": 16708500 + }, + { + "epoch": 48.37, + "learning_rate": 2.582584840450167e-05, + "loss": 1.9976, + "step": 16709000 + }, + { + "epoch": 48.37, + "learning_rate": 2.5825126204149684e-05, + "loss": 2.0158, + "step": 16709500 + }, + { + "epoch": 48.37, + "learning_rate": 2.582440255650241e-05, + "loss": 2.0078, + "step": 16710000 + }, + { + "epoch": 48.37, + "learning_rate": 2.5823678908855132e-05, + "loss": 2.0015, + "step": 16710500 + }, + { + "epoch": 48.37, + "learning_rate": 2.5822955261207854e-05, + "loss": 1.999, + "step": 16711000 + }, + { + "epoch": 48.37, + "learning_rate": 2.582223306085587e-05, + "loss": 1.9943, + "step": 16711500 + }, + { + "epoch": 48.37, + "learning_rate": 2.5821509413208596e-05, + "loss": 2.0174, + "step": 16712000 + }, + { + "epoch": 48.38, + "learning_rate": 2.582078576556132e-05, + "loss": 1.9871, + "step": 16712500 + }, + { + "epoch": 48.38, + "learning_rate": 2.5820062117914047e-05, + "loss": 2.0, + "step": 16713000 + }, + { + "epoch": 48.38, + "learning_rate": 2.581933847026677e-05, + "loss": 2.0, + "step": 16713500 + }, + { + "epoch": 48.38, + "learning_rate": 2.581861482261949e-05, + "loss": 2.013, + "step": 16714000 + }, + { + "epoch": 48.38, + "learning_rate": 2.5817891174972214e-05, + "loss": 1.9961, + "step": 16714500 + }, + { + "epoch": 48.38, + "learning_rate": 2.5817167527324936e-05, + "loss": 1.9917, + "step": 16715000 + }, + { + "epoch": 48.38, + "learning_rate": 2.581644387967766e-05, + "loss": 2.0037, + "step": 16715500 + }, + { + "epoch": 48.39, + "learning_rate": 2.5815723126620974e-05, + "loss": 1.9941, + "step": 16716000 + }, + { + "epoch": 48.39, + "learning_rate": 2.5814999478973696e-05, + "loss": 2.0215, + "step": 16716500 + }, + { + "epoch": 48.39, + "learning_rate": 2.5814275831326418e-05, + "loss": 2.0146, + "step": 16717000 + }, + { + "epoch": 48.39, + "learning_rate": 2.581355218367914e-05, + "loss": 2.0165, + "step": 16717500 + }, + { + "epoch": 48.39, + "learning_rate": 2.5812828536031863e-05, + "loss": 1.9953, + "step": 16718000 + }, + { + "epoch": 48.39, + "learning_rate": 2.5812104888384585e-05, + "loss": 1.9853, + "step": 16718500 + }, + { + "epoch": 48.39, + "learning_rate": 2.581138124073731e-05, + "loss": 1.9875, + "step": 16719000 + }, + { + "epoch": 48.4, + "learning_rate": 2.5810657593090033e-05, + "loss": 2.007, + "step": 16719500 + }, + { + "epoch": 48.4, + "learning_rate": 2.5809935392738055e-05, + "loss": 2.0141, + "step": 16720000 + }, + { + "epoch": 48.4, + "learning_rate": 2.5809213192386074e-05, + "loss": 2.0066, + "step": 16720500 + }, + { + "epoch": 48.4, + "learning_rate": 2.580849099203409e-05, + "loss": 2.0141, + "step": 16721000 + }, + { + "epoch": 48.4, + "learning_rate": 2.5807767344386812e-05, + "loss": 2.0044, + "step": 16721500 + }, + { + "epoch": 48.4, + "learning_rate": 2.5807043696739537e-05, + "loss": 2.009, + "step": 16722000 + }, + { + "epoch": 48.4, + "learning_rate": 2.580632004909226e-05, + "loss": 1.9849, + "step": 16722500 + }, + { + "epoch": 48.41, + "learning_rate": 2.5805596401444982e-05, + "loss": 2.0019, + "step": 16723000 + }, + { + "epoch": 48.41, + "learning_rate": 2.5804872753797704e-05, + "loss": 2.022, + "step": 16723500 + }, + { + "epoch": 48.41, + "learning_rate": 2.5804149106150426e-05, + "loss": 1.9962, + "step": 16724000 + }, + { + "epoch": 48.41, + "learning_rate": 2.580342545850315e-05, + "loss": 1.983, + "step": 16724500 + }, + { + "epoch": 48.41, + "learning_rate": 2.5802701810855874e-05, + "loss": 2.0245, + "step": 16725000 + }, + { + "epoch": 48.41, + "learning_rate": 2.5801978163208596e-05, + "loss": 2.0045, + "step": 16725500 + }, + { + "epoch": 48.41, + "learning_rate": 2.580125451556132e-05, + "loss": 2.009, + "step": 16726000 + }, + { + "epoch": 48.42, + "learning_rate": 2.580053086791404e-05, + "loss": 2.0197, + "step": 16726500 + }, + { + "epoch": 48.42, + "learning_rate": 2.579980866756206e-05, + "loss": 2.0221, + "step": 16727000 + }, + { + "epoch": 48.42, + "learning_rate": 2.5799085019914785e-05, + "loss": 1.9979, + "step": 16727500 + }, + { + "epoch": 48.42, + "learning_rate": 2.579836137226751e-05, + "loss": 2.006, + "step": 16728000 + }, + { + "epoch": 48.42, + "learning_rate": 2.5797639171915527e-05, + "loss": 1.9862, + "step": 16728500 + }, + { + "epoch": 48.42, + "learning_rate": 2.579691552426825e-05, + "loss": 2.0002, + "step": 16729000 + }, + { + "epoch": 48.43, + "learning_rate": 2.5796191876620974e-05, + "loss": 2.0182, + "step": 16729500 + }, + { + "epoch": 48.43, + "learning_rate": 2.5795468228973697e-05, + "loss": 1.9796, + "step": 16730000 + }, + { + "epoch": 48.43, + "learning_rate": 2.579474458132642e-05, + "loss": 1.9946, + "step": 16730500 + }, + { + "epoch": 48.43, + "learning_rate": 2.579402093367914e-05, + "loss": 1.9918, + "step": 16731000 + }, + { + "epoch": 48.43, + "learning_rate": 2.579329873332716e-05, + "loss": 2.0049, + "step": 16731500 + }, + { + "epoch": 48.43, + "learning_rate": 2.5792575085679882e-05, + "loss": 2.008, + "step": 16732000 + }, + { + "epoch": 48.43, + "learning_rate": 2.5791851438032605e-05, + "loss": 1.9648, + "step": 16732500 + }, + { + "epoch": 48.44, + "learning_rate": 2.5791127790385327e-05, + "loss": 2.0067, + "step": 16733000 + }, + { + "epoch": 48.44, + "learning_rate": 2.579040414273805e-05, + "loss": 2.0199, + "step": 16733500 + }, + { + "epoch": 48.44, + "learning_rate": 2.5789681942386068e-05, + "loss": 1.997, + "step": 16734000 + }, + { + "epoch": 48.44, + "learning_rate": 2.578895829473879e-05, + "loss": 1.9986, + "step": 16734500 + }, + { + "epoch": 48.44, + "learning_rate": 2.578823464709152e-05, + "loss": 2.019, + "step": 16735000 + }, + { + "epoch": 48.44, + "learning_rate": 2.578751099944424e-05, + "loss": 2.0242, + "step": 16735500 + }, + { + "epoch": 48.44, + "learning_rate": 2.578678879909226e-05, + "loss": 2.0098, + "step": 16736000 + }, + { + "epoch": 48.45, + "learning_rate": 2.5786066598740276e-05, + "loss": 2.0182, + "step": 16736500 + }, + { + "epoch": 48.45, + "learning_rate": 2.5785342951093e-05, + "loss": 1.9957, + "step": 16737000 + }, + { + "epoch": 48.45, + "learning_rate": 2.5784619303445724e-05, + "loss": 2.0195, + "step": 16737500 + }, + { + "epoch": 48.45, + "learning_rate": 2.5783895655798446e-05, + "loss": 2.0245, + "step": 16738000 + }, + { + "epoch": 48.45, + "learning_rate": 2.5783172008151168e-05, + "loss": 2.0029, + "step": 16738500 + }, + { + "epoch": 48.45, + "learning_rate": 2.5782449807799187e-05, + "loss": 1.9981, + "step": 16739000 + }, + { + "epoch": 48.45, + "learning_rate": 2.5781727607447203e-05, + "loss": 1.965, + "step": 16739500 + }, + { + "epoch": 48.46, + "learning_rate": 2.5781003959799925e-05, + "loss": 1.9813, + "step": 16740000 + }, + { + "epoch": 48.46, + "learning_rate": 2.5780283206743237e-05, + "loss": 2.0129, + "step": 16740500 + }, + { + "epoch": 48.46, + "learning_rate": 2.5779559559095963e-05, + "loss": 1.9986, + "step": 16741000 + }, + { + "epoch": 48.46, + "learning_rate": 2.5778835911448685e-05, + "loss": 2.0496, + "step": 16741500 + }, + { + "epoch": 48.46, + "learning_rate": 2.5778112263801407e-05, + "loss": 2.0098, + "step": 16742000 + }, + { + "epoch": 48.46, + "learning_rate": 2.577738861615413e-05, + "loss": 1.9919, + "step": 16742500 + }, + { + "epoch": 48.46, + "learning_rate": 2.5776664968506852e-05, + "loss": 2.006, + "step": 16743000 + }, + { + "epoch": 48.47, + "learning_rate": 2.577594132085958e-05, + "loss": 2.0262, + "step": 16743500 + }, + { + "epoch": 48.47, + "learning_rate": 2.5775217673212303e-05, + "loss": 2.0073, + "step": 16744000 + }, + { + "epoch": 48.47, + "learning_rate": 2.5774494025565025e-05, + "loss": 2.0056, + "step": 16744500 + }, + { + "epoch": 48.47, + "learning_rate": 2.577377037791775e-05, + "loss": 1.9859, + "step": 16745000 + }, + { + "epoch": 48.47, + "learning_rate": 2.5773046730270473e-05, + "loss": 1.9906, + "step": 16745500 + }, + { + "epoch": 48.47, + "learning_rate": 2.5772323082623195e-05, + "loss": 2.0001, + "step": 16746000 + }, + { + "epoch": 48.47, + "learning_rate": 2.5771599434975918e-05, + "loss": 1.9944, + "step": 16746500 + }, + { + "epoch": 48.48, + "learning_rate": 2.577087578732864e-05, + "loss": 1.9843, + "step": 16747000 + }, + { + "epoch": 48.48, + "learning_rate": 2.5770152139681365e-05, + "loss": 1.9976, + "step": 16747500 + }, + { + "epoch": 48.48, + "learning_rate": 2.5769428492034088e-05, + "loss": 2.0087, + "step": 16748000 + }, + { + "epoch": 48.48, + "learning_rate": 2.576870484438681e-05, + "loss": 2.0075, + "step": 16748500 + }, + { + "epoch": 48.48, + "learning_rate": 2.5767981196739532e-05, + "loss": 2.0239, + "step": 16749000 + }, + { + "epoch": 48.48, + "learning_rate": 2.5767257549092254e-05, + "loss": 1.9882, + "step": 16749500 + }, + { + "epoch": 48.48, + "learning_rate": 2.5766533901444983e-05, + "loss": 1.9916, + "step": 16750000 + }, + { + "epoch": 48.49, + "learning_rate": 2.5765810253797706e-05, + "loss": 2.0029, + "step": 16750500 + }, + { + "epoch": 48.49, + "learning_rate": 2.5765086606150428e-05, + "loss": 2.02, + "step": 16751000 + }, + { + "epoch": 48.49, + "learning_rate": 2.5764364405798447e-05, + "loss": 1.9968, + "step": 16751500 + }, + { + "epoch": 48.49, + "learning_rate": 2.576364075815117e-05, + "loss": 2.0275, + "step": 16752000 + }, + { + "epoch": 48.49, + "learning_rate": 2.576291711050389e-05, + "loss": 2.0104, + "step": 16752500 + }, + { + "epoch": 48.49, + "learning_rate": 2.576219491015191e-05, + "loss": 2.0001, + "step": 16753000 + }, + { + "epoch": 48.49, + "learning_rate": 2.5761471262504632e-05, + "loss": 1.9949, + "step": 16753500 + }, + { + "epoch": 48.5, + "learning_rate": 2.5760747614857355e-05, + "loss": 2.0205, + "step": 16754000 + }, + { + "epoch": 48.5, + "learning_rate": 2.5760023967210077e-05, + "loss": 2.0184, + "step": 16754500 + }, + { + "epoch": 48.5, + "learning_rate": 2.5759300319562803e-05, + "loss": 2.0009, + "step": 16755000 + }, + { + "epoch": 48.5, + "learning_rate": 2.5758576671915525e-05, + "loss": 2.023, + "step": 16755500 + }, + { + "epoch": 48.5, + "learning_rate": 2.5757853024268247e-05, + "loss": 2.0174, + "step": 16756000 + }, + { + "epoch": 48.5, + "learning_rate": 2.5757130823916266e-05, + "loss": 1.9904, + "step": 16756500 + }, + { + "epoch": 48.5, + "learning_rate": 2.5756407176268988e-05, + "loss": 2.0112, + "step": 16757000 + }, + { + "epoch": 48.51, + "learning_rate": 2.5755683528621717e-05, + "loss": 2.0065, + "step": 16757500 + }, + { + "epoch": 48.51, + "learning_rate": 2.575495988097444e-05, + "loss": 2.0, + "step": 16758000 + }, + { + "epoch": 48.51, + "learning_rate": 2.5754236233327162e-05, + "loss": 2.0099, + "step": 16758500 + }, + { + "epoch": 48.51, + "learning_rate": 2.5753512585679884e-05, + "loss": 2.0144, + "step": 16759000 + }, + { + "epoch": 48.51, + "learning_rate": 2.5752788938032606e-05, + "loss": 2.0073, + "step": 16759500 + }, + { + "epoch": 48.51, + "learning_rate": 2.575206529038533e-05, + "loss": 2.0142, + "step": 16760000 + }, + { + "epoch": 48.51, + "learning_rate": 2.5751341642738054e-05, + "loss": 1.9875, + "step": 16760500 + }, + { + "epoch": 48.52, + "learning_rate": 2.5750617995090776e-05, + "loss": 2.0232, + "step": 16761000 + }, + { + "epoch": 48.52, + "learning_rate": 2.57498943474435e-05, + "loss": 2.0138, + "step": 16761500 + }, + { + "epoch": 48.52, + "learning_rate": 2.574917069979622e-05, + "loss": 1.9913, + "step": 16762000 + }, + { + "epoch": 48.52, + "learning_rate": 2.5748447052148943e-05, + "loss": 2.017, + "step": 16762500 + }, + { + "epoch": 48.52, + "learning_rate": 2.5747724851796962e-05, + "loss": 1.998, + "step": 16763000 + }, + { + "epoch": 48.52, + "learning_rate": 2.5747001204149684e-05, + "loss": 2.0099, + "step": 16763500 + }, + { + "epoch": 48.52, + "learning_rate": 2.5746277556502406e-05, + "loss": 2.0068, + "step": 16764000 + }, + { + "epoch": 48.53, + "learning_rate": 2.5745555356150425e-05, + "loss": 2.0, + "step": 16764500 + }, + { + "epoch": 48.53, + "learning_rate": 2.5744831708503154e-05, + "loss": 1.9768, + "step": 16765000 + }, + { + "epoch": 48.53, + "learning_rate": 2.5744108060855877e-05, + "loss": 2.0019, + "step": 16765500 + }, + { + "epoch": 48.53, + "learning_rate": 2.57433844132086e-05, + "loss": 1.9932, + "step": 16766000 + }, + { + "epoch": 48.53, + "learning_rate": 2.574266076556132e-05, + "loss": 2.0094, + "step": 16766500 + }, + { + "epoch": 48.53, + "learning_rate": 2.5741937117914043e-05, + "loss": 1.9716, + "step": 16767000 + }, + { + "epoch": 48.54, + "learning_rate": 2.574121347026677e-05, + "loss": 1.9952, + "step": 16767500 + }, + { + "epoch": 48.54, + "learning_rate": 2.574048982261949e-05, + "loss": 2.0013, + "step": 16768000 + }, + { + "epoch": 48.54, + "learning_rate": 2.5739766174972213e-05, + "loss": 1.9878, + "step": 16768500 + }, + { + "epoch": 48.54, + "learning_rate": 2.573904397462023e-05, + "loss": 2.0099, + "step": 16769000 + }, + { + "epoch": 48.54, + "learning_rate": 2.5738320326972955e-05, + "loss": 2.0119, + "step": 16769500 + }, + { + "epoch": 48.54, + "learning_rate": 2.5737596679325677e-05, + "loss": 2.0169, + "step": 16770000 + }, + { + "epoch": 48.54, + "learning_rate": 2.57368730316784e-05, + "loss": 2.0052, + "step": 16770500 + }, + { + "epoch": 48.55, + "learning_rate": 2.5736150831326418e-05, + "loss": 2.002, + "step": 16771000 + }, + { + "epoch": 48.55, + "learning_rate": 2.573542718367914e-05, + "loss": 2.0171, + "step": 16771500 + }, + { + "epoch": 48.55, + "learning_rate": 2.5734703536031862e-05, + "loss": 1.9949, + "step": 16772000 + }, + { + "epoch": 48.55, + "learning_rate": 2.573397988838459e-05, + "loss": 2.0089, + "step": 16772500 + }, + { + "epoch": 48.55, + "learning_rate": 2.5733256240737314e-05, + "loss": 1.9781, + "step": 16773000 + }, + { + "epoch": 48.55, + "learning_rate": 2.5732532593090036e-05, + "loss": 1.9991, + "step": 16773500 + }, + { + "epoch": 48.55, + "learning_rate": 2.5731808945442758e-05, + "loss": 2.0106, + "step": 16774000 + }, + { + "epoch": 48.56, + "learning_rate": 2.573108529779548e-05, + "loss": 2.0231, + "step": 16774500 + }, + { + "epoch": 48.56, + "learning_rate": 2.5730361650148206e-05, + "loss": 2.0078, + "step": 16775000 + }, + { + "epoch": 48.56, + "learning_rate": 2.572963944979622e-05, + "loss": 2.0067, + "step": 16775500 + }, + { + "epoch": 48.56, + "learning_rate": 2.5728915802148944e-05, + "loss": 1.9822, + "step": 16776000 + }, + { + "epoch": 48.56, + "learning_rate": 2.572819215450167e-05, + "loss": 1.9801, + "step": 16776500 + }, + { + "epoch": 48.56, + "learning_rate": 2.572746850685439e-05, + "loss": 1.9775, + "step": 16777000 + }, + { + "epoch": 48.56, + "learning_rate": 2.5726744859207114e-05, + "loss": 2.0277, + "step": 16777500 + }, + { + "epoch": 48.57, + "learning_rate": 2.5726021211559836e-05, + "loss": 2.0165, + "step": 16778000 + }, + { + "epoch": 48.57, + "learning_rate": 2.5725297563912558e-05, + "loss": 2.0233, + "step": 16778500 + }, + { + "epoch": 48.57, + "learning_rate": 2.572457391626528e-05, + "loss": 2.0123, + "step": 16779000 + }, + { + "epoch": 48.57, + "learning_rate": 2.572385026861801e-05, + "loss": 2.011, + "step": 16779500 + }, + { + "epoch": 48.57, + "learning_rate": 2.572312806826603e-05, + "loss": 2.0098, + "step": 16780000 + }, + { + "epoch": 48.57, + "learning_rate": 2.572240442061875e-05, + "loss": 1.9799, + "step": 16780500 + }, + { + "epoch": 48.57, + "learning_rate": 2.5721680772971473e-05, + "loss": 2.0018, + "step": 16781000 + }, + { + "epoch": 48.58, + "learning_rate": 2.5720957125324195e-05, + "loss": 2.0031, + "step": 16781500 + }, + { + "epoch": 48.58, + "learning_rate": 2.5720234924972214e-05, + "loss": 1.9881, + "step": 16782000 + }, + { + "epoch": 48.58, + "learning_rate": 2.5719511277324936e-05, + "loss": 2.0033, + "step": 16782500 + }, + { + "epoch": 48.58, + "learning_rate": 2.571878762967766e-05, + "loss": 2.0, + "step": 16783000 + }, + { + "epoch": 48.58, + "learning_rate": 2.5718063982030384e-05, + "loss": 2.0015, + "step": 16783500 + }, + { + "epoch": 48.58, + "learning_rate": 2.5717340334383106e-05, + "loss": 2.0114, + "step": 16784000 + }, + { + "epoch": 48.58, + "learning_rate": 2.571661668673583e-05, + "loss": 1.9899, + "step": 16784500 + }, + { + "epoch": 48.59, + "learning_rate": 2.5715894486383844e-05, + "loss": 2.0012, + "step": 16785000 + }, + { + "epoch": 48.59, + "learning_rate": 2.5715172286031863e-05, + "loss": 2.0242, + "step": 16785500 + }, + { + "epoch": 48.59, + "learning_rate": 2.5714448638384585e-05, + "loss": 2.0145, + "step": 16786000 + }, + { + "epoch": 48.59, + "learning_rate": 2.5713724990737308e-05, + "loss": 2.0088, + "step": 16786500 + }, + { + "epoch": 48.59, + "learning_rate": 2.5713001343090033e-05, + "loss": 1.9923, + "step": 16787000 + }, + { + "epoch": 48.59, + "learning_rate": 2.571227769544276e-05, + "loss": 2.0, + "step": 16787500 + }, + { + "epoch": 48.59, + "learning_rate": 2.5711554047795485e-05, + "loss": 2.0125, + "step": 16788000 + }, + { + "epoch": 48.6, + "learning_rate": 2.5710830400148207e-05, + "loss": 1.9762, + "step": 16788500 + }, + { + "epoch": 48.6, + "learning_rate": 2.571010675250093e-05, + "loss": 1.9761, + "step": 16789000 + }, + { + "epoch": 48.6, + "learning_rate": 2.570938310485365e-05, + "loss": 1.996, + "step": 16789500 + }, + { + "epoch": 48.6, + "learning_rate": 2.5708659457206373e-05, + "loss": 2.0195, + "step": 16790000 + }, + { + "epoch": 48.6, + "learning_rate": 2.5707935809559096e-05, + "loss": 2.0096, + "step": 16790500 + }, + { + "epoch": 48.6, + "learning_rate": 2.5707213609207115e-05, + "loss": 2.0252, + "step": 16791000 + }, + { + "epoch": 48.6, + "learning_rate": 2.5706489961559837e-05, + "loss": 1.9999, + "step": 16791500 + }, + { + "epoch": 48.61, + "learning_rate": 2.5705767761207856e-05, + "loss": 2.0091, + "step": 16792000 + }, + { + "epoch": 48.61, + "learning_rate": 2.5705044113560578e-05, + "loss": 2.0097, + "step": 16792500 + }, + { + "epoch": 48.61, + "learning_rate": 2.57043204659133e-05, + "loss": 2.0297, + "step": 16793000 + }, + { + "epoch": 48.61, + "learning_rate": 2.5703596818266023e-05, + "loss": 2.0132, + "step": 16793500 + }, + { + "epoch": 48.61, + "learning_rate": 2.5702873170618748e-05, + "loss": 2.0052, + "step": 16794000 + }, + { + "epoch": 48.61, + "learning_rate": 2.5702149522971474e-05, + "loss": 1.9871, + "step": 16794500 + }, + { + "epoch": 48.61, + "learning_rate": 2.5701425875324196e-05, + "loss": 2.0176, + "step": 16795000 + }, + { + "epoch": 48.62, + "learning_rate": 2.5700703674972215e-05, + "loss": 2.0051, + "step": 16795500 + }, + { + "epoch": 48.62, + "learning_rate": 2.5699980027324937e-05, + "loss": 2.0092, + "step": 16796000 + }, + { + "epoch": 48.62, + "learning_rate": 2.569925637967766e-05, + "loss": 2.0238, + "step": 16796500 + }, + { + "epoch": 48.62, + "learning_rate": 2.5698532732030385e-05, + "loss": 2.0, + "step": 16797000 + }, + { + "epoch": 48.62, + "learning_rate": 2.5697809084383107e-05, + "loss": 2.0012, + "step": 16797500 + }, + { + "epoch": 48.62, + "learning_rate": 2.569708543673583e-05, + "loss": 2.0019, + "step": 16798000 + }, + { + "epoch": 48.62, + "learning_rate": 2.5696361789088552e-05, + "loss": 2.0118, + "step": 16798500 + }, + { + "epoch": 48.63, + "learning_rate": 2.5695638141441274e-05, + "loss": 2.0287, + "step": 16799000 + }, + { + "epoch": 48.63, + "learning_rate": 2.5694914493793996e-05, + "loss": 1.9933, + "step": 16799500 + }, + { + "epoch": 48.63, + "learning_rate": 2.5694190846146722e-05, + "loss": 2.031, + "step": 16800000 + }, + { + "epoch": 48.63, + "learning_rate": 2.5693468645794737e-05, + "loss": 1.9776, + "step": 16800500 + }, + { + "epoch": 48.63, + "learning_rate": 2.569274499814746e-05, + "loss": 2.0006, + "step": 16801000 + }, + { + "epoch": 48.63, + "learning_rate": 2.5692021350500185e-05, + "loss": 1.9855, + "step": 16801500 + }, + { + "epoch": 48.63, + "learning_rate": 2.569129770285291e-05, + "loss": 2.0015, + "step": 16802000 + }, + { + "epoch": 48.64, + "learning_rate": 2.5690574055205636e-05, + "loss": 2.0005, + "step": 16802500 + }, + { + "epoch": 48.64, + "learning_rate": 2.568985040755836e-05, + "loss": 2.0133, + "step": 16803000 + }, + { + "epoch": 48.64, + "learning_rate": 2.568912675991108e-05, + "loss": 2.0298, + "step": 16803500 + }, + { + "epoch": 48.64, + "learning_rate": 2.5688403112263803e-05, + "loss": 2.0079, + "step": 16804000 + }, + { + "epoch": 48.64, + "learning_rate": 2.5687679464616525e-05, + "loss": 2.0175, + "step": 16804500 + }, + { + "epoch": 48.64, + "learning_rate": 2.5686955816969248e-05, + "loss": 2.0043, + "step": 16805000 + }, + { + "epoch": 48.65, + "learning_rate": 2.5686233616617267e-05, + "loss": 2.0022, + "step": 16805500 + }, + { + "epoch": 48.65, + "learning_rate": 2.568550996896999e-05, + "loss": 1.9834, + "step": 16806000 + }, + { + "epoch": 48.65, + "learning_rate": 2.568478632132271e-05, + "loss": 2.0137, + "step": 16806500 + }, + { + "epoch": 48.65, + "learning_rate": 2.5684062673675437e-05, + "loss": 2.0126, + "step": 16807000 + }, + { + "epoch": 48.65, + "learning_rate": 2.5683340473323452e-05, + "loss": 2.0101, + "step": 16807500 + }, + { + "epoch": 48.65, + "learning_rate": 2.5682616825676174e-05, + "loss": 1.9949, + "step": 16808000 + }, + { + "epoch": 48.65, + "learning_rate": 2.56818931780289e-05, + "loss": 1.9983, + "step": 16808500 + }, + { + "epoch": 48.66, + "learning_rate": 2.5681169530381622e-05, + "loss": 2.0048, + "step": 16809000 + }, + { + "epoch": 48.66, + "learning_rate": 2.568044588273435e-05, + "loss": 2.0114, + "step": 16809500 + }, + { + "epoch": 48.66, + "learning_rate": 2.5679723682382367e-05, + "loss": 2.0291, + "step": 16810000 + }, + { + "epoch": 48.66, + "learning_rate": 2.567900003473509e-05, + "loss": 2.0099, + "step": 16810500 + }, + { + "epoch": 48.66, + "learning_rate": 2.567827638708781e-05, + "loss": 2.0072, + "step": 16811000 + }, + { + "epoch": 48.66, + "learning_rate": 2.5677552739440537e-05, + "loss": 1.9863, + "step": 16811500 + }, + { + "epoch": 48.66, + "learning_rate": 2.567683198638385e-05, + "loss": 2.0041, + "step": 16812000 + }, + { + "epoch": 48.67, + "learning_rate": 2.567610833873657e-05, + "loss": 2.0274, + "step": 16812500 + }, + { + "epoch": 48.67, + "learning_rate": 2.5675384691089294e-05, + "loss": 2.0356, + "step": 16813000 + }, + { + "epoch": 48.67, + "learning_rate": 2.5674661043442016e-05, + "loss": 2.0194, + "step": 16813500 + }, + { + "epoch": 48.67, + "learning_rate": 2.5673937395794738e-05, + "loss": 1.9996, + "step": 16814000 + }, + { + "epoch": 48.67, + "learning_rate": 2.5673213748147464e-05, + "loss": 2.0075, + "step": 16814500 + }, + { + "epoch": 48.67, + "learning_rate": 2.5672490100500186e-05, + "loss": 1.9908, + "step": 16815000 + }, + { + "epoch": 48.67, + "learning_rate": 2.5671766452852908e-05, + "loss": 2.0157, + "step": 16815500 + }, + { + "epoch": 48.68, + "learning_rate": 2.567104280520563e-05, + "loss": 2.0048, + "step": 16816000 + }, + { + "epoch": 48.68, + "learning_rate": 2.5670319157558353e-05, + "loss": 2.0133, + "step": 16816500 + }, + { + "epoch": 48.68, + "learning_rate": 2.5669596957206375e-05, + "loss": 1.9784, + "step": 16817000 + }, + { + "epoch": 48.68, + "learning_rate": 2.56688733095591e-05, + "loss": 2.0334, + "step": 16817500 + }, + { + "epoch": 48.68, + "learning_rate": 2.5668149661911823e-05, + "loss": 2.0061, + "step": 16818000 + }, + { + "epoch": 48.68, + "learning_rate": 2.5667426014264545e-05, + "loss": 2.0, + "step": 16818500 + }, + { + "epoch": 48.68, + "learning_rate": 2.5666703813912564e-05, + "loss": 2.0135, + "step": 16819000 + }, + { + "epoch": 48.69, + "learning_rate": 2.5665980166265286e-05, + "loss": 2.0175, + "step": 16819500 + }, + { + "epoch": 48.69, + "learning_rate": 2.566525651861801e-05, + "loss": 2.0022, + "step": 16820000 + }, + { + "epoch": 48.69, + "learning_rate": 2.566453287097073e-05, + "loss": 2.0018, + "step": 16820500 + }, + { + "epoch": 48.69, + "learning_rate": 2.5663809223323453e-05, + "loss": 2.0024, + "step": 16821000 + }, + { + "epoch": 48.69, + "learning_rate": 2.5663085575676175e-05, + "loss": 2.0269, + "step": 16821500 + }, + { + "epoch": 48.69, + "learning_rate": 2.56623619280289e-05, + "loss": 2.0171, + "step": 16822000 + }, + { + "epoch": 48.69, + "learning_rate": 2.5661638280381623e-05, + "loss": 2.0256, + "step": 16822500 + }, + { + "epoch": 48.7, + "learning_rate": 2.5660914632734345e-05, + "loss": 2.003, + "step": 16823000 + }, + { + "epoch": 48.7, + "learning_rate": 2.5660190985087068e-05, + "loss": 2.0353, + "step": 16823500 + }, + { + "epoch": 48.7, + "learning_rate": 2.5659468784735087e-05, + "loss": 2.0252, + "step": 16824000 + }, + { + "epoch": 48.7, + "learning_rate": 2.565874658438311e-05, + "loss": 2.0003, + "step": 16824500 + }, + { + "epoch": 48.7, + "learning_rate": 2.565802293673583e-05, + "loss": 2.0194, + "step": 16825000 + }, + { + "epoch": 48.7, + "learning_rate": 2.5657299289088553e-05, + "loss": 2.0106, + "step": 16825500 + }, + { + "epoch": 48.7, + "learning_rate": 2.5656575641441276e-05, + "loss": 1.9922, + "step": 16826000 + }, + { + "epoch": 48.71, + "learning_rate": 2.5655851993794e-05, + "loss": 2.0035, + "step": 16826500 + }, + { + "epoch": 48.71, + "learning_rate": 2.5655128346146723e-05, + "loss": 2.0022, + "step": 16827000 + }, + { + "epoch": 48.71, + "learning_rate": 2.5654404698499446e-05, + "loss": 1.9765, + "step": 16827500 + }, + { + "epoch": 48.71, + "learning_rate": 2.5653681050852168e-05, + "loss": 1.9938, + "step": 16828000 + }, + { + "epoch": 48.71, + "learning_rate": 2.565295740320489e-05, + "loss": 2.033, + "step": 16828500 + }, + { + "epoch": 48.71, + "learning_rate": 2.5652233755557616e-05, + "loss": 2.0159, + "step": 16829000 + }, + { + "epoch": 48.71, + "learning_rate": 2.5651510107910338e-05, + "loss": 2.0112, + "step": 16829500 + }, + { + "epoch": 48.72, + "learning_rate": 2.565078646026306e-05, + "loss": 1.996, + "step": 16830000 + }, + { + "epoch": 48.72, + "learning_rate": 2.5650064259911076e-05, + "loss": 2.0182, + "step": 16830500 + }, + { + "epoch": 48.72, + "learning_rate": 2.56493406122638e-05, + "loss": 2.009, + "step": 16831000 + }, + { + "epoch": 48.72, + "learning_rate": 2.5648616964616527e-05, + "loss": 2.0171, + "step": 16831500 + }, + { + "epoch": 48.72, + "learning_rate": 2.5647893316969253e-05, + "loss": 2.017, + "step": 16832000 + }, + { + "epoch": 48.72, + "learning_rate": 2.5647169669321975e-05, + "loss": 2.008, + "step": 16832500 + }, + { + "epoch": 48.72, + "learning_rate": 2.5646446021674697e-05, + "loss": 2.028, + "step": 16833000 + }, + { + "epoch": 48.73, + "learning_rate": 2.564572237402742e-05, + "loss": 1.9953, + "step": 16833500 + }, + { + "epoch": 48.73, + "learning_rate": 2.5645000173675438e-05, + "loss": 2.0202, + "step": 16834000 + }, + { + "epoch": 48.73, + "learning_rate": 2.5644277973323454e-05, + "loss": 1.9846, + "step": 16834500 + }, + { + "epoch": 48.73, + "learning_rate": 2.564355432567618e-05, + "loss": 2.0013, + "step": 16835000 + }, + { + "epoch": 48.73, + "learning_rate": 2.56428306780289e-05, + "loss": 2.0106, + "step": 16835500 + }, + { + "epoch": 48.73, + "learning_rate": 2.5642107030381624e-05, + "loss": 2.0306, + "step": 16836000 + }, + { + "epoch": 48.73, + "learning_rate": 2.5641383382734346e-05, + "loss": 1.9933, + "step": 16836500 + }, + { + "epoch": 48.74, + "learning_rate": 2.5640661182382365e-05, + "loss": 2.0074, + "step": 16837000 + }, + { + "epoch": 48.74, + "learning_rate": 2.5639937534735087e-05, + "loss": 1.9811, + "step": 16837500 + }, + { + "epoch": 48.74, + "learning_rate": 2.563921388708781e-05, + "loss": 2.0128, + "step": 16838000 + }, + { + "epoch": 48.74, + "learning_rate": 2.5638490239440532e-05, + "loss": 1.9941, + "step": 16838500 + }, + { + "epoch": 48.74, + "learning_rate": 2.5637766591793254e-05, + "loss": 2.0116, + "step": 16839000 + }, + { + "epoch": 48.74, + "learning_rate": 2.563704439144128e-05, + "loss": 1.994, + "step": 16839500 + }, + { + "epoch": 48.74, + "learning_rate": 2.5636320743794002e-05, + "loss": 2.018, + "step": 16840000 + }, + { + "epoch": 48.75, + "learning_rate": 2.5635597096146724e-05, + "loss": 2.0006, + "step": 16840500 + }, + { + "epoch": 48.75, + "learning_rate": 2.5634873448499446e-05, + "loss": 2.0145, + "step": 16841000 + }, + { + "epoch": 48.75, + "learning_rate": 2.563414980085217e-05, + "loss": 1.9948, + "step": 16841500 + }, + { + "epoch": 48.75, + "learning_rate": 2.563342615320489e-05, + "loss": 1.9876, + "step": 16842000 + }, + { + "epoch": 48.75, + "learning_rate": 2.5632702505557617e-05, + "loss": 1.9881, + "step": 16842500 + }, + { + "epoch": 48.75, + "learning_rate": 2.563197885791034e-05, + "loss": 1.9876, + "step": 16843000 + }, + { + "epoch": 48.76, + "learning_rate": 2.563125521026306e-05, + "loss": 1.9932, + "step": 16843500 + }, + { + "epoch": 48.76, + "learning_rate": 2.563053300991108e-05, + "loss": 2.0133, + "step": 16844000 + }, + { + "epoch": 48.76, + "learning_rate": 2.5629810809559095e-05, + "loss": 1.9963, + "step": 16844500 + }, + { + "epoch": 48.76, + "learning_rate": 2.5629087161911818e-05, + "loss": 2.0256, + "step": 16845000 + }, + { + "epoch": 48.76, + "learning_rate": 2.5628363514264543e-05, + "loss": 1.9973, + "step": 16845500 + }, + { + "epoch": 48.76, + "learning_rate": 2.5627639866617266e-05, + "loss": 1.9948, + "step": 16846000 + }, + { + "epoch": 48.76, + "learning_rate": 2.5626916218969988e-05, + "loss": 2.0122, + "step": 16846500 + }, + { + "epoch": 48.77, + "learning_rate": 2.5626192571322717e-05, + "loss": 2.0, + "step": 16847000 + }, + { + "epoch": 48.77, + "learning_rate": 2.562546892367544e-05, + "loss": 1.9917, + "step": 16847500 + }, + { + "epoch": 48.77, + "learning_rate": 2.562474527602816e-05, + "loss": 1.9957, + "step": 16848000 + }, + { + "epoch": 48.77, + "learning_rate": 2.562402307567618e-05, + "loss": 2.0301, + "step": 16848500 + }, + { + "epoch": 48.77, + "learning_rate": 2.5623299428028902e-05, + "loss": 2.0051, + "step": 16849000 + }, + { + "epoch": 48.77, + "learning_rate": 2.5622575780381625e-05, + "loss": 2.0151, + "step": 16849500 + }, + { + "epoch": 48.77, + "learning_rate": 2.5621852132734347e-05, + "loss": 2.0145, + "step": 16850000 + }, + { + "epoch": 48.78, + "learning_rate": 2.562112848508707e-05, + "loss": 1.9969, + "step": 16850500 + }, + { + "epoch": 48.78, + "learning_rate": 2.5620404837439795e-05, + "loss": 2.046, + "step": 16851000 + }, + { + "epoch": 48.78, + "learning_rate": 2.5619681189792517e-05, + "loss": 1.9962, + "step": 16851500 + }, + { + "epoch": 48.78, + "learning_rate": 2.561895754214524e-05, + "loss": 2.0181, + "step": 16852000 + }, + { + "epoch": 48.78, + "learning_rate": 2.561823389449796e-05, + "loss": 1.9811, + "step": 16852500 + }, + { + "epoch": 48.78, + "learning_rate": 2.561751169414598e-05, + "loss": 2.0166, + "step": 16853000 + }, + { + "epoch": 48.78, + "learning_rate": 2.5616788046498703e-05, + "loss": 2.0103, + "step": 16853500 + }, + { + "epoch": 48.79, + "learning_rate": 2.561606439885143e-05, + "loss": 2.0077, + "step": 16854000 + }, + { + "epoch": 48.79, + "learning_rate": 2.5615340751204154e-05, + "loss": 2.0167, + "step": 16854500 + }, + { + "epoch": 48.79, + "learning_rate": 2.5614617103556876e-05, + "loss": 2.0144, + "step": 16855000 + }, + { + "epoch": 48.79, + "learning_rate": 2.56138934559096e-05, + "loss": 2.0124, + "step": 16855500 + }, + { + "epoch": 48.79, + "learning_rate": 2.561316980826232e-05, + "loss": 2.0032, + "step": 16856000 + }, + { + "epoch": 48.79, + "learning_rate": 2.5612446160615046e-05, + "loss": 2.0143, + "step": 16856500 + }, + { + "epoch": 48.79, + "learning_rate": 2.561172251296777e-05, + "loss": 1.9995, + "step": 16857000 + }, + { + "epoch": 48.8, + "learning_rate": 2.561099886532049e-05, + "loss": 1.9902, + "step": 16857500 + }, + { + "epoch": 48.8, + "learning_rate": 2.5610275217673213e-05, + "loss": 2.0225, + "step": 16858000 + }, + { + "epoch": 48.8, + "learning_rate": 2.5609553017321232e-05, + "loss": 2.0195, + "step": 16858500 + }, + { + "epoch": 48.8, + "learning_rate": 2.5608829369673954e-05, + "loss": 2.0143, + "step": 16859000 + }, + { + "epoch": 48.8, + "learning_rate": 2.5608105722026676e-05, + "loss": 2.0249, + "step": 16859500 + }, + { + "epoch": 48.8, + "learning_rate": 2.56073820743794e-05, + "loss": 1.9892, + "step": 16860000 + }, + { + "epoch": 48.8, + "learning_rate": 2.560665842673212e-05, + "loss": 2.0274, + "step": 16860500 + }, + { + "epoch": 48.81, + "learning_rate": 2.560593622638014e-05, + "loss": 2.0126, + "step": 16861000 + }, + { + "epoch": 48.81, + "learning_rate": 2.560521257873287e-05, + "loss": 2.0107, + "step": 16861500 + }, + { + "epoch": 48.81, + "learning_rate": 2.5604490378380884e-05, + "loss": 2.0158, + "step": 16862000 + }, + { + "epoch": 48.81, + "learning_rate": 2.5603766730733607e-05, + "loss": 2.0168, + "step": 16862500 + }, + { + "epoch": 48.81, + "learning_rate": 2.5603043083086332e-05, + "loss": 2.0173, + "step": 16863000 + }, + { + "epoch": 48.81, + "learning_rate": 2.5602319435439054e-05, + "loss": 2.0022, + "step": 16863500 + }, + { + "epoch": 48.81, + "learning_rate": 2.5601595787791777e-05, + "loss": 2.0068, + "step": 16864000 + }, + { + "epoch": 48.82, + "learning_rate": 2.56008721401445e-05, + "loss": 1.9868, + "step": 16864500 + }, + { + "epoch": 48.82, + "learning_rate": 2.560014849249722e-05, + "loss": 2.0371, + "step": 16865000 + }, + { + "epoch": 48.82, + "learning_rate": 2.5599424844849947e-05, + "loss": 1.9955, + "step": 16865500 + }, + { + "epoch": 48.82, + "learning_rate": 2.5598702644497962e-05, + "loss": 2.0107, + "step": 16866000 + }, + { + "epoch": 48.82, + "learning_rate": 2.5597978996850685e-05, + "loss": 1.9992, + "step": 16866500 + }, + { + "epoch": 48.82, + "learning_rate": 2.5597256796498703e-05, + "loss": 1.9925, + "step": 16867000 + }, + { + "epoch": 48.82, + "learning_rate": 2.5596533148851426e-05, + "loss": 2.0232, + "step": 16867500 + }, + { + "epoch": 48.83, + "learning_rate": 2.5595812395794738e-05, + "loss": 1.9998, + "step": 16868000 + }, + { + "epoch": 48.83, + "learning_rate": 2.559508874814746e-05, + "loss": 1.9897, + "step": 16868500 + }, + { + "epoch": 48.83, + "learning_rate": 2.5594365100500182e-05, + "loss": 2.0227, + "step": 16869000 + }, + { + "epoch": 48.83, + "learning_rate": 2.559364145285291e-05, + "loss": 2.0256, + "step": 16869500 + }, + { + "epoch": 48.83, + "learning_rate": 2.5592917805205634e-05, + "loss": 1.9978, + "step": 16870000 + }, + { + "epoch": 48.83, + "learning_rate": 2.559219415755836e-05, + "loss": 1.9822, + "step": 16870500 + }, + { + "epoch": 48.83, + "learning_rate": 2.559147050991108e-05, + "loss": 1.9981, + "step": 16871000 + }, + { + "epoch": 48.84, + "learning_rate": 2.5590746862263804e-05, + "loss": 2.0265, + "step": 16871500 + }, + { + "epoch": 48.84, + "learning_rate": 2.5590023214616526e-05, + "loss": 2.0046, + "step": 16872000 + }, + { + "epoch": 48.84, + "learning_rate": 2.5589299566969248e-05, + "loss": 2.0273, + "step": 16872500 + }, + { + "epoch": 48.84, + "learning_rate": 2.558857591932197e-05, + "loss": 1.9811, + "step": 16873000 + }, + { + "epoch": 48.84, + "learning_rate": 2.558785371896999e-05, + "loss": 2.0019, + "step": 16873500 + }, + { + "epoch": 48.84, + "learning_rate": 2.558713007132271e-05, + "loss": 2.0083, + "step": 16874000 + }, + { + "epoch": 48.84, + "learning_rate": 2.5586406423675434e-05, + "loss": 2.0176, + "step": 16874500 + }, + { + "epoch": 48.85, + "learning_rate": 2.558568277602816e-05, + "loss": 1.9994, + "step": 16875000 + }, + { + "epoch": 48.85, + "learning_rate": 2.5584959128380882e-05, + "loss": 2.0093, + "step": 16875500 + }, + { + "epoch": 48.85, + "learning_rate": 2.5584235480733604e-05, + "loss": 2.0042, + "step": 16876000 + }, + { + "epoch": 48.85, + "learning_rate": 2.5583513280381623e-05, + "loss": 2.0035, + "step": 16876500 + }, + { + "epoch": 48.85, + "learning_rate": 2.558278963273435e-05, + "loss": 2.0205, + "step": 16877000 + }, + { + "epoch": 48.85, + "learning_rate": 2.5582067432382367e-05, + "loss": 1.9956, + "step": 16877500 + }, + { + "epoch": 48.85, + "learning_rate": 2.558134378473509e-05, + "loss": 2.0141, + "step": 16878000 + }, + { + "epoch": 48.86, + "learning_rate": 2.5580620137087812e-05, + "loss": 1.9835, + "step": 16878500 + }, + { + "epoch": 48.86, + "learning_rate": 2.5579896489440534e-05, + "loss": 2.0065, + "step": 16879000 + }, + { + "epoch": 48.86, + "learning_rate": 2.557917284179326e-05, + "loss": 2.0093, + "step": 16879500 + }, + { + "epoch": 48.86, + "learning_rate": 2.5578449194145982e-05, + "loss": 1.9926, + "step": 16880000 + }, + { + "epoch": 48.86, + "learning_rate": 2.5577725546498704e-05, + "loss": 2.0302, + "step": 16880500 + }, + { + "epoch": 48.86, + "learning_rate": 2.5577001898851426e-05, + "loss": 2.0076, + "step": 16881000 + }, + { + "epoch": 48.87, + "learning_rate": 2.557627825120415e-05, + "loss": 2.0017, + "step": 16881500 + }, + { + "epoch": 48.87, + "learning_rate": 2.5575554603556874e-05, + "loss": 2.0053, + "step": 16882000 + }, + { + "epoch": 48.87, + "learning_rate": 2.5574830955909597e-05, + "loss": 2.0146, + "step": 16882500 + }, + { + "epoch": 48.87, + "learning_rate": 2.557410730826232e-05, + "loss": 2.0007, + "step": 16883000 + }, + { + "epoch": 48.87, + "learning_rate": 2.557338366061504e-05, + "loss": 2.0123, + "step": 16883500 + }, + { + "epoch": 48.87, + "learning_rate": 2.557266001296777e-05, + "loss": 2.0226, + "step": 16884000 + }, + { + "epoch": 48.87, + "learning_rate": 2.5571936365320492e-05, + "loss": 2.021, + "step": 16884500 + }, + { + "epoch": 48.88, + "learning_rate": 2.557121416496851e-05, + "loss": 2.0155, + "step": 16885000 + }, + { + "epoch": 48.88, + "learning_rate": 2.5570490517321233e-05, + "loss": 2.0114, + "step": 16885500 + }, + { + "epoch": 48.88, + "learning_rate": 2.5569766869673956e-05, + "loss": 2.0014, + "step": 16886000 + }, + { + "epoch": 48.88, + "learning_rate": 2.5569043222026678e-05, + "loss": 2.0481, + "step": 16886500 + }, + { + "epoch": 48.88, + "learning_rate": 2.55683195743794e-05, + "loss": 2.0068, + "step": 16887000 + }, + { + "epoch": 48.88, + "learning_rate": 2.5567595926732126e-05, + "loss": 2.0048, + "step": 16887500 + }, + { + "epoch": 48.88, + "learning_rate": 2.5566872279084848e-05, + "loss": 2.0309, + "step": 16888000 + }, + { + "epoch": 48.89, + "learning_rate": 2.556614863143757e-05, + "loss": 2.0193, + "step": 16888500 + }, + { + "epoch": 48.89, + "learning_rate": 2.5565424983790292e-05, + "loss": 1.9915, + "step": 16889000 + }, + { + "epoch": 48.89, + "learning_rate": 2.5564701336143015e-05, + "loss": 2.003, + "step": 16889500 + }, + { + "epoch": 48.89, + "learning_rate": 2.5563977688495737e-05, + "loss": 2.0065, + "step": 16890000 + }, + { + "epoch": 48.89, + "learning_rate": 2.5563255488143756e-05, + "loss": 2.0234, + "step": 16890500 + }, + { + "epoch": 48.89, + "learning_rate": 2.5562533287791775e-05, + "loss": 2.0213, + "step": 16891000 + }, + { + "epoch": 48.89, + "learning_rate": 2.55618096401445e-05, + "loss": 2.0038, + "step": 16891500 + }, + { + "epoch": 48.9, + "learning_rate": 2.5561085992497226e-05, + "loss": 1.9897, + "step": 16892000 + }, + { + "epoch": 48.9, + "learning_rate": 2.556036234484995e-05, + "loss": 2.0247, + "step": 16892500 + }, + { + "epoch": 48.9, + "learning_rate": 2.555963869720267e-05, + "loss": 2.0249, + "step": 16893000 + }, + { + "epoch": 48.9, + "learning_rate": 2.5558915049555393e-05, + "loss": 2.0164, + "step": 16893500 + }, + { + "epoch": 48.9, + "learning_rate": 2.5558191401908115e-05, + "loss": 2.0373, + "step": 16894000 + }, + { + "epoch": 48.9, + "learning_rate": 2.5557467754260837e-05, + "loss": 1.9702, + "step": 16894500 + }, + { + "epoch": 48.9, + "learning_rate": 2.5556744106613563e-05, + "loss": 2.0071, + "step": 16895000 + }, + { + "epoch": 48.91, + "learning_rate": 2.5556020458966285e-05, + "loss": 2.0097, + "step": 16895500 + }, + { + "epoch": 48.91, + "learning_rate": 2.55552982586143e-05, + "loss": 2.0178, + "step": 16896000 + }, + { + "epoch": 48.91, + "learning_rate": 2.5554574610967026e-05, + "loss": 2.0201, + "step": 16896500 + }, + { + "epoch": 48.91, + "learning_rate": 2.555385096331975e-05, + "loss": 2.0243, + "step": 16897000 + }, + { + "epoch": 48.91, + "learning_rate": 2.5553128762967764e-05, + "loss": 1.9959, + "step": 16897500 + }, + { + "epoch": 48.91, + "learning_rate": 2.5552405115320486e-05, + "loss": 1.9942, + "step": 16898000 + }, + { + "epoch": 48.91, + "learning_rate": 2.5551681467673212e-05, + "loss": 1.9882, + "step": 16898500 + }, + { + "epoch": 48.92, + "learning_rate": 2.5550957820025938e-05, + "loss": 2.0003, + "step": 16899000 + }, + { + "epoch": 48.92, + "learning_rate": 2.5550234172378663e-05, + "loss": 2.0188, + "step": 16899500 + }, + { + "epoch": 48.92, + "learning_rate": 2.5549510524731385e-05, + "loss": 2.0184, + "step": 16900000 + }, + { + "epoch": 48.92, + "learning_rate": 2.5548786877084108e-05, + "loss": 2.0209, + "step": 16900500 + }, + { + "epoch": 48.92, + "learning_rate": 2.5548064676732127e-05, + "loss": 2.0243, + "step": 16901000 + }, + { + "epoch": 48.92, + "learning_rate": 2.554734102908485e-05, + "loss": 2.0065, + "step": 16901500 + }, + { + "epoch": 48.92, + "learning_rate": 2.554661738143757e-05, + "loss": 1.9999, + "step": 16902000 + }, + { + "epoch": 48.93, + "learning_rate": 2.5545893733790293e-05, + "loss": 1.9929, + "step": 16902500 + }, + { + "epoch": 48.93, + "learning_rate": 2.5545170086143016e-05, + "loss": 1.9811, + "step": 16903000 + }, + { + "epoch": 48.93, + "learning_rate": 2.5544446438495738e-05, + "loss": 2.0097, + "step": 16903500 + }, + { + "epoch": 48.93, + "learning_rate": 2.5543722790848463e-05, + "loss": 1.9956, + "step": 16904000 + }, + { + "epoch": 48.93, + "learning_rate": 2.5542999143201186e-05, + "loss": 2.0033, + "step": 16904500 + }, + { + "epoch": 48.93, + "learning_rate": 2.5542275495553908e-05, + "loss": 1.9942, + "step": 16905000 + }, + { + "epoch": 48.93, + "learning_rate": 2.5541553295201927e-05, + "loss": 2.0237, + "step": 16905500 + }, + { + "epoch": 48.94, + "learning_rate": 2.5540829647554652e-05, + "loss": 2.0123, + "step": 16906000 + }, + { + "epoch": 48.94, + "learning_rate": 2.5540105999907378e-05, + "loss": 2.0101, + "step": 16906500 + }, + { + "epoch": 48.94, + "learning_rate": 2.55393823522601e-05, + "loss": 1.9978, + "step": 16907000 + }, + { + "epoch": 48.94, + "learning_rate": 2.5538658704612822e-05, + "loss": 2.0109, + "step": 16907500 + }, + { + "epoch": 48.94, + "learning_rate": 2.553793650426084e-05, + "loss": 2.0134, + "step": 16908000 + }, + { + "epoch": 48.94, + "learning_rate": 2.5537212856613564e-05, + "loss": 2.0114, + "step": 16908500 + }, + { + "epoch": 48.94, + "learning_rate": 2.5536489208966286e-05, + "loss": 2.0242, + "step": 16909000 + }, + { + "epoch": 48.95, + "learning_rate": 2.5535765561319008e-05, + "loss": 2.001, + "step": 16909500 + }, + { + "epoch": 48.95, + "learning_rate": 2.553504191367173e-05, + "loss": 1.9845, + "step": 16910000 + }, + { + "epoch": 48.95, + "learning_rate": 2.5534318266024453e-05, + "loss": 1.9943, + "step": 16910500 + }, + { + "epoch": 48.95, + "learning_rate": 2.5533594618377178e-05, + "loss": 2.0097, + "step": 16911000 + }, + { + "epoch": 48.95, + "learning_rate": 2.5532872418025194e-05, + "loss": 1.9663, + "step": 16911500 + }, + { + "epoch": 48.95, + "learning_rate": 2.5532148770377916e-05, + "loss": 2.0319, + "step": 16912000 + }, + { + "epoch": 48.95, + "learning_rate": 2.553142512273064e-05, + "loss": 2.0267, + "step": 16912500 + }, + { + "epoch": 48.96, + "learning_rate": 2.5530701475083364e-05, + "loss": 2.0273, + "step": 16913000 + }, + { + "epoch": 48.96, + "learning_rate": 2.5529977827436093e-05, + "loss": 2.0224, + "step": 16913500 + }, + { + "epoch": 48.96, + "learning_rate": 2.5529254179788815e-05, + "loss": 2.006, + "step": 16914000 + }, + { + "epoch": 48.96, + "learning_rate": 2.5528530532141537e-05, + "loss": 2.0354, + "step": 16914500 + }, + { + "epoch": 48.96, + "learning_rate": 2.552780688449426e-05, + "loss": 2.0276, + "step": 16915000 + }, + { + "epoch": 48.96, + "learning_rate": 2.552708468414228e-05, + "loss": 2.014, + "step": 16915500 + }, + { + "epoch": 48.96, + "learning_rate": 2.5526365378380884e-05, + "loss": 2.0149, + "step": 16916000 + }, + { + "epoch": 48.97, + "learning_rate": 2.5525641730733606e-05, + "loss": 2.0238, + "step": 16916500 + }, + { + "epoch": 48.97, + "learning_rate": 2.552491808308633e-05, + "loss": 2.0276, + "step": 16917000 + }, + { + "epoch": 48.97, + "learning_rate": 2.5524194435439054e-05, + "loss": 2.0197, + "step": 16917500 + }, + { + "epoch": 48.97, + "learning_rate": 2.552347223508707e-05, + "loss": 2.0218, + "step": 16918000 + }, + { + "epoch": 48.97, + "learning_rate": 2.5522748587439792e-05, + "loss": 1.9908, + "step": 16918500 + }, + { + "epoch": 48.97, + "learning_rate": 2.5522024939792518e-05, + "loss": 2.013, + "step": 16919000 + }, + { + "epoch": 48.98, + "learning_rate": 2.552130129214524e-05, + "loss": 2.0257, + "step": 16919500 + }, + { + "epoch": 48.98, + "learning_rate": 2.5520577644497962e-05, + "loss": 2.0084, + "step": 16920000 + }, + { + "epoch": 48.98, + "learning_rate": 2.5519853996850684e-05, + "loss": 1.9764, + "step": 16920500 + }, + { + "epoch": 48.98, + "learning_rate": 2.5519131796498703e-05, + "loss": 1.9922, + "step": 16921000 + }, + { + "epoch": 48.98, + "learning_rate": 2.551840814885143e-05, + "loss": 2.0049, + "step": 16921500 + }, + { + "epoch": 48.98, + "learning_rate": 2.5517684501204154e-05, + "loss": 2.0233, + "step": 16922000 + }, + { + "epoch": 48.98, + "learning_rate": 2.5516960853556877e-05, + "loss": 1.9986, + "step": 16922500 + }, + { + "epoch": 48.99, + "learning_rate": 2.55162372059096e-05, + "loss": 1.9991, + "step": 16923000 + }, + { + "epoch": 48.99, + "learning_rate": 2.551551355826232e-05, + "loss": 2.0083, + "step": 16923500 + }, + { + "epoch": 48.99, + "learning_rate": 2.5514789910615043e-05, + "loss": 1.9907, + "step": 16924000 + }, + { + "epoch": 48.99, + "learning_rate": 2.551406626296777e-05, + "loss": 1.9852, + "step": 16924500 + }, + { + "epoch": 48.99, + "learning_rate": 2.551334261532049e-05, + "loss": 2.0225, + "step": 16925000 + }, + { + "epoch": 48.99, + "learning_rate": 2.5512618967673214e-05, + "loss": 2.0156, + "step": 16925500 + }, + { + "epoch": 48.99, + "learning_rate": 2.5511895320025936e-05, + "loss": 2.0281, + "step": 16926000 + }, + { + "epoch": 49.0, + "learning_rate": 2.5511173119673955e-05, + "loss": 2.0107, + "step": 16926500 + }, + { + "epoch": 49.0, + "learning_rate": 2.5510449472026677e-05, + "loss": 1.9908, + "step": 16927000 + }, + { + "epoch": 49.0, + "learning_rate": 2.55097258243794e-05, + "loss": 1.989, + "step": 16927500 + }, + { + "epoch": 49.0, + "learning_rate": 2.5509003624027418e-05, + "loss": 2.0041, + "step": 16928000 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.6736439941361908, + "eval_accuracy_mlm": 0.6394893188323623, + "eval_accuracy_nsp": 0.8566796368352788, + "eval_loss": 2.1714398860931396, + "eval_runtime": 331.7519, + "eval_samples_per_second": 1315.399, + "eval_steps_per_second": 54.809, + "step": 16928128 + }, + { + "epoch": 49.0, + "learning_rate": 2.550827997638014e-05, + "loss": 1.9782, + "step": 16928500 + }, + { + "epoch": 49.0, + "learning_rate": 2.550755632873287e-05, + "loss": 1.994, + "step": 16929000 + }, + { + "epoch": 49.0, + "learning_rate": 2.550683268108559e-05, + "loss": 1.9906, + "step": 16929500 + }, + { + "epoch": 49.01, + "learning_rate": 2.5506109033438314e-05, + "loss": 2.0034, + "step": 16930000 + }, + { + "epoch": 49.01, + "learning_rate": 2.5505385385791036e-05, + "loss": 2.0016, + "step": 16930500 + }, + { + "epoch": 49.01, + "learning_rate": 2.5504661738143758e-05, + "loss": 1.9844, + "step": 16931000 + }, + { + "epoch": 49.01, + "learning_rate": 2.550393809049648e-05, + "loss": 1.965, + "step": 16931500 + }, + { + "epoch": 49.01, + "learning_rate": 2.5503214442849206e-05, + "loss": 2.0007, + "step": 16932000 + }, + { + "epoch": 49.01, + "learning_rate": 2.550249079520193e-05, + "loss": 1.9915, + "step": 16932500 + }, + { + "epoch": 49.01, + "learning_rate": 2.550176714755465e-05, + "loss": 1.9724, + "step": 16933000 + }, + { + "epoch": 49.02, + "learning_rate": 2.550104494720267e-05, + "loss": 2.0185, + "step": 16933500 + }, + { + "epoch": 49.02, + "learning_rate": 2.5500321299555392e-05, + "loss": 1.975, + "step": 16934000 + }, + { + "epoch": 49.02, + "learning_rate": 2.5499597651908114e-05, + "loss": 1.9651, + "step": 16934500 + }, + { + "epoch": 49.02, + "learning_rate": 2.5498874004260836e-05, + "loss": 2.0111, + "step": 16935000 + }, + { + "epoch": 49.02, + "learning_rate": 2.549815035661356e-05, + "loss": 2.013, + "step": 16935500 + }, + { + "epoch": 49.02, + "learning_rate": 2.5497426708966287e-05, + "loss": 1.9855, + "step": 16936000 + }, + { + "epoch": 49.02, + "learning_rate": 2.549670306131901e-05, + "loss": 1.9776, + "step": 16936500 + }, + { + "epoch": 49.03, + "learning_rate": 2.5495979413671732e-05, + "loss": 1.981, + "step": 16937000 + }, + { + "epoch": 49.03, + "learning_rate": 2.5495255766024458e-05, + "loss": 1.9619, + "step": 16937500 + }, + { + "epoch": 49.03, + "learning_rate": 2.5494533565672473e-05, + "loss": 1.9775, + "step": 16938000 + }, + { + "epoch": 49.03, + "learning_rate": 2.5493809918025195e-05, + "loss": 1.9593, + "step": 16938500 + }, + { + "epoch": 49.03, + "learning_rate": 2.549308627037792e-05, + "loss": 1.9754, + "step": 16939000 + }, + { + "epoch": 49.03, + "learning_rate": 2.5492362622730643e-05, + "loss": 1.9623, + "step": 16939500 + }, + { + "epoch": 49.03, + "learning_rate": 2.5491638975083365e-05, + "loss": 1.9801, + "step": 16940000 + }, + { + "epoch": 49.04, + "learning_rate": 2.549091677473138e-05, + "loss": 1.9867, + "step": 16940500 + }, + { + "epoch": 49.04, + "learning_rate": 2.5490193127084107e-05, + "loss": 1.9734, + "step": 16941000 + }, + { + "epoch": 49.04, + "learning_rate": 2.548946947943683e-05, + "loss": 1.9963, + "step": 16941500 + }, + { + "epoch": 49.04, + "learning_rate": 2.548874583178955e-05, + "loss": 1.9766, + "step": 16942000 + }, + { + "epoch": 49.04, + "learning_rate": 2.5488022184142273e-05, + "loss": 2.0071, + "step": 16942500 + }, + { + "epoch": 49.04, + "learning_rate": 2.5487299983790292e-05, + "loss": 1.9846, + "step": 16943000 + }, + { + "epoch": 49.04, + "learning_rate": 2.548657633614302e-05, + "loss": 2.032, + "step": 16943500 + }, + { + "epoch": 49.05, + "learning_rate": 2.5485852688495744e-05, + "loss": 2.0034, + "step": 16944000 + }, + { + "epoch": 49.05, + "learning_rate": 2.5485129040848466e-05, + "loss": 1.9925, + "step": 16944500 + }, + { + "epoch": 49.05, + "learning_rate": 2.5484405393201188e-05, + "loss": 1.9841, + "step": 16945000 + }, + { + "epoch": 49.05, + "learning_rate": 2.548368174555391e-05, + "loss": 1.9762, + "step": 16945500 + }, + { + "epoch": 49.05, + "learning_rate": 2.5482958097906632e-05, + "loss": 1.998, + "step": 16946000 + }, + { + "epoch": 49.05, + "learning_rate": 2.548223589755465e-05, + "loss": 1.9723, + "step": 16946500 + }, + { + "epoch": 49.05, + "learning_rate": 2.5481512249907374e-05, + "loss": 2.0047, + "step": 16947000 + }, + { + "epoch": 49.06, + "learning_rate": 2.5480788602260096e-05, + "loss": 1.9964, + "step": 16947500 + }, + { + "epoch": 49.06, + "learning_rate": 2.548006495461282e-05, + "loss": 2.0056, + "step": 16948000 + }, + { + "epoch": 49.06, + "learning_rate": 2.5479341306965544e-05, + "loss": 2.0033, + "step": 16948500 + }, + { + "epoch": 49.06, + "learning_rate": 2.5478617659318266e-05, + "loss": 2.0076, + "step": 16949000 + }, + { + "epoch": 49.06, + "learning_rate": 2.5477894011670988e-05, + "loss": 2.0378, + "step": 16949500 + }, + { + "epoch": 49.06, + "learning_rate": 2.5477171811319007e-05, + "loss": 1.9785, + "step": 16950000 + }, + { + "epoch": 49.06, + "learning_rate": 2.547644816367173e-05, + "loss": 1.9846, + "step": 16950500 + }, + { + "epoch": 49.07, + "learning_rate": 2.547572451602446e-05, + "loss": 2.0016, + "step": 16951000 + }, + { + "epoch": 49.07, + "learning_rate": 2.5475002315672474e-05, + "loss": 2.0012, + "step": 16951500 + }, + { + "epoch": 49.07, + "learning_rate": 2.5474278668025196e-05, + "loss": 1.9876, + "step": 16952000 + }, + { + "epoch": 49.07, + "learning_rate": 2.5473556467673215e-05, + "loss": 1.9862, + "step": 16952500 + }, + { + "epoch": 49.07, + "learning_rate": 2.5472832820025937e-05, + "loss": 1.9668, + "step": 16953000 + }, + { + "epoch": 49.07, + "learning_rate": 2.547210917237866e-05, + "loss": 1.9762, + "step": 16953500 + }, + { + "epoch": 49.07, + "learning_rate": 2.5471385524731385e-05, + "loss": 1.9725, + "step": 16954000 + }, + { + "epoch": 49.08, + "learning_rate": 2.5470661877084107e-05, + "loss": 1.9848, + "step": 16954500 + }, + { + "epoch": 49.08, + "learning_rate": 2.546993822943683e-05, + "loss": 2.0021, + "step": 16955000 + }, + { + "epoch": 49.08, + "learning_rate": 2.546921602908485e-05, + "loss": 2.0018, + "step": 16955500 + }, + { + "epoch": 49.08, + "learning_rate": 2.546849238143757e-05, + "loss": 2.01, + "step": 16956000 + }, + { + "epoch": 49.08, + "learning_rate": 2.5467768733790293e-05, + "loss": 1.992, + "step": 16956500 + }, + { + "epoch": 49.08, + "learning_rate": 2.5467045086143015e-05, + "loss": 1.9692, + "step": 16957000 + }, + { + "epoch": 49.09, + "learning_rate": 2.5466321438495738e-05, + "loss": 1.9939, + "step": 16957500 + }, + { + "epoch": 49.09, + "learning_rate": 2.546559779084846e-05, + "loss": 2.0019, + "step": 16958000 + }, + { + "epoch": 49.09, + "learning_rate": 2.546487414320119e-05, + "loss": 1.9474, + "step": 16958500 + }, + { + "epoch": 49.09, + "learning_rate": 2.546415049555391e-05, + "loss": 2.003, + "step": 16959000 + }, + { + "epoch": 49.09, + "learning_rate": 2.5463426847906637e-05, + "loss": 1.9765, + "step": 16959500 + }, + { + "epoch": 49.09, + "learning_rate": 2.546270320025936e-05, + "loss": 2.0026, + "step": 16960000 + }, + { + "epoch": 49.09, + "learning_rate": 2.546197955261208e-05, + "loss": 1.9805, + "step": 16960500 + }, + { + "epoch": 49.1, + "learning_rate": 2.5461255904964803e-05, + "loss": 1.9997, + "step": 16961000 + }, + { + "epoch": 49.1, + "learning_rate": 2.5460532257317526e-05, + "loss": 2.001, + "step": 16961500 + }, + { + "epoch": 49.1, + "learning_rate": 2.5459808609670248e-05, + "loss": 2.0146, + "step": 16962000 + }, + { + "epoch": 49.1, + "learning_rate": 2.5459084962022973e-05, + "loss": 2.0055, + "step": 16962500 + }, + { + "epoch": 49.1, + "learning_rate": 2.545836276167099e-05, + "loss": 2.0152, + "step": 16963000 + }, + { + "epoch": 49.1, + "learning_rate": 2.545763911402371e-05, + "loss": 2.0095, + "step": 16963500 + }, + { + "epoch": 49.1, + "learning_rate": 2.5456915466376437e-05, + "loss": 1.9861, + "step": 16964000 + }, + { + "epoch": 49.11, + "learning_rate": 2.545619181872916e-05, + "loss": 2.0048, + "step": 16964500 + }, + { + "epoch": 49.11, + "learning_rate": 2.545546817108188e-05, + "loss": 1.9813, + "step": 16965000 + }, + { + "epoch": 49.11, + "learning_rate": 2.545474452343461e-05, + "loss": 2.0109, + "step": 16965500 + }, + { + "epoch": 49.11, + "learning_rate": 2.5454020875787333e-05, + "loss": 2.012, + "step": 16966000 + }, + { + "epoch": 49.11, + "learning_rate": 2.5453298675435348e-05, + "loss": 1.9951, + "step": 16966500 + }, + { + "epoch": 49.11, + "learning_rate": 2.5452575027788074e-05, + "loss": 1.9888, + "step": 16967000 + }, + { + "epoch": 49.11, + "learning_rate": 2.5451851380140796e-05, + "loss": 1.9866, + "step": 16967500 + }, + { + "epoch": 49.12, + "learning_rate": 2.5451127732493518e-05, + "loss": 1.9844, + "step": 16968000 + }, + { + "epoch": 49.12, + "learning_rate": 2.545040408484624e-05, + "loss": 1.9843, + "step": 16968500 + }, + { + "epoch": 49.12, + "learning_rate": 2.5449680437198963e-05, + "loss": 2.0029, + "step": 16969000 + }, + { + "epoch": 49.12, + "learning_rate": 2.5448956789551688e-05, + "loss": 1.9713, + "step": 16969500 + }, + { + "epoch": 49.12, + "learning_rate": 2.5448234589199704e-05, + "loss": 2.0069, + "step": 16970000 + }, + { + "epoch": 49.12, + "learning_rate": 2.5447510941552426e-05, + "loss": 2.0149, + "step": 16970500 + }, + { + "epoch": 49.12, + "learning_rate": 2.5446787293905148e-05, + "loss": 2.0192, + "step": 16971000 + }, + { + "epoch": 49.13, + "learning_rate": 2.5446063646257874e-05, + "loss": 2.0014, + "step": 16971500 + }, + { + "epoch": 49.13, + "learning_rate": 2.544534144590589e-05, + "loss": 1.9724, + "step": 16972000 + }, + { + "epoch": 49.13, + "learning_rate": 2.5444617798258612e-05, + "loss": 2.0048, + "step": 16972500 + }, + { + "epoch": 49.13, + "learning_rate": 2.544389559790663e-05, + "loss": 1.9768, + "step": 16973000 + }, + { + "epoch": 49.13, + "learning_rate": 2.544317195025936e-05, + "loss": 1.9847, + "step": 16973500 + }, + { + "epoch": 49.13, + "learning_rate": 2.5442448302612082e-05, + "loss": 2.001, + "step": 16974000 + }, + { + "epoch": 49.13, + "learning_rate": 2.5441724654964804e-05, + "loss": 1.9997, + "step": 16974500 + }, + { + "epoch": 49.14, + "learning_rate": 2.5441001007317526e-05, + "loss": 1.9746, + "step": 16975000 + }, + { + "epoch": 49.14, + "learning_rate": 2.5440277359670252e-05, + "loss": 1.9913, + "step": 16975500 + }, + { + "epoch": 49.14, + "learning_rate": 2.5439553712022974e-05, + "loss": 1.9872, + "step": 16976000 + }, + { + "epoch": 49.14, + "learning_rate": 2.5438830064375696e-05, + "loss": 2.0002, + "step": 16976500 + }, + { + "epoch": 49.14, + "learning_rate": 2.543810641672842e-05, + "loss": 1.9737, + "step": 16977000 + }, + { + "epoch": 49.14, + "learning_rate": 2.543738276908114e-05, + "loss": 1.9835, + "step": 16977500 + }, + { + "epoch": 49.14, + "learning_rate": 2.5436659121433863e-05, + "loss": 2.0001, + "step": 16978000 + }, + { + "epoch": 49.15, + "learning_rate": 2.5435938368377175e-05, + "loss": 1.9786, + "step": 16978500 + }, + { + "epoch": 49.15, + "learning_rate": 2.54352147207299e-05, + "loss": 1.9967, + "step": 16979000 + }, + { + "epoch": 49.15, + "learning_rate": 2.5434491073082623e-05, + "loss": 1.9991, + "step": 16979500 + }, + { + "epoch": 49.15, + "learning_rate": 2.5433767425435346e-05, + "loss": 2.0106, + "step": 16980000 + }, + { + "epoch": 49.15, + "learning_rate": 2.5433043777788068e-05, + "loss": 1.9797, + "step": 16980500 + }, + { + "epoch": 49.15, + "learning_rate": 2.5432320130140797e-05, + "loss": 1.9991, + "step": 16981000 + }, + { + "epoch": 49.15, + "learning_rate": 2.5431597929788816e-05, + "loss": 1.9766, + "step": 16981500 + }, + { + "epoch": 49.16, + "learning_rate": 2.5430874282141538e-05, + "loss": 2.002, + "step": 16982000 + }, + { + "epoch": 49.16, + "learning_rate": 2.543015063449426e-05, + "loss": 2.0145, + "step": 16982500 + }, + { + "epoch": 49.16, + "learning_rate": 2.5429426986846982e-05, + "loss": 1.9925, + "step": 16983000 + }, + { + "epoch": 49.16, + "learning_rate": 2.5428703339199705e-05, + "loss": 2.0015, + "step": 16983500 + }, + { + "epoch": 49.16, + "learning_rate": 2.5427979691552427e-05, + "loss": 2.001, + "step": 16984000 + }, + { + "epoch": 49.16, + "learning_rate": 2.5427256043905152e-05, + "loss": 2.0143, + "step": 16984500 + }, + { + "epoch": 49.16, + "learning_rate": 2.5426533843553168e-05, + "loss": 1.9803, + "step": 16985000 + }, + { + "epoch": 49.17, + "learning_rate": 2.542581019590589e-05, + "loss": 1.9949, + "step": 16985500 + }, + { + "epoch": 49.17, + "learning_rate": 2.5425086548258616e-05, + "loss": 1.9897, + "step": 16986000 + }, + { + "epoch": 49.17, + "learning_rate": 2.5424362900611338e-05, + "loss": 1.9959, + "step": 16986500 + }, + { + "epoch": 49.17, + "learning_rate": 2.542363925296406e-05, + "loss": 1.9987, + "step": 16987000 + }, + { + "epoch": 49.17, + "learning_rate": 2.5422915605316783e-05, + "loss": 2.0033, + "step": 16987500 + }, + { + "epoch": 49.17, + "learning_rate": 2.54221934049648e-05, + "loss": 1.9775, + "step": 16988000 + }, + { + "epoch": 49.17, + "learning_rate": 2.5421469757317527e-05, + "loss": 1.9811, + "step": 16988500 + }, + { + "epoch": 49.18, + "learning_rate": 2.5420746109670253e-05, + "loss": 1.9894, + "step": 16989000 + }, + { + "epoch": 49.18, + "learning_rate": 2.5420022462022975e-05, + "loss": 1.9761, + "step": 16989500 + }, + { + "epoch": 49.18, + "learning_rate": 2.5419298814375697e-05, + "loss": 2.0026, + "step": 16990000 + }, + { + "epoch": 49.18, + "learning_rate": 2.541857516672842e-05, + "loss": 1.9803, + "step": 16990500 + }, + { + "epoch": 49.18, + "learning_rate": 2.5417851519081142e-05, + "loss": 1.9874, + "step": 16991000 + }, + { + "epoch": 49.18, + "learning_rate": 2.5417127871433867e-05, + "loss": 1.9894, + "step": 16991500 + }, + { + "epoch": 49.18, + "learning_rate": 2.541640422378659e-05, + "loss": 1.9988, + "step": 16992000 + }, + { + "epoch": 49.19, + "learning_rate": 2.5415682023434605e-05, + "loss": 2.0012, + "step": 16992500 + }, + { + "epoch": 49.19, + "learning_rate": 2.5414958375787327e-05, + "loss": 2.0146, + "step": 16993000 + }, + { + "epoch": 49.19, + "learning_rate": 2.5414236175435346e-05, + "loss": 2.0129, + "step": 16993500 + }, + { + "epoch": 49.19, + "learning_rate": 2.541351252778807e-05, + "loss": 1.9853, + "step": 16994000 + }, + { + "epoch": 49.19, + "learning_rate": 2.541278888014079e-05, + "loss": 1.9931, + "step": 16994500 + }, + { + "epoch": 49.19, + "learning_rate": 2.5412065232493516e-05, + "loss": 1.98, + "step": 16995000 + }, + { + "epoch": 49.2, + "learning_rate": 2.5411341584846242e-05, + "loss": 1.9911, + "step": 16995500 + }, + { + "epoch": 49.2, + "learning_rate": 2.5410617937198968e-05, + "loss": 1.9718, + "step": 16996000 + }, + { + "epoch": 49.2, + "learning_rate": 2.540989428955169e-05, + "loss": 2.001, + "step": 16996500 + }, + { + "epoch": 49.2, + "learning_rate": 2.5409170641904412e-05, + "loss": 2.0017, + "step": 16997000 + }, + { + "epoch": 49.2, + "learning_rate": 2.5408446994257134e-05, + "loss": 1.9938, + "step": 16997500 + }, + { + "epoch": 49.2, + "learning_rate": 2.5407723346609857e-05, + "loss": 2.0015, + "step": 16998000 + }, + { + "epoch": 49.2, + "learning_rate": 2.5407001146257876e-05, + "loss": 2.0, + "step": 16998500 + }, + { + "epoch": 49.21, + "learning_rate": 2.5406277498610598e-05, + "loss": 2.0171, + "step": 16999000 + }, + { + "epoch": 49.21, + "learning_rate": 2.540555385096332e-05, + "loss": 1.978, + "step": 16999500 + }, + { + "epoch": 49.21, + "learning_rate": 2.5404830203316042e-05, + "loss": 1.9999, + "step": 17000000 + }, + { + "epoch": 49.21, + "learning_rate": 2.5404106555668768e-05, + "loss": 1.987, + "step": 17000500 + }, + { + "epoch": 49.21, + "learning_rate": 2.540338290802149e-05, + "loss": 1.9858, + "step": 17001000 + }, + { + "epoch": 49.21, + "learning_rate": 2.5402660707669506e-05, + "loss": 2.0057, + "step": 17001500 + }, + { + "epoch": 49.21, + "learning_rate": 2.5401937060022228e-05, + "loss": 2.0059, + "step": 17002000 + }, + { + "epoch": 49.22, + "learning_rate": 2.5401213412374953e-05, + "loss": 2.0109, + "step": 17002500 + }, + { + "epoch": 49.22, + "learning_rate": 2.540048976472768e-05, + "loss": 2.0087, + "step": 17003000 + }, + { + "epoch": 49.22, + "learning_rate": 2.5399767564375698e-05, + "loss": 1.9982, + "step": 17003500 + }, + { + "epoch": 49.22, + "learning_rate": 2.539904391672842e-05, + "loss": 1.9856, + "step": 17004000 + }, + { + "epoch": 49.22, + "learning_rate": 2.5398320269081143e-05, + "loss": 1.9869, + "step": 17004500 + }, + { + "epoch": 49.22, + "learning_rate": 2.5397596621433868e-05, + "loss": 1.9751, + "step": 17005000 + }, + { + "epoch": 49.22, + "learning_rate": 2.539687297378659e-05, + "loss": 1.9991, + "step": 17005500 + }, + { + "epoch": 49.23, + "learning_rate": 2.5396149326139313e-05, + "loss": 2.0184, + "step": 17006000 + }, + { + "epoch": 49.23, + "learning_rate": 2.5395425678492035e-05, + "loss": 1.9958, + "step": 17006500 + }, + { + "epoch": 49.23, + "learning_rate": 2.5394702030844757e-05, + "loss": 1.9887, + "step": 17007000 + }, + { + "epoch": 49.23, + "learning_rate": 2.539397838319748e-05, + "loss": 2.016, + "step": 17007500 + }, + { + "epoch": 49.23, + "learning_rate": 2.5393254735550205e-05, + "loss": 1.9994, + "step": 17008000 + }, + { + "epoch": 49.23, + "learning_rate": 2.5392531087902927e-05, + "loss": 2.004, + "step": 17008500 + }, + { + "epoch": 49.23, + "learning_rate": 2.5391808887550943e-05, + "loss": 1.9745, + "step": 17009000 + }, + { + "epoch": 49.24, + "learning_rate": 2.539108668719896e-05, + "loss": 1.991, + "step": 17009500 + }, + { + "epoch": 49.24, + "learning_rate": 2.5390363039551684e-05, + "loss": 1.9807, + "step": 17010000 + }, + { + "epoch": 49.24, + "learning_rate": 2.5389639391904413e-05, + "loss": 2.0207, + "step": 17010500 + }, + { + "epoch": 49.24, + "learning_rate": 2.5388915744257135e-05, + "loss": 2.025, + "step": 17011000 + }, + { + "epoch": 49.24, + "learning_rate": 2.5388192096609857e-05, + "loss": 2.0078, + "step": 17011500 + }, + { + "epoch": 49.24, + "learning_rate": 2.5387468448962583e-05, + "loss": 1.9998, + "step": 17012000 + }, + { + "epoch": 49.24, + "learning_rate": 2.5386744801315305e-05, + "loss": 1.9966, + "step": 17012500 + }, + { + "epoch": 49.25, + "learning_rate": 2.5386021153668027e-05, + "loss": 1.9742, + "step": 17013000 + }, + { + "epoch": 49.25, + "learning_rate": 2.5385298953316043e-05, + "loss": 1.9769, + "step": 17013500 + }, + { + "epoch": 49.25, + "learning_rate": 2.538457530566877e-05, + "loss": 1.9933, + "step": 17014000 + }, + { + "epoch": 49.25, + "learning_rate": 2.538385165802149e-05, + "loss": 1.9838, + "step": 17014500 + }, + { + "epoch": 49.25, + "learning_rate": 2.5383129457669506e-05, + "loss": 2.0115, + "step": 17015000 + }, + { + "epoch": 49.25, + "learning_rate": 2.5382405810022232e-05, + "loss": 1.9943, + "step": 17015500 + }, + { + "epoch": 49.25, + "learning_rate": 2.5381682162374954e-05, + "loss": 1.9974, + "step": 17016000 + }, + { + "epoch": 49.26, + "learning_rate": 2.5380958514727677e-05, + "loss": 2.0184, + "step": 17016500 + }, + { + "epoch": 49.26, + "learning_rate": 2.53802348670804e-05, + "loss": 1.9884, + "step": 17017000 + }, + { + "epoch": 49.26, + "learning_rate": 2.537951121943312e-05, + "loss": 1.9876, + "step": 17017500 + }, + { + "epoch": 49.26, + "learning_rate": 2.537878757178585e-05, + "loss": 2.002, + "step": 17018000 + }, + { + "epoch": 49.26, + "learning_rate": 2.5378063924138572e-05, + "loss": 2.0026, + "step": 17018500 + }, + { + "epoch": 49.26, + "learning_rate": 2.5377340276491294e-05, + "loss": 1.9863, + "step": 17019000 + }, + { + "epoch": 49.26, + "learning_rate": 2.537661662884402e-05, + "loss": 1.9871, + "step": 17019500 + }, + { + "epoch": 49.27, + "learning_rate": 2.5375892981196742e-05, + "loss": 1.9942, + "step": 17020000 + }, + { + "epoch": 49.27, + "learning_rate": 2.5375169333549465e-05, + "loss": 2.0147, + "step": 17020500 + }, + { + "epoch": 49.27, + "learning_rate": 2.5374445685902187e-05, + "loss": 1.9802, + "step": 17021000 + }, + { + "epoch": 49.27, + "learning_rate": 2.537372203825491e-05, + "loss": 1.9684, + "step": 17021500 + }, + { + "epoch": 49.27, + "learning_rate": 2.5372999837902928e-05, + "loss": 1.9884, + "step": 17022000 + }, + { + "epoch": 49.27, + "learning_rate": 2.537227619025565e-05, + "loss": 1.9845, + "step": 17022500 + }, + { + "epoch": 49.27, + "learning_rate": 2.537155398990367e-05, + "loss": 1.9777, + "step": 17023000 + }, + { + "epoch": 49.28, + "learning_rate": 2.537083034225639e-05, + "loss": 1.9924, + "step": 17023500 + }, + { + "epoch": 49.28, + "learning_rate": 2.5370106694609114e-05, + "loss": 2.0218, + "step": 17024000 + }, + { + "epoch": 49.28, + "learning_rate": 2.5369383046961836e-05, + "loss": 2.0135, + "step": 17024500 + }, + { + "epoch": 49.28, + "learning_rate": 2.5368659399314565e-05, + "loss": 2.0057, + "step": 17025000 + }, + { + "epoch": 49.28, + "learning_rate": 2.5367935751667287e-05, + "loss": 2.0018, + "step": 17025500 + }, + { + "epoch": 49.28, + "learning_rate": 2.536721210402001e-05, + "loss": 2.0086, + "step": 17026000 + }, + { + "epoch": 49.28, + "learning_rate": 2.5366488456372735e-05, + "loss": 2.0072, + "step": 17026500 + }, + { + "epoch": 49.29, + "learning_rate": 2.5365764808725457e-05, + "loss": 1.9951, + "step": 17027000 + }, + { + "epoch": 49.29, + "learning_rate": 2.536504116107818e-05, + "loss": 2.0334, + "step": 17027500 + }, + { + "epoch": 49.29, + "learning_rate": 2.5364318960726195e-05, + "loss": 2.0056, + "step": 17028000 + }, + { + "epoch": 49.29, + "learning_rate": 2.536359531307892e-05, + "loss": 1.9953, + "step": 17028500 + }, + { + "epoch": 49.29, + "learning_rate": 2.5362873112726936e-05, + "loss": 2.0111, + "step": 17029000 + }, + { + "epoch": 49.29, + "learning_rate": 2.536214946507966e-05, + "loss": 1.9915, + "step": 17029500 + }, + { + "epoch": 49.29, + "learning_rate": 2.5361425817432384e-05, + "loss": 2.0007, + "step": 17030000 + }, + { + "epoch": 49.3, + "learning_rate": 2.5360702169785106e-05, + "loss": 2.0196, + "step": 17030500 + }, + { + "epoch": 49.3, + "learning_rate": 2.535997852213783e-05, + "loss": 2.0158, + "step": 17031000 + }, + { + "epoch": 49.3, + "learning_rate": 2.535925487449055e-05, + "loss": 1.9766, + "step": 17031500 + }, + { + "epoch": 49.3, + "learning_rate": 2.5358531226843273e-05, + "loss": 1.988, + "step": 17032000 + }, + { + "epoch": 49.3, + "learning_rate": 2.5357809026491292e-05, + "loss": 2.0046, + "step": 17032500 + }, + { + "epoch": 49.3, + "learning_rate": 2.535708537884402e-05, + "loss": 1.9674, + "step": 17033000 + }, + { + "epoch": 49.31, + "learning_rate": 2.5356361731196743e-05, + "loss": 1.9983, + "step": 17033500 + }, + { + "epoch": 49.31, + "learning_rate": 2.5355638083549465e-05, + "loss": 2.0105, + "step": 17034000 + }, + { + "epoch": 49.31, + "learning_rate": 2.5354914435902188e-05, + "loss": 2.011, + "step": 17034500 + }, + { + "epoch": 49.31, + "learning_rate": 2.535419078825491e-05, + "loss": 1.9966, + "step": 17035000 + }, + { + "epoch": 49.31, + "learning_rate": 2.5353467140607635e-05, + "loss": 1.9979, + "step": 17035500 + }, + { + "epoch": 49.31, + "learning_rate": 2.5352743492960358e-05, + "loss": 1.9819, + "step": 17036000 + }, + { + "epoch": 49.31, + "learning_rate": 2.535201984531308e-05, + "loss": 1.9765, + "step": 17036500 + }, + { + "epoch": 49.32, + "learning_rate": 2.5351296197665802e-05, + "loss": 1.9823, + "step": 17037000 + }, + { + "epoch": 49.32, + "learning_rate": 2.535057399731382e-05, + "loss": 1.9992, + "step": 17037500 + }, + { + "epoch": 49.32, + "learning_rate": 2.5349850349666543e-05, + "loss": 1.9907, + "step": 17038000 + }, + { + "epoch": 49.32, + "learning_rate": 2.534912814931456e-05, + "loss": 2.0134, + "step": 17038500 + }, + { + "epoch": 49.32, + "learning_rate": 2.5348404501667284e-05, + "loss": 1.9793, + "step": 17039000 + }, + { + "epoch": 49.32, + "learning_rate": 2.53476823013153e-05, + "loss": 1.9995, + "step": 17039500 + }, + { + "epoch": 49.32, + "learning_rate": 2.5346958653668022e-05, + "loss": 2.0073, + "step": 17040000 + }, + { + "epoch": 49.33, + "learning_rate": 2.534623500602075e-05, + "loss": 1.9914, + "step": 17040500 + }, + { + "epoch": 49.33, + "learning_rate": 2.5345511358373474e-05, + "loss": 1.9896, + "step": 17041000 + }, + { + "epoch": 49.33, + "learning_rate": 2.53447877107262e-05, + "loss": 2.0144, + "step": 17041500 + }, + { + "epoch": 49.33, + "learning_rate": 2.534406406307892e-05, + "loss": 2.0124, + "step": 17042000 + }, + { + "epoch": 49.33, + "learning_rate": 2.5343340415431644e-05, + "loss": 2.0135, + "step": 17042500 + }, + { + "epoch": 49.33, + "learning_rate": 2.5342616767784366e-05, + "loss": 2.0084, + "step": 17043000 + }, + { + "epoch": 49.33, + "learning_rate": 2.5341893120137088e-05, + "loss": 2.0038, + "step": 17043500 + }, + { + "epoch": 49.34, + "learning_rate": 2.5341170919785107e-05, + "loss": 1.9882, + "step": 17044000 + }, + { + "epoch": 49.34, + "learning_rate": 2.534044727213783e-05, + "loss": 1.9871, + "step": 17044500 + }, + { + "epoch": 49.34, + "learning_rate": 2.533972362449055e-05, + "loss": 1.9878, + "step": 17045000 + }, + { + "epoch": 49.34, + "learning_rate": 2.5338999976843274e-05, + "loss": 2.0148, + "step": 17045500 + }, + { + "epoch": 49.34, + "learning_rate": 2.5338276329196e-05, + "loss": 1.9808, + "step": 17046000 + }, + { + "epoch": 49.34, + "learning_rate": 2.533755268154872e-05, + "loss": 1.9965, + "step": 17046500 + }, + { + "epoch": 49.34, + "learning_rate": 2.5336829033901444e-05, + "loss": 1.9995, + "step": 17047000 + }, + { + "epoch": 49.35, + "learning_rate": 2.5336106833549466e-05, + "loss": 2.0137, + "step": 17047500 + }, + { + "epoch": 49.35, + "learning_rate": 2.533538318590219e-05, + "loss": 2.0017, + "step": 17048000 + }, + { + "epoch": 49.35, + "learning_rate": 2.5334659538254914e-05, + "loss": 1.9902, + "step": 17048500 + }, + { + "epoch": 49.35, + "learning_rate": 2.5333935890607636e-05, + "loss": 2.0068, + "step": 17049000 + }, + { + "epoch": 49.35, + "learning_rate": 2.533321224296036e-05, + "loss": 2.0103, + "step": 17049500 + }, + { + "epoch": 49.35, + "learning_rate": 2.533248859531308e-05, + "loss": 2.0107, + "step": 17050000 + }, + { + "epoch": 49.35, + "learning_rate": 2.5331764947665803e-05, + "loss": 2.0139, + "step": 17050500 + }, + { + "epoch": 49.36, + "learning_rate": 2.5331042747313822e-05, + "loss": 2.0124, + "step": 17051000 + }, + { + "epoch": 49.36, + "learning_rate": 2.5330319099666544e-05, + "loss": 2.0031, + "step": 17051500 + }, + { + "epoch": 49.36, + "learning_rate": 2.5329595452019266e-05, + "loss": 2.0019, + "step": 17052000 + }, + { + "epoch": 49.36, + "learning_rate": 2.532887180437199e-05, + "loss": 1.9875, + "step": 17052500 + }, + { + "epoch": 49.36, + "learning_rate": 2.5328148156724714e-05, + "loss": 1.9992, + "step": 17053000 + }, + { + "epoch": 49.36, + "learning_rate": 2.5327424509077436e-05, + "loss": 1.9908, + "step": 17053500 + }, + { + "epoch": 49.36, + "learning_rate": 2.532670086143016e-05, + "loss": 2.0141, + "step": 17054000 + }, + { + "epoch": 49.37, + "learning_rate": 2.532597721378288e-05, + "loss": 2.0042, + "step": 17054500 + }, + { + "epoch": 49.37, + "learning_rate": 2.5325255013430903e-05, + "loss": 1.9969, + "step": 17055000 + }, + { + "epoch": 49.37, + "learning_rate": 2.5324532813078922e-05, + "loss": 2.0081, + "step": 17055500 + }, + { + "epoch": 49.37, + "learning_rate": 2.5323809165431644e-05, + "loss": 1.9859, + "step": 17056000 + }, + { + "epoch": 49.37, + "learning_rate": 2.5323085517784367e-05, + "loss": 2.0096, + "step": 17056500 + }, + { + "epoch": 49.37, + "learning_rate": 2.532236187013709e-05, + "loss": 2.0114, + "step": 17057000 + }, + { + "epoch": 49.37, + "learning_rate": 2.5321638222489814e-05, + "loss": 2.0047, + "step": 17057500 + }, + { + "epoch": 49.38, + "learning_rate": 2.5320914574842537e-05, + "loss": 1.9712, + "step": 17058000 + }, + { + "epoch": 49.38, + "learning_rate": 2.532019092719526e-05, + "loss": 2.0086, + "step": 17058500 + }, + { + "epoch": 49.38, + "learning_rate": 2.531946727954798e-05, + "loss": 1.998, + "step": 17059000 + }, + { + "epoch": 49.38, + "learning_rate": 2.5318745079196e-05, + "loss": 2.0056, + "step": 17059500 + }, + { + "epoch": 49.38, + "learning_rate": 2.5318021431548722e-05, + "loss": 2.0157, + "step": 17060000 + }, + { + "epoch": 49.38, + "learning_rate": 2.5317297783901445e-05, + "loss": 2.0048, + "step": 17060500 + }, + { + "epoch": 49.38, + "learning_rate": 2.5316574136254167e-05, + "loss": 1.9836, + "step": 17061000 + }, + { + "epoch": 49.39, + "learning_rate": 2.531585048860689e-05, + "loss": 1.9898, + "step": 17061500 + }, + { + "epoch": 49.39, + "learning_rate": 2.5315126840959615e-05, + "loss": 2.0078, + "step": 17062000 + }, + { + "epoch": 49.39, + "learning_rate": 2.531440319331234e-05, + "loss": 2.0107, + "step": 17062500 + }, + { + "epoch": 49.39, + "learning_rate": 2.5313679545665066e-05, + "loss": 2.0103, + "step": 17063000 + }, + { + "epoch": 49.39, + "learning_rate": 2.531295734531308e-05, + "loss": 1.9947, + "step": 17063500 + }, + { + "epoch": 49.39, + "learning_rate": 2.53122351449611e-05, + "loss": 1.9724, + "step": 17064000 + }, + { + "epoch": 49.39, + "learning_rate": 2.5311511497313823e-05, + "loss": 1.9911, + "step": 17064500 + }, + { + "epoch": 49.4, + "learning_rate": 2.5310787849666545e-05, + "loss": 1.9932, + "step": 17065000 + }, + { + "epoch": 49.4, + "learning_rate": 2.5310064202019267e-05, + "loss": 1.9863, + "step": 17065500 + }, + { + "epoch": 49.4, + "learning_rate": 2.5309342001667286e-05, + "loss": 2.0376, + "step": 17066000 + }, + { + "epoch": 49.4, + "learning_rate": 2.53086198013153e-05, + "loss": 1.9961, + "step": 17066500 + }, + { + "epoch": 49.4, + "learning_rate": 2.5307896153668027e-05, + "loss": 1.9949, + "step": 17067000 + }, + { + "epoch": 49.4, + "learning_rate": 2.530717250602075e-05, + "loss": 2.0141, + "step": 17067500 + }, + { + "epoch": 49.4, + "learning_rate": 2.5306448858373472e-05, + "loss": 2.029, + "step": 17068000 + }, + { + "epoch": 49.41, + "learning_rate": 2.5305725210726194e-05, + "loss": 1.9913, + "step": 17068500 + }, + { + "epoch": 49.41, + "learning_rate": 2.5305001563078916e-05, + "loss": 1.9645, + "step": 17069000 + }, + { + "epoch": 49.41, + "learning_rate": 2.530427791543164e-05, + "loss": 1.9895, + "step": 17069500 + }, + { + "epoch": 49.41, + "learning_rate": 2.5303554267784367e-05, + "loss": 2.0046, + "step": 17070000 + }, + { + "epoch": 49.41, + "learning_rate": 2.5302832067432386e-05, + "loss": 2.0095, + "step": 17070500 + }, + { + "epoch": 49.41, + "learning_rate": 2.530210841978511e-05, + "loss": 2.0035, + "step": 17071000 + }, + { + "epoch": 49.42, + "learning_rate": 2.530138477213783e-05, + "loss": 1.9779, + "step": 17071500 + }, + { + "epoch": 49.42, + "learning_rate": 2.530066257178585e-05, + "loss": 1.9904, + "step": 17072000 + }, + { + "epoch": 49.42, + "learning_rate": 2.5299938924138572e-05, + "loss": 2.0064, + "step": 17072500 + }, + { + "epoch": 49.42, + "learning_rate": 2.5299215276491294e-05, + "loss": 1.9821, + "step": 17073000 + }, + { + "epoch": 49.42, + "learning_rate": 2.5298491628844016e-05, + "loss": 2.0477, + "step": 17073500 + }, + { + "epoch": 49.42, + "learning_rate": 2.5297767981196742e-05, + "loss": 1.9921, + "step": 17074000 + }, + { + "epoch": 49.42, + "learning_rate": 2.5297044333549464e-05, + "loss": 2.0153, + "step": 17074500 + }, + { + "epoch": 49.43, + "learning_rate": 2.5296320685902187e-05, + "loss": 1.979, + "step": 17075000 + }, + { + "epoch": 49.43, + "learning_rate": 2.529559703825491e-05, + "loss": 1.9973, + "step": 17075500 + }, + { + "epoch": 49.43, + "learning_rate": 2.529487339060763e-05, + "loss": 1.9974, + "step": 17076000 + }, + { + "epoch": 49.43, + "learning_rate": 2.5294149742960353e-05, + "loss": 2.0052, + "step": 17076500 + }, + { + "epoch": 49.43, + "learning_rate": 2.529342609531308e-05, + "loss": 1.9979, + "step": 17077000 + }, + { + "epoch": 49.43, + "learning_rate": 2.5292702447665805e-05, + "loss": 2.0116, + "step": 17077500 + }, + { + "epoch": 49.43, + "learning_rate": 2.5291980247313823e-05, + "loss": 1.9885, + "step": 17078000 + }, + { + "epoch": 49.44, + "learning_rate": 2.5291256599666546e-05, + "loss": 1.9838, + "step": 17078500 + }, + { + "epoch": 49.44, + "learning_rate": 2.5290532952019268e-05, + "loss": 2.0033, + "step": 17079000 + }, + { + "epoch": 49.44, + "learning_rate": 2.5289809304371994e-05, + "loss": 1.9949, + "step": 17079500 + }, + { + "epoch": 49.44, + "learning_rate": 2.5289085656724716e-05, + "loss": 2.0188, + "step": 17080000 + }, + { + "epoch": 49.44, + "learning_rate": 2.5288362009077438e-05, + "loss": 2.0108, + "step": 17080500 + }, + { + "epoch": 49.44, + "learning_rate": 2.528763836143016e-05, + "loss": 2.0071, + "step": 17081000 + }, + { + "epoch": 49.44, + "learning_rate": 2.5286914713782882e-05, + "loss": 1.9814, + "step": 17081500 + }, + { + "epoch": 49.45, + "learning_rate": 2.52861925134309e-05, + "loss": 1.9911, + "step": 17082000 + }, + { + "epoch": 49.45, + "learning_rate": 2.5285470313078917e-05, + "loss": 2.0101, + "step": 17082500 + }, + { + "epoch": 49.45, + "learning_rate": 2.5284746665431643e-05, + "loss": 1.992, + "step": 17083000 + }, + { + "epoch": 49.45, + "learning_rate": 2.5284023017784365e-05, + "loss": 1.9969, + "step": 17083500 + }, + { + "epoch": 49.45, + "learning_rate": 2.5283299370137087e-05, + "loss": 2.0026, + "step": 17084000 + }, + { + "epoch": 49.45, + "learning_rate": 2.528257572248981e-05, + "loss": 2.0298, + "step": 17084500 + }, + { + "epoch": 49.45, + "learning_rate": 2.528185207484254e-05, + "loss": 1.9949, + "step": 17085000 + }, + { + "epoch": 49.46, + "learning_rate": 2.528112842719526e-05, + "loss": 1.9894, + "step": 17085500 + }, + { + "epoch": 49.46, + "learning_rate": 2.5280404779547983e-05, + "loss": 1.9827, + "step": 17086000 + }, + { + "epoch": 49.46, + "learning_rate": 2.5279681131900705e-05, + "loss": 1.9812, + "step": 17086500 + }, + { + "epoch": 49.46, + "learning_rate": 2.527895748425343e-05, + "loss": 2.019, + "step": 17087000 + }, + { + "epoch": 49.46, + "learning_rate": 2.5278233836606153e-05, + "loss": 2.0025, + "step": 17087500 + }, + { + "epoch": 49.46, + "learning_rate": 2.5277510188958875e-05, + "loss": 2.0236, + "step": 17088000 + }, + { + "epoch": 49.46, + "learning_rate": 2.5276787988606894e-05, + "loss": 1.9979, + "step": 17088500 + }, + { + "epoch": 49.47, + "learning_rate": 2.5276064340959616e-05, + "loss": 2.0137, + "step": 17089000 + }, + { + "epoch": 49.47, + "learning_rate": 2.5275342140607632e-05, + "loss": 1.9983, + "step": 17089500 + }, + { + "epoch": 49.47, + "learning_rate": 2.5274618492960357e-05, + "loss": 2.0185, + "step": 17090000 + }, + { + "epoch": 49.47, + "learning_rate": 2.527389484531308e-05, + "loss": 1.9768, + "step": 17090500 + }, + { + "epoch": 49.47, + "learning_rate": 2.5273171197665802e-05, + "loss": 2.0123, + "step": 17091000 + }, + { + "epoch": 49.47, + "learning_rate": 2.5272447550018524e-05, + "loss": 2.0137, + "step": 17091500 + }, + { + "epoch": 49.47, + "learning_rate": 2.5271723902371246e-05, + "loss": 2.0162, + "step": 17092000 + }, + { + "epoch": 49.48, + "learning_rate": 2.5271000254723975e-05, + "loss": 1.9708, + "step": 17092500 + }, + { + "epoch": 49.48, + "learning_rate": 2.5270276607076698e-05, + "loss": 1.9997, + "step": 17093000 + }, + { + "epoch": 49.48, + "learning_rate": 2.526955295942942e-05, + "loss": 2.0134, + "step": 17093500 + }, + { + "epoch": 49.48, + "learning_rate": 2.5268829311782145e-05, + "loss": 1.9853, + "step": 17094000 + }, + { + "epoch": 49.48, + "learning_rate": 2.5268105664134868e-05, + "loss": 1.9868, + "step": 17094500 + }, + { + "epoch": 49.48, + "learning_rate": 2.5267383463782883e-05, + "loss": 1.9923, + "step": 17095000 + }, + { + "epoch": 49.48, + "learning_rate": 2.5266659816135606e-05, + "loss": 2.0049, + "step": 17095500 + }, + { + "epoch": 49.49, + "learning_rate": 2.5265937615783624e-05, + "loss": 1.9896, + "step": 17096000 + }, + { + "epoch": 49.49, + "learning_rate": 2.5265213968136347e-05, + "loss": 1.9926, + "step": 17096500 + }, + { + "epoch": 49.49, + "learning_rate": 2.526449032048907e-05, + "loss": 2.0012, + "step": 17097000 + }, + { + "epoch": 49.49, + "learning_rate": 2.5263766672841795e-05, + "loss": 2.0224, + "step": 17097500 + }, + { + "epoch": 49.49, + "learning_rate": 2.5263045919785107e-05, + "loss": 2.0277, + "step": 17098000 + }, + { + "epoch": 49.49, + "learning_rate": 2.526232227213783e-05, + "loss": 2.0084, + "step": 17098500 + }, + { + "epoch": 49.49, + "learning_rate": 2.5261600071785845e-05, + "loss": 2.0042, + "step": 17099000 + }, + { + "epoch": 49.5, + "learning_rate": 2.526087642413857e-05, + "loss": 1.9889, + "step": 17099500 + }, + { + "epoch": 49.5, + "learning_rate": 2.5260152776491292e-05, + "loss": 2.0, + "step": 17100000 + }, + { + "epoch": 49.5, + "learning_rate": 2.525942912884402e-05, + "loss": 1.9969, + "step": 17100500 + }, + { + "epoch": 49.5, + "learning_rate": 2.5258705481196744e-05, + "loss": 2.0208, + "step": 17101000 + }, + { + "epoch": 49.5, + "learning_rate": 2.5257981833549466e-05, + "loss": 1.9853, + "step": 17101500 + }, + { + "epoch": 49.5, + "learning_rate": 2.5257258185902188e-05, + "loss": 1.9803, + "step": 17102000 + }, + { + "epoch": 49.5, + "learning_rate": 2.525653453825491e-05, + "loss": 1.9966, + "step": 17102500 + }, + { + "epoch": 49.51, + "learning_rate": 2.5255810890607633e-05, + "loss": 2.0139, + "step": 17103000 + }, + { + "epoch": 49.51, + "learning_rate": 2.5255087242960358e-05, + "loss": 2.0193, + "step": 17103500 + }, + { + "epoch": 49.51, + "learning_rate": 2.525436359531308e-05, + "loss": 2.018, + "step": 17104000 + }, + { + "epoch": 49.51, + "learning_rate": 2.5253639947665803e-05, + "loss": 1.9754, + "step": 17104500 + }, + { + "epoch": 49.51, + "learning_rate": 2.5252916300018525e-05, + "loss": 1.975, + "step": 17105000 + }, + { + "epoch": 49.51, + "learning_rate": 2.5252192652371247e-05, + "loss": 2.0106, + "step": 17105500 + }, + { + "epoch": 49.51, + "learning_rate": 2.5251470452019266e-05, + "loss": 2.0064, + "step": 17106000 + }, + { + "epoch": 49.52, + "learning_rate": 2.525074825166728e-05, + "loss": 2.0203, + "step": 17106500 + }, + { + "epoch": 49.52, + "learning_rate": 2.5250024604020007e-05, + "loss": 2.0046, + "step": 17107000 + }, + { + "epoch": 49.52, + "learning_rate": 2.5249300956372733e-05, + "loss": 2.0135, + "step": 17107500 + }, + { + "epoch": 49.52, + "learning_rate": 2.524857730872546e-05, + "loss": 2.0038, + "step": 17108000 + }, + { + "epoch": 49.52, + "learning_rate": 2.524785366107818e-05, + "loss": 1.995, + "step": 17108500 + }, + { + "epoch": 49.52, + "learning_rate": 2.5247131460726196e-05, + "loss": 1.9867, + "step": 17109000 + }, + { + "epoch": 49.52, + "learning_rate": 2.5246409260374215e-05, + "loss": 1.9985, + "step": 17109500 + }, + { + "epoch": 49.53, + "learning_rate": 2.5245685612726938e-05, + "loss": 2.0139, + "step": 17110000 + }, + { + "epoch": 49.53, + "learning_rate": 2.524496196507966e-05, + "loss": 1.9932, + "step": 17110500 + }, + { + "epoch": 49.53, + "learning_rate": 2.5244238317432385e-05, + "loss": 2.0113, + "step": 17111000 + }, + { + "epoch": 49.53, + "learning_rate": 2.5243514669785108e-05, + "loss": 2.0036, + "step": 17111500 + }, + { + "epoch": 49.53, + "learning_rate": 2.524279102213783e-05, + "loss": 2.0059, + "step": 17112000 + }, + { + "epoch": 49.53, + "learning_rate": 2.5242068821785845e-05, + "loss": 2.001, + "step": 17112500 + }, + { + "epoch": 49.54, + "learning_rate": 2.524134517413857e-05, + "loss": 2.0134, + "step": 17113000 + }, + { + "epoch": 49.54, + "learning_rate": 2.5240622973786587e-05, + "loss": 1.9815, + "step": 17113500 + }, + { + "epoch": 49.54, + "learning_rate": 2.523989932613931e-05, + "loss": 2.0046, + "step": 17114000 + }, + { + "epoch": 49.54, + "learning_rate": 2.5239175678492034e-05, + "loss": 1.9996, + "step": 17114500 + }, + { + "epoch": 49.54, + "learning_rate": 2.5238452030844757e-05, + "loss": 1.9902, + "step": 17115000 + }, + { + "epoch": 49.54, + "learning_rate": 2.5237728383197486e-05, + "loss": 2.0029, + "step": 17115500 + }, + { + "epoch": 49.54, + "learning_rate": 2.5237004735550208e-05, + "loss": 2.0206, + "step": 17116000 + }, + { + "epoch": 49.55, + "learning_rate": 2.523628108790293e-05, + "loss": 2.0135, + "step": 17116500 + }, + { + "epoch": 49.55, + "learning_rate": 2.5235557440255652e-05, + "loss": 2.0353, + "step": 17117000 + }, + { + "epoch": 49.55, + "learning_rate": 2.5234833792608375e-05, + "loss": 1.9984, + "step": 17117500 + }, + { + "epoch": 49.55, + "learning_rate": 2.5234110144961097e-05, + "loss": 1.9927, + "step": 17118000 + }, + { + "epoch": 49.55, + "learning_rate": 2.5233386497313822e-05, + "loss": 2.0047, + "step": 17118500 + }, + { + "epoch": 49.55, + "learning_rate": 2.5232662849666545e-05, + "loss": 2.0135, + "step": 17119000 + }, + { + "epoch": 49.55, + "learning_rate": 2.5231939202019267e-05, + "loss": 1.996, + "step": 17119500 + }, + { + "epoch": 49.56, + "learning_rate": 2.523121555437199e-05, + "loss": 2.0119, + "step": 17120000 + }, + { + "epoch": 49.56, + "learning_rate": 2.523049190672471e-05, + "loss": 2.0009, + "step": 17120500 + }, + { + "epoch": 49.56, + "learning_rate": 2.5229768259077437e-05, + "loss": 2.0177, + "step": 17121000 + }, + { + "epoch": 49.56, + "learning_rate": 2.522904461143016e-05, + "loss": 2.0089, + "step": 17121500 + }, + { + "epoch": 49.56, + "learning_rate": 2.522832385837347e-05, + "loss": 1.9958, + "step": 17122000 + }, + { + "epoch": 49.56, + "learning_rate": 2.52276002107262e-05, + "loss": 2.009, + "step": 17122500 + }, + { + "epoch": 49.56, + "learning_rate": 2.5226876563078923e-05, + "loss": 2.007, + "step": 17123000 + }, + { + "epoch": 49.57, + "learning_rate": 2.5226152915431645e-05, + "loss": 2.0198, + "step": 17123500 + }, + { + "epoch": 49.57, + "learning_rate": 2.5225429267784367e-05, + "loss": 2.0102, + "step": 17124000 + }, + { + "epoch": 49.57, + "learning_rate": 2.522470562013709e-05, + "loss": 2.0018, + "step": 17124500 + }, + { + "epoch": 49.57, + "learning_rate": 2.522398197248981e-05, + "loss": 2.0111, + "step": 17125000 + }, + { + "epoch": 49.57, + "learning_rate": 2.5223258324842537e-05, + "loss": 2.0134, + "step": 17125500 + }, + { + "epoch": 49.57, + "learning_rate": 2.522253467719526e-05, + "loss": 1.9996, + "step": 17126000 + }, + { + "epoch": 49.57, + "learning_rate": 2.5221811029547982e-05, + "loss": 1.9889, + "step": 17126500 + }, + { + "epoch": 49.58, + "learning_rate": 2.5221087381900704e-05, + "loss": 1.9895, + "step": 17127000 + }, + { + "epoch": 49.58, + "learning_rate": 2.5220365181548723e-05, + "loss": 2.003, + "step": 17127500 + }, + { + "epoch": 49.58, + "learning_rate": 2.5219641533901445e-05, + "loss": 2.0164, + "step": 17128000 + }, + { + "epoch": 49.58, + "learning_rate": 2.5218917886254167e-05, + "loss": 2.0049, + "step": 17128500 + }, + { + "epoch": 49.58, + "learning_rate": 2.521819423860689e-05, + "loss": 1.9764, + "step": 17129000 + }, + { + "epoch": 49.58, + "learning_rate": 2.5217470590959612e-05, + "loss": 1.9928, + "step": 17129500 + }, + { + "epoch": 49.58, + "learning_rate": 2.521674694331234e-05, + "loss": 1.9901, + "step": 17130000 + }, + { + "epoch": 49.59, + "learning_rate": 2.5216023295665063e-05, + "loss": 2.0011, + "step": 17130500 + }, + { + "epoch": 49.59, + "learning_rate": 2.521529964801779e-05, + "loss": 2.0113, + "step": 17131000 + }, + { + "epoch": 49.59, + "learning_rate": 2.521457600037051e-05, + "loss": 1.9957, + "step": 17131500 + }, + { + "epoch": 49.59, + "learning_rate": 2.5213852352723233e-05, + "loss": 2.0171, + "step": 17132000 + }, + { + "epoch": 49.59, + "learning_rate": 2.5213128705075955e-05, + "loss": 2.0022, + "step": 17132500 + }, + { + "epoch": 49.59, + "learning_rate": 2.5212406504723974e-05, + "loss": 1.9972, + "step": 17133000 + }, + { + "epoch": 49.59, + "learning_rate": 2.5211682857076697e-05, + "loss": 2.0201, + "step": 17133500 + }, + { + "epoch": 49.6, + "learning_rate": 2.5210960656724712e-05, + "loss": 1.9908, + "step": 17134000 + }, + { + "epoch": 49.6, + "learning_rate": 2.5210237009077438e-05, + "loss": 2.0076, + "step": 17134500 + }, + { + "epoch": 49.6, + "learning_rate": 2.520951336143016e-05, + "loss": 2.0088, + "step": 17135000 + }, + { + "epoch": 49.6, + "learning_rate": 2.5208791161078176e-05, + "loss": 1.9947, + "step": 17135500 + }, + { + "epoch": 49.6, + "learning_rate": 2.52080675134309e-05, + "loss": 1.9786, + "step": 17136000 + }, + { + "epoch": 49.6, + "learning_rate": 2.5207343865783623e-05, + "loss": 1.982, + "step": 17136500 + }, + { + "epoch": 49.6, + "learning_rate": 2.5206620218136346e-05, + "loss": 2.0174, + "step": 17137000 + }, + { + "epoch": 49.61, + "learning_rate": 2.5205896570489075e-05, + "loss": 1.992, + "step": 17137500 + }, + { + "epoch": 49.61, + "learning_rate": 2.5205172922841797e-05, + "loss": 2.0168, + "step": 17138000 + }, + { + "epoch": 49.61, + "learning_rate": 2.520444927519452e-05, + "loss": 2.0113, + "step": 17138500 + }, + { + "epoch": 49.61, + "learning_rate": 2.5203727074842538e-05, + "loss": 1.9923, + "step": 17139000 + }, + { + "epoch": 49.61, + "learning_rate": 2.520300342719526e-05, + "loss": 1.9978, + "step": 17139500 + }, + { + "epoch": 49.61, + "learning_rate": 2.5202279779547983e-05, + "loss": 2.0138, + "step": 17140000 + }, + { + "epoch": 49.61, + "learning_rate": 2.5201556131900705e-05, + "loss": 2.0056, + "step": 17140500 + }, + { + "epoch": 49.62, + "learning_rate": 2.5200832484253427e-05, + "loss": 2.0002, + "step": 17141000 + }, + { + "epoch": 49.62, + "learning_rate": 2.5200108836606153e-05, + "loss": 2.0206, + "step": 17141500 + }, + { + "epoch": 49.62, + "learning_rate": 2.5199385188958875e-05, + "loss": 1.9865, + "step": 17142000 + }, + { + "epoch": 49.62, + "learning_rate": 2.519866298860689e-05, + "loss": 2.0136, + "step": 17142500 + }, + { + "epoch": 49.62, + "learning_rate": 2.5197939340959613e-05, + "loss": 1.9972, + "step": 17143000 + }, + { + "epoch": 49.62, + "learning_rate": 2.519721714060763e-05, + "loss": 1.9926, + "step": 17143500 + }, + { + "epoch": 49.62, + "learning_rate": 2.5196493492960354e-05, + "loss": 2.0165, + "step": 17144000 + }, + { + "epoch": 49.63, + "learning_rate": 2.5195769845313076e-05, + "loss": 1.993, + "step": 17144500 + }, + { + "epoch": 49.63, + "learning_rate": 2.5195046197665805e-05, + "loss": 2.0149, + "step": 17145000 + }, + { + "epoch": 49.63, + "learning_rate": 2.5194322550018527e-05, + "loss": 2.006, + "step": 17145500 + }, + { + "epoch": 49.63, + "learning_rate": 2.5193598902371253e-05, + "loss": 2.0373, + "step": 17146000 + }, + { + "epoch": 49.63, + "learning_rate": 2.5192875254723975e-05, + "loss": 2.0059, + "step": 17146500 + }, + { + "epoch": 49.63, + "learning_rate": 2.5192151607076697e-05, + "loss": 2.0046, + "step": 17147000 + }, + { + "epoch": 49.63, + "learning_rate": 2.5191429406724716e-05, + "loss": 2.0093, + "step": 17147500 + }, + { + "epoch": 49.64, + "learning_rate": 2.519070575907744e-05, + "loss": 2.0185, + "step": 17148000 + }, + { + "epoch": 49.64, + "learning_rate": 2.518998211143016e-05, + "loss": 2.0074, + "step": 17148500 + }, + { + "epoch": 49.64, + "learning_rate": 2.5189258463782883e-05, + "loss": 1.9889, + "step": 17149000 + }, + { + "epoch": 49.64, + "learning_rate": 2.5188534816135605e-05, + "loss": 1.9943, + "step": 17149500 + }, + { + "epoch": 49.64, + "learning_rate": 2.5187811168488328e-05, + "loss": 2.0241, + "step": 17150000 + }, + { + "epoch": 49.64, + "learning_rate": 2.5187087520841053e-05, + "loss": 1.9832, + "step": 17150500 + }, + { + "epoch": 49.65, + "learning_rate": 2.5186363873193775e-05, + "loss": 2.0023, + "step": 17151000 + }, + { + "epoch": 49.65, + "learning_rate": 2.5185640225546498e-05, + "loss": 2.0074, + "step": 17151500 + }, + { + "epoch": 49.65, + "learning_rate": 2.5184916577899227e-05, + "loss": 1.9895, + "step": 17152000 + }, + { + "epoch": 49.65, + "learning_rate": 2.518419293025195e-05, + "loss": 2.0149, + "step": 17152500 + }, + { + "epoch": 49.65, + "learning_rate": 2.518346928260467e-05, + "loss": 1.9795, + "step": 17153000 + }, + { + "epoch": 49.65, + "learning_rate": 2.518274708225269e-05, + "loss": 1.9969, + "step": 17153500 + }, + { + "epoch": 49.65, + "learning_rate": 2.5182023434605412e-05, + "loss": 2.0187, + "step": 17154000 + }, + { + "epoch": 49.66, + "learning_rate": 2.5181299786958135e-05, + "loss": 1.9854, + "step": 17154500 + }, + { + "epoch": 49.66, + "learning_rate": 2.5180576139310857e-05, + "loss": 1.9972, + "step": 17155000 + }, + { + "epoch": 49.66, + "learning_rate": 2.5179853938958876e-05, + "loss": 2.0164, + "step": 17155500 + }, + { + "epoch": 49.66, + "learning_rate": 2.5179130291311598e-05, + "loss": 1.9999, + "step": 17156000 + }, + { + "epoch": 49.66, + "learning_rate": 2.5178408090959617e-05, + "loss": 2.0081, + "step": 17156500 + }, + { + "epoch": 49.66, + "learning_rate": 2.517768444331234e-05, + "loss": 2.0098, + "step": 17157000 + }, + { + "epoch": 49.66, + "learning_rate": 2.517696079566506e-05, + "loss": 1.9937, + "step": 17157500 + }, + { + "epoch": 49.67, + "learning_rate": 2.5176237148017784e-05, + "loss": 2.005, + "step": 17158000 + }, + { + "epoch": 49.67, + "learning_rate": 2.5175514947665803e-05, + "loss": 1.995, + "step": 17158500 + }, + { + "epoch": 49.67, + "learning_rate": 2.5174791300018525e-05, + "loss": 2.0228, + "step": 17159000 + }, + { + "epoch": 49.67, + "learning_rate": 2.5174067652371247e-05, + "loss": 2.012, + "step": 17159500 + }, + { + "epoch": 49.67, + "learning_rate": 2.5173344004723976e-05, + "loss": 2.0139, + "step": 17160000 + }, + { + "epoch": 49.67, + "learning_rate": 2.517262180437199e-05, + "loss": 2.012, + "step": 17160500 + }, + { + "epoch": 49.67, + "learning_rate": 2.5171898156724717e-05, + "loss": 2.011, + "step": 17161000 + }, + { + "epoch": 49.68, + "learning_rate": 2.5171175956372733e-05, + "loss": 2.0123, + "step": 17161500 + }, + { + "epoch": 49.68, + "learning_rate": 2.5170452308725455e-05, + "loss": 1.9957, + "step": 17162000 + }, + { + "epoch": 49.68, + "learning_rate": 2.516972866107818e-05, + "loss": 2.0077, + "step": 17162500 + }, + { + "epoch": 49.68, + "learning_rate": 2.5169005013430903e-05, + "loss": 2.0187, + "step": 17163000 + }, + { + "epoch": 49.68, + "learning_rate": 2.516828281307892e-05, + "loss": 2.0028, + "step": 17163500 + }, + { + "epoch": 49.68, + "learning_rate": 2.516755916543164e-05, + "loss": 2.0191, + "step": 17164000 + }, + { + "epoch": 49.68, + "learning_rate": 2.5166835517784366e-05, + "loss": 2.0195, + "step": 17164500 + }, + { + "epoch": 49.69, + "learning_rate": 2.516611187013709e-05, + "loss": 2.0122, + "step": 17165000 + }, + { + "epoch": 49.69, + "learning_rate": 2.516538822248981e-05, + "loss": 1.9988, + "step": 17165500 + }, + { + "epoch": 49.69, + "learning_rate": 2.5164664574842533e-05, + "loss": 2.0053, + "step": 17166000 + }, + { + "epoch": 49.69, + "learning_rate": 2.5163940927195255e-05, + "loss": 2.0224, + "step": 17166500 + }, + { + "epoch": 49.69, + "learning_rate": 2.5163218726843274e-05, + "loss": 2.0096, + "step": 17167000 + }, + { + "epoch": 49.69, + "learning_rate": 2.5162495079196003e-05, + "loss": 2.0325, + "step": 17167500 + }, + { + "epoch": 49.69, + "learning_rate": 2.5161771431548725e-05, + "loss": 2.0172, + "step": 17168000 + }, + { + "epoch": 49.7, + "learning_rate": 2.5161047783901448e-05, + "loss": 1.9684, + "step": 17168500 + }, + { + "epoch": 49.7, + "learning_rate": 2.516032413625417e-05, + "loss": 2.0319, + "step": 17169000 + }, + { + "epoch": 49.7, + "learning_rate": 2.5159600488606892e-05, + "loss": 1.9945, + "step": 17169500 + }, + { + "epoch": 49.7, + "learning_rate": 2.5158876840959618e-05, + "loss": 2.0173, + "step": 17170000 + }, + { + "epoch": 49.7, + "learning_rate": 2.515815319331234e-05, + "loss": 2.0298, + "step": 17170500 + }, + { + "epoch": 49.7, + "learning_rate": 2.5157429545665062e-05, + "loss": 1.9965, + "step": 17171000 + }, + { + "epoch": 49.7, + "learning_rate": 2.5156705898017784e-05, + "loss": 2.0051, + "step": 17171500 + }, + { + "epoch": 49.71, + "learning_rate": 2.5155982250370507e-05, + "loss": 1.9961, + "step": 17172000 + }, + { + "epoch": 49.71, + "learning_rate": 2.5155258602723232e-05, + "loss": 1.997, + "step": 17172500 + }, + { + "epoch": 49.71, + "learning_rate": 2.5154534955075954e-05, + "loss": 2.0052, + "step": 17173000 + }, + { + "epoch": 49.71, + "learning_rate": 2.5153814202019267e-05, + "loss": 2.0135, + "step": 17173500 + }, + { + "epoch": 49.71, + "learning_rate": 2.515309055437199e-05, + "loss": 1.9972, + "step": 17174000 + }, + { + "epoch": 49.71, + "learning_rate": 2.515236690672471e-05, + "loss": 2.0242, + "step": 17174500 + }, + { + "epoch": 49.71, + "learning_rate": 2.515164325907744e-05, + "loss": 1.995, + "step": 17175000 + }, + { + "epoch": 49.72, + "learning_rate": 2.5150919611430162e-05, + "loss": 2.0093, + "step": 17175500 + }, + { + "epoch": 49.72, + "learning_rate": 2.515019741107818e-05, + "loss": 1.9937, + "step": 17176000 + }, + { + "epoch": 49.72, + "learning_rate": 2.5149473763430904e-05, + "loss": 2.0161, + "step": 17176500 + }, + { + "epoch": 49.72, + "learning_rate": 2.5148750115783626e-05, + "loss": 2.0111, + "step": 17177000 + }, + { + "epoch": 49.72, + "learning_rate": 2.5148026468136348e-05, + "loss": 2.0036, + "step": 17177500 + }, + { + "epoch": 49.72, + "learning_rate": 2.514730282048907e-05, + "loss": 2.0191, + "step": 17178000 + }, + { + "epoch": 49.72, + "learning_rate": 2.5146579172841796e-05, + "loss": 2.0152, + "step": 17178500 + }, + { + "epoch": 49.73, + "learning_rate": 2.5145855525194518e-05, + "loss": 1.9918, + "step": 17179000 + }, + { + "epoch": 49.73, + "learning_rate": 2.514513187754724e-05, + "loss": 2.0073, + "step": 17179500 + }, + { + "epoch": 49.73, + "learning_rate": 2.5144408229899963e-05, + "loss": 2.0184, + "step": 17180000 + }, + { + "epoch": 49.73, + "learning_rate": 2.5143684582252685e-05, + "loss": 2.0309, + "step": 17180500 + }, + { + "epoch": 49.73, + "learning_rate": 2.5142960934605407e-05, + "loss": 2.0242, + "step": 17181000 + }, + { + "epoch": 49.73, + "learning_rate": 2.5142237286958133e-05, + "loss": 1.9942, + "step": 17181500 + }, + { + "epoch": 49.73, + "learning_rate": 2.514151363931086e-05, + "loss": 1.9888, + "step": 17182000 + }, + { + "epoch": 49.74, + "learning_rate": 2.5140789991663584e-05, + "loss": 1.9844, + "step": 17182500 + }, + { + "epoch": 49.74, + "learning_rate": 2.5140066344016306e-05, + "loss": 1.9805, + "step": 17183000 + }, + { + "epoch": 49.74, + "learning_rate": 2.5139344143664322e-05, + "loss": 2.0171, + "step": 17183500 + }, + { + "epoch": 49.74, + "learning_rate": 2.5138623390607634e-05, + "loss": 1.9793, + "step": 17184000 + }, + { + "epoch": 49.74, + "learning_rate": 2.513789974296036e-05, + "loss": 1.9992, + "step": 17184500 + }, + { + "epoch": 49.74, + "learning_rate": 2.5137177542608375e-05, + "loss": 1.9955, + "step": 17185000 + }, + { + "epoch": 49.74, + "learning_rate": 2.5136455342256394e-05, + "loss": 2.0044, + "step": 17185500 + }, + { + "epoch": 49.75, + "learning_rate": 2.5135731694609116e-05, + "loss": 1.9941, + "step": 17186000 + }, + { + "epoch": 49.75, + "learning_rate": 2.513500804696184e-05, + "loss": 2.0118, + "step": 17186500 + }, + { + "epoch": 49.75, + "learning_rate": 2.513428439931456e-05, + "loss": 2.017, + "step": 17187000 + }, + { + "epoch": 49.75, + "learning_rate": 2.5133560751667283e-05, + "loss": 1.9888, + "step": 17187500 + }, + { + "epoch": 49.75, + "learning_rate": 2.513283710402001e-05, + "loss": 2.0233, + "step": 17188000 + }, + { + "epoch": 49.75, + "learning_rate": 2.513211345637273e-05, + "loss": 2.008, + "step": 17188500 + }, + { + "epoch": 49.76, + "learning_rate": 2.5131389808725453e-05, + "loss": 2.0155, + "step": 17189000 + }, + { + "epoch": 49.76, + "learning_rate": 2.5130666161078175e-05, + "loss": 1.991, + "step": 17189500 + }, + { + "epoch": 49.76, + "learning_rate": 2.5129942513430904e-05, + "loss": 1.9857, + "step": 17190000 + }, + { + "epoch": 49.76, + "learning_rate": 2.5129218865783627e-05, + "loss": 2.0267, + "step": 17190500 + }, + { + "epoch": 49.76, + "learning_rate": 2.512849521813635e-05, + "loss": 2.006, + "step": 17191000 + }, + { + "epoch": 49.76, + "learning_rate": 2.512777157048907e-05, + "loss": 2.0314, + "step": 17191500 + }, + { + "epoch": 49.76, + "learning_rate": 2.5127047922841797e-05, + "loss": 1.9939, + "step": 17192000 + }, + { + "epoch": 49.77, + "learning_rate": 2.512632427519452e-05, + "loss": 2.0021, + "step": 17192500 + }, + { + "epoch": 49.77, + "learning_rate": 2.5125602074842535e-05, + "loss": 1.9968, + "step": 17193000 + }, + { + "epoch": 49.77, + "learning_rate": 2.512487842719526e-05, + "loss": 2.0245, + "step": 17193500 + }, + { + "epoch": 49.77, + "learning_rate": 2.5124154779547982e-05, + "loss": 2.0078, + "step": 17194000 + }, + { + "epoch": 49.77, + "learning_rate": 2.5123431131900705e-05, + "loss": 1.9716, + "step": 17194500 + }, + { + "epoch": 49.77, + "learning_rate": 2.5122707484253427e-05, + "loss": 2.0013, + "step": 17195000 + }, + { + "epoch": 49.77, + "learning_rate": 2.512198383660615e-05, + "loss": 2.0056, + "step": 17195500 + }, + { + "epoch": 49.78, + "learning_rate": 2.512126018895887e-05, + "loss": 1.9977, + "step": 17196000 + }, + { + "epoch": 49.78, + "learning_rate": 2.5120536541311597e-05, + "loss": 1.9895, + "step": 17196500 + }, + { + "epoch": 49.78, + "learning_rate": 2.5119812893664323e-05, + "loss": 2.0022, + "step": 17197000 + }, + { + "epoch": 49.78, + "learning_rate": 2.511909069331234e-05, + "loss": 1.9957, + "step": 17197500 + }, + { + "epoch": 49.78, + "learning_rate": 2.5118367045665064e-05, + "loss": 2.0019, + "step": 17198000 + }, + { + "epoch": 49.78, + "learning_rate": 2.5117643398017786e-05, + "loss": 2.0101, + "step": 17198500 + }, + { + "epoch": 49.78, + "learning_rate": 2.511691975037051e-05, + "loss": 2.0117, + "step": 17199000 + }, + { + "epoch": 49.79, + "learning_rate": 2.5116197550018527e-05, + "loss": 1.9904, + "step": 17199500 + }, + { + "epoch": 49.79, + "learning_rate": 2.511547390237125e-05, + "loss": 2.0189, + "step": 17200000 + }, + { + "epoch": 49.79, + "learning_rate": 2.511475025472397e-05, + "loss": 1.9909, + "step": 17200500 + }, + { + "epoch": 49.79, + "learning_rate": 2.5114026607076697e-05, + "loss": 1.9984, + "step": 17201000 + }, + { + "epoch": 49.79, + "learning_rate": 2.511330295942942e-05, + "loss": 1.9783, + "step": 17201500 + }, + { + "epoch": 49.79, + "learning_rate": 2.511257931178214e-05, + "loss": 1.9991, + "step": 17202000 + }, + { + "epoch": 49.79, + "learning_rate": 2.5111855664134864e-05, + "loss": 2.0123, + "step": 17202500 + }, + { + "epoch": 49.8, + "learning_rate": 2.5111132016487586e-05, + "loss": 2.0075, + "step": 17203000 + }, + { + "epoch": 49.8, + "learning_rate": 2.5110408368840312e-05, + "loss": 2.01, + "step": 17203500 + }, + { + "epoch": 49.8, + "learning_rate": 2.5109686168488327e-05, + "loss": 1.9995, + "step": 17204000 + }, + { + "epoch": 49.8, + "learning_rate": 2.5108963968136346e-05, + "loss": 1.9837, + "step": 17204500 + }, + { + "epoch": 49.8, + "learning_rate": 2.510824176778437e-05, + "loss": 2.0044, + "step": 17205000 + }, + { + "epoch": 49.8, + "learning_rate": 2.510751812013709e-05, + "loss": 2.0081, + "step": 17205500 + }, + { + "epoch": 49.8, + "learning_rate": 2.5106794472489813e-05, + "loss": 1.9916, + "step": 17206000 + }, + { + "epoch": 49.81, + "learning_rate": 2.5106070824842535e-05, + "loss": 1.9999, + "step": 17206500 + }, + { + "epoch": 49.81, + "learning_rate": 2.5105348624490554e-05, + "loss": 2.0008, + "step": 17207000 + }, + { + "epoch": 49.81, + "learning_rate": 2.5104624976843276e-05, + "loss": 1.9969, + "step": 17207500 + }, + { + "epoch": 49.81, + "learning_rate": 2.5103901329196e-05, + "loss": 2.0141, + "step": 17208000 + }, + { + "epoch": 49.81, + "learning_rate": 2.5103177681548724e-05, + "loss": 2.0204, + "step": 17208500 + }, + { + "epoch": 49.81, + "learning_rate": 2.5102454033901447e-05, + "loss": 2.0074, + "step": 17209000 + }, + { + "epoch": 49.81, + "learning_rate": 2.510173038625417e-05, + "loss": 2.0277, + "step": 17209500 + }, + { + "epoch": 49.82, + "learning_rate": 2.510100673860689e-05, + "loss": 2.0103, + "step": 17210000 + }, + { + "epoch": 49.82, + "learning_rate": 2.5100283090959613e-05, + "loss": 1.989, + "step": 17210500 + }, + { + "epoch": 49.82, + "learning_rate": 2.5099560890607632e-05, + "loss": 1.9838, + "step": 17211000 + }, + { + "epoch": 49.82, + "learning_rate": 2.5098837242960354e-05, + "loss": 1.9962, + "step": 17211500 + }, + { + "epoch": 49.82, + "learning_rate": 2.5098113595313077e-05, + "loss": 2.0084, + "step": 17212000 + }, + { + "epoch": 49.82, + "learning_rate": 2.5097389947665806e-05, + "loss": 2.0097, + "step": 17212500 + }, + { + "epoch": 49.82, + "learning_rate": 2.5096666300018528e-05, + "loss": 2.008, + "step": 17213000 + }, + { + "epoch": 49.83, + "learning_rate": 2.509594265237125e-05, + "loss": 1.9742, + "step": 17213500 + }, + { + "epoch": 49.83, + "learning_rate": 2.5095219004723976e-05, + "loss": 2.0039, + "step": 17214000 + }, + { + "epoch": 49.83, + "learning_rate": 2.5094495357076698e-05, + "loss": 2.0155, + "step": 17214500 + }, + { + "epoch": 49.83, + "learning_rate": 2.509377170942942e-05, + "loss": 1.9983, + "step": 17215000 + }, + { + "epoch": 49.83, + "learning_rate": 2.509304950907744e-05, + "loss": 1.9882, + "step": 17215500 + }, + { + "epoch": 49.83, + "learning_rate": 2.509232586143016e-05, + "loss": 1.9893, + "step": 17216000 + }, + { + "epoch": 49.83, + "learning_rate": 2.5091602213782884e-05, + "loss": 1.9917, + "step": 17216500 + }, + { + "epoch": 49.84, + "learning_rate": 2.5090878566135606e-05, + "loss": 2.0152, + "step": 17217000 + }, + { + "epoch": 49.84, + "learning_rate": 2.5090156365783625e-05, + "loss": 2.0168, + "step": 17217500 + }, + { + "epoch": 49.84, + "learning_rate": 2.508943416543164e-05, + "loss": 2.0151, + "step": 17218000 + }, + { + "epoch": 49.84, + "learning_rate": 2.5088710517784363e-05, + "loss": 2.0269, + "step": 17218500 + }, + { + "epoch": 49.84, + "learning_rate": 2.508798831743238e-05, + "loss": 2.0095, + "step": 17219000 + }, + { + "epoch": 49.84, + "learning_rate": 2.5087264669785104e-05, + "loss": 2.0184, + "step": 17219500 + }, + { + "epoch": 49.84, + "learning_rate": 2.5086541022137833e-05, + "loss": 2.0028, + "step": 17220000 + }, + { + "epoch": 49.85, + "learning_rate": 2.5085817374490555e-05, + "loss": 2.0164, + "step": 17220500 + }, + { + "epoch": 49.85, + "learning_rate": 2.5085093726843277e-05, + "loss": 1.9868, + "step": 17221000 + }, + { + "epoch": 49.85, + "learning_rate": 2.5084371526491296e-05, + "loss": 2.0377, + "step": 17221500 + }, + { + "epoch": 49.85, + "learning_rate": 2.508364787884402e-05, + "loss": 1.9783, + "step": 17222000 + }, + { + "epoch": 49.85, + "learning_rate": 2.508292423119674e-05, + "loss": 2.0146, + "step": 17222500 + }, + { + "epoch": 49.85, + "learning_rate": 2.5082200583549463e-05, + "loss": 2.0298, + "step": 17223000 + }, + { + "epoch": 49.85, + "learning_rate": 2.508147693590219e-05, + "loss": 2.0146, + "step": 17223500 + }, + { + "epoch": 49.86, + "learning_rate": 2.508075328825491e-05, + "loss": 1.9954, + "step": 17224000 + }, + { + "epoch": 49.86, + "learning_rate": 2.5080029640607633e-05, + "loss": 2.0177, + "step": 17224500 + }, + { + "epoch": 49.86, + "learning_rate": 2.5079305992960355e-05, + "loss": 2.0136, + "step": 17225000 + }, + { + "epoch": 49.86, + "learning_rate": 2.5078582345313077e-05, + "loss": 2.0046, + "step": 17225500 + }, + { + "epoch": 49.86, + "learning_rate": 2.5077858697665803e-05, + "loss": 2.0043, + "step": 17226000 + }, + { + "epoch": 49.86, + "learning_rate": 2.5077135050018525e-05, + "loss": 2.0186, + "step": 17226500 + }, + { + "epoch": 49.87, + "learning_rate": 2.5076411402371248e-05, + "loss": 2.006, + "step": 17227000 + }, + { + "epoch": 49.87, + "learning_rate": 2.5075687754723977e-05, + "loss": 1.9879, + "step": 17227500 + }, + { + "epoch": 49.87, + "learning_rate": 2.50749641070767e-05, + "loss": 1.9947, + "step": 17228000 + }, + { + "epoch": 49.87, + "learning_rate": 2.5074241906724714e-05, + "loss": 1.9746, + "step": 17228500 + }, + { + "epoch": 49.87, + "learning_rate": 2.507351825907744e-05, + "loss": 2.0195, + "step": 17229000 + }, + { + "epoch": 49.87, + "learning_rate": 2.5072794611430162e-05, + "loss": 2.023, + "step": 17229500 + }, + { + "epoch": 49.87, + "learning_rate": 2.5072070963782884e-05, + "loss": 2.0226, + "step": 17230000 + }, + { + "epoch": 49.88, + "learning_rate": 2.5071347316135607e-05, + "loss": 2.0182, + "step": 17230500 + }, + { + "epoch": 49.88, + "learning_rate": 2.507062366848833e-05, + "loss": 2.0009, + "step": 17231000 + }, + { + "epoch": 49.88, + "learning_rate": 2.5069901468136348e-05, + "loss": 1.9983, + "step": 17231500 + }, + { + "epoch": 49.88, + "learning_rate": 2.506917782048907e-05, + "loss": 2.0023, + "step": 17232000 + }, + { + "epoch": 49.88, + "learning_rate": 2.5068454172841792e-05, + "loss": 2.0088, + "step": 17232500 + }, + { + "epoch": 49.88, + "learning_rate": 2.5067730525194515e-05, + "loss": 2.0243, + "step": 17233000 + }, + { + "epoch": 49.88, + "learning_rate": 2.506700687754724e-05, + "loss": 2.01, + "step": 17233500 + }, + { + "epoch": 49.89, + "learning_rate": 2.5066284677195256e-05, + "loss": 2.0227, + "step": 17234000 + }, + { + "epoch": 49.89, + "learning_rate": 2.5065561029547978e-05, + "loss": 2.0083, + "step": 17234500 + }, + { + "epoch": 49.89, + "learning_rate": 2.5064837381900707e-05, + "loss": 2.0073, + "step": 17235000 + }, + { + "epoch": 49.89, + "learning_rate": 2.506411373425343e-05, + "loss": 2.0123, + "step": 17235500 + }, + { + "epoch": 49.89, + "learning_rate": 2.5063390086606155e-05, + "loss": 1.985, + "step": 17236000 + }, + { + "epoch": 49.89, + "learning_rate": 2.5062666438958877e-05, + "loss": 2.0157, + "step": 17236500 + }, + { + "epoch": 49.89, + "learning_rate": 2.50619427913116e-05, + "loss": 2.0047, + "step": 17237000 + }, + { + "epoch": 49.9, + "learning_rate": 2.506121914366432e-05, + "loss": 2.0147, + "step": 17237500 + }, + { + "epoch": 49.9, + "learning_rate": 2.5060495496017044e-05, + "loss": 1.9979, + "step": 17238000 + }, + { + "epoch": 49.9, + "learning_rate": 2.5059771848369766e-05, + "loss": 2.0031, + "step": 17238500 + }, + { + "epoch": 49.9, + "learning_rate": 2.505904820072249e-05, + "loss": 1.9938, + "step": 17239000 + }, + { + "epoch": 49.9, + "learning_rate": 2.5058324553075214e-05, + "loss": 1.9909, + "step": 17239500 + }, + { + "epoch": 49.9, + "learning_rate": 2.505760235272323e-05, + "loss": 2.0003, + "step": 17240000 + }, + { + "epoch": 49.9, + "learning_rate": 2.5056878705075955e-05, + "loss": 2.0262, + "step": 17240500 + }, + { + "epoch": 49.91, + "learning_rate": 2.5056155057428677e-05, + "loss": 1.9928, + "step": 17241000 + }, + { + "epoch": 49.91, + "learning_rate": 2.50554314097814e-05, + "loss": 2.0057, + "step": 17241500 + }, + { + "epoch": 49.91, + "learning_rate": 2.505470776213413e-05, + "loss": 2.0238, + "step": 17242000 + }, + { + "epoch": 49.91, + "learning_rate": 2.505398411448685e-05, + "loss": 1.9817, + "step": 17242500 + }, + { + "epoch": 49.91, + "learning_rate": 2.5053260466839573e-05, + "loss": 2.0066, + "step": 17243000 + }, + { + "epoch": 49.91, + "learning_rate": 2.5052536819192295e-05, + "loss": 2.0363, + "step": 17243500 + }, + { + "epoch": 49.91, + "learning_rate": 2.5051814618840314e-05, + "loss": 2.0045, + "step": 17244000 + }, + { + "epoch": 49.92, + "learning_rate": 2.505109241848833e-05, + "loss": 2.0202, + "step": 17244500 + }, + { + "epoch": 49.92, + "learning_rate": 2.5050368770841055e-05, + "loss": 1.9989, + "step": 17245000 + }, + { + "epoch": 49.92, + "learning_rate": 2.5049645123193778e-05, + "loss": 1.9885, + "step": 17245500 + }, + { + "epoch": 49.92, + "learning_rate": 2.50489214755465e-05, + "loss": 2.0192, + "step": 17246000 + }, + { + "epoch": 49.92, + "learning_rate": 2.5048197827899222e-05, + "loss": 1.9959, + "step": 17246500 + }, + { + "epoch": 49.92, + "learning_rate": 2.5047474180251944e-05, + "loss": 1.9762, + "step": 17247000 + }, + { + "epoch": 49.92, + "learning_rate": 2.5046750532604667e-05, + "loss": 2.0121, + "step": 17247500 + }, + { + "epoch": 49.93, + "learning_rate": 2.5046028332252685e-05, + "loss": 1.9769, + "step": 17248000 + }, + { + "epoch": 49.93, + "learning_rate": 2.5045304684605408e-05, + "loss": 2.0101, + "step": 17248500 + }, + { + "epoch": 49.93, + "learning_rate": 2.504458103695813e-05, + "loss": 1.9803, + "step": 17249000 + }, + { + "epoch": 49.93, + "learning_rate": 2.504385738931086e-05, + "loss": 2.022, + "step": 17249500 + }, + { + "epoch": 49.93, + "learning_rate": 2.504313374166358e-05, + "loss": 1.9777, + "step": 17250000 + }, + { + "epoch": 49.93, + "learning_rate": 2.5042410094016307e-05, + "loss": 1.9609, + "step": 17250500 + }, + { + "epoch": 49.93, + "learning_rate": 2.504168644636903e-05, + "loss": 2.0106, + "step": 17251000 + }, + { + "epoch": 49.94, + "learning_rate": 2.5040964246017045e-05, + "loss": 1.9936, + "step": 17251500 + }, + { + "epoch": 49.94, + "learning_rate": 2.504024059836977e-05, + "loss": 1.996, + "step": 17252000 + }, + { + "epoch": 49.94, + "learning_rate": 2.5039516950722492e-05, + "loss": 2.0196, + "step": 17252500 + }, + { + "epoch": 49.94, + "learning_rate": 2.5038793303075215e-05, + "loss": 2.0026, + "step": 17253000 + }, + { + "epoch": 49.94, + "learning_rate": 2.503807110272323e-05, + "loss": 2.0257, + "step": 17253500 + }, + { + "epoch": 49.94, + "learning_rate": 2.5037347455075956e-05, + "loss": 1.9888, + "step": 17254000 + }, + { + "epoch": 49.94, + "learning_rate": 2.5036623807428678e-05, + "loss": 2.017, + "step": 17254500 + }, + { + "epoch": 49.95, + "learning_rate": 2.50359001597814e-05, + "loss": 2.0108, + "step": 17255000 + }, + { + "epoch": 49.95, + "learning_rate": 2.5035176512134123e-05, + "loss": 2.0257, + "step": 17255500 + }, + { + "epoch": 49.95, + "learning_rate": 2.5034452864486845e-05, + "loss": 1.9923, + "step": 17256000 + }, + { + "epoch": 49.95, + "learning_rate": 2.503372921683957e-05, + "loss": 2.015, + "step": 17256500 + }, + { + "epoch": 49.95, + "learning_rate": 2.5033005569192296e-05, + "loss": 1.9953, + "step": 17257000 + }, + { + "epoch": 49.95, + "learning_rate": 2.5032281921545018e-05, + "loss": 1.9949, + "step": 17257500 + }, + { + "epoch": 49.95, + "learning_rate": 2.5031559721193037e-05, + "loss": 1.9776, + "step": 17258000 + }, + { + "epoch": 49.96, + "learning_rate": 2.503083607354576e-05, + "loss": 2.0111, + "step": 17258500 + }, + { + "epoch": 49.96, + "learning_rate": 2.503011242589848e-05, + "loss": 1.9954, + "step": 17259000 + }, + { + "epoch": 49.96, + "learning_rate": 2.5029388778251207e-05, + "loss": 1.9916, + "step": 17259500 + }, + { + "epoch": 49.96, + "learning_rate": 2.502866802519452e-05, + "loss": 2.0306, + "step": 17260000 + }, + { + "epoch": 49.96, + "learning_rate": 2.5027944377547242e-05, + "loss": 2.0086, + "step": 17260500 + }, + { + "epoch": 49.96, + "learning_rate": 2.5027220729899964e-05, + "loss": 2.0263, + "step": 17261000 + }, + { + "epoch": 49.96, + "learning_rate": 2.5026497082252686e-05, + "loss": 2.004, + "step": 17261500 + }, + { + "epoch": 49.97, + "learning_rate": 2.502577343460541e-05, + "loss": 2.0082, + "step": 17262000 + }, + { + "epoch": 49.97, + "learning_rate": 2.502504978695813e-05, + "loss": 2.0068, + "step": 17262500 + }, + { + "epoch": 49.97, + "learning_rate": 2.5024326139310856e-05, + "loss": 2.0072, + "step": 17263000 + }, + { + "epoch": 49.97, + "learning_rate": 2.502360538625417e-05, + "loss": 2.0177, + "step": 17263500 + }, + { + "epoch": 49.97, + "learning_rate": 2.502288173860689e-05, + "loss": 1.9981, + "step": 17264000 + }, + { + "epoch": 49.97, + "learning_rate": 2.5022159538254906e-05, + "loss": 2.0084, + "step": 17264500 + }, + { + "epoch": 49.98, + "learning_rate": 2.5021435890607635e-05, + "loss": 1.9905, + "step": 17265000 + }, + { + "epoch": 49.98, + "learning_rate": 2.5020712242960358e-05, + "loss": 1.9823, + "step": 17265500 + }, + { + "epoch": 49.98, + "learning_rate": 2.5019988595313083e-05, + "loss": 1.9878, + "step": 17266000 + }, + { + "epoch": 49.98, + "learning_rate": 2.5019264947665805e-05, + "loss": 2.0191, + "step": 17266500 + }, + { + "epoch": 49.98, + "learning_rate": 2.5018541300018528e-05, + "loss": 2.0064, + "step": 17267000 + }, + { + "epoch": 49.98, + "learning_rate": 2.501781765237125e-05, + "loss": 2.0192, + "step": 17267500 + }, + { + "epoch": 49.98, + "learning_rate": 2.5017094004723972e-05, + "loss": 1.9878, + "step": 17268000 + }, + { + "epoch": 49.99, + "learning_rate": 2.5016370357076694e-05, + "loss": 1.999, + "step": 17268500 + }, + { + "epoch": 49.99, + "learning_rate": 2.501564670942942e-05, + "loss": 1.9902, + "step": 17269000 + }, + { + "epoch": 49.99, + "learning_rate": 2.5014923061782142e-05, + "loss": 2.0018, + "step": 17269500 + }, + { + "epoch": 49.99, + "learning_rate": 2.5014200861430158e-05, + "loss": 2.0427, + "step": 17270000 + }, + { + "epoch": 49.99, + "learning_rate": 2.5013477213782883e-05, + "loss": 2.0052, + "step": 17270500 + }, + { + "epoch": 49.99, + "learning_rate": 2.5012753566135606e-05, + "loss": 2.0067, + "step": 17271000 + }, + { + "epoch": 49.99, + "learning_rate": 2.5012029918488328e-05, + "loss": 2.0015, + "step": 17271500 + }, + { + "epoch": 50.0, + "learning_rate": 2.5011306270841057e-05, + "loss": 2.0068, + "step": 17272000 + }, + { + "epoch": 50.0, + "learning_rate": 2.501058262319378e-05, + "loss": 1.9937, + "step": 17272500 + }, + { + "epoch": 50.0, + "learning_rate": 2.50098589755465e-05, + "loss": 2.0069, + "step": 17273000 + }, + { + "epoch": 50.0, + "learning_rate": 2.5009135327899224e-05, + "loss": 1.9959, + "step": 17273500 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.6738095040996602, + "eval_accuracy_mlm": 0.6396376624402491, + "eval_accuracy_nsp": 0.8572044016077509, + "eval_loss": 2.1667933464050293, + "eval_runtime": 331.9451, + "eval_samples_per_second": 1314.633, + "eval_steps_per_second": 54.777, + "step": 17273600 } ], "max_steps": 34547200, "num_train_epochs": 100, - "total_flos": 1.893300081164393e+19, + "total_flos": 2.366578797409368e+19, "trial_name": null, "trial_params": null }