{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.005340453938585, "global_step": 33000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 1.2135922330097088e-05, "loss": 2.9902, "step": 1000 }, { "epoch": 0.12, "eval_HasAns_exact": 58.41767881241565, "eval_HasAns_f1": 67.0415484792148, "eval_HasAns_total": 5928, "eval_NoAns_exact": 32.38015138772077, "eval_NoAns_f1": 32.38015138772077, "eval_NoAns_total": 5945, "eval_best_exact": 50.11370336056599, "eval_best_exact_thresh": 0.0, "eval_best_f1": 51.236780700905136, "eval_best_f1_thresh": 0.0, "eval_exact": 45.380274572559586, "eval_f1": 49.686035491012206, "eval_total": 11873, "step": 1000 }, { "epoch": 0.24, "learning_rate": 2.4271844660194176e-05, "loss": 1.5894, "step": 2000 }, { "epoch": 0.24, "eval_HasAns_exact": 69.28137651821862, "eval_HasAns_f1": 77.89405016908765, "eval_HasAns_total": 5928, "eval_NoAns_exact": 46.913372582001685, "eval_NoAns_f1": 46.913372582001685, "eval_NoAns_total": 5945, "eval_best_exact": 58.08978354249137, "eval_best_exact_thresh": 0.0, "eval_best_f1": 62.389954468319395, "eval_best_f1_thresh": 0.0, "eval_exact": 58.08136107133833, "eval_f1": 62.38153199716619, "eval_total": 11873, "step": 2000 }, { "epoch": 0.36, "learning_rate": 2.959094078454665e-05, "loss": 1.3761, "step": 3000 }, { "epoch": 0.36, "eval_HasAns_exact": 71.87921727395411, "eval_HasAns_f1": 80.58982724903632, "eval_HasAns_total": 5928, "eval_NoAns_exact": 51.42136248948697, "eval_NoAns_f1": 51.42136248948697, "eval_NoAns_total": 5945, "eval_best_exact": 61.65248884022572, "eval_best_exact_thresh": 0.0, "eval_best_f1": 66.00155781456188, "eval_best_f1_thresh": 0.0, "eval_exact": 61.63564389791965, "eval_f1": 65.98471287225554, "eval_total": 11873, "step": 3000 }, { "epoch": 0.49, "learning_rate": 2.8816207421945613e-05, "loss": 1.249, "step": 4000 }, { "epoch": 0.49, "eval_HasAns_exact": 68.47165991902834, "eval_HasAns_f1": 75.13575702386423, "eval_HasAns_total": 5928, "eval_NoAns_exact": 74.3313708999159, "eval_NoAns_f1": 74.3313708999159, "eval_NoAns_total": 5945, "eval_best_exact": 71.40571043544176, "eval_best_exact_thresh": 0.0, "eval_best_f1": 74.7329880937815, "eval_best_f1_thresh": 0.0, "eval_exact": 71.40571043544176, "eval_f1": 74.73298809378153, "eval_total": 11873, "step": 4000 }, { "epoch": 0.61, "learning_rate": 2.8041474059344576e-05, "loss": 1.1796, "step": 5000 }, { "epoch": 0.61, "eval_HasAns_exact": 67.62820512820512, "eval_HasAns_f1": 73.71252998774504, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.26745164003364, "eval_NoAns_f1": 78.26745164003364, "eval_NoAns_total": 5945, "eval_best_exact": 72.95544512760044, "eval_best_exact_thresh": 0.0, "eval_best_f1": 75.99325172806816, "eval_best_f1_thresh": 0.0, "eval_exact": 72.95544512760044, "eval_f1": 75.99325172806819, "eval_total": 11873, "step": 5000 }, { "epoch": 0.73, "learning_rate": 2.726674069674354e-05, "loss": 1.1097, "step": 6000 }, { "epoch": 0.73, "eval_HasAns_exact": 73.80229419703105, "eval_HasAns_f1": 80.14574039018072, "eval_HasAns_total": 5928, "eval_NoAns_exact": 70.21026072329688, "eval_NoAns_f1": 70.21026072329688, "eval_NoAns_total": 5945, "eval_best_exact": 72.01212835846037, "eval_best_exact_thresh": 0.0, "eval_best_f1": 75.17931011816671, "eval_best_f1_thresh": 0.0, "eval_exact": 72.00370588730733, "eval_f1": 75.17088764701359, "eval_total": 11873, "step": 6000 }, { "epoch": 0.85, "learning_rate": 2.64920073341425e-05, "loss": 1.0804, "step": 7000 }, { "epoch": 0.85, "eval_HasAns_exact": 75.60728744939271, "eval_HasAns_f1": 82.66989971492067, "eval_HasAns_total": 5928, "eval_NoAns_exact": 68.56181665264928, "eval_NoAns_f1": 68.56181665264928, "eval_NoAns_total": 5945, "eval_best_exact": 72.0879305988377, "eval_best_exact_thresh": 0.0, "eval_best_f1": 75.61418053651563, "eval_best_f1_thresh": 0.0, "eval_exact": 72.07950812768466, "eval_f1": 75.60575806536264, "eval_total": 11873, "step": 7000 }, { "epoch": 0.97, "learning_rate": 2.5717273971541462e-05, "loss": 1.0272, "step": 8000 }, { "epoch": 0.97, "eval_HasAns_exact": 72.85762483130904, "eval_HasAns_f1": 79.23824809473913, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.01345668629101, "eval_NoAns_f1": 76.01345668629101, "eval_NoAns_total": 5945, "eval_best_exact": 74.43780005053483, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.62354372994311, "eval_best_f1_thresh": 0.0, "eval_exact": 74.43780005053483, "eval_f1": 77.62354372994308, "eval_total": 11873, "step": 8000 }, { "epoch": 1.09, "learning_rate": 2.4942540608940426e-05, "loss": 0.8905, "step": 9000 }, { "epoch": 1.09, "eval_HasAns_exact": 76.417004048583, "eval_HasAns_f1": 83.82699462168138, "eval_HasAns_total": 5928, "eval_NoAns_exact": 66.27417998317914, "eval_NoAns_f1": 66.27417998317914, "eval_NoAns_total": 5945, "eval_best_exact": 71.3467531373705, "eval_best_exact_thresh": 0.0, "eval_best_f1": 75.04644353721282, "eval_best_f1_thresh": 0.0, "eval_exact": 71.33833066621747, "eval_f1": 75.03802106605983, "eval_total": 11873, "step": 9000 }, { "epoch": 1.21, "learning_rate": 2.4167807246339385e-05, "loss": 0.8348, "step": 10000 }, { "epoch": 1.21, "eval_HasAns_exact": 74.00472334682861, "eval_HasAns_f1": 80.47538325931156, "eval_HasAns_total": 5928, "eval_NoAns_exact": 75.55929352396973, "eval_NoAns_f1": 75.55929352396973, "eval_NoAns_total": 5945, "eval_best_exact": 74.78312136780931, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.01381891360221, "eval_best_f1_thresh": 0.0, "eval_exact": 74.78312136780931, "eval_f1": 78.01381891360226, "eval_total": 11873, "step": 10000 }, { "epoch": 1.34, "learning_rate": 2.3393073883738345e-05, "loss": 0.8345, "step": 11000 }, { "epoch": 1.34, "eval_HasAns_exact": 73.21187584345479, "eval_HasAns_f1": 79.61538989647931, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.91421362489487, "eval_NoAns_f1": 77.91421362489487, "eval_NoAns_total": 5945, "eval_best_exact": 75.56641118504169, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.76358387150074, "eval_best_f1_thresh": 0.0, "eval_exact": 75.56641118504169, "eval_f1": 78.76358387150083, "eval_total": 11873, "step": 11000 }, { "epoch": 1.46, "learning_rate": 2.2618340521137308e-05, "loss": 0.8372, "step": 12000 }, { "epoch": 1.46, "eval_HasAns_exact": 74.12280701754386, "eval_HasAns_f1": 80.2215790589714, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.6955424726661, "eval_NoAns_f1": 77.6955424726661, "eval_NoAns_total": 5945, "eval_best_exact": 75.91173250231618, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.95675235084504, "eval_best_f1_thresh": 0.0, "eval_exact": 75.91173250231618, "eval_f1": 78.95675235084498, "eval_total": 11873, "step": 12000 }, { "epoch": 1.58, "learning_rate": 2.184360715853627e-05, "loss": 0.8396, "step": 13000 }, { "epoch": 1.58, "eval_HasAns_exact": 77.74966261808368, "eval_HasAns_f1": 84.34526051529392, "eval_HasAns_total": 5928, "eval_NoAns_exact": 70.68124474348191, "eval_NoAns_f1": 70.68124474348191, "eval_NoAns_total": 5945, "eval_best_exact": 74.21881580055589, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.51189289435379, "eval_best_f1_thresh": 0.0, "eval_exact": 74.21039332940285, "eval_f1": 77.50347042320072, "eval_total": 11873, "step": 13000 }, { "epoch": 1.7, "learning_rate": 2.1068873795935234e-05, "loss": 0.8164, "step": 14000 }, { "epoch": 1.7, "eval_HasAns_exact": 75.65789473684211, "eval_HasAns_f1": 82.25660585284308, "eval_HasAns_total": 5928, "eval_NoAns_exact": 73.59125315391086, "eval_NoAns_f1": 73.59125315391086, "eval_NoAns_total": 5945, "eval_best_exact": 74.62309441590162, "eval_best_exact_thresh": 0.0, "eval_best_f1": 77.9177258903103, "eval_best_f1_thresh": 0.0, "eval_exact": 74.62309441590162, "eval_f1": 77.91772589031022, "eval_total": 11873, "step": 14000 }, { "epoch": 1.82, "learning_rate": 2.0294140433334194e-05, "loss": 0.8052, "step": 15000 }, { "epoch": 1.82, "eval_HasAns_exact": 78.71120107962213, "eval_HasAns_f1": 85.01212257884671, "eval_HasAns_total": 5928, "eval_NoAns_exact": 68.78048780487805, "eval_NoAns_f1": 68.78048780487805, "eval_NoAns_total": 5945, "eval_best_exact": 73.74715741598585, "eval_best_exact_thresh": 0.0, "eval_best_f1": 76.89310727258507, "eval_best_f1_thresh": 0.0, "eval_exact": 73.73873494483281, "eval_f1": 76.88468480143192, "eval_total": 11873, "step": 15000 }, { "epoch": 1.94, "learning_rate": 1.9519407070733157e-05, "loss": 0.8066, "step": 16000 }, { "epoch": 1.94, "eval_HasAns_exact": 74.69635627530364, "eval_HasAns_f1": 80.71107846446266, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.89739276703112, "eval_NoAns_f1": 77.89739276703112, "eval_NoAns_total": 5945, "eval_best_exact": 76.29916617535585, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.30222126988423, "eval_best_f1_thresh": 0.0, "eval_exact": 76.29916617535585, "eval_f1": 79.30222126988414, "eval_total": 11873, "step": 16000 }, { "epoch": 2.06, "learning_rate": 1.874467370813212e-05, "loss": 0.7057, "step": 17000 }, { "epoch": 2.06, "eval_HasAns_exact": 77.47975708502024, "eval_HasAns_f1": 84.97990004810745, "eval_HasAns_total": 5928, "eval_NoAns_exact": 68.71320437342304, "eval_NoAns_f1": 68.71320437342304, "eval_NoAns_total": 5945, "eval_best_exact": 73.09020466604902, "eval_best_exact_thresh": 0.0, "eval_best_f1": 76.8349067198839, "eval_best_f1_thresh": 0.0, "eval_exact": 73.09020466604902, "eval_f1": 76.83490671988388, "eval_total": 11873, "step": 17000 }, { "epoch": 2.18, "learning_rate": 1.796994034553108e-05, "loss": 0.6376, "step": 18000 }, { "epoch": 2.18, "eval_HasAns_exact": 74.25775978407557, "eval_HasAns_f1": 81.15876336598458, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.37594617325483, "eval_NoAns_f1": 77.37594617325483, "eval_NoAns_total": 5945, "eval_best_exact": 75.81908531963278, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.26464661278166, "eval_best_f1_thresh": 0.0, "eval_exact": 75.81908531963278, "eval_f1": 79.26464661278159, "eval_total": 11873, "step": 18000 }, { "epoch": 2.31, "learning_rate": 1.719520698293004e-05, "loss": 0.6315, "step": 19000 }, { "epoch": 2.31, "eval_HasAns_exact": 73.3468286099865, "eval_HasAns_f1": 79.65372011100962, "eval_HasAns_total": 5928, "eval_NoAns_exact": 80.43734230445753, "eval_NoAns_f1": 80.43734230445753, "eval_NoAns_total": 5945, "eval_best_exact": 76.89716162722142, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.04609221073564, "eval_best_f1_thresh": 0.0, "eval_exact": 76.89716162722142, "eval_f1": 80.0460922107357, "eval_total": 11873, "step": 19000 }, { "epoch": 2.43, "learning_rate": 1.6420473620329003e-05, "loss": 0.6323, "step": 20000 }, { "epoch": 2.43, "eval_HasAns_exact": 75.21929824561404, "eval_HasAns_f1": 81.79839130722176, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.85449957947856, "eval_NoAns_f1": 76.85449957947856, "eval_NoAns_total": 5945, "eval_best_exact": 76.03806956961172, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.32290606158605, "eval_best_f1_thresh": 0.0, "eval_exact": 76.03806956961172, "eval_f1": 79.322906061586, "eval_total": 11873, "step": 20000 }, { "epoch": 2.55, "learning_rate": 1.5645740257727966e-05, "loss": 0.6125, "step": 21000 }, { "epoch": 2.55, "eval_HasAns_exact": 75.82658569500674, "eval_HasAns_f1": 82.54925380201965, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.36669470142978, "eval_NoAns_f1": 76.36669470142978, "eval_NoAns_total": 5945, "eval_best_exact": 76.09702686768298, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.45354809554222, "eval_best_f1_thresh": 0.0, "eval_exact": 76.09702686768298, "eval_f1": 79.45354809554216, "eval_total": 11873, "step": 21000 }, { "epoch": 2.67, "learning_rate": 1.4871006895126928e-05, "loss": 0.6095, "step": 22000 }, { "epoch": 2.67, "eval_HasAns_exact": 77.36167341430499, "eval_HasAns_f1": 84.06251850374095, "eval_HasAns_total": 5928, "eval_NoAns_exact": 73.92767031118586, "eval_NoAns_f1": 73.92767031118586, "eval_NoAns_total": 5945, "eval_best_exact": 75.65063589657206, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.99626123896034, "eval_best_f1_thresh": 0.0, "eval_exact": 75.64221342541902, "eval_f1": 78.98783876780718, "eval_total": 11873, "step": 22000 }, { "epoch": 2.79, "learning_rate": 1.4096273532525889e-05, "loss": 0.5988, "step": 23000 }, { "epoch": 2.79, "eval_HasAns_exact": 77.17611336032388, "eval_HasAns_f1": 83.48725009021588, "eval_HasAns_total": 5928, "eval_NoAns_exact": 74.95374264087468, "eval_NoAns_f1": 74.95374264087468, "eval_NoAns_total": 5945, "eval_best_exact": 76.07175945422387, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.22280961297066, "eval_best_f1_thresh": 0.0, "eval_exact": 76.06333698307083, "eval_f1": 79.21438714181748, "eval_total": 11873, "step": 23000 }, { "epoch": 2.91, "learning_rate": 1.3321540169924852e-05, "loss": 0.5968, "step": 24000 }, { "epoch": 2.91, "eval_HasAns_exact": 74.13967611336032, "eval_HasAns_f1": 80.3794525038334, "eval_HasAns_total": 5928, "eval_NoAns_exact": 81.22792262405383, "eval_NoAns_f1": 81.22792262405383, "eval_NoAns_total": 5945, "eval_best_exact": 77.68887391560683, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.80429499222815, "eval_best_f1_thresh": 0.0, "eval_exact": 77.68887391560683, "eval_f1": 80.80429499222812, "eval_total": 11873, "step": 24000 }, { "epoch": 3.03, "learning_rate": 1.2546806807323812e-05, "loss": 0.5722, "step": 25000 }, { "epoch": 3.03, "eval_HasAns_exact": 75.2867746288799, "eval_HasAns_f1": 81.508728103177, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.30866274179984, "eval_NoAns_f1": 77.30866274179984, "eval_NoAns_total": 5945, "eval_best_exact": 76.29916617535585, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.40568855349397, "eval_best_f1_thresh": 0.0, "eval_exact": 76.29916617535585, "eval_f1": 79.4056885534939, "eval_total": 11873, "step": 25000 }, { "epoch": 3.16, "learning_rate": 1.1772073444722775e-05, "loss": 0.4844, "step": 26000 }, { "epoch": 3.16, "eval_HasAns_exact": 76.78812415654521, "eval_HasAns_f1": 84.07498836042032, "eval_HasAns_total": 5928, "eval_NoAns_exact": 73.10344827586206, "eval_NoAns_f1": 73.10344827586206, "eval_NoAns_total": 5945, "eval_best_exact": 74.95157079087004, "eval_best_exact_thresh": 0.0, "eval_best_f1": 78.58978615350551, "eval_best_f1_thresh": 0.0, "eval_exact": 74.94314831971701, "eval_f1": 78.58136368235249, "eval_total": 11873, "step": 26000 }, { "epoch": 3.28, "learning_rate": 1.0997340082121736e-05, "loss": 0.4664, "step": 27000 }, { "epoch": 3.28, "eval_HasAns_exact": 76.40013495276654, "eval_HasAns_f1": 83.06591024545988, "eval_HasAns_total": 5928, "eval_NoAns_exact": 78.04878048780488, "eval_NoAns_f1": 78.04878048780488, "eval_NoAns_total": 5945, "eval_best_exact": 77.23406047334288, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.56217602417964, "eval_best_f1_thresh": 0.0, "eval_exact": 77.22563800218984, "eval_f1": 80.55375355302654, "eval_total": 11873, "step": 27000 }, { "epoch": 3.4, "learning_rate": 1.02226067195207e-05, "loss": 0.4914, "step": 28000 }, { "epoch": 3.4, "eval_HasAns_exact": 76.38326585695006, "eval_HasAns_f1": 83.19767154572087, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.01345668629101, "eval_NoAns_f1": 76.01345668629101, "eval_NoAns_total": 5945, "eval_best_exact": 76.20651899267246, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.60884333555407, "eval_best_f1_thresh": 0.0, "eval_exact": 76.19809652151942, "eval_f1": 79.60042086440092, "eval_total": 11873, "step": 28000 }, { "epoch": 3.52, "learning_rate": 9.44787335691966e-06, "loss": 0.4696, "step": 29000 }, { "epoch": 3.52, "eval_HasAns_exact": 74.91565452091768, "eval_HasAns_f1": 81.5931788001518, "eval_HasAns_total": 5928, "eval_NoAns_exact": 79.37762825904122, "eval_NoAns_f1": 79.37762825904122, "eval_NoAns_total": 5945, "eval_best_exact": 77.14983576181251, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.48381739470226, "eval_best_f1_thresh": 0.0, "eval_exact": 77.14983576181251, "eval_f1": 80.48381739470223, "eval_total": 11873, "step": 29000 }, { "epoch": 3.64, "learning_rate": 8.673139994318622e-06, "loss": 0.483, "step": 30000 }, { "epoch": 3.64, "eval_HasAns_exact": 76.6025641025641, "eval_HasAns_f1": 83.549417980174, "eval_HasAns_total": 5928, "eval_NoAns_exact": 76.13120269133726, "eval_NoAns_f1": 76.13120269133726, "eval_NoAns_total": 5945, "eval_best_exact": 76.36654594458014, "eval_best_exact_thresh": 0.0, "eval_best_f1": 79.83499956089204, "eval_best_f1_thresh": 0.0, "eval_exact": 76.36654594458014, "eval_f1": 79.83499956089204, "eval_total": 11873, "step": 30000 }, { "epoch": 3.76, "learning_rate": 7.898406631717584e-06, "loss": 0.4602, "step": 31000 }, { "epoch": 3.76, "eval_HasAns_exact": 73.61673414304994, "eval_HasAns_f1": 79.59590506841275, "eval_HasAns_total": 5928, "eval_NoAns_exact": 81.69890664423886, "eval_NoAns_f1": 81.69890664423886, "eval_NoAns_total": 5945, "eval_best_exact": 77.66360650214773, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.64891141628488, "eval_best_f1_thresh": 0.0, "eval_exact": 77.66360650214773, "eval_f1": 80.64891141628489, "eval_total": 11873, "step": 31000 }, { "epoch": 3.88, "learning_rate": 7.123673269116546e-06, "loss": 0.4706, "step": 32000 }, { "epoch": 3.88, "eval_HasAns_exact": 76.1302294197031, "eval_HasAns_f1": 83.02595617673859, "eval_HasAns_total": 5928, "eval_NoAns_exact": 77.14045416316232, "eval_NoAns_f1": 77.14045416316232, "eval_NoAns_total": 5945, "eval_best_exact": 76.63606502147731, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.07899167992126, "eval_best_f1_thresh": 0.0, "eval_exact": 76.63606502147731, "eval_f1": 80.07899167992143, "eval_total": 11873, "step": 32000 }, { "epoch": 4.01, "learning_rate": 6.348939906515508e-06, "loss": 0.4633, "step": 33000 }, { "epoch": 4.01, "eval_HasAns_exact": 75.38798920377867, "eval_HasAns_f1": 82.20772908314846, "eval_HasAns_total": 5928, "eval_NoAns_exact": 79.42809083263246, "eval_NoAns_f1": 79.42809083263246, "eval_NoAns_total": 5945, "eval_best_exact": 77.41935483870968, "eval_best_exact_thresh": 0.0, "eval_best_f1": 80.82434245809002, "eval_best_f1_thresh": 0.0, "eval_exact": 77.41093236755664, "eval_f1": 80.81591998693705, "eval_total": 11873, "step": 33000 } ], "max_steps": 41195, "num_train_epochs": 5, "total_flos": 1.2072877510811443e+17, "trial_name": null, "trial_params": null }