task,metric,value,err,version anli_r1,acc,0.35,0.015090650341444233,0 anli_r2,acc,0.321,0.014770821817934642,0 anli_r3,acc,0.3458333333333333,0.013736245342311014,0 arc_challenge,acc,0.26023890784982934,0.012821930225112566,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539427,0 arc_easy,acc,0.5778619528619529,0.010134620524592271,0 arc_easy,acc_norm,0.5404040404040404,0.010226230740889027,0 boolq,acc,0.6027522935779817,0.008558401855851161,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3401360544217687,,1 copa,acc,0.74,0.0440844002276808,0 hellaswag,acc,0.43248356901015733,0.004944080605048775,0 hellaswag,acc_norm,0.5641306512646883,0.004948567856373858,0 piqa,acc,0.7372143634385201,0.010269354068140767,0 piqa,acc_norm,0.7334058759521219,0.010316749863541365,0 rte,acc,0.5740072202166066,0.02976495674177765,0 sciq,acc,0.851,0.011266140684632168,0 sciq,acc_norm,0.827,0.01196721413755994,0 storycloze_2016,acc,0.6926777124532336,0.01066944508186666,0 winogrande,acc,0.5580110497237569,0.013957584079108989,0