task,metric,value,err,version anli_r1,acc,0.36,0.015186527932040117,0 anli_r2,acc,0.352,0.015110404505648673,0 anli_r3,acc,0.35,0.013774667009018552,0 arc_challenge,acc,0.2636518771331058,0.012875929151297056,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5972222222222222,0.010063960494989161,0 arc_easy,acc_norm,0.5702861952861953,0.010157908005763676,0 boolq,acc,0.5951070336391437,0.008585393347962307,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3338164251207729,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4314877514439355,0.004942716091996078,0 hellaswag,acc_norm,0.5659231228838877,0.004946221512145289,0 piqa,acc,0.7301414581066377,0.010356595421852209,0 piqa,acc_norm,0.7377584330794341,0.010262502565172443,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.886,0.010055103435823328,0 sciq,acc_norm,0.86,0.010978183844357807,0 storycloze_2016,acc,0.6916087653661144,0.010679734445487801,0 winogrande,acc,0.569060773480663,0.013917796623335964,0