|
task,metric,value,err,version
|
|
anli_r1,acc,0.36,0.015186527932040117,0
|
|
anli_r2,acc,0.352,0.015110404505648673,0
|
|
anli_r3,acc,0.35,0.013774667009018552,0
|
|
arc_challenge,acc,0.2636518771331058,0.012875929151297056,0
|
|
arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0
|
|
arc_easy,acc,0.5972222222222222,0.010063960494989161,0
|
|
arc_easy,acc_norm,0.5702861952861953,0.010157908005763676,0
|
|
boolq,acc,0.5951070336391437,0.008585393347962307,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.3338164251207729,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4314877514439355,0.004942716091996078,0
|
|
hellaswag,acc_norm,0.5659231228838877,0.004946221512145289,0
|
|
piqa,acc,0.7301414581066377,0.010356595421852209,0
|
|
piqa,acc_norm,0.7377584330794341,0.010262502565172443,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.886,0.010055103435823328,0
|
|
sciq,acc_norm,0.86,0.010978183844357807,0
|
|
storycloze_2016,acc,0.6916087653661144,0.010679734445487801,0
|
|
winogrande,acc,0.569060773480663,0.013917796623335964,0
|
|
|