|
task,metric,value,err,version
|
|
anli_r1,acc,0.359,0.015177264224798592,0
|
|
anli_r2,acc,0.349,0.0150806639915631,0
|
|
anli_r3,acc,0.3383333333333333,0.013664144006618275,0
|
|
arc_challenge,acc,0.2645051194539249,0.012889272949313366,0
|
|
arc_challenge,acc_norm,0.302901023890785,0.013428241573185349,0
|
|
arc_easy,acc,0.5968013468013468,0.01006566857679479,0
|
|
arc_easy,acc_norm,0.5585016835016835,0.010189314382749936,0
|
|
boolq,acc,0.6039755351681957,0.008553881336813413,1
|
|
cb,acc,0.44642857142857145,0.067031892279424,1
|
|
cb,f1,0.3171262699564586,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.42869946225851424,0.004938787067611805,0
|
|
hellaswag,acc_norm,0.5677155945030871,0.004943809330692693,0
|
|
piqa,acc,0.7388465723612623,0.010248738649935587,0
|
|
piqa,acc_norm,0.7393906420021763,0.010241826155811632,0
|
|
rte,acc,0.5090252707581228,0.030091559826331334,0
|
|
sciq,acc,0.889,0.009938701010583726,0
|
|
sciq,acc_norm,0.87,0.01064016979249935,0
|
|
storycloze_2016,acc,0.6969535008017104,0.010627613073376717,0
|
|
winogrande,acc,0.5627466456195738,0.013941393310695918,0
|
|
|