task,metric,value,err,version arc_challenge,acc,0.27303754266211605,0.013019332762635734,0 arc_challenge,acc_norm,0.2815699658703072,0.013143376735009024,0 arc_easy,acc,0.5597643097643098,0.010186228624515651,0 arc_easy,acc_norm,0.4978956228956229,0.010259692651537049,0 boolq,acc,0.5892966360856269,0.008604460608471413,1 copa,acc,0.74,0.04408440022768077,0 hellaswag,acc,0.43616809400517825,0.004948952519517524,0 hellaswag,acc_norm,0.563931487751444,0.004948824501355473,0 piqa,acc,0.7366702937976061,0.010276185322196764,0 piqa,acc_norm,0.7383025027203483,0.010255630772708227,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.802,0.012607733934175315,0 sciq,acc_norm,0.713,0.014312087053809961,0 winogrande,acc,0.5485398579321231,0.01398611030101776,0