task,metric,value,err,version anli_r1,acc,0.319,0.014746404865473468,0 anli_r2,acc,0.347,0.01506047203170662,0 anli_r3,acc,0.3333333333333333,0.013613950010225594,0 arc_challenge,acc,0.2568259385665529,0.012766923794116801,0 arc_challenge,acc_norm,0.2977815699658703,0.013363080107244489,0 arc_easy,acc,0.5883838383838383,0.01009821864671491,0 arc_easy,acc_norm,0.563973063973064,0.010175459582759738,0 boolq,acc,0.6045871559633027,0.008551600109082895,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.3536644846617893,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.4311890061740689,0.0049423027680021055,0 hellaswag,acc_norm,0.5632344154550887,0.004949716368890495,0 piqa,acc,0.7442872687704026,0.010178690109459862,0 piqa,acc_norm,0.7524483133841132,0.010069703966857116,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.876,0.010427498872343961,0 sciq,acc_norm,0.855,0.011139977517890132,0 storycloze_2016,acc,0.6996258685195083,0.010600915927985028,0 winogrande,acc,0.5461720599842147,0.013992441563707068,0