task,metric,value,err,version anli_r1,acc,0.354,0.015129868238451773,0 anli_r2,acc,0.334,0.014922019523732965,0 anli_r3,acc,0.3525,0.013797164918918355,0 arc_challenge,acc,0.24744027303754265,0.012610352663292673,0 arc_challenge,acc_norm,0.27559726962457337,0.013057169655761838,0 arc_easy,acc,0.5867003367003367,0.010104361780747513,0 arc_easy,acc_norm,0.5774410774410774,0.010135978222981075,0 boolq,acc,0.6128440366972477,0.008519429207594416,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.34887334887334887,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4297948615813583,0.004940349676769321,0 hellaswag,acc_norm,0.5647281418044214,0.004947793051042662,0 piqa,acc,0.733949945593036,0.010310039263352831,0 piqa,acc_norm,0.7421109902067464,0.010206956662056232,0 rte,acc,0.5523465703971119,0.029931070362939526,0 sciq,acc,0.901,0.009449248027662732,0 sciq,acc_norm,0.912,0.008963053962592085,0 storycloze_2016,acc,0.6905398182789952,0.010689956745189072,0 winogrande,acc,0.5611681136543015,0.013946933444507032,0