task,metric,value,err,version anli_r1,acc,0.323,0.014794927843348633,0 anli_r2,acc,0.357,0.015158521721486769,0 anli_r3,acc,0.34833333333333333,0.013759437498874079,0 arc_challenge,acc,0.2738907849829352,0.013032004972989505,0 arc_challenge,acc_norm,0.28071672354948807,0.013131238126975576,0 arc_easy,acc,0.5921717171717171,0.010083950240041216,0 arc_easy,acc_norm,0.5711279461279462,0.010155440652900152,0 boolq,acc,0.5941896024464832,0.008588486726385772,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.3162578162578163,,1 copa,acc,0.81,0.03942772444036622,0 hellaswag,acc,0.4329814777932683,0.004944755230598382,0 hellaswag,acc_norm,0.5656243776140211,0.004946617138983511,0 piqa,acc,0.735582154515778,0.010289787244767168,0 piqa,acc_norm,0.7334058759521219,0.010316749863541365,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.872,0.010570133761108665,0 sciq,acc_norm,0.829,0.011912216456264613,0 storycloze_2016,acc,0.6900053447354356,0.010695042806212553,0 winogrande,acc,0.5461720599842147,0.013992441563707067,0