Muennighoff's picture
Add eval
1aec95b
raw
history blame
2.43 kB
{
"results": {
"anli_r1": {
"acc": 0.332,
"acc_stderr": 0.014899597242811476
},
"anli_r2": {
"acc": 0.321,
"acc_stderr": 0.014770821817934652
},
"anli_r3": {
"acc": 0.3425,
"acc_stderr": 0.013704669762934722
},
"cb": {
"acc": 0.5,
"acc_stderr": 0.06741998624632421,
"f1": 0.3487553126897389
},
"copa": {
"acc": 0.71,
"acc_stderr": 0.04560480215720684
},
"hellaswag": {
"acc": 0.4334793865763792,
"acc_stderr": 0.004945424771611597,
"acc_norm": 0.5631348336984664,
"acc_norm_stderr": 0.004949842967331438
},
"rte": {
"acc": 0.5667870036101083,
"acc_stderr": 0.02982676408213828
},
"winogrande": {
"acc": 0.5509076558800315,
"acc_stderr": 0.013979459389140848
},
"storycloze_2016": {
"acc": 0.6835916622127205,
"acc_stderr": 0.01075478009794089
},
"boolq": {
"acc": 0.6012232415902141,
"acc_stderr": 0.008563973987729913
},
"arc_easy": {
"acc": 0.5698653198653199,
"acc_stderr": 0.010159130445178492,
"acc_norm": 0.5429292929292929,
"acc_norm_stderr": 0.01022189756425604
},
"arc_challenge": {
"acc": 0.24744027303754265,
"acc_stderr": 0.01261035266329267,
"acc_norm": 0.2738907849829352,
"acc_norm_stderr": 0.013032004972989501
},
"sciq": {
"acc": 0.88,
"acc_stderr": 0.010281328012747394,
"acc_norm": 0.844,
"acc_norm_stderr": 0.011480235006122361
},
"piqa": {
"acc": 0.733949945593036,
"acc_stderr": 0.010310039263352831,
"acc_norm": 0.7421109902067464,
"acc_norm_stderr": 0.010206956662056245
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}