lm1-2b8-55b-c4-repetitions
/
evaluation_small
/2b855b9bc4-results_lm-eval_global_step52452_2022-12-23-23-13-03.json
{ | |
"results": { | |
"copa": { | |
"acc": 0.72, | |
"acc_stderr": 0.04512608598542127 | |
}, | |
"boolq": { | |
"acc": 0.5963302752293578, | |
"acc_stderr": 0.008581220435616816 | |
}, | |
"hellaswag": { | |
"acc": 0.4334793865763792, | |
"acc_stderr": 0.004945424771611596, | |
"acc_norm": 0.560744871539534, | |
"acc_norm_stderr": 0.0049528205388318985 | |
}, | |
"arc_challenge": { | |
"acc": 0.25, | |
"acc_stderr": 0.012653835621466646, | |
"acc_norm": 0.28071672354948807, | |
"acc_norm_stderr": 0.013131238126975578 | |
}, | |
"arc_easy": { | |
"acc": 0.5458754208754208, | |
"acc_stderr": 0.010216507710244106, | |
"acc_norm": 0.49074074074074076, | |
"acc_norm_stderr": 0.010258024147860673 | |
}, | |
"sciq": { | |
"acc": 0.814, | |
"acc_stderr": 0.012310790208412803, | |
"acc_norm": 0.711, | |
"acc_norm_stderr": 0.014341711358296177 | |
}, | |
"winogrande": { | |
"acc": 0.5461720599842147, | |
"acc_stderr": 0.013992441563707068 | |
}, | |
"piqa": { | |
"acc": 0.7399347116430903, | |
"acc_stderr": 0.0102348932490613, | |
"acc_norm": 0.7426550598476604, | |
"acc_norm_stderr": 0.01019992106479251 | |
}, | |
"rte": { | |
"acc": 0.5487364620938628, | |
"acc_stderr": 0.029953149241808943 | |
} | |
}, | |
"versions": { | |
"copa": 0, | |
"boolq": 1, | |
"hellaswag": 0, | |
"arc_challenge": 0, | |
"arc_easy": 0, | |
"sciq": 0, | |
"winogrande": 0, | |
"piqa": 0, | |
"rte": 0 | |
} | |
} |