jihoo-kim commited on
Commit
1b269d7
1 Parent(s): e6cfe9b

fix read_evals

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +2 -3
src/leaderboard/read_evals.py CHANGED
@@ -100,15 +100,14 @@ class EvalResult:
100
  ko_ifeval = data["results"]["ko_ifeval"]
101
  accs = np.mean([ko_ifeval["prompt_level_strict_acc,none"], ko_ifeval["inst_level_strict_acc,none"]])
102
  mean_acc = np.mean(accs) * 100.0
103
- results[task.benchmark] = mean_acc
104
-
105
  if task.benchmark in ["ko_winogrande", "ko_gsm8k", "ko_eqbench", "kornat_common", "kornat_social", "kornat_harmless", "kornat_helpful", "ko_gpqa_diamond_zeroshot"]:
106
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
107
  if accs.size == 0 or any([acc is None for acc in accs]):
108
  continue
109
-
110
  if task.benchmark not in ["ko_eqbench"]:
111
  mean_acc = accs[0] * 100.0
 
 
112
  results[task.benchmark] = mean_acc
113
 
114
  return self(
 
100
  ko_ifeval = data["results"]["ko_ifeval"]
101
  accs = np.mean([ko_ifeval["prompt_level_strict_acc,none"], ko_ifeval["inst_level_strict_acc,none"]])
102
  mean_acc = np.mean(accs) * 100.0
 
 
103
  if task.benchmark in ["ko_winogrande", "ko_gsm8k", "ko_eqbench", "kornat_common", "kornat_social", "kornat_harmless", "kornat_helpful", "ko_gpqa_diamond_zeroshot"]:
104
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
105
  if accs.size == 0 or any([acc is None for acc in accs]):
106
  continue
 
107
  if task.benchmark not in ["ko_eqbench"]:
108
  mean_acc = accs[0] * 100.0
109
+ else:
110
+ mean_acc = accs[0]
111
  results[task.benchmark] = mean_acc
112
 
113
  return self(