Add AlpacaEval base winrate
Browse files
app.py
CHANGED
@@ -64,9 +64,6 @@ def get_leaderboard_df(merge_values: bool = True):
|
|
64 |
# MATH reports qem
|
65 |
elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
|
66 |
value = data["results"]["all"]["qem"]
|
67 |
-
# Report length controlled winrate for AlpacaEval
|
68 |
-
elif task.lower() == "alpaca_eval":
|
69 |
-
value = data["results"][first_result_key]["length_controlled_winrate"] / 100.0
|
70 |
else:
|
71 |
first_metric_key = next(
|
72 |
iter(data["results"][first_result_key])
|
@@ -80,12 +77,18 @@ def get_leaderboard_df(merge_values: bool = True):
|
|
80 |
level = k.split("|")[1].split(":")[-1]
|
81 |
value = v["qem"]
|
82 |
df.loc[model_revision, f"{task}_{level}"] = value
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
else:
|
84 |
df.loc[model_revision, task] = value
|
85 |
|
86 |
# Put IFEval / BBH / AGIEval / AlpacaEval in first columns
|
87 |
-
alpaca_col = df.pop("
|
88 |
-
df.insert(1, "
|
89 |
ifeval_col = df.pop("Ifeval")
|
90 |
df.insert(2, "Ifeval", ifeval_col)
|
91 |
bbh_col = df.pop("Bbh")
|
|
|
64 |
# MATH reports qem
|
65 |
elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
|
66 |
value = data["results"]["all"]["qem"]
|
|
|
|
|
|
|
67 |
else:
|
68 |
first_metric_key = next(
|
69 |
iter(data["results"][first_result_key])
|
|
|
77 |
level = k.split("|")[1].split(":")[-1]
|
78 |
value = v["qem"]
|
79 |
df.loc[model_revision, f"{task}_{level}"] = value
|
80 |
+
# For AlpacaEval we report base winrate and lenght corrected one
|
81 |
+
elif task.lower() == "alpaca_eval":
|
82 |
+
value = data["results"][first_result_key]["win_rate"]
|
83 |
+
df.loc[model_revision, "Alpaca_eval"] = value / 100.0
|
84 |
+
value = data["results"][first_result_key]["length_controlled_winrate"]
|
85 |
+
df.loc[model_revision, "Alpaca_eval_lc"] = value / 100.0
|
86 |
else:
|
87 |
df.loc[model_revision, task] = value
|
88 |
|
89 |
# Put IFEval / BBH / AGIEval / AlpacaEval in first columns
|
90 |
+
alpaca_col = df.pop("Alpaca_eval_lc")
|
91 |
+
df.insert(1, "Alpaca_eval_lc", alpaca_col)
|
92 |
ifeval_col = df.pop("Ifeval")
|
93 |
df.insert(2, "Ifeval", ifeval_col)
|
94 |
bbh_col = df.pop("Bbh")
|