lewtun HF staff commited on
Commit
b966f9c
·
1 Parent(s): dbe56f3

Add AlpacaEval base winrate

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -64,9 +64,6 @@ def get_leaderboard_df(merge_values: bool = True):
64
  # MATH reports qem
65
  elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
66
  value = data["results"]["all"]["qem"]
67
- # Report length controlled winrate for AlpacaEval
68
- elif task.lower() == "alpaca_eval":
69
- value = data["results"][first_result_key]["length_controlled_winrate"] / 100.0
70
  else:
71
  first_metric_key = next(
72
  iter(data["results"][first_result_key])
@@ -80,12 +77,18 @@ def get_leaderboard_df(merge_values: bool = True):
80
  level = k.split("|")[1].split(":")[-1]
81
  value = v["qem"]
82
  df.loc[model_revision, f"{task}_{level}"] = value
 
 
 
 
 
 
83
  else:
84
  df.loc[model_revision, task] = value
85
 
86
  # Put IFEval / BBH / AGIEval / AlpacaEval in first columns
87
- alpaca_col = df.pop("Alpaca_eval")
88
- df.insert(1, "Alpaca_eval", alpaca_col)
89
  ifeval_col = df.pop("Ifeval")
90
  df.insert(2, "Ifeval", ifeval_col)
91
  bbh_col = df.pop("Bbh")
 
64
  # MATH reports qem
65
  elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
66
  value = data["results"]["all"]["qem"]
 
 
 
67
  else:
68
  first_metric_key = next(
69
  iter(data["results"][first_result_key])
 
77
  level = k.split("|")[1].split(":")[-1]
78
  value = v["qem"]
79
  df.loc[model_revision, f"{task}_{level}"] = value
80
+ # For AlpacaEval we report base winrate and lenght corrected one
81
+ elif task.lower() == "alpaca_eval":
82
+ value = data["results"][first_result_key]["win_rate"]
83
+ df.loc[model_revision, "Alpaca_eval"] = value / 100.0
84
+ value = data["results"][first_result_key]["length_controlled_winrate"]
85
+ df.loc[model_revision, "Alpaca_eval_lc"] = value / 100.0
86
  else:
87
  df.loc[model_revision, task] = value
88
 
89
  # Put IFEval / BBH / AGIEval / AlpacaEval in first columns
90
+ alpaca_col = df.pop("Alpaca_eval_lc")
91
+ df.insert(1, "Alpaca_eval_lc", alpaca_col)
92
  ifeval_col = df.pop("Ifeval")
93
  df.insert(2, "Ifeval", ifeval_col)
94
  bbh_col = df.pop("Bbh")