lewtun HF staff commited on
Commit
cf8d412
·
1 Parent(s): f89c0b9
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -10,7 +10,7 @@ DESCRIPTION = f"""
10
  Evaluation of H4 and community models across a diverse range of benchmarks from [LightEval](https://github.com/huggingface/lighteval). All scores are reported as accuracy.
11
  """
12
 
13
- BENCHMARKS_TO_SKIP = ["math", "mini_math"]
14
 
15
 
16
  def get_leaderboard_df(agg: str = "max"):
@@ -100,6 +100,8 @@ def get_leaderboard_df(agg: str = "max"):
100
  "aimo_kaggle_tora_hard",
101
  "aimo_kaggle_tora_medium_extended",
102
  "aimo_kaggle_tora_hard_extended",
 
 
103
  ]:
104
  for k, v in data["results"].items():
105
  value = float(v["qem"]) / 100.0
@@ -122,7 +124,7 @@ def get_leaderboard_df(agg: str = "max"):
122
  # Trim AIMO column names
123
  df.columns = [c.replace("aimo_", "") for c in df.columns]
124
 
125
- df = df.reset_index().rename(columns={"index": "Model"}).round(2)
126
  # Strip off date from model name
127
  df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
128
 
 
10
  Evaluation of H4 and community models across a diverse range of benchmarks from [LightEval](https://github.com/huggingface/lighteval). All scores are reported as accuracy.
11
  """
12
 
13
+ BENCHMARKS_TO_SKIP = ["math", "mini_math", "aimo_math_integer_lvl4-5"]
14
 
15
 
16
  def get_leaderboard_df(agg: str = "max"):
 
100
  "aimo_kaggle_tora_hard",
101
  "aimo_kaggle_tora_medium_extended",
102
  "aimo_kaggle_tora_hard_extended",
103
+ "aimo_math_integer_lvl4",
104
+ "aimo_math_integer_lvl5",
105
  ]:
106
  for k, v in data["results"].items():
107
  value = float(v["qem"]) / 100.0
 
124
  # Trim AIMO column names
125
  df.columns = [c.replace("aimo_", "") for c in df.columns]
126
 
127
+ df = df.reset_index().rename(columns={"index": "Model"}).round(4)
128
  # Strip off date from model name
129
  df["Model"] = df["Model"].apply(lambda x: x.rsplit("_", 1)[0])
130