albertvillanova HF staff commited on
Commit
2b1d96b
1 Parent(s): 07db628

Plot with plotly

Browse files
Files changed (2) hide show
  1. requirements.txt +2 -0
  2. src/results.py +13 -10
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ plotly
2
+
src/results.py CHANGED
@@ -157,8 +157,11 @@ def plot_results(task, *dfs):
157
  and (col.endswith("acc,none") or col.endswith("acc_norm,none") or col.endswith("exact_match,none"))
158
  ]
159
  ]
 
 
 
160
  if task == "All":
161
- df = df[[col for col in df.columns if col.split(".")[1] in TASKS]]
162
  # - IFEval: Calculate average of both strict accuracies
163
  ifeval_mean = df[
164
  [
@@ -170,19 +173,19 @@ def plot_results(task, *dfs):
170
  loc = df.columns.get_loc("results.leaderboard_math_hard.exact_match,none")
171
  df.insert(loc - 1, "results.leaderboard_ifeval", ifeval_mean)
172
  # Rename
173
- df = df.rename(columns=lambda col: TASKS[col.split(".")[1]][0])
174
  else:
175
  df = df[[col for col in df.columns if col.startswith(f"results.{task}")]]
176
- tasks = {key: tupl[0] for key, tupl in TASKS.items()}
177
- subtasks = {tupl[1]: tupl[0] for value in constants.SUBTASKS.values() for tupl in value}
178
- subtasks = {**tasks, **subtasks}
179
  # - IFEval: Return 4 accuracies
180
  if task == "leaderboard_ifeval":
181
  df = df.rename(columns=lambda col: col.split(".")[2].removesuffix(",none"))
182
  else:
183
- df = df.rename(columns=lambda col: subtasks[col.split(".")[1]])
184
- ax = df.T.rename_axis(columns="Models").plot(kind="bar", ylabel="Scores", rot=45, figsize=(18, 6))
185
- fig = ax.get_figure()
186
- fig.autofmt_xdate(rotation=45)
187
- fig.tight_layout()
 
 
 
188
  return fig
 
157
  and (col.endswith("acc,none") or col.endswith("acc_norm,none") or col.endswith("exact_match,none"))
158
  ]
159
  ]
160
+ tasks = {key: tupl[0] for key, tupl in TASKS.items()}
161
+ tasks["leaderboard_math"] = tasks["leaderboard_math_hard"]
162
+ subtasks = {tupl[1]: tupl[0] for tupl in constants.SUBTASKS.get(task, [])}
163
  if task == "All":
164
+ df = df[[col for col in df.columns if col.split(".")[1] in tasks]]
165
  # - IFEval: Calculate average of both strict accuracies
166
  ifeval_mean = df[
167
  [
 
173
  loc = df.columns.get_loc("results.leaderboard_math_hard.exact_match,none")
174
  df.insert(loc - 1, "results.leaderboard_ifeval", ifeval_mean)
175
  # Rename
176
+ df = df.rename(columns=lambda col: tasks[col.split(".")[1]])
177
  else:
178
  df = df[[col for col in df.columns if col.startswith(f"results.{task}")]]
 
 
 
179
  # - IFEval: Return 4 accuracies
180
  if task == "leaderboard_ifeval":
181
  df = df.rename(columns=lambda col: col.split(".")[2].removesuffix(",none"))
182
  else:
183
+ df = df.rename(columns=lambda col: tasks.get(col.split(".")[1], subtasks.get(col.split(".")[1])))
184
+ fig = df.T.rename_axis(columns="Model").plot.bar(
185
+ backend="plotly",
186
+ barmode="group",
187
+ labels={"index": "Benchmark" if task == "All" else "Subtask", "value": "Score"},
188
+ color_discrete_sequence=["#FF9D00", "#32343D"],
189
+ )
190
+ fig.update_yaxes(range=[0, 1])
191
  return fig