core_leaderboard

Running

benediktstroebl commited on Aug 20, 2024

Commit

9250161

1 Parent(s): f5fc72d

Added sorting to heatmap

Files changed (3) hide show

app.py CHANGED Viewed

@@ -224,8 +224,6 @@ with gr.Blocks() as demo:
         with gr.Tab("USACO"):
             with gr.Row():
                 with gr.Column(scale=2):
-                    print(parse_json_files(os.path.join(abs_path, "evals_live"), 'usaco').columns)
-                    print(parse_json_files(os.path.join(abs_path, "evals_live"), 'mlagentbench').columns)
                     Leaderboard(
                         value=parse_json_files(os.path.join(abs_path, "evals_live"), 'usaco'),
                         select_columns=SelectColumns(

         with gr.Tab("USACO"):
             with gr.Row():
                 with gr.Column(scale=2):
                     Leaderboard(
                         value=parse_json_files(os.path.join(abs_path, "evals_live"), 'usaco'),
                         select_columns=SelectColumns(

utils/db.py CHANGED Viewed

@@ -63,6 +63,7 @@ class TracePreprocessor:
             ''')
     def preprocess_traces(self, processed_dir="evals_live"):
         processed_dir = Path(processed_dir)
         for file in processed_dir.glob('*.json'):
             with open(file, 'r') as f:

             ''')
     def preprocess_traces(self, processed_dir="evals_live"):
+        self.create_tables()
         processed_dir = Path(processed_dir)
         for file in processed_dir.glob('*.json'):
             with open(file, 'r') as f:

utils/viz.py CHANGED Viewed

@@ -5,9 +5,22 @@ import plotly.graph_objects as go
 import textwrap
 def create_task_success_heatmap(df, benchmark_name):
     # Pivot the dataframe to create a matrix of agents vs tasks
     pivot_df = df.pivot(index='Agent Name', columns='Task ID', values='Success')
     # Create the heatmap
     fig = go.Figure(data=go.Heatmap(
         z=pivot_df.values,
@@ -23,7 +36,7 @@ def create_task_success_heatmap(df, benchmark_name):
     # Update the layout
     fig.update_layout(
         xaxis_title='Task ID',
-        height=600,
         width=1300,
         yaxis=dict(
             autorange='reversed',

 import textwrap
 def create_task_success_heatmap(df, benchmark_name):
+    # Calculate agent accuracy
+    agent_accuracy = df.groupby('Agent Name')['Success'].mean().sort_values(ascending=False)
+    # Calculate task success rate
+    task_success_rate = df.groupby('Task ID')['Success'].mean().sort_values(ascending=False)
     # Pivot the dataframe to create a matrix of agents vs tasks
     pivot_df = df.pivot(index='Agent Name', columns='Task ID', values='Success')
+    # Sort the pivot table
+    pivot_df = pivot_df.reindex(index=agent_accuracy.index, columns=task_success_rate.index)
+    num_agents = len(pivot_df.index)
+    row_height = 30  # Fixed height for each row in pixels
+    total_height = num_agents * row_height
     # Create the heatmap
     fig = go.Figure(data=go.Heatmap(
         z=pivot_df.values,
     # Update the layout
     fig.update_layout(
         xaxis_title='Task ID',
+        height=total_height,
         width=1300,
         yaxis=dict(
             autorange='reversed',