core_leaderboard

Running

App Files Files Community

benediktstroebl commited on Aug 9, 2024

Commit

575c750

1 Parent(s): 6c02e00

added task flow plot

Browse files

Files changed (2) hide show

app.py +19 -7
utils.py +132 -2

app.py CHANGED Viewed

@@ -6,10 +6,12 @@ from pathlib import Path
 import pandas as pd
 import os
 import json
-from utils import parse_json_files, create_scatter_plot
 from huggingface_hub import snapshot_download
 from apscheduler.schedulers.background import BackgroundScheduler
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
@@ -36,7 +38,7 @@ with open(os.path.join(abs_path, "evals", "usaco_traces", "task_analyses.json"),
 def update_task_analysis(task_id):
     if task_id not in analyzed_traces:
-        return "No analysis available for this task.", [], ""
     analysis = analyzed_traces[task_id]
     summary = analysis['summary']
@@ -45,9 +47,9 @@ def update_task_analysis(task_id):
         try:
             summary = json.loads(summary)
         except json.JSONDecodeError:
-            return "Error: Unable to parse summary data.", [], ""
     elif not isinstance(summary, dict):
-        return "Error: Summary data is in an unexpected format.", [], ""
     overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
     overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
@@ -55,7 +57,9 @@ def update_task_analysis(task_id):
     steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
-    return overview, gr.Dropdown(choices=steps, label="Agent Steps"), ""
 def update_step_details(task_id, step_index):
     if task_id not in analyzed_traces:
@@ -91,6 +95,7 @@ def update_step_details(task_id, step_index):
     return details
 with gr.Blocks() as demo:
     gr.Markdown("""
     # 🥇 Agent Leaderboard
@@ -139,9 +144,16 @@ with gr.Blocks() as demo:
                 with gr.Column(scale=1):
                     steps_dropdown = gr.Dropdown(label="Agent Steps")
                     step_details = gr.Markdown()
-            task_dropdown.change(update_task_analysis, inputs=[task_dropdown], outputs=[task_overview, steps_dropdown, step_details])
-            steps_dropdown.change(update_step_details, inputs=[task_dropdown, steps_dropdown], outputs=[step_details])
         with gr.Tab("About"):
             gr.Markdown((Path(__file__).parent / "about.md").read_text())

 import pandas as pd
 import os
 import json
+from utils import parse_json_files, create_scatter_plot, create_flow_chart
 from huggingface_hub import snapshot_download
 from apscheduler.schedulers.background import BackgroundScheduler
 def restart_space():
     API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
 def update_task_analysis(task_id):
     if task_id not in analyzed_traces:
+        return "No analysis available for this task.", None, [], ""
     analysis = analyzed_traces[task_id]
     summary = analysis['summary']
         try:
             summary = json.loads(summary)
         except json.JSONDecodeError:
+            return "Error: Unable to parse summary data.", None, [], ""
     elif not isinstance(summary, dict):
+        return "Error: Summary data is in an unexpected format.", None, [], ""
     overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
     overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
     steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
+    flow_chart = create_flow_chart(analysis['steps'])
+    return overview, flow_chart, gr.Dropdown(choices=steps, label="Agent Steps"), ""
 def update_step_details(task_id, step_index):
     if task_id not in analyzed_traces:
     return details
 with gr.Blocks() as demo:
     gr.Markdown("""
     # 🥇 Agent Leaderboard
                 with gr.Column(scale=1):
                     steps_dropdown = gr.Dropdown(label="Agent Steps")
                     step_details = gr.Markdown()
+            with gr.Row():
+                flow_chart = gr.Plot(label="Task Flow")
+            task_dropdown.change(update_task_analysis,
+                                inputs=[task_dropdown],
+                                outputs=[task_overview, flow_chart, steps_dropdown, step_details])
+            steps_dropdown.change(update_step_details,
+                                inputs=[task_dropdown, steps_dropdown],
+                                outputs=[step_details])
         with gr.Tab("About"):
             gr.Markdown((Path(__file__).parent / "about.md").read_text())

utils.py CHANGED Viewed

@@ -4,8 +4,7 @@ import pandas as pd
 import plotly.express as px
 from pareto_utils import Agent, compute_pareto_frontier
 import plotly.graph_objects as go
 def parse_json_files(folder_path, benchmark_name):
     # Convert folder path to Path object
@@ -103,4 +102,135 @@ def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str =
         bgcolor="rgba(255, 255, 255, 0.5)"  # semi-transparent white background
         )
     )
     return fig

 import plotly.express as px
 from pareto_utils import Agent, compute_pareto_frontier
 import plotly.graph_objects as go
+import textwrap
 def parse_json_files(folder_path, benchmark_name):
     # Convert folder path to Path object
         bgcolor="rgba(255, 255, 255, 0.5)"  # semi-transparent white background
         )
     )
+    return fig
+import plotly.graph_objects as go
+import textwrap
+def create_flow_chart(steps):
+    node_x = []
+    node_y = []
+    edge_x = []
+    edge_y = []
+    node_text = []
+    hover_text = []
+    node_colors = []
+    node_shapes = []
+    # Define color and shape mappings
+    color_map = {True: 'green', False: 'red'}  # True for success, False for challenges
+    shape_map = {
+        'plan': 'octagon',
+        'tool': 'square',
+        'retrieve': 'diamond',
+        'other': 'circle'
+    }
+    for i, step in enumerate(steps):
+        node_x.append(i)
+        node_y.append(0)
+        # Extract Description, Assessment, and new attributes
+        analysis = step['analysis']
+        if isinstance(analysis, str):
+            try:
+                analysis = json.loads(analysis)
+            except json.JSONDecodeError:
+                analysis = {}
+        description = analysis.get('description', 'No description available.')
+        assessment = analysis.get('assessment', 'No assessment available.')
+        success = analysis.get('success', True)  # Assuming True if not specified
+        action_type = analysis.get('action_type', 'other')  # Default to 'other' if not specified
+        step_outline = analysis.get('step_outline', '')
+        # Set node color and shape based on attributes
+        node_colors.append(color_map[success])
+        node_shapes.append(shape_map.get(action_type, 'star'))
+        # Wrap text to improve readability
+        wrapped_description = '<br>'.join(textwrap.wrap(description, width=50))
+        wrapped_assessment = '<br>'.join(textwrap.wrap(assessment, width=50))
+        wrapped_outline = textwrap.shorten(step_outline, width=30, placeholder='')
+        wrapped_outline = '' if wrapped_outline == '' else f": {wrapped_outline}"
+        node_text_outline = '' if wrapped_outline == '' else f":<br>{textwrap.shorten(step_outline, width=30, placeholder='')}"
+        node_text.append(f"Step {i+1}{node_text_outline}")
+        # Create formatted hover text without indentation
+        hover_info = f"<b>Step {i+1}{wrapped_outline}</b><br><br>" \
+                     f"<b>Description:</b><br>" \
+                     f"{wrapped_description}<br><br>" \
+                     f"<b>Assessment:</b><br>" \
+                     f"{wrapped_assessment}<br><br>" \
+                     f"<b>Successful:</b> {'Yes' if success else 'No'}<br>" \
+                     f"<b>Action Type:</b> {action_type.capitalize()}"
+        hover_text.append(hover_info)
+        if i > 0:
+            edge_x.extend([i-1, i, None])
+            edge_y.extend([0, 0, None])
+    node_trace = go.Scatter(
+        x=node_x, y=node_y,
+        mode='markers+text',
+        text=node_text,
+        textposition="top center",
+        hovertext=hover_text,
+        hoverinfo='text',
+        hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
+        marker=dict(
+            color=node_colors,
+            size=30,
+            line_width=2,
+            symbol=node_shapes
+        ))
+    edge_trace = go.Scatter(
+        x=edge_x, y=edge_y,
+        line=dict(width=2, color='#888'),
+        hoverinfo='none',
+        mode='lines')
+    layout = go.Layout(
+        showlegend=False,
+        hovermode='closest',
+        margin=dict(b=20,l=5,r=5,t=40),
+        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+        plot_bgcolor='white',  # Set plot background color to white
+        paper_bgcolor='white'  # Set paper background color to white
+    )
+    fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
+    # Add a legend for colors and shapes
+    for success, color in color_map.items():
+        fig.add_trace(go.Scatter(
+            x=[None], y=[None], mode='markers',
+            marker=dict(size=10, color=color),
+            showlegend=True,
+            name=f"{'Success' if success else 'Challenge'}"
+        ))
+    for action, shape in shape_map.items():
+        fig.add_trace(go.Scatter(
+            x=[None], y=[None], mode='markers',
+            marker=dict(size=10, symbol=shape, color='gray'),
+            showlegend=True,
+            name=f"Action: {action.capitalize()}"
+        ))
+    fig.update_layout(legend=dict(
+        orientation="h",
+        yanchor="bottom",
+        y=1.02,
+        xanchor="right",
+        x=1,
+        bgcolor='rgba(255,255,255,0.8)',  # Set legend background to slightly transparent white
+        bordercolor='rgba(0,0,0,0.1)',  # Add a light border to the legend
+        borderwidth=1
+    ))
     return fig