Spaces:
Running
Running
Commit
·
575c750
1
Parent(s):
6c02e00
added task flow plot
Browse files
app.py
CHANGED
@@ -6,10 +6,12 @@ from pathlib import Path
|
|
6 |
import pandas as pd
|
7 |
import os
|
8 |
import json
|
9 |
-
from utils import parse_json_files, create_scatter_plot
|
10 |
from huggingface_hub import snapshot_download
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
|
|
|
|
13 |
def restart_space():
|
14 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
15 |
|
@@ -36,7 +38,7 @@ with open(os.path.join(abs_path, "evals", "usaco_traces", "task_analyses.json"),
|
|
36 |
|
37 |
def update_task_analysis(task_id):
|
38 |
if task_id not in analyzed_traces:
|
39 |
-
return "No analysis available for this task.", [], ""
|
40 |
|
41 |
analysis = analyzed_traces[task_id]
|
42 |
summary = analysis['summary']
|
@@ -45,9 +47,9 @@ def update_task_analysis(task_id):
|
|
45 |
try:
|
46 |
summary = json.loads(summary)
|
47 |
except json.JSONDecodeError:
|
48 |
-
return "Error: Unable to parse summary data.", [], ""
|
49 |
elif not isinstance(summary, dict):
|
50 |
-
return "Error: Summary data is in an unexpected format.", [], ""
|
51 |
|
52 |
overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
|
53 |
overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
|
@@ -55,7 +57,9 @@ def update_task_analysis(task_id):
|
|
55 |
|
56 |
steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
|
57 |
|
58 |
-
|
|
|
|
|
59 |
|
60 |
def update_step_details(task_id, step_index):
|
61 |
if task_id not in analyzed_traces:
|
@@ -91,6 +95,7 @@ def update_step_details(task_id, step_index):
|
|
91 |
|
92 |
return details
|
93 |
|
|
|
94 |
with gr.Blocks() as demo:
|
95 |
gr.Markdown("""
|
96 |
# 🥇 Agent Leaderboard
|
@@ -139,9 +144,16 @@ with gr.Blocks() as demo:
|
|
139 |
with gr.Column(scale=1):
|
140 |
steps_dropdown = gr.Dropdown(label="Agent Steps")
|
141 |
step_details = gr.Markdown()
|
|
|
|
|
142 |
|
143 |
-
task_dropdown.change(update_task_analysis,
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
with gr.Tab("About"):
|
147 |
gr.Markdown((Path(__file__).parent / "about.md").read_text())
|
|
|
6 |
import pandas as pd
|
7 |
import os
|
8 |
import json
|
9 |
+
from utils import parse_json_files, create_scatter_plot, create_flow_chart
|
10 |
from huggingface_hub import snapshot_download
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
13 |
+
|
14 |
+
|
15 |
def restart_space():
|
16 |
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
|
17 |
|
|
|
38 |
|
39 |
def update_task_analysis(task_id):
|
40 |
if task_id not in analyzed_traces:
|
41 |
+
return "No analysis available for this task.", None, [], ""
|
42 |
|
43 |
analysis = analyzed_traces[task_id]
|
44 |
summary = analysis['summary']
|
|
|
47 |
try:
|
48 |
summary = json.loads(summary)
|
49 |
except json.JSONDecodeError:
|
50 |
+
return "Error: Unable to parse summary data.", None, [], ""
|
51 |
elif not isinstance(summary, dict):
|
52 |
+
return "Error: Summary data is in an unexpected format.", None, [], ""
|
53 |
|
54 |
overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
|
55 |
overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
|
|
|
57 |
|
58 |
steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
|
59 |
|
60 |
+
flow_chart = create_flow_chart(analysis['steps'])
|
61 |
+
|
62 |
+
return overview, flow_chart, gr.Dropdown(choices=steps, label="Agent Steps"), ""
|
63 |
|
64 |
def update_step_details(task_id, step_index):
|
65 |
if task_id not in analyzed_traces:
|
|
|
95 |
|
96 |
return details
|
97 |
|
98 |
+
|
99 |
with gr.Blocks() as demo:
|
100 |
gr.Markdown("""
|
101 |
# 🥇 Agent Leaderboard
|
|
|
144 |
with gr.Column(scale=1):
|
145 |
steps_dropdown = gr.Dropdown(label="Agent Steps")
|
146 |
step_details = gr.Markdown()
|
147 |
+
with gr.Row():
|
148 |
+
flow_chart = gr.Plot(label="Task Flow")
|
149 |
|
150 |
+
task_dropdown.change(update_task_analysis,
|
151 |
+
inputs=[task_dropdown],
|
152 |
+
outputs=[task_overview, flow_chart, steps_dropdown, step_details])
|
153 |
+
steps_dropdown.change(update_step_details,
|
154 |
+
inputs=[task_dropdown, steps_dropdown],
|
155 |
+
outputs=[step_details])
|
156 |
+
|
157 |
|
158 |
with gr.Tab("About"):
|
159 |
gr.Markdown((Path(__file__).parent / "about.md").read_text())
|
utils.py
CHANGED
@@ -4,8 +4,7 @@ import pandas as pd
|
|
4 |
import plotly.express as px
|
5 |
from pareto_utils import Agent, compute_pareto_frontier
|
6 |
import plotly.graph_objects as go
|
7 |
-
|
8 |
-
|
9 |
|
10 |
def parse_json_files(folder_path, benchmark_name):
|
11 |
# Convert folder path to Path object
|
@@ -103,4 +102,135 @@ def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str =
|
|
103 |
bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
|
104 |
)
|
105 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
return fig
|
|
|
4 |
import plotly.express as px
|
5 |
from pareto_utils import Agent, compute_pareto_frontier
|
6 |
import plotly.graph_objects as go
|
7 |
+
import textwrap
|
|
|
8 |
|
9 |
def parse_json_files(folder_path, benchmark_name):
|
10 |
# Convert folder path to Path object
|
|
|
102 |
bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
|
103 |
)
|
104 |
)
|
105 |
+
return fig
|
106 |
+
|
107 |
+
|
108 |
+
import plotly.graph_objects as go
|
109 |
+
import textwrap
|
110 |
+
|
111 |
+
def create_flow_chart(steps):
|
112 |
+
node_x = []
|
113 |
+
node_y = []
|
114 |
+
edge_x = []
|
115 |
+
edge_y = []
|
116 |
+
node_text = []
|
117 |
+
hover_text = []
|
118 |
+
node_colors = []
|
119 |
+
node_shapes = []
|
120 |
+
|
121 |
+
# Define color and shape mappings
|
122 |
+
color_map = {True: 'green', False: 'red'} # True for success, False for challenges
|
123 |
+
shape_map = {
|
124 |
+
'plan': 'octagon',
|
125 |
+
'tool': 'square',
|
126 |
+
'retrieve': 'diamond',
|
127 |
+
'other': 'circle'
|
128 |
+
}
|
129 |
+
|
130 |
+
for i, step in enumerate(steps):
|
131 |
+
node_x.append(i)
|
132 |
+
node_y.append(0)
|
133 |
+
|
134 |
+
# Extract Description, Assessment, and new attributes
|
135 |
+
analysis = step['analysis']
|
136 |
+
if isinstance(analysis, str):
|
137 |
+
try:
|
138 |
+
analysis = json.loads(analysis)
|
139 |
+
except json.JSONDecodeError:
|
140 |
+
analysis = {}
|
141 |
+
|
142 |
+
description = analysis.get('description', 'No description available.')
|
143 |
+
assessment = analysis.get('assessment', 'No assessment available.')
|
144 |
+
success = analysis.get('success', True) # Assuming True if not specified
|
145 |
+
action_type = analysis.get('action_type', 'other') # Default to 'other' if not specified
|
146 |
+
step_outline = analysis.get('step_outline', '')
|
147 |
+
|
148 |
+
# Set node color and shape based on attributes
|
149 |
+
node_colors.append(color_map[success])
|
150 |
+
node_shapes.append(shape_map.get(action_type, 'star'))
|
151 |
+
|
152 |
+
# Wrap text to improve readability
|
153 |
+
wrapped_description = '<br>'.join(textwrap.wrap(description, width=50))
|
154 |
+
wrapped_assessment = '<br>'.join(textwrap.wrap(assessment, width=50))
|
155 |
+
wrapped_outline = textwrap.shorten(step_outline, width=30, placeholder='')
|
156 |
+
wrapped_outline = '' if wrapped_outline == '' else f": {wrapped_outline}"
|
157 |
+
|
158 |
+
node_text_outline = '' if wrapped_outline == '' else f":<br>{textwrap.shorten(step_outline, width=30, placeholder='')}"
|
159 |
+
node_text.append(f"Step {i+1}{node_text_outline}")
|
160 |
+
|
161 |
+
# Create formatted hover text without indentation
|
162 |
+
hover_info = f"<b>Step {i+1}{wrapped_outline}</b><br><br>" \
|
163 |
+
f"<b>Description:</b><br>" \
|
164 |
+
f"{wrapped_description}<br><br>" \
|
165 |
+
f"<b>Assessment:</b><br>" \
|
166 |
+
f"{wrapped_assessment}<br><br>" \
|
167 |
+
f"<b>Successful:</b> {'Yes' if success else 'No'}<br>" \
|
168 |
+
f"<b>Action Type:</b> {action_type.capitalize()}"
|
169 |
+
hover_text.append(hover_info)
|
170 |
+
|
171 |
+
if i > 0:
|
172 |
+
edge_x.extend([i-1, i, None])
|
173 |
+
edge_y.extend([0, 0, None])
|
174 |
+
|
175 |
+
node_trace = go.Scatter(
|
176 |
+
x=node_x, y=node_y,
|
177 |
+
mode='markers+text',
|
178 |
+
text=node_text,
|
179 |
+
textposition="top center",
|
180 |
+
hovertext=hover_text,
|
181 |
+
hoverinfo='text',
|
182 |
+
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
|
183 |
+
marker=dict(
|
184 |
+
color=node_colors,
|
185 |
+
size=30,
|
186 |
+
line_width=2,
|
187 |
+
symbol=node_shapes
|
188 |
+
))
|
189 |
+
|
190 |
+
edge_trace = go.Scatter(
|
191 |
+
x=edge_x, y=edge_y,
|
192 |
+
line=dict(width=2, color='#888'),
|
193 |
+
hoverinfo='none',
|
194 |
+
mode='lines')
|
195 |
+
|
196 |
+
layout = go.Layout(
|
197 |
+
showlegend=False,
|
198 |
+
hovermode='closest',
|
199 |
+
margin=dict(b=20,l=5,r=5,t=40),
|
200 |
+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
201 |
+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
|
202 |
+
plot_bgcolor='white', # Set plot background color to white
|
203 |
+
paper_bgcolor='white' # Set paper background color to white
|
204 |
+
)
|
205 |
+
|
206 |
+
fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
|
207 |
+
|
208 |
+
# Add a legend for colors and shapes
|
209 |
+
for success, color in color_map.items():
|
210 |
+
fig.add_trace(go.Scatter(
|
211 |
+
x=[None], y=[None], mode='markers',
|
212 |
+
marker=dict(size=10, color=color),
|
213 |
+
showlegend=True,
|
214 |
+
name=f"{'Success' if success else 'Challenge'}"
|
215 |
+
))
|
216 |
+
|
217 |
+
for action, shape in shape_map.items():
|
218 |
+
fig.add_trace(go.Scatter(
|
219 |
+
x=[None], y=[None], mode='markers',
|
220 |
+
marker=dict(size=10, symbol=shape, color='gray'),
|
221 |
+
showlegend=True,
|
222 |
+
name=f"Action: {action.capitalize()}"
|
223 |
+
))
|
224 |
+
|
225 |
+
fig.update_layout(legend=dict(
|
226 |
+
orientation="h",
|
227 |
+
yanchor="bottom",
|
228 |
+
y=1.02,
|
229 |
+
xanchor="right",
|
230 |
+
x=1,
|
231 |
+
bgcolor='rgba(255,255,255,0.8)', # Set legend background to slightly transparent white
|
232 |
+
bordercolor='rgba(0,0,0,0.1)', # Add a light border to the legend
|
233 |
+
borderwidth=1
|
234 |
+
))
|
235 |
+
|
236 |
return fig
|