benediktstroebl commited on
Commit
575c750
·
1 Parent(s): 6c02e00

added task flow plot

Browse files
Files changed (2) hide show
  1. app.py +19 -7
  2. utils.py +132 -2
app.py CHANGED
@@ -6,10 +6,12 @@ from pathlib import Path
6
  import pandas as pd
7
  import os
8
  import json
9
- from utils import parse_json_files, create_scatter_plot
10
  from huggingface_hub import snapshot_download
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
 
 
13
  def restart_space():
14
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
15
 
@@ -36,7 +38,7 @@ with open(os.path.join(abs_path, "evals", "usaco_traces", "task_analyses.json"),
36
 
37
  def update_task_analysis(task_id):
38
  if task_id not in analyzed_traces:
39
- return "No analysis available for this task.", [], ""
40
 
41
  analysis = analyzed_traces[task_id]
42
  summary = analysis['summary']
@@ -45,9 +47,9 @@ def update_task_analysis(task_id):
45
  try:
46
  summary = json.loads(summary)
47
  except json.JSONDecodeError:
48
- return "Error: Unable to parse summary data.", [], ""
49
  elif not isinstance(summary, dict):
50
- return "Error: Summary data is in an unexpected format.", [], ""
51
 
52
  overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
53
  overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
@@ -55,7 +57,9 @@ def update_task_analysis(task_id):
55
 
56
  steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
57
 
58
- return overview, gr.Dropdown(choices=steps, label="Agent Steps"), ""
 
 
59
 
60
  def update_step_details(task_id, step_index):
61
  if task_id not in analyzed_traces:
@@ -91,6 +95,7 @@ def update_step_details(task_id, step_index):
91
 
92
  return details
93
 
 
94
  with gr.Blocks() as demo:
95
  gr.Markdown("""
96
  # 🥇 Agent Leaderboard
@@ -139,9 +144,16 @@ with gr.Blocks() as demo:
139
  with gr.Column(scale=1):
140
  steps_dropdown = gr.Dropdown(label="Agent Steps")
141
  step_details = gr.Markdown()
 
 
142
 
143
- task_dropdown.change(update_task_analysis, inputs=[task_dropdown], outputs=[task_overview, steps_dropdown, step_details])
144
- steps_dropdown.change(update_step_details, inputs=[task_dropdown, steps_dropdown], outputs=[step_details])
 
 
 
 
 
145
 
146
  with gr.Tab("About"):
147
  gr.Markdown((Path(__file__).parent / "about.md").read_text())
 
6
  import pandas as pd
7
  import os
8
  import json
9
+ from utils import parse_json_files, create_scatter_plot, create_flow_chart
10
  from huggingface_hub import snapshot_download
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
+
14
+
15
  def restart_space():
16
  API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
17
 
 
38
 
39
  def update_task_analysis(task_id):
40
  if task_id not in analyzed_traces:
41
+ return "No analysis available for this task.", None, [], ""
42
 
43
  analysis = analyzed_traces[task_id]
44
  summary = analysis['summary']
 
47
  try:
48
  summary = json.loads(summary)
49
  except json.JSONDecodeError:
50
+ return "Error: Unable to parse summary data.", None, [], ""
51
  elif not isinstance(summary, dict):
52
+ return "Error: Summary data is in an unexpected format.", None, [], ""
53
 
54
  overview = f"# Task Overview\n\n{summary.get('overview', 'No overview available.')}\n\n"
55
  overview += f"## Successes\n{summary.get('successes', 'No successes listed.')}\n\n"
 
57
 
58
  steps = [(f"Step {i+1}", i) for i in range(len(analysis['steps']))]
59
 
60
+ flow_chart = create_flow_chart(analysis['steps'])
61
+
62
+ return overview, flow_chart, gr.Dropdown(choices=steps, label="Agent Steps"), ""
63
 
64
  def update_step_details(task_id, step_index):
65
  if task_id not in analyzed_traces:
 
95
 
96
  return details
97
 
98
+
99
  with gr.Blocks() as demo:
100
  gr.Markdown("""
101
  # 🥇 Agent Leaderboard
 
144
  with gr.Column(scale=1):
145
  steps_dropdown = gr.Dropdown(label="Agent Steps")
146
  step_details = gr.Markdown()
147
+ with gr.Row():
148
+ flow_chart = gr.Plot(label="Task Flow")
149
 
150
+ task_dropdown.change(update_task_analysis,
151
+ inputs=[task_dropdown],
152
+ outputs=[task_overview, flow_chart, steps_dropdown, step_details])
153
+ steps_dropdown.change(update_step_details,
154
+ inputs=[task_dropdown, steps_dropdown],
155
+ outputs=[step_details])
156
+
157
 
158
  with gr.Tab("About"):
159
  gr.Markdown((Path(__file__).parent / "about.md").read_text())
utils.py CHANGED
@@ -4,8 +4,7 @@ import pandas as pd
4
  import plotly.express as px
5
  from pareto_utils import Agent, compute_pareto_frontier
6
  import plotly.graph_objects as go
7
-
8
-
9
 
10
  def parse_json_files(folder_path, benchmark_name):
11
  # Convert folder path to Path object
@@ -103,4 +102,135 @@ def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str =
103
  bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
104
  )
105
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  return fig
 
4
  import plotly.express as px
5
  from pareto_utils import Agent, compute_pareto_frontier
6
  import plotly.graph_objects as go
7
+ import textwrap
 
8
 
9
  def parse_json_files(folder_path, benchmark_name):
10
  # Convert folder path to Path object
 
102
  bgcolor="rgba(255, 255, 255, 0.5)" # semi-transparent white background
103
  )
104
  )
105
+ return fig
106
+
107
+
108
+ import plotly.graph_objects as go
109
+ import textwrap
110
+
111
+ def create_flow_chart(steps):
112
+ node_x = []
113
+ node_y = []
114
+ edge_x = []
115
+ edge_y = []
116
+ node_text = []
117
+ hover_text = []
118
+ node_colors = []
119
+ node_shapes = []
120
+
121
+ # Define color and shape mappings
122
+ color_map = {True: 'green', False: 'red'} # True for success, False for challenges
123
+ shape_map = {
124
+ 'plan': 'octagon',
125
+ 'tool': 'square',
126
+ 'retrieve': 'diamond',
127
+ 'other': 'circle'
128
+ }
129
+
130
+ for i, step in enumerate(steps):
131
+ node_x.append(i)
132
+ node_y.append(0)
133
+
134
+ # Extract Description, Assessment, and new attributes
135
+ analysis = step['analysis']
136
+ if isinstance(analysis, str):
137
+ try:
138
+ analysis = json.loads(analysis)
139
+ except json.JSONDecodeError:
140
+ analysis = {}
141
+
142
+ description = analysis.get('description', 'No description available.')
143
+ assessment = analysis.get('assessment', 'No assessment available.')
144
+ success = analysis.get('success', True) # Assuming True if not specified
145
+ action_type = analysis.get('action_type', 'other') # Default to 'other' if not specified
146
+ step_outline = analysis.get('step_outline', '')
147
+
148
+ # Set node color and shape based on attributes
149
+ node_colors.append(color_map[success])
150
+ node_shapes.append(shape_map.get(action_type, 'star'))
151
+
152
+ # Wrap text to improve readability
153
+ wrapped_description = '<br>'.join(textwrap.wrap(description, width=50))
154
+ wrapped_assessment = '<br>'.join(textwrap.wrap(assessment, width=50))
155
+ wrapped_outline = textwrap.shorten(step_outline, width=30, placeholder='')
156
+ wrapped_outline = '' if wrapped_outline == '' else f": {wrapped_outline}"
157
+
158
+ node_text_outline = '' if wrapped_outline == '' else f":<br>{textwrap.shorten(step_outline, width=30, placeholder='')}"
159
+ node_text.append(f"Step {i+1}{node_text_outline}")
160
+
161
+ # Create formatted hover text without indentation
162
+ hover_info = f"<b>Step {i+1}{wrapped_outline}</b><br><br>" \
163
+ f"<b>Description:</b><br>" \
164
+ f"{wrapped_description}<br><br>" \
165
+ f"<b>Assessment:</b><br>" \
166
+ f"{wrapped_assessment}<br><br>" \
167
+ f"<b>Successful:</b> {'Yes' if success else 'No'}<br>" \
168
+ f"<b>Action Type:</b> {action_type.capitalize()}"
169
+ hover_text.append(hover_info)
170
+
171
+ if i > 0:
172
+ edge_x.extend([i-1, i, None])
173
+ edge_y.extend([0, 0, None])
174
+
175
+ node_trace = go.Scatter(
176
+ x=node_x, y=node_y,
177
+ mode='markers+text',
178
+ text=node_text,
179
+ textposition="top center",
180
+ hovertext=hover_text,
181
+ hoverinfo='text',
182
+ hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
183
+ marker=dict(
184
+ color=node_colors,
185
+ size=30,
186
+ line_width=2,
187
+ symbol=node_shapes
188
+ ))
189
+
190
+ edge_trace = go.Scatter(
191
+ x=edge_x, y=edge_y,
192
+ line=dict(width=2, color='#888'),
193
+ hoverinfo='none',
194
+ mode='lines')
195
+
196
+ layout = go.Layout(
197
+ showlegend=False,
198
+ hovermode='closest',
199
+ margin=dict(b=20,l=5,r=5,t=40),
200
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
201
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
202
+ plot_bgcolor='white', # Set plot background color to white
203
+ paper_bgcolor='white' # Set paper background color to white
204
+ )
205
+
206
+ fig = go.Figure(data=[edge_trace, node_trace], layout=layout)
207
+
208
+ # Add a legend for colors and shapes
209
+ for success, color in color_map.items():
210
+ fig.add_trace(go.Scatter(
211
+ x=[None], y=[None], mode='markers',
212
+ marker=dict(size=10, color=color),
213
+ showlegend=True,
214
+ name=f"{'Success' if success else 'Challenge'}"
215
+ ))
216
+
217
+ for action, shape in shape_map.items():
218
+ fig.add_trace(go.Scatter(
219
+ x=[None], y=[None], mode='markers',
220
+ marker=dict(size=10, symbol=shape, color='gray'),
221
+ showlegend=True,
222
+ name=f"Action: {action.capitalize()}"
223
+ ))
224
+
225
+ fig.update_layout(legend=dict(
226
+ orientation="h",
227
+ yanchor="bottom",
228
+ y=1.02,
229
+ xanchor="right",
230
+ x=1,
231
+ bgcolor='rgba(255,255,255,0.8)', # Set legend background to slightly transparent white
232
+ bordercolor='rgba(0,0,0,0.1)', # Add a light border to the legend
233
+ borderwidth=1
234
+ ))
235
+
236
  return fig