Spaces:
Running
Running
Commit
·
cb163b3
1
Parent(s):
f9c6a2b
Added default to only restructure and not run llm task monitor inference calls
Browse files- agent_monitor/monitor.py +39 -13
agent_monitor/monitor.py
CHANGED
@@ -64,7 +64,7 @@ class AsyncOpenAIClient(AsyncLLMClient):
|
|
64 |
|
65 |
return response.choices[0].message.content
|
66 |
|
67 |
-
async def analyze_agent_steps(processed_calls, llm_client):
|
68 |
task_calls = defaultdict(list)
|
69 |
for call in processed_calls:
|
70 |
task_calls[call['weave_task_id']].append(call)
|
@@ -72,30 +72,56 @@ async def analyze_agent_steps(processed_calls, llm_client):
|
|
72 |
for task_id in task_calls:
|
73 |
task_calls[task_id].sort(key=lambda x: x['created_timestamp'])
|
74 |
|
75 |
-
tasks = [analyze_task(calls, llm_client) for task_id, calls in task_calls.items()]
|
76 |
task_analyses = await asyncio.gather(*tasks)
|
77 |
|
78 |
return dict(zip(task_calls.keys(), task_analyses))
|
79 |
|
80 |
-
async def analyze_task(calls, llm_client):
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
try:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
'steps': steps,
|
89 |
'task_analysis': task_analysis
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
except Exception as e:
|
92 |
print(f"Error in task summarization: {str(e)}")
|
93 |
-
return TaskSummary(
|
94 |
overview="Not available",
|
95 |
key_successes='Not available',
|
96 |
main_challenges='Not available',
|
97 |
overall_assessment="Not available"
|
98 |
-
)
|
99 |
|
100 |
async def analyze_step(call, step_number, total_steps, llm_client):
|
101 |
prompt = f"""
|
@@ -128,12 +154,12 @@ async def analyze_step(call, step_number, total_steps, llm_client):
|
|
128 |
except json.JSONDecodeError:
|
129 |
print(f"Error parsing analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}. Using default values.")
|
130 |
analysis = print(f"Error in analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}: {str(e)}")
|
131 |
-
analysis = StepAnalysis(
|
132 |
description="Analysis failed",
|
133 |
category='other',
|
134 |
success=False,
|
135 |
assessment="Unable to assess due to error"
|
136 |
-
)
|
137 |
|
138 |
return {
|
139 |
'call_data': call,
|
|
|
64 |
|
65 |
return response.choices[0].message.content
|
66 |
|
67 |
+
async def analyze_agent_steps(processed_calls, llm_client, llm_eval=False):
|
68 |
task_calls = defaultdict(list)
|
69 |
for call in processed_calls:
|
70 |
task_calls[call['weave_task_id']].append(call)
|
|
|
72 |
for task_id in task_calls:
|
73 |
task_calls[task_id].sort(key=lambda x: x['created_timestamp'])
|
74 |
|
75 |
+
tasks = [analyze_task(calls, llm_client, llm_eval) for task_id, calls in task_calls.items()]
|
76 |
task_analyses = await asyncio.gather(*tasks)
|
77 |
|
78 |
return dict(zip(task_calls.keys(), task_analyses))
|
79 |
|
80 |
+
async def analyze_task(calls, llm_client, llm_eval=False):
|
81 |
+
if llm_eval:
|
82 |
+
step_tasks = [analyze_step(call, i+1, len(calls), llm_client) for i, call in enumerate(calls)]
|
83 |
+
steps = await asyncio.gather(*step_tasks)
|
84 |
+
|
85 |
+
else:
|
86 |
+
steps = []
|
87 |
+
for i, call in enumerate(calls):
|
88 |
+
steps.append({
|
89 |
+
'call_data': call,
|
90 |
+
'analysis': dict(StepAnalysis(
|
91 |
+
description="Not available",
|
92 |
+
action_type='other',
|
93 |
+
success=False,
|
94 |
+
assessment="Not available",
|
95 |
+
headline="Not available"
|
96 |
+
))
|
97 |
+
})
|
98 |
|
99 |
try:
|
100 |
+
if llm_eval:
|
101 |
+
task_analysis = await summarize_task(steps, llm_client)
|
102 |
+
return {
|
103 |
'steps': steps,
|
104 |
'task_analysis': task_analysis
|
105 |
+
}
|
106 |
+
else:
|
107 |
+
return {
|
108 |
+
'steps': steps,
|
109 |
+
'task_analysis': dict(TaskSummary(
|
110 |
+
overview="Not available",
|
111 |
+
key_successes='Not available',
|
112 |
+
main_challenges='Not available',
|
113 |
+
overall_assessment="Not available"
|
114 |
+
))
|
115 |
+
}
|
116 |
+
|
117 |
except Exception as e:
|
118 |
print(f"Error in task summarization: {str(e)}")
|
119 |
+
return dict(TaskSummary(
|
120 |
overview="Not available",
|
121 |
key_successes='Not available',
|
122 |
main_challenges='Not available',
|
123 |
overall_assessment="Not available"
|
124 |
+
))
|
125 |
|
126 |
async def analyze_step(call, step_number, total_steps, llm_client):
|
127 |
prompt = f"""
|
|
|
154 |
except json.JSONDecodeError:
|
155 |
print(f"Error parsing analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}. Using default values.")
|
156 |
analysis = print(f"Error in analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}: {str(e)}")
|
157 |
+
analysis = dict(StepAnalysis(
|
158 |
description="Analysis failed",
|
159 |
category='other',
|
160 |
success=False,
|
161 |
assessment="Unable to assess due to error"
|
162 |
+
))
|
163 |
|
164 |
return {
|
165 |
'call_data': call,
|