benediktstroebl commited on
Commit
cb163b3
·
1 Parent(s): f9c6a2b

Added default to only restructure and not run llm task monitor inference calls

Browse files
Files changed (1) hide show
  1. agent_monitor/monitor.py +39 -13
agent_monitor/monitor.py CHANGED
@@ -64,7 +64,7 @@ class AsyncOpenAIClient(AsyncLLMClient):
64
 
65
  return response.choices[0].message.content
66
 
67
- async def analyze_agent_steps(processed_calls, llm_client):
68
  task_calls = defaultdict(list)
69
  for call in processed_calls:
70
  task_calls[call['weave_task_id']].append(call)
@@ -72,30 +72,56 @@ async def analyze_agent_steps(processed_calls, llm_client):
72
  for task_id in task_calls:
73
  task_calls[task_id].sort(key=lambda x: x['created_timestamp'])
74
 
75
- tasks = [analyze_task(calls, llm_client) for task_id, calls in task_calls.items()]
76
  task_analyses = await asyncio.gather(*tasks)
77
 
78
  return dict(zip(task_calls.keys(), task_analyses))
79
 
80
- async def analyze_task(calls, llm_client):
81
- step_tasks = [analyze_step(call, i+1, len(calls), llm_client) for i, call in enumerate(calls)]
82
- steps = await asyncio.gather(*step_tasks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  try:
85
- task_analysis = await summarize_task(steps, llm_client)
86
-
87
- return {
88
  'steps': steps,
89
  'task_analysis': task_analysis
90
- }
 
 
 
 
 
 
 
 
 
 
 
91
  except Exception as e:
92
  print(f"Error in task summarization: {str(e)}")
93
- return TaskSummary(
94
  overview="Not available",
95
  key_successes='Not available',
96
  main_challenges='Not available',
97
  overall_assessment="Not available"
98
- )
99
 
100
  async def analyze_step(call, step_number, total_steps, llm_client):
101
  prompt = f"""
@@ -128,12 +154,12 @@ async def analyze_step(call, step_number, total_steps, llm_client):
128
  except json.JSONDecodeError:
129
  print(f"Error parsing analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}. Using default values.")
130
  analysis = print(f"Error in analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}: {str(e)}")
131
- analysis = StepAnalysis(
132
  description="Analysis failed",
133
  category='other',
134
  success=False,
135
  assessment="Unable to assess due to error"
136
- )
137
 
138
  return {
139
  'call_data': call,
 
64
 
65
  return response.choices[0].message.content
66
 
67
+ async def analyze_agent_steps(processed_calls, llm_client, llm_eval=False):
68
  task_calls = defaultdict(list)
69
  for call in processed_calls:
70
  task_calls[call['weave_task_id']].append(call)
 
72
  for task_id in task_calls:
73
  task_calls[task_id].sort(key=lambda x: x['created_timestamp'])
74
 
75
+ tasks = [analyze_task(calls, llm_client, llm_eval) for task_id, calls in task_calls.items()]
76
  task_analyses = await asyncio.gather(*tasks)
77
 
78
  return dict(zip(task_calls.keys(), task_analyses))
79
 
80
+ async def analyze_task(calls, llm_client, llm_eval=False):
81
+ if llm_eval:
82
+ step_tasks = [analyze_step(call, i+1, len(calls), llm_client) for i, call in enumerate(calls)]
83
+ steps = await asyncio.gather(*step_tasks)
84
+
85
+ else:
86
+ steps = []
87
+ for i, call in enumerate(calls):
88
+ steps.append({
89
+ 'call_data': call,
90
+ 'analysis': dict(StepAnalysis(
91
+ description="Not available",
92
+ action_type='other',
93
+ success=False,
94
+ assessment="Not available",
95
+ headline="Not available"
96
+ ))
97
+ })
98
 
99
  try:
100
+ if llm_eval:
101
+ task_analysis = await summarize_task(steps, llm_client)
102
+ return {
103
  'steps': steps,
104
  'task_analysis': task_analysis
105
+ }
106
+ else:
107
+ return {
108
+ 'steps': steps,
109
+ 'task_analysis': dict(TaskSummary(
110
+ overview="Not available",
111
+ key_successes='Not available',
112
+ main_challenges='Not available',
113
+ overall_assessment="Not available"
114
+ ))
115
+ }
116
+
117
  except Exception as e:
118
  print(f"Error in task summarization: {str(e)}")
119
+ return dict(TaskSummary(
120
  overview="Not available",
121
  key_successes='Not available',
122
  main_challenges='Not available',
123
  overall_assessment="Not available"
124
+ ))
125
 
126
  async def analyze_step(call, step_number, total_steps, llm_client):
127
  prompt = f"""
 
154
  except json.JSONDecodeError:
155
  print(f"Error parsing analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}. Using default values.")
156
  analysis = print(f"Error in analysis for step {step_number} of {total_steps} in task {call['weave_task_id']}: {str(e)}")
157
+ analysis = dict(StepAnalysis(
158
  description="Analysis failed",
159
  category='other',
160
  success=False,
161
  assessment="Unable to assess due to error"
162
+ ))
163
 
164
  return {
165
  'call_data': call,