Spaces:
Running
Running
Commit
·
9f1f7b2
1
Parent(s):
1d04811
updated prompts
Browse files- agent_monitor/monitor.py +32 -27
agent_monitor/monitor.py
CHANGED
@@ -99,23 +99,28 @@ async def analyze_task(calls, llm_client):
|
|
99 |
|
100 |
async def analyze_step(call, step_number, total_steps, llm_client):
|
101 |
prompt = f"""
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
119 |
analysis = await llm_client.generate_text(prompt, system_message, response_format=StepAnalysis)
|
120 |
|
121 |
try:
|
@@ -138,20 +143,20 @@ async def summarize_task(steps, llm_client):
|
|
138 |
steps_summary = "\n".join([f"Step {i+1}: {step['analysis']}" for i, step in enumerate(steps)])
|
139 |
|
140 |
prompt = f"""
|
141 |
-
|
142 |
|
143 |
-
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
|
151 |
-
|
152 |
-
|
153 |
|
154 |
-
system_message = "You are an expert AI performance analyst, skilled in evaluating and summarizing AI agent task execution."
|
155 |
analysis = await llm_client.generate_text(prompt, system_message, response_format=TaskSummary)
|
156 |
return json.loads(analysis)
|
157 |
|
|
|
99 |
|
100 |
async def analyze_step(call, step_number, total_steps, llm_client):
|
101 |
prompt = f"""
|
102 |
+
Analyze Step {step_number}/{total_steps} of the AI agent's USACO task solution:
|
103 |
+
Input: {call['inputs']}
|
104 |
+
Output: {call['outputs']}
|
105 |
+
Exception: {call['exception']}
|
106 |
+
Summary: {call['summary']}
|
107 |
+
|
108 |
+
Provide a detailed, technical analysis with the following:
|
109 |
+
1. Specific Description: Describe precisely what the agent did in this step, including any algorithms, data structures, or problem-solving techniques employed.
|
110 |
+
2. Action Classification: Categorize the action as one of:
|
111 |
+
- 'plan': Strategizing or outlining an approach
|
112 |
+
- 'tool': Using a specific programming construct or algorithm
|
113 |
+
- 'retrieve': Accessing or utilizing external information
|
114 |
+
- 'other': Any action that doesn't fit the above categories
|
115 |
+
3. Technical Evaluation: Assess the technical merit of the agent's approach. Comment on efficiency, correctness, and adherence to USACO problem-solving best practices.
|
116 |
+
4. Success: Determine if the agent successfully completed its intended action.
|
117 |
+
5. Concise Headline: Write a technically precise headline (max 7 words) that captures the essence of this step.
|
118 |
+
|
119 |
+
Your analysis should be highly specific to this task. Avoid generalities and focus on the technical details of the agent's approach to this particular problem.
|
120 |
+
"""
|
121 |
+
|
122 |
+
system_message = "You are an expert in AI agent design and evaluation. Analyze the AI agent's actions with the depth and specificity expected in a detailed expert review. Focus on providing insights that would be valuable to an AI researcher specializing in AI agent development."
|
123 |
+
|
124 |
analysis = await llm_client.generate_text(prompt, system_message, response_format=StepAnalysis)
|
125 |
|
126 |
try:
|
|
|
143 |
steps_summary = "\n".join([f"Step {i+1}: {step['analysis']}" for i, step in enumerate(steps)])
|
144 |
|
145 |
prompt = f"""
|
146 |
+
Provide a comprehensive analysis of the AI agent's approach to solving this USACO task:
|
147 |
|
148 |
+
{steps_summary}
|
149 |
|
150 |
+
Your analysis should include:
|
151 |
+
1. Technical Overview: Describe the agent's overall problem-solving strategy, highlighting specific actions and techniques used throughout the task.
|
152 |
+
2. Key Achievements: Identify and explain the most significant breakthroughs or efficient implementations demonstrated by the agent.
|
153 |
+
3. Technical Challenges: Analyze the primary obstacles encountered, focusing on difficulties or conceptual misunderstandings in the context of the task.
|
154 |
+
4. Performance Evaluation: Assess the agent's overall performance, considering factors such as time complexity, space efficiency, code quality, and adherence to competitive programming best practices.
|
155 |
|
156 |
+
Your summary should be highly technical and specific to this task. Assume the reader is an expert as well and familiar with the task context. Focus on providing insights that would be valuable to an AI researcher specializing in AI agent development.
|
157 |
+
"""
|
158 |
|
159 |
+
system_message = "You are an expert AI performance analyst, skilled in evaluating and summarizing AI agent task execution. You are specialized in providing analyses to support AI researchers to develop AI agents."
|
160 |
analysis = await llm_client.generate_text(prompt, system_message, response_format=TaskSummary)
|
161 |
return json.loads(analysis)
|
162 |
|