benediktstroebl commited on
Commit
9f1f7b2
·
1 Parent(s): 1d04811

updated prompts

Browse files
Files changed (1) hide show
  1. agent_monitor/monitor.py +32 -27
agent_monitor/monitor.py CHANGED
@@ -99,23 +99,28 @@ async def analyze_task(calls, llm_client):
99
 
100
  async def analyze_step(call, step_number, total_steps, llm_client):
101
  prompt = f"""
102
- Analyze Step {step_number}/{total_steps} of AI agent task:
103
- Input: {call['inputs']}
104
- Output: {call['outputs']}
105
- Exception: {call['exception']}
106
- Summary: {call['summary']}
107
-
108
- Provide an analysis with:
109
- 1. A brief description of the agent's action.
110
- 2. Classify the action as one of: 'plan', 'tool', 'retrieve', or 'other'.
111
- 3. Give a brief evaluation of progress, obstacles, or errors.
112
- 4. Indicate if the agent successfully completed its intended action.
113
- 5. Write a concise headline summarizing the agent's action that is ideally less than 7 words long.
114
-
115
- Ensure accuracy and conciseness. Be specific and avoid too high-level descriptions.
116
- """
117
-
118
- system_message = "You are an expert AI system analyst, skilled in categorizing and evaluating AI agent actions."
 
 
 
 
 
119
  analysis = await llm_client.generate_text(prompt, system_message, response_format=StepAnalysis)
120
 
121
  try:
@@ -138,20 +143,20 @@ async def summarize_task(steps, llm_client):
138
  steps_summary = "\n".join([f"Step {i+1}: {step['analysis']}" for i, step in enumerate(steps)])
139
 
140
  prompt = f"""
141
- Analyze the following AI agent task steps:
142
 
143
- {steps_summary}
144
 
145
- Provide a summary with:
146
- 1. A concise overview of the agent's approach.
147
- 2. Main achievements or breakthroughs.
148
- 3. Primary obstacles or errors encountered.
149
- 4. A brief evaluation of the agent's overall performance.
150
 
151
- Focus on patterns in the agent's approach and effectiveness. Be concise and insightful.
152
- """
153
 
154
- system_message = "You are an expert AI performance analyst, skilled in evaluating and summarizing AI agent task execution."
155
  analysis = await llm_client.generate_text(prompt, system_message, response_format=TaskSummary)
156
  return json.loads(analysis)
157
 
 
99
 
100
  async def analyze_step(call, step_number, total_steps, llm_client):
101
  prompt = f"""
102
+ Analyze Step {step_number}/{total_steps} of the AI agent's USACO task solution:
103
+ Input: {call['inputs']}
104
+ Output: {call['outputs']}
105
+ Exception: {call['exception']}
106
+ Summary: {call['summary']}
107
+
108
+ Provide a detailed, technical analysis with the following:
109
+ 1. Specific Description: Describe precisely what the agent did in this step, including any algorithms, data structures, or problem-solving techniques employed.
110
+ 2. Action Classification: Categorize the action as one of:
111
+ - 'plan': Strategizing or outlining an approach
112
+ - 'tool': Using a specific programming construct or algorithm
113
+ - 'retrieve': Accessing or utilizing external information
114
+ - 'other': Any action that doesn't fit the above categories
115
+ 3. Technical Evaluation: Assess the technical merit of the agent's approach. Comment on efficiency, correctness, and adherence to USACO problem-solving best practices.
116
+ 4. Success: Determine if the agent successfully completed its intended action.
117
+ 5. Concise Headline: Write a technically precise headline (max 7 words) that captures the essence of this step.
118
+
119
+ Your analysis should be highly specific to this task. Avoid generalities and focus on the technical details of the agent's approach to this particular problem.
120
+ """
121
+
122
+ system_message = "You are an expert in AI agent design and evaluation. Analyze the AI agent's actions with the depth and specificity expected in a detailed expert review. Focus on providing insights that would be valuable to an AI researcher specializing in AI agent development."
123
+
124
  analysis = await llm_client.generate_text(prompt, system_message, response_format=StepAnalysis)
125
 
126
  try:
 
143
  steps_summary = "\n".join([f"Step {i+1}: {step['analysis']}" for i, step in enumerate(steps)])
144
 
145
  prompt = f"""
146
+ Provide a comprehensive analysis of the AI agent's approach to solving this USACO task:
147
 
148
+ {steps_summary}
149
 
150
+ Your analysis should include:
151
+ 1. Technical Overview: Describe the agent's overall problem-solving strategy, highlighting specific actions and techniques used throughout the task.
152
+ 2. Key Achievements: Identify and explain the most significant breakthroughs or efficient implementations demonstrated by the agent.
153
+ 3. Technical Challenges: Analyze the primary obstacles encountered, focusing on difficulties or conceptual misunderstandings in the context of the task.
154
+ 4. Performance Evaluation: Assess the agent's overall performance, considering factors such as time complexity, space efficiency, code quality, and adherence to competitive programming best practices.
155
 
156
+ Your summary should be highly technical and specific to this task. Assume the reader is an expert as well and familiar with the task context. Focus on providing insights that would be valuable to an AI researcher specializing in AI agent development.
157
+ """
158
 
159
+ system_message = "You are an expert AI performance analyst, skilled in evaluating and summarizing AI agent task execution. You are specialized in providing analyses to support AI researchers to develop AI agents."
160
  analysis = await llm_client.generate_text(prompt, system_message, response_format=TaskSummary)
161
  return json.loads(analysis)
162