Spaces:
Running
Running
Commit
·
19bb306
1
Parent(s):
3427022
added timestamp to task summary prompt for failure report and fixed failure report gradio issue
Browse files- agent_monitor/failure_report.py +2 -0
- app.py +5 -5
agent_monitor/failure_report.py
CHANGED
@@ -6,6 +6,7 @@ from pydantic import BaseModel
|
|
6 |
from abc import ABC, abstractmethod
|
7 |
import json
|
8 |
from typing import Dict, List
|
|
|
9 |
|
10 |
class FailureCategory(BaseModel):
|
11 |
category_id: int
|
@@ -105,6 +106,7 @@ async def summarize_task(task_id, calls, llm_client):
|
|
105 |
Step {i}:
|
106 |
Input: {call['inputs']}
|
107 |
Output: {call['outputs']}
|
|
|
108 |
"""
|
109 |
|
110 |
prompt = f"""
|
|
|
6 |
from abc import ABC, abstractmethod
|
7 |
import json
|
8 |
from typing import Dict, List
|
9 |
+
from datetime import datetime
|
10 |
|
11 |
class FailureCategory(BaseModel):
|
12 |
category_id: int
|
|
|
106 |
Step {i}:
|
107 |
Input: {call['inputs']}
|
108 |
Output: {call['outputs']}
|
109 |
+
Timestamp: {datetime.fromtimestamp(call_data['created_timestamp'])}
|
110 |
"""
|
111 |
|
112 |
prompt = f"""
|
app.py
CHANGED
@@ -216,8 +216,8 @@ def format_call_info(step, step_index):
|
|
216 |
return formatted_info
|
217 |
|
218 |
|
219 |
-
def update_failure_report(agent_name):
|
220 |
-
failure_report = get_failure_report(agent_name,
|
221 |
if not failure_report:
|
222 |
return "No failure report available for this agent.", None
|
223 |
|
@@ -382,7 +382,7 @@ with gr.Blocks() as demo:
|
|
382 |
|
383 |
# Update failure report when agent is selected
|
384 |
failure_report_agent_dropdown.change(update_failure_report,
|
385 |
-
inputs=[failure_report_agent_dropdown],
|
386 |
outputs=[failure_categories_overview, failure_categories_chart])
|
387 |
|
388 |
gr.Markdown("# Raw Predictions")
|
@@ -480,7 +480,7 @@ with gr.Blocks() as demo:
|
|
480 |
|
481 |
# Update failure report when agent is selected
|
482 |
failure_report_agent_dropdown.change(update_failure_report,
|
483 |
-
inputs=[failure_report_agent_dropdown],
|
484 |
outputs=[failure_categories_overview, failure_categories_chart])
|
485 |
|
486 |
gr.Markdown("# Raw Predictions")
|
@@ -553,7 +553,7 @@ async def main():
|
|
553 |
preprocess_traces()
|
554 |
|
555 |
# # Download the results from the Hugging Face Hub
|
556 |
-
|
557 |
|
558 |
# Check for new uploads and process them
|
559 |
await check_and_process_uploads()
|
|
|
216 |
return formatted_info
|
217 |
|
218 |
|
219 |
+
def update_failure_report(agent_name, benchmark_name):
|
220 |
+
failure_report = get_failure_report(agent_name, benchmark_name)
|
221 |
if not failure_report:
|
222 |
return "No failure report available for this agent.", None
|
223 |
|
|
|
382 |
|
383 |
# Update failure report when agent is selected
|
384 |
failure_report_agent_dropdown.change(update_failure_report,
|
385 |
+
inputs=[failure_report_agent_dropdown, gr.Textbox(value="swebench_lite", visible=False)],
|
386 |
outputs=[failure_categories_overview, failure_categories_chart])
|
387 |
|
388 |
gr.Markdown("# Raw Predictions")
|
|
|
480 |
|
481 |
# Update failure report when agent is selected
|
482 |
failure_report_agent_dropdown.change(update_failure_report,
|
483 |
+
inputs=[failure_report_agent_dropdown, gr.Textbox(value="swebench_verified", visible=False)],
|
484 |
outputs=[failure_categories_overview, failure_categories_chart])
|
485 |
|
486 |
gr.Markdown("# Raw Predictions")
|
|
|
553 |
preprocess_traces()
|
554 |
|
555 |
# # Download the results from the Hugging Face Hub
|
556 |
+
await asyncio.to_thread(download_latest_results)
|
557 |
|
558 |
# Check for new uploads and process them
|
559 |
await check_and_process_uploads()
|