Spaces:
Running
Running
# This file contains information about verified agent results for different benchmarks. | |
# Format: | |
# benchmark_name: | |
# - agent_name: "Name of the agent" | |
# verification_date: YYYY-MM-DD | |
corebench_easy: | |
- agent_name: "AutoGPT (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "AutoGPT (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
corebench_medium: | |
- agent_name: "AutoGPT (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "AutoGPT (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
corebench_hard: | |
- agent_name: "AutoGPT (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "AutoGPT (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (gpt-4o-mini)" | |
verification_date: 2024-11-26 | |
- agent_name: "CORE-Agent (claude-3.5-sonnet)" | |
verification_date: 2024-11-16 | |
- agent_name: "CORE-Agent (o1-mini)" | |
verification_date: 2024-11-26 |