# This file contains information about verified agent results for different benchmarks. # Format: # benchmark_name: # - agent_name: "Name of the agent" # verification_date: YYYY-MM-DD corebench_easy: - agent_name: "AutoGPT (gpt-4o)" verification_date: 2024-11-26 - agent_name: "AutoGPT (gpt-4o-mini)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o-mini)" verification_date: 2024-11-26 corebench_medium: - agent_name: "AutoGPT (gpt-4o)" verification_date: 2024-11-26 - agent_name: "AutoGPT (gpt-4o-mini)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o-mini)" verification_date: 2024-11-26 corebench_hard: - agent_name: "AutoGPT (gpt-4o)" verification_date: 2024-11-26 - agent_name: "AutoGPT (gpt-4o-mini)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (gpt-4o-mini)" verification_date: 2024-11-26 - agent_name: "CORE-Agent (claude-3.5-sonnet)" verification_date: 2024-11-16 - agent_name: "CORE-Agent (o1-mini) (cost limit $10)" verification_date: 2024-11-26