# This file contains information about verified agent results for different benchmarks.
# Format: 
#   benchmark_name:
#     - agent_name: "Name of the agent"
#       verification_date: YYYY-MM-DD

corebench_easy:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26

corebench_medium:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26

corebench_hard:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (claude-3.5-sonnet)"
    verification_date: 2024-11-16
  - agent_name: "CORE-Agent (o1-mini) (cost limit $10)"
    verification_date: 2024-11-26