File size: 1,298 Bytes
b7d1f08
 
 
 
 
 
abf78cc
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
e92240d
abf78cc
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
b7d1f08
abf78cc
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
ffe8812
8b95f64
67c84a0
 
56eb4b8
67c84a0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# This file contains information about verified agent results for different benchmarks.
# Format: 
#   benchmark_name:
#     - agent_name: "Name of the agent"
#       verification_date: YYYY-MM-DD

corebench_easy:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26

corebench_medium:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26

corebench_hard:
  - agent_name: "AutoGPT (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "AutoGPT (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (gpt-4o-mini)"
    verification_date: 2024-11-26
  - agent_name: "CORE-Agent (claude-3.5-sonnet)"
    verification_date: 2024-11-16
  - agent_name: "CORE-Agent (o1-mini) (cost limit $10)"
    verification_date: 2024-11-26