Zachary Siegel commited on
Commit
abf78cc
·
1 Parent(s): 8de3f0a

verify the agents

Browse files
Files changed (1) hide show
  1. verified_agents.yaml +27 -52
verified_agents.yaml CHANGED
@@ -4,57 +4,32 @@
4
  # - agent_name: "Name of the agent"
5
  # verification_date: YYYY-MM-DD
6
 
7
- usaco:
8
- - agent_name: "USACO Reflexion + Episodic (gpt-4o-mini-2024-07-18)"
9
- verification_date: 2024-08-20
10
- - agent_name: "USACO Reflexion + Episodic + Semantic (gpt-4o-mini-2024-07-18)"
11
- verification_date: 2024-08-20
12
- - agent_name: "USACO Reflexion (gpt-4o-mini-2024-07-18)"
13
- verification_date: 2024-08-20
14
- - agent_name: "USACO Episodic (gpt-4o-mini-2024-07-18)"
15
- verification_date: 2024-08-12
16
- - agent_name: "USACO Reflexion + Semantic (gpt-4o-mini-2024-07-18)"
17
- verification_date: 2024-08-20
18
- - agent_name: "USACO Zero-shot (gpt-4o-mini-2024-07-18)"
19
- verification_date: 2024-08-11
20
- - agent_name: "USACO Semantic (gpt-4o-mini-2024-07-18)"
21
- verification_date: 2024-08-12
22
- - agent_name: USACO Reflexion + Episodic + Semantic (gpt-4o-2024-05-13)
23
- verification_date: 2024-08-25
24
- - agent_name: USACO Reflexion + Episodic (gpt-4o-2024-05-13)
25
- verification_date: 2024-08-25
26
- - agent_name: USACO Reflexion + Semantic (gpt-4o-2024-05-13)
27
- verification_date: 2024-08-25
28
- - agent_name: Episodic Retrial (2x) (gpt-4o-2024-05-13)
29
- verification_date: 2024-08-25
30
- - agent_name: Episodic Retrial (3x) (gpt-4o-mini-2024-07-18)
31
- verification_date: 2024-08-25
32
- - agent_name: Episodic Retrial (2x) (gpt-4o-mini-2024-07-18)
33
- verification_date: 2024-08-25
34
- - agent_name: Episodic Retrial (5x) (gpt-4o-mini-2024-07-18)
35
- verification_date: 2024-08-25
36
- - agent_name: Episodic Warming (3 Steps) (gpt-4o-mini-2024-07-18)
37
- verification_date: 2024-08-24
38
- - agent_name: USACO Episodic (gpt-4o-2024-05-13)
39
- verification_date: 2024-08-24
40
- - agent_name: USACO Semantic (gpt-4o-2024-05-13)
41
- verification_date: 2024-08-24
42
- - agent_name: Zero-shot Retrial (2x) (gpt-4o-mini-2024-07-18)
43
- verification_date: 2024-08-24
44
- - agent_name: Zero-shot Retrial (3x) (gpt-4o-mini-2024-07-18)
45
- verification_date: 2024-08-24
46
- - agent_name: Zero-shot Retrial (5x) (gpt-4o-mini-2024-07-18)
47
- verification_date: 2024-08-24
48
- - agent_name: USACO Zero-shot (gpt-4o-2024-05-13)
49
- verification_date: 2024-08-24
50
 
 
 
 
 
 
 
 
 
 
51
 
52
- swebench_verified:
53
- - agent_name: "Agentless (gpt-4o-mini-2024-07-18) (50 Instances)"
54
- verification_date: 2024-08-17
55
- - agent_name: "SWE-agent (gpt-4o-mini-2024-07-18) (Cost Limit: $1) (50 Instances)"
56
- verification_date: 2024-08-19
57
-
58
- mlagentbench:
59
- - agent_name: "MLAgentBench ResearchAgent (gpt-4o-mini-2024-07-18)"
60
- verification_date: 2024-08-19
 
4
  # - agent_name: "Name of the agent"
5
  # verification_date: YYYY-MM-DD
6
 
7
+ corebench_easy:
8
+ - agent_name: "AutoGPT (GPT-4o)"
9
+ verification_date: 2024-09-28
10
+ - agent_name: "AutoGPT (GPT-4o-mini)"
11
+ verification_date: 2024-09-28
12
+ - agent_name: "CORE-Agent (GPT-4o)"
13
+ verification_date: 2024-09-28
14
+ - agent_name: "CORE-Agent (GPT-4o-mini)"
15
+ verification_date: 2024-09-28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ corebench_medium:
18
+ - agent_name: "AutoGPT (GPT-4o)"
19
+ verification_date: 2024-09-28
20
+ - agent_name: "AutoGPT (GPT-4o-mini)"
21
+ verification_date: 2024-09-28
22
+ - agent_name: "CORE-Agent (GPT-4o)"
23
+ verification_date: 2024-09-28
24
+ - agent_name: "CORE-Agent (GPT-4o-mini)"
25
+ verification_date: 2024-09-28
26
 
27
+ corebench_hard:
28
+ - agent_name: "AutoGPT (GPT-4o)"
29
+ verification_date: 2024-09-28
30
+ - agent_name: "AutoGPT (GPT-4o-mini)"
31
+ verification_date: 2024-09-28
32
+ - agent_name: "CORE-Agent (GPT-4o)"
33
+ verification_date: 2024-09-28
34
+ - agent_name: "CORE-Agent (GPT-4o-mini)"
35
+ verification_date: 2024-09-28