Spaces:
Running
Running
File size: 1,298 Bytes
b7d1f08 abf78cc ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 e92240d abf78cc ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 b7d1f08 abf78cc ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 ffe8812 8b95f64 67c84a0 56eb4b8 67c84a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# This file contains information about verified agent results for different benchmarks.
# Format:
# benchmark_name:
# - agent_name: "Name of the agent"
# verification_date: YYYY-MM-DD
corebench_easy:
- agent_name: "AutoGPT (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "AutoGPT (gpt-4o-mini)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o-mini)"
verification_date: 2024-11-26
corebench_medium:
- agent_name: "AutoGPT (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "AutoGPT (gpt-4o-mini)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o-mini)"
verification_date: 2024-11-26
corebench_hard:
- agent_name: "AutoGPT (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "AutoGPT (gpt-4o-mini)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (gpt-4o-mini)"
verification_date: 2024-11-26
- agent_name: "CORE-Agent (claude-3.5-sonnet)"
verification_date: 2024-11-16
- agent_name: "CORE-Agent (o1-mini) (cost limit $10)"
verification_date: 2024-11-26 |