import React, { useState, useEffect } from 'react'; import { chain } from 'lodash'; import './App.css'; const ScoreBar = ({ score, isVanilla = false }) => { if (score === undefined || score === null) return null; const percentage = score <= 1 ? score * 100 : score; const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green) const backgroundColor = `hsl(${hue}, 80%, 50%)`; const className = isVanilla ? "vanilla-bar" : "score-bar"; return (
{!isVanilla && ( {percentage.toFixed(1)}% )}
); }; const App = () => { const [allData, setAllData] = useState([]); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' }); const [searchQuery, setSearchQuery] = useState(''); const [showVanilla, setShowVanilla] = useState(true); const [showToolCalling, setShowToolCalling] = useState(false); useEffect(() => { const fetchData = async () => { try { setLoading(true); // Fetch all data from API const response = await fetch('https://smolagents-smolagents-leaderboard.hf.space/api/results'); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const jsonData = await response.json(); setAllData(jsonData); } catch (err) { console.error('Error fetching data:', err); setError(err.message); } finally { setLoading(false); } }; fetchData(); }, []); const handleSort = (key) => { const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc'; setSortConfig({ key, direction }); }; // Filter data based on selected action type const getFilteredData = () => { const actionType = showToolCalling ? 'tool-calling' : 'code'; return allData.filter(item => item.agent_action_type === actionType); }; // Get vanilla score for a model const getVanillaScore = (modelId, metric) => { const vanillaEntry = allData.find(item => item.model_id === modelId && item.agent_action_type === 'vanilla' ); return vanillaEntry?.scores[metric]; }; const filteredAndSortedData = chain(getFilteredData()) .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase())) .orderBy( [item => { if (sortConfig.key === 'model') { return item.model_id; } return item.scores[sortConfig.key] || 0; }], [sortConfig.direction] ) .value(); if (loading) return
Loading benchmark results...
; if (error) return
Error: {error}
; return (

Smolagents Leaderboard

How do different LLMs compare for powering agents?

Uses smolagents with smolagents benchmark.

setSearchQuery(e.target.value)} />
{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => ( ))} {filteredAndSortedData.map((item, index) => ( {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => ( ))} ))}
handleSort('model')}> Model {sortConfig.key === 'model' && ( sortConfig.direction === 'desc' ? '↓' : '↑' )} handleSort(benchmark)}> {benchmark === "Average" ? benchmark : benchmark + ` subset`} {sortConfig.key === benchmark && ( sortConfig.direction === 'desc' ? '↓' : '↑' )}
{item.model_id}
{showVanilla && (
Vanilla score below
)}
{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && ( )}

Agent type: {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}

); }; export default App;