|
import React, { useState, useEffect } from 'react'; |
|
import { chain } from 'lodash'; |
|
import './App.css'; |
|
|
|
const ScoreBar = ({ score, isVanilla = false }) => { |
|
if (score === undefined || score === null) return null; |
|
|
|
const percentage = score <= 1 ? score * 100 : score; |
|
const hue = Math.min(percentage * 1.2, 120); |
|
const backgroundColor = `hsl(${hue}, 80%, 50%)`; |
|
const className = isVanilla ? "vanilla-bar" : "score-bar"; |
|
|
|
return ( |
|
<div className={className}> |
|
<div |
|
className="score-fill" |
|
style={{ |
|
width: `${percentage}%`, |
|
backgroundColor, |
|
height: `100%`, |
|
}} |
|
/> |
|
{!isVanilla && ( |
|
<span className="score-text"> |
|
{percentage.toFixed(1)}% |
|
</span> |
|
)} |
|
</div> |
|
); |
|
}; |
|
|
|
const App = () => { |
|
const [allData, setAllData] = useState([]); |
|
const [loading, setLoading] = useState(true); |
|
const [error, setError] = useState(null); |
|
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' }); |
|
const [searchQuery, setSearchQuery] = useState(''); |
|
const [showVanilla, setShowVanilla] = useState(true); |
|
const [showToolCalling, setShowToolCalling] = useState(false); |
|
|
|
useEffect(() => { |
|
const fetchData = async () => { |
|
try { |
|
setLoading(true); |
|
|
|
|
|
const response = await fetch('https://smolagents-smolagents-leaderboard.hf.space/api/results'); |
|
if (!response.ok) { |
|
throw new Error(`HTTP error! status: ${response.status}`); |
|
} |
|
const jsonData = await response.json(); |
|
setAllData(jsonData); |
|
} catch (err) { |
|
console.error('Error fetching data:', err); |
|
setError(err.message); |
|
} finally { |
|
setLoading(false); |
|
} |
|
}; |
|
|
|
fetchData(); |
|
}, []); |
|
|
|
const handleSort = (key) => { |
|
const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc'; |
|
setSortConfig({ key, direction }); |
|
}; |
|
|
|
|
|
const getFilteredData = () => { |
|
const actionType = showToolCalling ? 'tool-calling' : 'code'; |
|
return allData.filter(item => item.agent_action_type === actionType); |
|
}; |
|
|
|
|
|
const getVanillaScore = (modelId, metric) => { |
|
const vanillaEntry = allData.find(item => |
|
item.model_id === modelId && item.agent_action_type === 'vanilla' |
|
); |
|
return vanillaEntry?.scores[metric]; |
|
}; |
|
|
|
const filteredAndSortedData = chain(getFilteredData()) |
|
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase())) |
|
.orderBy( |
|
[item => { |
|
if (sortConfig.key === 'model') { |
|
return item.model_id; |
|
} |
|
return item.scores[sortConfig.key] || 0; |
|
}], |
|
[sortConfig.direction] |
|
) |
|
.value(); |
|
|
|
if (loading) return <div className="container">Loading benchmark results...</div>; |
|
if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>; |
|
|
|
return ( |
|
<div className="container"> |
|
<div className="header"> |
|
<h1 className="title">Smolagents Leaderboard</h1> |
|
<p className="subtitle">How do different LLMs compare for powering agents?</p> |
|
<p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co./datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p> |
|
</div> |
|
|
|
<div className="search-container"> |
|
<div className="search-with-options"> |
|
<input |
|
type="text" |
|
className="search-input" |
|
placeholder="Search models..." |
|
value={searchQuery} |
|
onChange={(e) => setSearchQuery(e.target.value)} |
|
/> |
|
|
|
<div className="options-container"> |
|
<label className="option-label"> |
|
<input |
|
type="checkbox" |
|
checked={showVanilla} |
|
onChange={() => setShowVanilla(!showVanilla)} |
|
/> |
|
Show Vanilla Scores |
|
</label> |
|
|
|
<label className="option-label"> |
|
<input |
|
type="checkbox" |
|
checked={showToolCalling} |
|
onChange={() => setShowToolCalling(!showToolCalling)} |
|
/> |
|
Show Tool-Calling Scores |
|
</label> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<div className="table-container"> |
|
<table> |
|
<thead> |
|
<tr> |
|
<th onClick={() => handleSort('model')}> |
|
Model {sortConfig.key === 'model' && ( |
|
sortConfig.direction === 'desc' ? 'β' : 'β' |
|
)} |
|
</th> |
|
{["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => ( |
|
<th key={benchmark} onClick={() => handleSort(benchmark)}> |
|
{benchmark === "Average" ? benchmark : benchmark + ` subset`} {sortConfig.key === benchmark && ( |
|
sortConfig.direction === 'desc' ? 'β' : 'β' |
|
)} |
|
</th> |
|
))} |
|
</tr> |
|
</thead> |
|
<tbody> |
|
{filteredAndSortedData.map((item, index) => ( |
|
<tr key={index}> |
|
<td className="model-cell"> |
|
<div className="model-name">{item.model_id}</div> |
|
{showVanilla && ( |
|
<div className="vanilla-text"><i>Vanilla score below</i></div> |
|
)} |
|
</td> |
|
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => ( |
|
<td key={metric}> |
|
<ScoreBar score={item.scores[metric]} isVanilla={false}/> |
|
{showVanilla && getVanillaScore(item.model_id, metric) !== undefined && ( |
|
<ScoreBar score={getVanillaScore(item.model_id, metric)} isVanilla={true}/> |
|
)} |
|
</td> |
|
))} |
|
</tr> |
|
))} |
|
</tbody> |
|
</table> |
|
</div> |
|
|
|
<div className="legend"> |
|
<p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p> |
|
</div> |
|
</div> |
|
); |
|
}; |
|
|
|
export default App; |