blogpost-scaling-test-time-compute / app /src /leaderboard_results.js
hynky's picture
hynky HF staff
tmp
e4890d1
raw
history blame
9.32 kB
import Papa from 'papaparse';
import { DataTable } from 'simple-datatables';
const languageMap = {
'All Languages': 'final_rankings.csv',
'Arabic': 'results_ar.csv',
'Turkish': 'results_tr.csv',
'Swahili': 'results_sw.csv',
'Russian': 'results_ru.csv',
'Telugu': 'results_te.csv',
'Thai': 'results_th.csv',
'Chinese': 'results_zh.csv',
'French': 'results_fr.csv',
'Hindi': 'results_hi.csv',
};
const columnNameMap = {
'runname': 'Model',
'agg_score_macro': 'Macro Score',
'agg_score_RES': 'RES Score',
'agg_score_RC': 'RC Score',
'agg_score_GK': 'GK Score',
'agg_score_NLU': 'NLU Score',
'avg_rank_macro': 'Average Rank',
'rank': 'Rank'
};
function createDropdown(options, onChange) {
const select = document.createElement('select');
options.forEach(option => {
const optionElement = document.createElement('option');
optionElement.value = option;
optionElement.textContent = option;
select.appendChild(optionElement);
});
select.addEventListener('change', onChange);
return select;
}
function processTaskName(taskName) {
const parts = taskName.split('|');
let processedName = parts.length > 1 ? parts[1] : taskName;
processedName = processedName.split('_mcf')[0].split('_cf')[0];
return processedName;
}
function sanitizeColumnName(name) {
return name.replace(/[^a-zA-Z0-9-_]/g, '_');
}
function createResultsTable(data, extraColumn) {
const tableWrapper = document.createElement('div');
tableWrapper.className = 'table-wrapper leaderboard-table-wrapper';
const table = document.createElement('table');
table.className = 'results-table leaderboard-results-table';
const columns = extraColumn === 'All Languages'
? ['rank', 'runname', 'avg_rank_macro']
: ['rank', 'runname', 'agg_score_macro', extraColumn].filter(Boolean);
const header = table.createTHead();
const headerRow = header.insertRow();
columns.forEach(column => {
const th = document.createElement('th');
th.textContent = columnNameMap[column] || processTaskName(column);
th.className = `column-${sanitizeColumnName(column)}`; // Sanitize the column name
headerRow.appendChild(th);
});
const body = table.createTBody();
data.forEach((row, index) => {
if (!row.runname) return; // Skip rows without a model name
const tr = body.insertRow();
columns.forEach(column => {
const td = tr.insertCell();
td.className = `column-${sanitizeColumnName(column)}`; // Sanitize the column name
if (column === 'rank') {
td.textContent = index + 1;
} else if (column === 'runname') {
const modelName = row[column];
let displayName;
// Check if it's a chat model
const chatModels = [
'CohereForAI/c4ai-command-r-plus-08-2024',
'openai/gpt-4o-mini',
'silma-ai/SILMA-9B-Instruct-v1.0',
'microsoft/Phi-3.5-mini-instruct',
'TURKCELL/Turkcell-LLM-7b-v1'
];
if (chatModels.some(chatModel => modelName.includes(chatModel))) {
displayName = `💬 ${modelName}`;
} else {
displayName = `🟢 ${modelName}`;
}
if (modelName.split("/")[0] !== "openai")
displayName = `<a href="https://huggingface.co./${modelName}">${displayName}</a>`;
td.innerHTML = displayName;
td.title = modelName; // Add full model name as tooltip
td.style.cursor = 'help'; // Change cursor to indicate hover functionality
} else {
const value = row[column];
td.textContent = typeof value === 'number' ? value.toFixed(2) : value;
}
});
});
tableWrapper.appendChild(table);
return tableWrapper;
}
export function initLeaderboardResults(containerId) {
const container = document.getElementById(containerId);
if (!container) return;
const titleElement = document.createElement('h3');
titleElement.textContent = 'FineTasks Leaderboard';
titleElement.className = 'leaderboard-title';
const tableContainer = document.createElement('div');
tableContainer.className = 'table-container';
const languageLabel = document.createElement('label');
languageLabel.textContent = 'Language: ';
const languageDropdown = createDropdown(Object.keys(languageMap), updateLanguageTable);
const extraColumnLabel = document.createElement('label');
extraColumnLabel.textContent = 'Task: ';
const extraColumnDropdown = createDropdown(['None'], updateTable);
let leaderboardDataTable;
let currentData = [];
// Create caption element
const captionElement = document.createElement('figcaption');
captionElement.className = 'table-caption';
captionElement.textContent = container.dataset.caption || '';
async function updateLanguageTable() {
const selectedLanguage = languageDropdown.value;
const csvFile = languageMap[selectedLanguage];
try {
const response = await fetch(`data/os_models/${csvFile}`);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const csvText = await response.text();
const results = Papa.parse(csvText, { header: true, dynamicTyping: true }).data;
currentData = selectedLanguage === 'All Languages'
? results.sort((a, b) => a.avg_rank_macro - b.avg_rank_macro)
: results.sort((a, b) => b.agg_score_macro - a.agg_score_macro);
// Update extra column dropdown options
if (selectedLanguage !== 'All Languages') {
const columnOptions = ['None'].concat(Object.keys(currentData[0]).filter(key =>
!['runname', 'seed', 'steps', 'agg_score_micro', 'rank', 'avg_rank_macro', ''].includes(key)
));
extraColumnDropdown.innerHTML = '';
columnOptions.forEach(option => {
const optionElement = document.createElement('option');
optionElement.value = option;
optionElement.textContent = option === 'None' ? 'None' : processTaskName(option);
extraColumnDropdown.appendChild(optionElement);
});
extraColumnDropdown.value = 'None';
extraColumnLabel.style.display = 'inline';
extraColumnDropdown.style.display = 'inline';
} else {
extraColumnLabel.style.display = 'none';
extraColumnDropdown.style.display = 'none';
}
updateTable();
} catch (error) {
console.error('Error fetching CSV:', error);
tableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`;
}
}
function updateTable() {
const extraColumn = languageDropdown.value === 'All Languages' ? 'All Languages' :
(extraColumnDropdown.value === 'None' ? null : extraColumnDropdown.value);
tableContainer.innerHTML = '';
const tableWrapper = createResultsTable(currentData, extraColumn);
tableContainer.appendChild(tableWrapper);
if (leaderboardDataTable) {
leaderboardDataTable.destroy();
}
leaderboardDataTable = new DataTable('.leaderboard-results-table', {
perPage: 10,
perPageSelect: false,
searchable: false,
sortable: true,
fixedHeight: true,
labels: {
info: '' // This removes the "Showing 1 to X of Y entries" text
}
});
// Adjust column widths after the table is created
setTimeout(adjustColumnWidths, 0);
}
const controls = document.createElement('div');
controls.className = 'controls leaderboard-controls fine-tasks-controls';
const languageControlGroup = document.createElement('div');
languageControlGroup.className = 'control-group';
languageControlGroup.appendChild(languageLabel);
languageControlGroup.appendChild(languageDropdown);
const extraColumnControlGroup = document.createElement('div');
extraColumnControlGroup.className = 'control-group';
extraColumnControlGroup.appendChild(extraColumnLabel);
extraColumnControlGroup.appendChild(extraColumnDropdown);
controls.appendChild(languageControlGroup);
controls.appendChild(extraColumnControlGroup);
container.appendChild(titleElement);
container.appendChild(tableContainer);
container.appendChild(captionElement); // Add caption below the table
container.appendChild(controls);
// Initialize with All Languages data
languageDropdown.value = 'All Languages';
updateLanguageTable();
}
function adjustColumnWidths() {
const table = document.querySelector('.leaderboard-results-table');
if (!table) return;
const columns = table.querySelectorAll('th');
columns.forEach((column, index) => {
const columnClass = column.className;
const cells = table.querySelectorAll(`td.${columnClass}`);
let maxWidth = column.offsetWidth;
cells.forEach(cell => {
maxWidth = Math.max(maxWidth, cell.offsetWidth);
});
let adjustedWidth;
if (index === 0) { // Rank column
adjustedWidth = 50;
} else if (index === 1) { // Model name column
adjustedWidth = 200;
} else if (index === 2) { // Macro score column
adjustedWidth = 100;
} else { // Extra column or any other column
adjustedWidth = Math.min(maxWidth, 150); // Set a maximum width of 150px for other columns
}
column.style.width = `${adjustedWidth}px`;
cells.forEach(cell => {
cell.style.width = `${adjustedWidth}px`;
});
});
}