hynky HF staff commited on
Commit
97efd89
Β·
1 Parent(s): b3d863b
analysis/data/os_models/{final_rankings.csv β†’ v1/final_rankings.csv} RENAMED
File without changes
analysis/data/os_models/{results_all.csv β†’ v1/results_all.csv} RENAMED
File without changes
analysis/data/os_models/{results_ar.csv β†’ v1/results_ar.csv} RENAMED
File without changes
analysis/data/os_models/{results_fr.csv β†’ v1/results_fr.csv} RENAMED
File without changes
analysis/data/os_models/{results_hi.csv β†’ v1/results_hi.csv} RENAMED
File without changes
analysis/data/os_models/{results_ru.csv β†’ v1/results_ru.csv} RENAMED
File without changes
analysis/data/os_models/{results_sw.csv β†’ v1/results_sw.csv} RENAMED
File without changes
analysis/data/os_models/{results_te.csv β†’ v1/results_te.csv} RENAMED
File without changes
analysis/data/os_models/{results_th.csv β†’ v1/results_th.csv} RENAMED
File without changes
analysis/data/os_models/{results_tr.csv β†’ v1/results_tr.csv} RENAMED
File without changes
analysis/data/os_models/{results_zh.csv β†’ v1/results_zh.csv} RENAMED
File without changes
analysis/data/os_models/v2/final_rankings.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc908af138b83e2b2760975929742886c2fc8b5095ff87ae95a2a10a7b9626b3
3
+ size 872
analysis/data/os_models/v2/results_ar.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40fb9d04723ff5ffd5189222d0496a0bb1b4b5d62fcb43f0b3bd1f52fee8ad43
3
+ size 12363
analysis/data/os_models/v2/results_fr.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86837a26cd20191cfb6e321a47ecb6bc50cf541a6313cbd7a69e8bb7778c32da
3
+ size 7235
analysis/data/os_models/v2/results_hi.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f6d09787fa06e86d7407e418d15cd83d5a32a5ac947257fbf7bff251e93fda
3
+ size 7433
analysis/data/os_models/v2/results_ru.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ab46c8317084966648116ac11e3d739dbd492013fcce48d2ec0283b1b50a74f
3
+ size 8586
analysis/data/os_models/v2/results_sw.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caec2333bd61ec53164555ff8629b11d79fb1d4a1272e3d31b0731a380d2748f
3
+ size 7352
analysis/data/os_models/v2/results_te.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db36b5f9a229368749d0ed9196155eb7a2bd2ffe5604e73912e72d98533429c1
3
+ size 6394
analysis/data/os_models/v2/results_th.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808f72a53b1de3ec5a8839db3cc4393e0e4cfcf5e52f13a11ee8988fdeb03432
3
+ size 7582
analysis/data/os_models/v2/results_tr.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffd3d95b5a8d9d4c4db2706cc254c098ad017d98a81b8288ac64b8c2c716038
3
+ size 7202
analysis/data/os_models/v2/results_zh.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86c959789f6425ec5e4e5dc4d05140e15502a7c37c2de65493ea18bb474599c4
3
+ size 10203
app/src/leaderboard_results.js CHANGED
@@ -14,6 +14,20 @@ const languageMap = {
14
  'Hindi': 'results_hi.csv',
15
  };
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  const columnNameMap = {
18
  'runname': 'Model',
19
  'agg_score_macro': 'Score',
@@ -25,12 +39,15 @@ const columnNameMap = {
25
  'rank': 'Rank'
26
  };
27
 
28
- function createDropdown(options, onChange) {
29
  const select = document.createElement('select');
30
  options.forEach(option => {
31
  const optionElement = document.createElement('option');
32
  optionElement.value = option;
33
  optionElement.textContent = option;
 
 
 
34
  select.appendChild(optionElement);
35
  });
36
  select.addEventListener('change', onChange);
@@ -72,11 +89,35 @@ function createResultsTable(data, extraColumn) {
72
  data.forEach((row, index) => {
73
  if (!row.runname) return; // Skip rows without a model name
74
  const tr = body.insertRow();
 
 
 
 
 
 
 
 
75
  columns.forEach(column => {
76
  const td = tr.insertCell();
77
- td.className = `column-${sanitizeColumnName(column)}`; // Sanitize the column name
 
78
  if (column === 'rank') {
79
  td.textContent = index + 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  } else if (column === 'runname') {
81
  const modelName = row[column];
82
  let displayName;
@@ -112,6 +153,40 @@ function createResultsTable(data, extraColumn) {
112
  return tableWrapper;
113
  }
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  export function initLeaderboardResults(containerId) {
116
  const container = document.getElementById(containerId);
117
  if (!container) return;
@@ -120,17 +195,12 @@ export function initLeaderboardResults(containerId) {
120
  titleElement.textContent = 'FineTasks Leaderboard';
121
  titleElement.className = 'leaderboard-title';
122
 
 
 
 
123
  const tableContainer = document.createElement('div');
124
  tableContainer.className = 'table-container';
125
 
126
- const languageLabel = document.createElement('label');
127
- languageLabel.textContent = 'Language: ';
128
- const languageDropdown = createDropdown(Object.keys(languageMap), updateLanguageTable);
129
-
130
- const extraColumnLabel = document.createElement('label');
131
- extraColumnLabel.textContent = 'Task: ';
132
- const extraColumnDropdown = createDropdown(['None'], updateTable);
133
-
134
  let leaderboardDataTable;
135
  let currentData = [];
136
 
@@ -139,12 +209,14 @@ export function initLeaderboardResults(containerId) {
139
  captionElement.className = 'table-caption';
140
  captionElement.textContent = container.dataset.caption || '';
141
 
 
142
  async function updateLanguageTable() {
 
143
  const selectedLanguage = languageDropdown.value;
144
  const csvFile = languageMap[selectedLanguage];
145
 
146
  try {
147
- const response = await fetch(`data/os_models/${csvFile}`);
148
  if (!response.ok) {
149
  throw new Error(`HTTP error! status: ${response.status}`);
150
  }
@@ -154,7 +226,6 @@ export function initLeaderboardResults(containerId) {
154
  ? results.sort((a, b) => a.avg_rank_macro - b.avg_rank_macro)
155
  : results.sort((a, b) => b.agg_score_macro - a.agg_score_macro);
156
 
157
- // Update extra column dropdown options
158
  if (selectedLanguage !== 'All Languages') {
159
  const columnOptions = ['None'].concat(Object.keys(currentData[0]).filter(key =>
160
  !['runname', 'seed', 'steps', 'agg_score_micro', 'rank', 'avg_rank_macro', ''].includes(key)
@@ -176,6 +247,7 @@ export function initLeaderboardResults(containerId) {
176
  }
177
 
178
  updateTable();
 
179
  } catch (error) {
180
  console.error('Error fetching CSV:', error);
181
  tableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`;
@@ -184,7 +256,7 @@ export function initLeaderboardResults(containerId) {
184
 
185
  function updateTable() {
186
  const extraColumn = languageDropdown.value === 'All Languages' ? 'All Languages' :
187
- (extraColumnDropdown.value === 'None' ? null : extraColumnDropdown.value);
188
 
189
  tableContainer.innerHTML = '';
190
  const tableWrapper = createResultsTable(currentData, extraColumn);
@@ -201,17 +273,71 @@ export function initLeaderboardResults(containerId) {
201
  sortable: true,
202
  fixedHeight: true,
203
  labels: {
204
- info: '' // This removes the "Showing 1 to X of Y entries" text
205
  }
206
  });
207
 
208
- // Adjust column widths after the table is created
209
  setTimeout(adjustColumnWidths, 0);
210
  }
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  const controls = document.createElement('div');
213
  controls.className = 'controls leaderboard-controls fine-tasks-controls';
214
 
 
 
 
 
 
215
  const languageControlGroup = document.createElement('div');
216
  languageControlGroup.className = 'control-group';
217
  languageControlGroup.appendChild(languageLabel);
@@ -222,16 +348,20 @@ export function initLeaderboardResults(containerId) {
222
  extraColumnControlGroup.appendChild(extraColumnLabel);
223
  extraColumnControlGroup.appendChild(extraColumnDropdown);
224
 
 
225
  controls.appendChild(languageControlGroup);
226
  controls.appendChild(extraColumnControlGroup);
227
 
 
228
  container.appendChild(titleElement);
229
  container.appendChild(tableContainer);
230
- container.appendChild(captionElement); // Add caption below the table
231
  container.appendChild(controls);
 
232
 
233
- // Initialize with All Languages data
234
- languageDropdown.value = 'All Languages';
 
235
  updateLanguageTable();
236
  }
237
 
 
14
  'Hindi': 'results_hi.csv',
15
  };
16
 
17
+ const versionMap = {
18
+ 'v1': 'v1',
19
+ 'v2': 'v2'
20
+ };
21
+
22
+ const versionChangelog = {
23
+ 'v1': 'Initial release of FineTasks Leaderboard',
24
+ 'v2': 'Changes in v2:\n' +
25
+ 'β€’ Fixed a bug in the rescaling of scores\n' +
26
+ 'β€’ Switched to using Native choice prefixes for Thai/Telugu/Hindi/Arabics\n' +
27
+ 'β€’ Added Options: anchors before showing options for continuation tasks (e.g Hellawag) - consistent improvement in scores\n' +
28
+ 'β€’ Removed openai/gpt-4o-mini'
29
+ };
30
+
31
  const columnNameMap = {
32
  'runname': 'Model',
33
  'agg_score_macro': 'Score',
 
39
  'rank': 'Rank'
40
  };
41
 
42
+ function createDropdown(options, onChange, initialValue = null) {
43
  const select = document.createElement('select');
44
  options.forEach(option => {
45
  const optionElement = document.createElement('option');
46
  optionElement.value = option;
47
  optionElement.textContent = option;
48
+ if (initialValue && option === initialValue) {
49
+ optionElement.selected = true;
50
+ }
51
  select.appendChild(optionElement);
52
  });
53
  select.addEventListener('change', onChange);
 
89
  data.forEach((row, index) => {
90
  if (!row.runname) return; // Skip rows without a model name
91
  const tr = body.insertRow();
92
+
93
+ // Add gradient background for top 3 positions
94
+ if (index < 3) {
95
+ const opacity = 1 - (index * 0.25); // Creates a fading effect: 1, 0.75, 0.5
96
+ tr.style.backgroundColor = `rgba(255, 165, 0, ${opacity * 0.2})`; // Light orange with fading opacity
97
+ tr.style.fontWeight = 600; // Make text slightly bolder for top 3
98
+ }
99
+
100
  columns.forEach(column => {
101
  const td = tr.insertCell();
102
+ td.className = `column-${sanitizeColumnName(column)}`;
103
+
104
  if (column === 'rank') {
105
  td.textContent = index + 1;
106
+ // Add special styling for top 3 ranks
107
+ if (index < 3) {
108
+ td.style.fontWeight = 'bold';
109
+ switch(index) {
110
+ case 0:
111
+ td.style.color = '#FFB800'; // Gold
112
+ break;
113
+ case 1:
114
+ td.style.color = '#C0C0C0'; // Silver
115
+ break;
116
+ case 2:
117
+ td.style.color = '#CD7F32'; // Bronze
118
+ break;
119
+ }
120
+ }
121
  } else if (column === 'runname') {
122
  const modelName = row[column];
123
  let displayName;
 
153
  return tableWrapper;
154
  }
155
 
156
+ function createChangelog() {
157
+ const changelogContainer = document.createElement('div');
158
+ changelogContainer.className = 'changelog-container';
159
+
160
+ const changelogHeader = document.createElement('div');
161
+ changelogHeader.className = 'changelog-header';
162
+
163
+ const arrow = document.createElement('span');
164
+ arrow.className = 'changelog-arrow';
165
+ arrow.textContent = 'β–Ά';
166
+
167
+ const label = document.createElement('span');
168
+ label.textContent = 'Changelog';
169
+ label.className = 'changelog-label';
170
+
171
+ const content = document.createElement('div');
172
+ content.className = 'changelog-content';
173
+ content.style.display = 'none';
174
+
175
+ changelogHeader.appendChild(arrow);
176
+ changelogHeader.appendChild(label);
177
+ changelogContainer.appendChild(changelogHeader);
178
+ changelogContainer.appendChild(content);
179
+
180
+ // Toggle changelog visibility
181
+ changelogHeader.addEventListener('click', () => {
182
+ const isVisible = content.style.display !== 'none';
183
+ content.style.display = isVisible ? 'none' : 'block';
184
+ arrow.textContent = isVisible ? 'β–Ά' : 'β–Ό';
185
+ });
186
+
187
+ return { container: changelogContainer, content };
188
+ }
189
+
190
  export function initLeaderboardResults(containerId) {
191
  const container = document.getElementById(containerId);
192
  if (!container) return;
 
195
  titleElement.textContent = 'FineTasks Leaderboard';
196
  titleElement.className = 'leaderboard-title';
197
 
198
+ // Create changelog
199
+ const { container: changelogContainer, content: changelogContent } = createChangelog();
200
+
201
  const tableContainer = document.createElement('div');
202
  tableContainer.className = 'table-container';
203
 
 
 
 
 
 
 
 
 
204
  let leaderboardDataTable;
205
  let currentData = [];
206
 
 
209
  captionElement.className = 'table-caption';
210
  captionElement.textContent = container.dataset.caption || '';
211
 
212
+ // Define update functions first
213
  async function updateLanguageTable() {
214
+ const selectedVersion = versionDropdown.value;
215
  const selectedLanguage = languageDropdown.value;
216
  const csvFile = languageMap[selectedLanguage];
217
 
218
  try {
219
+ const response = await fetch(`data/os_models/${selectedVersion}/${csvFile}`);
220
  if (!response.ok) {
221
  throw new Error(`HTTP error! status: ${response.status}`);
222
  }
 
226
  ? results.sort((a, b) => a.avg_rank_macro - b.avg_rank_macro)
227
  : results.sort((a, b) => b.agg_score_macro - a.agg_score_macro);
228
 
 
229
  if (selectedLanguage !== 'All Languages') {
230
  const columnOptions = ['None'].concat(Object.keys(currentData[0]).filter(key =>
231
  !['runname', 'seed', 'steps', 'agg_score_micro', 'rank', 'avg_rank_macro', ''].includes(key)
 
247
  }
248
 
249
  updateTable();
250
+ updateChangelog();
251
  } catch (error) {
252
  console.error('Error fetching CSV:', error);
253
  tableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`;
 
256
 
257
  function updateTable() {
258
  const extraColumn = languageDropdown.value === 'All Languages' ? 'All Languages' :
259
+ (extraColumnDropdown.value === 'None' ? null : extraColumnDropdown.value);
260
 
261
  tableContainer.innerHTML = '';
262
  const tableWrapper = createResultsTable(currentData, extraColumn);
 
273
  sortable: true,
274
  fixedHeight: true,
275
  labels: {
276
+ info: ''
277
  }
278
  });
279
 
 
280
  setTimeout(adjustColumnWidths, 0);
281
  }
282
 
283
+ function updateChangelog() {
284
+ const selectedVersion = versionDropdown.value;
285
+ changelogContent.textContent = versionChangelog[selectedVersion];
286
+ }
287
+
288
+ // Add this function to get URL parameters
289
+ function getUrlParameter(name) {
290
+ const urlParams = new URLSearchParams(window.location.search);
291
+ return urlParams.get(name);
292
+ }
293
+
294
+ // Add this function to set URL parameters
295
+ function updateUrlParameter(key, value) {
296
+ const urlParams = new URLSearchParams(window.location.search);
297
+ if (value) {
298
+ urlParams.set(key, value);
299
+ } else {
300
+ urlParams.delete(key);
301
+ }
302
+ const newUrl = `${window.location.pathname}${urlParams.toString() ? '?' + urlParams.toString() : ''}`;
303
+ window.history.pushState({ path: newUrl }, '', newUrl);
304
+ }
305
+
306
+ // Get initial language from URL
307
+ const urlLanguage = getUrlParameter('language');
308
+ const initialLanguage = urlLanguage && Object.keys(languageMap).includes(urlLanguage)
309
+ ? urlLanguage
310
+ : 'All Languages';
311
+
312
+ // Create dropdowns with initial values
313
+ const languageLabel = document.createElement('label');
314
+ languageLabel.textContent = 'Language: ';
315
+ const languageDropdown = createDropdown(
316
+ Object.keys(languageMap),
317
+ (e) => {
318
+ updateLanguageTable();
319
+ updateUrlParameter('language', e.target.value === 'All Languages' ? null : e.target.value);
320
+ },
321
+ initialLanguage
322
+ );
323
+
324
+ const extraColumnLabel = document.createElement('label');
325
+ extraColumnLabel.textContent = 'Task: ';
326
+ const extraColumnDropdown = createDropdown(['None'], updateTable);
327
+
328
+ const versionLabel = document.createElement('label');
329
+ versionLabel.textContent = 'Version: ';
330
+ const versionDropdown = createDropdown(Object.keys(versionMap), updateLanguageTable);
331
+
332
+ // Create controls
333
  const controls = document.createElement('div');
334
  controls.className = 'controls leaderboard-controls fine-tasks-controls';
335
 
336
+ const versionControlGroup = document.createElement('div');
337
+ versionControlGroup.className = 'control-group';
338
+ versionControlGroup.appendChild(versionLabel);
339
+ versionControlGroup.appendChild(versionDropdown);
340
+
341
  const languageControlGroup = document.createElement('div');
342
  languageControlGroup.className = 'control-group';
343
  languageControlGroup.appendChild(languageLabel);
 
348
  extraColumnControlGroup.appendChild(extraColumnLabel);
349
  extraColumnControlGroup.appendChild(extraColumnDropdown);
350
 
351
+ controls.appendChild(versionControlGroup);
352
  controls.appendChild(languageControlGroup);
353
  controls.appendChild(extraColumnControlGroup);
354
 
355
+ // Add elements to container in new order
356
  container.appendChild(titleElement);
357
  container.appendChild(tableContainer);
358
+ container.appendChild(captionElement);
359
  container.appendChild(controls);
360
+ container.appendChild(changelogContainer);
361
 
362
+ // Initialize with URL language if present
363
+ versionDropdown.value = 'v2';
364
+ languageDropdown.value = initialLanguage;
365
  updateLanguageTable();
366
  }
367
 
app/src/style.css CHANGED
@@ -385,3 +385,43 @@ html, body {
385
  body {
386
  -webkit-text-size-adjust: 100%;
387
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  body {
386
  -webkit-text-size-adjust: 100%;
387
  }
388
+
389
+ /* Changelog styles */
390
+
391
+ .changelog-container {
392
+ margin-top: 20px;
393
+ font-size: 14px;
394
+ }
395
+
396
+ .changelog-header {
397
+ cursor: pointer;
398
+ display: flex;
399
+ align-items: center;
400
+ gap: 12px; /* Increased space between arrow and label */
401
+ padding: 8px;
402
+ background-color: #f5f5f5;
403
+ border-radius: 4px;
404
+ }
405
+
406
+ .changelog-header:hover {
407
+ background-color: #ebebeb;
408
+ }
409
+
410
+ .changelog-arrow {
411
+ font-size: 12px;
412
+ color: #666;
413
+ }
414
+
415
+ .changelog-label {
416
+ font-weight: 600;
417
+ color: #333;
418
+ }
419
+
420
+ .changelog-content {
421
+ margin-top: 8px;
422
+ padding: 12px;
423
+ background-color: #fff;
424
+ border: 1px solid #eee;
425
+ border-radius: 4px;
426
+ line-height: 1.4;
427
+ }