Pratik Bhavsar commited on
Commit
e2809a3
Β·
1 Parent(s): 91b3c5d

updated text

Browse files
Files changed (1) hide show
  1. data_loader.py +3 -13
data_loader.py CHANGED
@@ -354,7 +354,7 @@ DESCRIPTION_HTML = """
354
  color: var(--text-secondary);
355
  ">
356
  <div style="display: flex; gap: 8px; align-items: center;">
357
- βœ… Accuracy Performance
358
  </div>
359
  <div style="display: flex; gap: 8px; align-items: center;">
360
  πŸ’° Open Vs Closed Source
@@ -363,16 +363,6 @@ DESCRIPTION_HTML = """
363
  βš–οΈ Overall Effectiveness
364
  </div>
365
  </div>
366
-
367
- <div style="
368
- border-left: 4px solid var(--accent-color, #4F46E5);
369
- padding-left: 12px;
370
- margin-top: 8px;
371
- color: var(--text-secondary);
372
- font-style: italic;
373
- ">
374
- πŸ’‘ Use the filters below to explore different aspects of the evaluation and compare model performance across various dimensions.
375
- </div>
376
  </div>
377
  </div>
378
  """
@@ -726,8 +716,8 @@ METHODOLOGY = """
726
 
727
  <h2 class="methodology-subtitle">Overview</h2>
728
  <p class="methodology-text">
729
- The Berkeley Function Calling Leaderboard (BFCL) evaluates language models' ability to effectively use tools
730
- and maintain coherent multi-turn conversations. Our evaluation focuses on both basic functionality and edge
731
  cases that challenge real-world applicability.
732
  </p>
733
 
 
354
  color: var(--text-secondary);
355
  ">
356
  <div style="display: flex; gap: 8px; align-items: center;">
357
+ βœ… Tool Selection Quality
358
  </div>
359
  <div style="display: flex; gap: 8px; align-items: center;">
360
  πŸ’° Open Vs Closed Source
 
363
  βš–οΈ Overall Effectiveness
364
  </div>
365
  </div>
 
 
 
 
 
 
 
 
 
 
366
  </div>
367
  </div>
368
  """
 
716
 
717
  <h2 class="methodology-subtitle">Overview</h2>
718
  <p class="methodology-text">
719
+ We evaluate language models' ability to effectively use tools
720
+ in single and multi-turn conversations. Our evaluation focuses on both basic functionality and edge
721
  cases that challenge real-world applicability.
722
  </p>
723