Spaces:

galileo-ai
/

agent-leaderboard

Running on CPU Upgrade

App Files Files Community

Pratik Bhavsar commited on 21 days ago

Commit

fe118de

1 Parent(s): 2f40f5f

main table

Browse files

Files changed (3) hide show

app.py +3 -0
data_loader.py +0 -3
utils.py +258 -31

app.py CHANGED Viewed

@@ -44,6 +44,7 @@ def create_app():
                     )
             with gr.Tab("Model Performance"):
                 with gr.Row():
                     with gr.Column(scale=1):
                         model_selector = gr.Dropdown(
@@ -65,9 +66,11 @@ def create_app():
                 )
             with gr.Tab("Methodology"):
                 gr.Markdown(METHODOLOGY)
             with gr.Tab("Insights"):
                 gr.Markdown(INSIGHTS)
         app.load(

                     )
             with gr.Tab("Model Performance"):
+                gr.Markdown(TITLE)
                 with gr.Row():
                     with gr.Column(scale=1):
                         model_selector = gr.Dropdown(
                 )
             with gr.Tab("Methodology"):
+                gr.Markdown(TITLE)
                 gr.Markdown(METHODOLOGY)
             with gr.Tab("Insights"):
+                gr.Markdown(TITLE)
                 gr.Markdown(INSIGHTS)
         app.load(

data_loader.py CHANGED Viewed

@@ -87,9 +87,6 @@ TITLE = """
             <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
                 GitHub&nbsp;⭐
             </a>
-            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
-                Paper&nbsp;📄
-            </a>
             <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
                 Dataset&nbsp;📊
             </a>

             <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
                 GitHub&nbsp;⭐
             </a>
             <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
                 Dataset&nbsp;📊
             </a>

utils.py CHANGED Viewed

@@ -12,20 +12,136 @@ def model_info_tab(df, model_names=None):
     filtered_df = df[df["Model"].isin(model_names)]
     radar_chart = create_radar_plot(df, model_names)
-    info_html = filtered_df[
-        [
-            "Model",
-            "Model Type",
-            "Model Avg",
-            "IO Cost",
-            "single turn perf",
-            "multi turn perf",
-        ]
-    ].to_html(index=False)
     return info_html, radar_chart
 def filter_leaderboard(df, model_type, category, sort_by):
     filtered_df = df.copy()
     if model_type != "All":
@@ -41,30 +157,141 @@ def filter_leaderboard(df, model_type, category, sort_by):
         filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
     filtered_df["Rank"] = range(1, len(filtered_df) + 1)
     perf_chart = get_performance_chart(filtered_df, category)
     cost_chart = get_performance_cost_chart(filtered_df, category)
-    filtered_df["Cost (Input/Output)"] = filtered_df.apply(
-        lambda x: f"${x['Input cost per million token']:.2f}/${x['Output cost per million token']:.2f}",
-        axis=1,
-    )
-    display_columns = [
-        "Rank",
-        "Model",
-        "Model Type",
-        "Cost (Input/Output)",
-        "Category Score",
-    ]
-    table_html = filtered_df[display_columns].to_html(index=False, escape=False)
-    note_html = """
-    <div style='margin-top: 20px; padding: 10px; background-color: #f3f4f6; border-radius: 4px;'>
-        <p style='margin: 0; font-size: 0.9em; color: #4b5563;'>
-            Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
-        </p>
     </div>
     """
-    table_html += note_html
     return table_html, perf_chart, cost_chart

     filtered_df = df[df["Model"].isin(model_names)]
     radar_chart = create_radar_plot(df, model_names)
+    # Create styled table for model info
+    info_html = f"""
+    <div class="dark-table-container">
+        <table class="dark-styled-table">
+            <thead>
+                <tr>
+                    <th>Model</th>
+                    <th>Type</th>
+                    <th>Average</th>
+                    <th>I/O Cost</th>
+                    <th>Single Turn</th>
+                    <th>Multi Turn</th>
+                </tr>
+            </thead>
+            <tbody>
+    """
+    for _, row in filtered_df.iterrows():
+        info_html += f"""
+            <tr>
+                <td>{row['Model']}</td>
+                <td>{row['Model Type']}</td>
+                <td>{row['Model Avg']:.3f}</td>
+                <td>${row['IO Cost']:.2f}</td>
+                <td>{row['single turn perf']:.3f}</td>
+                <td>{row['multi turn perf']:.3f}</td>
+            </tr>
+        """
+    info_html += """
+            </tbody>
+        </table>
+    </div>
+    """
     return info_html, radar_chart
+def get_rank_badge(rank):
+    """Generate HTML for rank badge with appropriate styling"""
+    badge_styles = {
+        1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
+        2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
+        3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
+    }
+    if rank in badge_styles:
+        label, gradient, text_color = badge_styles[rank]
+        return f"""
+            <div style="
+                display: inline-flex;
+                align-items: center;
+                justify-content: center;
+                min-width: 48px;
+                padding: 4px 12px;
+                background: {gradient};
+                color: {text_color};
+                border-radius: 6px;
+                font-weight: 600;
+                font-size: 0.9em;
+                box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
+            ">
+                {label}
+            </div>
+        """
+    return f"""
+        <div style="
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            min-width: 28px;
+            color: #a1a1aa;
+            font-weight: 500;
+        ">
+            {rank}
+        </div>
+    """
+def get_type_badge(model_type):
+    """Generate HTML for model type badge"""
+    colors = {"Private": "#4F46E5", "Open source": "#16A34A"}
+    bg_color = colors.get(model_type, "#4F46E5")
+    return f"""
+        <div style="
+            display: inline-flex;
+            align-items: center;
+            padding: 4px 8px;
+            background: {bg_color};
+            color: white;
+            border-radius: 4px;
+            font-size: 0.85em;
+            font-weight: 500;
+        ">
+            {model_type}
+        </div>
+    """
+def get_score_bar(score):
+    """Generate HTML for score bar"""
+    width = score * 100
+    return f"""
+        <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
+            <div style="
+                flex-grow: 1;
+                height: 6px;
+                background: rgba(255, 255, 255, 0.1);
+                border-radius: 3px;
+                overflow: hidden;
+                max-width: 200px;
+            ">
+                <div style="
+                    width: {width}%;
+                    height: 100%;
+                    background: #4F46E5;
+                    border-radius: 3px;
+                "></div>
+            </div>
+            <span style="
+                font-family: 'SF Mono', monospace;
+                font-weight: 600;
+                color: #ffffff;
+                min-width: 60px;
+            ">{score:.3f}</span>
+        </div>
+    """
 def filter_leaderboard(df, model_type, category, sort_by):
     filtered_df = df.copy()
     if model_type != "All":
         filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
     filtered_df["Rank"] = range(1, len(filtered_df) + 1)
     perf_chart = get_performance_chart(filtered_df, category)
     cost_chart = get_performance_cost_chart(filtered_df, category)
+    table_html = f"""
+    <style>
+        .dark-table-container {{
+            max-height: 600px;
+            overflow-y: auto;
+            background: linear-gradient(145deg, #1a1b1e, #1f2023);
+            border-radius: 16px;
+            padding: 1px;
+            margin: 20px 0;
+            box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
+                        0 2px 4px -1px rgba(0, 0, 0, 0.06);
+        }}
+        .dark-styled-table {{
+            width: 100%;
+            border-collapse: separate;
+            border-spacing: 0;
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
+            background: transparent;
+            color: #ffffff;
+        }}
+        .dark-styled-table thead {{
+            position: sticky;
+            top: 0;
+            background: linear-gradient(180deg, #1a1b1e, #1d1e22);
+            z-index: 1;
+        }}
+        .dark-styled-table th {{
+            padding: 12px 20px;
+            text-align: left;
+            font-weight: 600;
+            color: #ffffff;
+            text-transform: uppercase;
+            font-size: 0.75em;
+            background: #1a1b1e;
+            letter-spacing: 0.05em;
+            border-bottom: 1px solid #2d2e32;
+        }}
+        .dark-styled-table td {{
+            padding: 16px 20px;
+            border-bottom: 1px solid rgba(45, 46, 50, 0.5);
+            color: #ffffff;
+            font-size: 0.95em;
+        }}
+        .dark-styled-table tbody tr {{
+            transition: all 0.2s ease;
+            background: transparent;
+        }}
+        .dark-styled-table tbody tr:hover {{
+            background: rgba(45, 46, 50, 0.5);
+        }}
+        .model-cell {{
+            font-weight: 500;
+            color: #e2e8f0;
+        }}
+        .cost-cell {{
+            font-family: 'SF Mono', monospace;
+            color: #94a3b8;
+        }}
+        .note-box {{
+            margin: 20px 0;
+            padding: 16px 20px;
+            background: rgba(45, 46, 50, 0.5);
+            border-radius: 12px;
+            color: #94a3b8;
+            font-size: 0.9em;
+            border-left: 4px solid #4f46e5;
+        }}
+        /* Custom scrollbar */
+        .dark-table-container::-webkit-scrollbar {{
+            width: 8px;
+        }}
+        .dark-table-container::-webkit-scrollbar-track {{
+            background: #1a1b1e;
+            border-radius: 4px;
+        }}
+        .dark-table-container::-webkit-scrollbar-thumb {{
+            background: #2d2e32;
+            border-radius: 4px;
+        }}
+        .dark-table-container::-webkit-scrollbar-thumb:hover {{
+            background: #3d3e42;
+        }}
+    </style>
+    <div class="dark-table-container">
+        <table class="dark-styled-table">
+            <thead>
+                <tr>
+                    <th>RANK</th>
+                    <th>MODEL</th>
+                    <th>TYPE</th>
+                    <th>COST (I/O)</th>
+                    <th>SCORE</th>
+                </tr>
+            </thead>
+            <tbody>
+    """
+    for _, row in filtered_df.iterrows():
+        rank_display = get_rank_badge(row["Rank"])
+        type_badge = get_type_badge(row["Model Type"])
+        score_bar = get_score_bar(row["Category Score"])
+        table_html += f"""
+            <tr>
+                <td>{rank_display}</td>
+                <td class="model-cell">{row['Model']}</td>
+                <td>{type_badge}</td>
+                <td class="cost-cell">${row['Input cost per million token']:.2f}/${row['Output cost per million token']:.2f}</td>
+                <td>{score_bar}</td>
+            </tr>
+        """
+    table_html += """
+            </tbody>
+        </table>
+    </div>
+    <div class="note-box">
+        Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
     </div>
     """
     return table_html, perf_chart, cost_chart