Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Pratik Bhavsar
commited on
Commit
Β·
fe118de
1
Parent(s):
2f40f5f
main table
Browse files- app.py +3 -0
- data_loader.py +0 -3
- utils.py +258 -31
app.py
CHANGED
@@ -44,6 +44,7 @@ def create_app():
|
|
44 |
)
|
45 |
|
46 |
with gr.Tab("Model Performance"):
|
|
|
47 |
with gr.Row():
|
48 |
with gr.Column(scale=1):
|
49 |
model_selector = gr.Dropdown(
|
@@ -65,9 +66,11 @@ def create_app():
|
|
65 |
)
|
66 |
|
67 |
with gr.Tab("Methodology"):
|
|
|
68 |
gr.Markdown(METHODOLOGY)
|
69 |
|
70 |
with gr.Tab("Insights"):
|
|
|
71 |
gr.Markdown(INSIGHTS)
|
72 |
|
73 |
app.load(
|
|
|
44 |
)
|
45 |
|
46 |
with gr.Tab("Model Performance"):
|
47 |
+
gr.Markdown(TITLE)
|
48 |
with gr.Row():
|
49 |
with gr.Column(scale=1):
|
50 |
model_selector = gr.Dropdown(
|
|
|
66 |
)
|
67 |
|
68 |
with gr.Tab("Methodology"):
|
69 |
+
gr.Markdown(TITLE)
|
70 |
gr.Markdown(METHODOLOGY)
|
71 |
|
72 |
with gr.Tab("Insights"):
|
73 |
+
gr.Markdown(TITLE)
|
74 |
gr.Markdown(INSIGHTS)
|
75 |
|
76 |
app.load(
|
data_loader.py
CHANGED
@@ -87,9 +87,6 @@ TITLE = """
|
|
87 |
<a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
|
88 |
GitHub β
|
89 |
</a>
|
90 |
-
<a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
|
91 |
-
Paper π
|
92 |
-
</a>
|
93 |
<a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
|
94 |
Dataset π
|
95 |
</a>
|
|
|
87 |
<a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
|
88 |
GitHub β
|
89 |
</a>
|
|
|
|
|
|
|
90 |
<a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
|
91 |
Dataset π
|
92 |
</a>
|
utils.py
CHANGED
@@ -12,20 +12,136 @@ def model_info_tab(df, model_names=None):
|
|
12 |
|
13 |
filtered_df = df[df["Model"].isin(model_names)]
|
14 |
radar_chart = create_radar_plot(df, model_names)
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
return info_html, radar_chart
|
27 |
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def filter_leaderboard(df, model_type, category, sort_by):
|
30 |
filtered_df = df.copy()
|
31 |
if model_type != "All":
|
@@ -41,30 +157,141 @@ def filter_leaderboard(df, model_type, category, sort_by):
|
|
41 |
filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
|
42 |
|
43 |
filtered_df["Rank"] = range(1, len(filtered_df) + 1)
|
44 |
-
|
45 |
perf_chart = get_performance_chart(filtered_df, category)
|
46 |
cost_chart = get_performance_cost_chart(filtered_df, category)
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
</div>
|
68 |
"""
|
69 |
-
|
70 |
return table_html, perf_chart, cost_chart
|
|
|
12 |
|
13 |
filtered_df = df[df["Model"].isin(model_names)]
|
14 |
radar_chart = create_radar_plot(df, model_names)
|
15 |
+
|
16 |
+
# Create styled table for model info
|
17 |
+
info_html = f"""
|
18 |
+
<div class="dark-table-container">
|
19 |
+
<table class="dark-styled-table">
|
20 |
+
<thead>
|
21 |
+
<tr>
|
22 |
+
<th>Model</th>
|
23 |
+
<th>Type</th>
|
24 |
+
<th>Average</th>
|
25 |
+
<th>I/O Cost</th>
|
26 |
+
<th>Single Turn</th>
|
27 |
+
<th>Multi Turn</th>
|
28 |
+
</tr>
|
29 |
+
</thead>
|
30 |
+
<tbody>
|
31 |
+
"""
|
32 |
+
|
33 |
+
for _, row in filtered_df.iterrows():
|
34 |
+
info_html += f"""
|
35 |
+
<tr>
|
36 |
+
<td>{row['Model']}</td>
|
37 |
+
<td>{row['Model Type']}</td>
|
38 |
+
<td>{row['Model Avg']:.3f}</td>
|
39 |
+
<td>${row['IO Cost']:.2f}</td>
|
40 |
+
<td>{row['single turn perf']:.3f}</td>
|
41 |
+
<td>{row['multi turn perf']:.3f}</td>
|
42 |
+
</tr>
|
43 |
+
"""
|
44 |
+
|
45 |
+
info_html += """
|
46 |
+
</tbody>
|
47 |
+
</table>
|
48 |
+
</div>
|
49 |
+
"""
|
50 |
|
51 |
return info_html, radar_chart
|
52 |
|
53 |
|
54 |
+
def get_rank_badge(rank):
|
55 |
+
"""Generate HTML for rank badge with appropriate styling"""
|
56 |
+
badge_styles = {
|
57 |
+
1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
|
58 |
+
2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
|
59 |
+
3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
|
60 |
+
}
|
61 |
+
|
62 |
+
if rank in badge_styles:
|
63 |
+
label, gradient, text_color = badge_styles[rank]
|
64 |
+
return f"""
|
65 |
+
<div style="
|
66 |
+
display: inline-flex;
|
67 |
+
align-items: center;
|
68 |
+
justify-content: center;
|
69 |
+
min-width: 48px;
|
70 |
+
padding: 4px 12px;
|
71 |
+
background: {gradient};
|
72 |
+
color: {text_color};
|
73 |
+
border-radius: 6px;
|
74 |
+
font-weight: 600;
|
75 |
+
font-size: 0.9em;
|
76 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
|
77 |
+
">
|
78 |
+
{label}
|
79 |
+
</div>
|
80 |
+
"""
|
81 |
+
return f"""
|
82 |
+
<div style="
|
83 |
+
display: inline-flex;
|
84 |
+
align-items: center;
|
85 |
+
justify-content: center;
|
86 |
+
min-width: 28px;
|
87 |
+
color: #a1a1aa;
|
88 |
+
font-weight: 500;
|
89 |
+
">
|
90 |
+
{rank}
|
91 |
+
</div>
|
92 |
+
"""
|
93 |
+
|
94 |
+
|
95 |
+
def get_type_badge(model_type):
|
96 |
+
"""Generate HTML for model type badge"""
|
97 |
+
colors = {"Private": "#4F46E5", "Open source": "#16A34A"}
|
98 |
+
bg_color = colors.get(model_type, "#4F46E5")
|
99 |
+
return f"""
|
100 |
+
<div style="
|
101 |
+
display: inline-flex;
|
102 |
+
align-items: center;
|
103 |
+
padding: 4px 8px;
|
104 |
+
background: {bg_color};
|
105 |
+
color: white;
|
106 |
+
border-radius: 4px;
|
107 |
+
font-size: 0.85em;
|
108 |
+
font-weight: 500;
|
109 |
+
">
|
110 |
+
{model_type}
|
111 |
+
</div>
|
112 |
+
"""
|
113 |
+
|
114 |
+
|
115 |
+
def get_score_bar(score):
|
116 |
+
"""Generate HTML for score bar"""
|
117 |
+
width = score * 100
|
118 |
+
return f"""
|
119 |
+
<div style="display: flex; align-items: center; gap: 12px; width: 100%;">
|
120 |
+
<div style="
|
121 |
+
flex-grow: 1;
|
122 |
+
height: 6px;
|
123 |
+
background: rgba(255, 255, 255, 0.1);
|
124 |
+
border-radius: 3px;
|
125 |
+
overflow: hidden;
|
126 |
+
max-width: 200px;
|
127 |
+
">
|
128 |
+
<div style="
|
129 |
+
width: {width}%;
|
130 |
+
height: 100%;
|
131 |
+
background: #4F46E5;
|
132 |
+
border-radius: 3px;
|
133 |
+
"></div>
|
134 |
+
</div>
|
135 |
+
<span style="
|
136 |
+
font-family: 'SF Mono', monospace;
|
137 |
+
font-weight: 600;
|
138 |
+
color: #ffffff;
|
139 |
+
min-width: 60px;
|
140 |
+
">{score:.3f}</span>
|
141 |
+
</div>
|
142 |
+
"""
|
143 |
+
|
144 |
+
|
145 |
def filter_leaderboard(df, model_type, category, sort_by):
|
146 |
filtered_df = df.copy()
|
147 |
if model_type != "All":
|
|
|
157 |
filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
|
158 |
|
159 |
filtered_df["Rank"] = range(1, len(filtered_df) + 1)
|
|
|
160 |
perf_chart = get_performance_chart(filtered_df, category)
|
161 |
cost_chart = get_performance_cost_chart(filtered_df, category)
|
162 |
|
163 |
+
table_html = f"""
|
164 |
+
<style>
|
165 |
+
.dark-table-container {{
|
166 |
+
max-height: 600px;
|
167 |
+
overflow-y: auto;
|
168 |
+
background: linear-gradient(145deg, #1a1b1e, #1f2023);
|
169 |
+
border-radius: 16px;
|
170 |
+
padding: 1px;
|
171 |
+
margin: 20px 0;
|
172 |
+
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
|
173 |
+
0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
174 |
+
}}
|
175 |
+
|
176 |
+
.dark-styled-table {{
|
177 |
+
width: 100%;
|
178 |
+
border-collapse: separate;
|
179 |
+
border-spacing: 0;
|
180 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
|
181 |
+
background: transparent;
|
182 |
+
color: #ffffff;
|
183 |
+
}}
|
184 |
+
|
185 |
+
.dark-styled-table thead {{
|
186 |
+
position: sticky;
|
187 |
+
top: 0;
|
188 |
+
background: linear-gradient(180deg, #1a1b1e, #1d1e22);
|
189 |
+
z-index: 1;
|
190 |
+
}}
|
191 |
+
|
192 |
+
.dark-styled-table th {{
|
193 |
+
padding: 12px 20px;
|
194 |
+
text-align: left;
|
195 |
+
font-weight: 600;
|
196 |
+
color: #ffffff;
|
197 |
+
text-transform: uppercase;
|
198 |
+
font-size: 0.75em;
|
199 |
+
background: #1a1b1e;
|
200 |
+
letter-spacing: 0.05em;
|
201 |
+
border-bottom: 1px solid #2d2e32;
|
202 |
+
}}
|
203 |
+
|
204 |
+
.dark-styled-table td {{
|
205 |
+
padding: 16px 20px;
|
206 |
+
border-bottom: 1px solid rgba(45, 46, 50, 0.5);
|
207 |
+
color: #ffffff;
|
208 |
+
font-size: 0.95em;
|
209 |
+
}}
|
210 |
+
|
211 |
+
.dark-styled-table tbody tr {{
|
212 |
+
transition: all 0.2s ease;
|
213 |
+
background: transparent;
|
214 |
+
}}
|
215 |
+
|
216 |
+
.dark-styled-table tbody tr:hover {{
|
217 |
+
background: rgba(45, 46, 50, 0.5);
|
218 |
+
}}
|
219 |
+
|
220 |
+
.model-cell {{
|
221 |
+
font-weight: 500;
|
222 |
+
color: #e2e8f0;
|
223 |
+
}}
|
224 |
+
|
225 |
+
.cost-cell {{
|
226 |
+
font-family: 'SF Mono', monospace;
|
227 |
+
color: #94a3b8;
|
228 |
+
}}
|
229 |
+
|
230 |
+
.note-box {{
|
231 |
+
margin: 20px 0;
|
232 |
+
padding: 16px 20px;
|
233 |
+
background: rgba(45, 46, 50, 0.5);
|
234 |
+
border-radius: 12px;
|
235 |
+
color: #94a3b8;
|
236 |
+
font-size: 0.9em;
|
237 |
+
border-left: 4px solid #4f46e5;
|
238 |
+
}}
|
239 |
+
|
240 |
+
/* Custom scrollbar */
|
241 |
+
.dark-table-container::-webkit-scrollbar {{
|
242 |
+
width: 8px;
|
243 |
+
}}
|
244 |
+
|
245 |
+
.dark-table-container::-webkit-scrollbar-track {{
|
246 |
+
background: #1a1b1e;
|
247 |
+
border-radius: 4px;
|
248 |
+
}}
|
249 |
+
|
250 |
+
.dark-table-container::-webkit-scrollbar-thumb {{
|
251 |
+
background: #2d2e32;
|
252 |
+
border-radius: 4px;
|
253 |
+
}}
|
254 |
+
|
255 |
+
.dark-table-container::-webkit-scrollbar-thumb:hover {{
|
256 |
+
background: #3d3e42;
|
257 |
+
}}
|
258 |
+
</style>
|
259 |
+
<div class="dark-table-container">
|
260 |
+
<table class="dark-styled-table">
|
261 |
+
<thead>
|
262 |
+
<tr>
|
263 |
+
<th>RANK</th>
|
264 |
+
<th>MODEL</th>
|
265 |
+
<th>TYPE</th>
|
266 |
+
<th>COST (I/O)</th>
|
267 |
+
<th>SCORE</th>
|
268 |
+
</tr>
|
269 |
+
</thead>
|
270 |
+
<tbody>
|
271 |
+
"""
|
272 |
+
|
273 |
+
for _, row in filtered_df.iterrows():
|
274 |
+
rank_display = get_rank_badge(row["Rank"])
|
275 |
+
type_badge = get_type_badge(row["Model Type"])
|
276 |
+
score_bar = get_score_bar(row["Category Score"])
|
277 |
+
|
278 |
+
table_html += f"""
|
279 |
+
<tr>
|
280 |
+
<td>{rank_display}</td>
|
281 |
+
<td class="model-cell">{row['Model']}</td>
|
282 |
+
<td>{type_badge}</td>
|
283 |
+
<td class="cost-cell">${row['Input cost per million token']:.2f}/${row['Output cost per million token']:.2f}</td>
|
284 |
+
<td>{score_bar}</td>
|
285 |
+
</tr>
|
286 |
+
"""
|
287 |
+
|
288 |
+
table_html += """
|
289 |
+
</tbody>
|
290 |
+
</table>
|
291 |
+
</div>
|
292 |
+
<div class="note-box">
|
293 |
+
Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
|
294 |
</div>
|
295 |
"""
|
296 |
+
|
297 |
return table_html, perf_chart, cost_chart
|