Pratik Bhavsar commited on
Commit
fe118de
Β·
1 Parent(s): 2f40f5f

main table

Browse files
Files changed (3) hide show
  1. app.py +3 -0
  2. data_loader.py +0 -3
  3. utils.py +258 -31
app.py CHANGED
@@ -44,6 +44,7 @@ def create_app():
44
  )
45
 
46
  with gr.Tab("Model Performance"):
 
47
  with gr.Row():
48
  with gr.Column(scale=1):
49
  model_selector = gr.Dropdown(
@@ -65,9 +66,11 @@ def create_app():
65
  )
66
 
67
  with gr.Tab("Methodology"):
 
68
  gr.Markdown(METHODOLOGY)
69
 
70
  with gr.Tab("Insights"):
 
71
  gr.Markdown(INSIGHTS)
72
 
73
  app.load(
 
44
  )
45
 
46
  with gr.Tab("Model Performance"):
47
+ gr.Markdown(TITLE)
48
  with gr.Row():
49
  with gr.Column(scale=1):
50
  model_selector = gr.Dropdown(
 
66
  )
67
 
68
  with gr.Tab("Methodology"):
69
+ gr.Markdown(TITLE)
70
  gr.Markdown(METHODOLOGY)
71
 
72
  with gr.Tab("Insights"):
73
+ gr.Markdown(TITLE)
74
  gr.Markdown(INSIGHTS)
75
 
76
  app.load(
data_loader.py CHANGED
@@ -87,9 +87,6 @@ TITLE = """
87
  <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
88
  GitHub&nbsp;⭐
89
  </a>
90
- <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
91
- Paper&nbsp;πŸ“„
92
- </a>
93
  <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
94
  Dataset&nbsp;πŸ“Š
95
  </a>
 
87
  <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
88
  GitHub&nbsp;⭐
89
  </a>
 
 
 
90
  <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #ffffff; font-weight: 500; padding: 0.5rem;">
91
  Dataset&nbsp;πŸ“Š
92
  </a>
utils.py CHANGED
@@ -12,20 +12,136 @@ def model_info_tab(df, model_names=None):
12
 
13
  filtered_df = df[df["Model"].isin(model_names)]
14
  radar_chart = create_radar_plot(df, model_names)
15
- info_html = filtered_df[
16
- [
17
- "Model",
18
- "Model Type",
19
- "Model Avg",
20
- "IO Cost",
21
- "single turn perf",
22
- "multi turn perf",
23
- ]
24
- ].to_html(index=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  return info_html, radar_chart
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def filter_leaderboard(df, model_type, category, sort_by):
30
  filtered_df = df.copy()
31
  if model_type != "All":
@@ -41,30 +157,141 @@ def filter_leaderboard(df, model_type, category, sort_by):
41
  filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
42
 
43
  filtered_df["Rank"] = range(1, len(filtered_df) + 1)
44
-
45
  perf_chart = get_performance_chart(filtered_df, category)
46
  cost_chart = get_performance_cost_chart(filtered_df, category)
47
 
48
- filtered_df["Cost (Input/Output)"] = filtered_df.apply(
49
- lambda x: f"${x['Input cost per million token']:.2f}/${x['Output cost per million token']:.2f}",
50
- axis=1,
51
- )
52
-
53
- display_columns = [
54
- "Rank",
55
- "Model",
56
- "Model Type",
57
- "Cost (Input/Output)",
58
- "Category Score",
59
- ]
60
-
61
- table_html = filtered_df[display_columns].to_html(index=False, escape=False)
62
- note_html = """
63
- <div style='margin-top: 20px; padding: 10px; background-color: #f3f4f6; border-radius: 4px;'>
64
- <p style='margin: 0; font-size: 0.9em; color: #4b5563;'>
65
- Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
66
- </p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  </div>
68
  """
69
- table_html += note_html
70
  return table_html, perf_chart, cost_chart
 
12
 
13
  filtered_df = df[df["Model"].isin(model_names)]
14
  radar_chart = create_radar_plot(df, model_names)
15
+
16
+ # Create styled table for model info
17
+ info_html = f"""
18
+ <div class="dark-table-container">
19
+ <table class="dark-styled-table">
20
+ <thead>
21
+ <tr>
22
+ <th>Model</th>
23
+ <th>Type</th>
24
+ <th>Average</th>
25
+ <th>I/O Cost</th>
26
+ <th>Single Turn</th>
27
+ <th>Multi Turn</th>
28
+ </tr>
29
+ </thead>
30
+ <tbody>
31
+ """
32
+
33
+ for _, row in filtered_df.iterrows():
34
+ info_html += f"""
35
+ <tr>
36
+ <td>{row['Model']}</td>
37
+ <td>{row['Model Type']}</td>
38
+ <td>{row['Model Avg']:.3f}</td>
39
+ <td>${row['IO Cost']:.2f}</td>
40
+ <td>{row['single turn perf']:.3f}</td>
41
+ <td>{row['multi turn perf']:.3f}</td>
42
+ </tr>
43
+ """
44
+
45
+ info_html += """
46
+ </tbody>
47
+ </table>
48
+ </div>
49
+ """
50
 
51
  return info_html, radar_chart
52
 
53
 
54
+ def get_rank_badge(rank):
55
+ """Generate HTML for rank badge with appropriate styling"""
56
+ badge_styles = {
57
+ 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
58
+ 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
59
+ 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
60
+ }
61
+
62
+ if rank in badge_styles:
63
+ label, gradient, text_color = badge_styles[rank]
64
+ return f"""
65
+ <div style="
66
+ display: inline-flex;
67
+ align-items: center;
68
+ justify-content: center;
69
+ min-width: 48px;
70
+ padding: 4px 12px;
71
+ background: {gradient};
72
+ color: {text_color};
73
+ border-radius: 6px;
74
+ font-weight: 600;
75
+ font-size: 0.9em;
76
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2);
77
+ ">
78
+ {label}
79
+ </div>
80
+ """
81
+ return f"""
82
+ <div style="
83
+ display: inline-flex;
84
+ align-items: center;
85
+ justify-content: center;
86
+ min-width: 28px;
87
+ color: #a1a1aa;
88
+ font-weight: 500;
89
+ ">
90
+ {rank}
91
+ </div>
92
+ """
93
+
94
+
95
+ def get_type_badge(model_type):
96
+ """Generate HTML for model type badge"""
97
+ colors = {"Private": "#4F46E5", "Open source": "#16A34A"}
98
+ bg_color = colors.get(model_type, "#4F46E5")
99
+ return f"""
100
+ <div style="
101
+ display: inline-flex;
102
+ align-items: center;
103
+ padding: 4px 8px;
104
+ background: {bg_color};
105
+ color: white;
106
+ border-radius: 4px;
107
+ font-size: 0.85em;
108
+ font-weight: 500;
109
+ ">
110
+ {model_type}
111
+ </div>
112
+ """
113
+
114
+
115
+ def get_score_bar(score):
116
+ """Generate HTML for score bar"""
117
+ width = score * 100
118
+ return f"""
119
+ <div style="display: flex; align-items: center; gap: 12px; width: 100%;">
120
+ <div style="
121
+ flex-grow: 1;
122
+ height: 6px;
123
+ background: rgba(255, 255, 255, 0.1);
124
+ border-radius: 3px;
125
+ overflow: hidden;
126
+ max-width: 200px;
127
+ ">
128
+ <div style="
129
+ width: {width}%;
130
+ height: 100%;
131
+ background: #4F46E5;
132
+ border-radius: 3px;
133
+ "></div>
134
+ </div>
135
+ <span style="
136
+ font-family: 'SF Mono', monospace;
137
+ font-weight: 600;
138
+ color: #ffffff;
139
+ min-width: 60px;
140
+ ">{score:.3f}</span>
141
+ </div>
142
+ """
143
+
144
+
145
  def filter_leaderboard(df, model_type, category, sort_by):
146
  filtered_df = df.copy()
147
  if model_type != "All":
 
157
  filtered_df = filtered_df.sort_values(by="IO Cost", ascending=True)
158
 
159
  filtered_df["Rank"] = range(1, len(filtered_df) + 1)
 
160
  perf_chart = get_performance_chart(filtered_df, category)
161
  cost_chart = get_performance_cost_chart(filtered_df, category)
162
 
163
+ table_html = f"""
164
+ <style>
165
+ .dark-table-container {{
166
+ max-height: 600px;
167
+ overflow-y: auto;
168
+ background: linear-gradient(145deg, #1a1b1e, #1f2023);
169
+ border-radius: 16px;
170
+ padding: 1px;
171
+ margin: 20px 0;
172
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
173
+ 0 2px 4px -1px rgba(0, 0, 0, 0.06);
174
+ }}
175
+
176
+ .dark-styled-table {{
177
+ width: 100%;
178
+ border-collapse: separate;
179
+ border-spacing: 0;
180
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
181
+ background: transparent;
182
+ color: #ffffff;
183
+ }}
184
+
185
+ .dark-styled-table thead {{
186
+ position: sticky;
187
+ top: 0;
188
+ background: linear-gradient(180deg, #1a1b1e, #1d1e22);
189
+ z-index: 1;
190
+ }}
191
+
192
+ .dark-styled-table th {{
193
+ padding: 12px 20px;
194
+ text-align: left;
195
+ font-weight: 600;
196
+ color: #ffffff;
197
+ text-transform: uppercase;
198
+ font-size: 0.75em;
199
+ background: #1a1b1e;
200
+ letter-spacing: 0.05em;
201
+ border-bottom: 1px solid #2d2e32;
202
+ }}
203
+
204
+ .dark-styled-table td {{
205
+ padding: 16px 20px;
206
+ border-bottom: 1px solid rgba(45, 46, 50, 0.5);
207
+ color: #ffffff;
208
+ font-size: 0.95em;
209
+ }}
210
+
211
+ .dark-styled-table tbody tr {{
212
+ transition: all 0.2s ease;
213
+ background: transparent;
214
+ }}
215
+
216
+ .dark-styled-table tbody tr:hover {{
217
+ background: rgba(45, 46, 50, 0.5);
218
+ }}
219
+
220
+ .model-cell {{
221
+ font-weight: 500;
222
+ color: #e2e8f0;
223
+ }}
224
+
225
+ .cost-cell {{
226
+ font-family: 'SF Mono', monospace;
227
+ color: #94a3b8;
228
+ }}
229
+
230
+ .note-box {{
231
+ margin: 20px 0;
232
+ padding: 16px 20px;
233
+ background: rgba(45, 46, 50, 0.5);
234
+ border-radius: 12px;
235
+ color: #94a3b8;
236
+ font-size: 0.9em;
237
+ border-left: 4px solid #4f46e5;
238
+ }}
239
+
240
+ /* Custom scrollbar */
241
+ .dark-table-container::-webkit-scrollbar {{
242
+ width: 8px;
243
+ }}
244
+
245
+ .dark-table-container::-webkit-scrollbar-track {{
246
+ background: #1a1b1e;
247
+ border-radius: 4px;
248
+ }}
249
+
250
+ .dark-table-container::-webkit-scrollbar-thumb {{
251
+ background: #2d2e32;
252
+ border-radius: 4px;
253
+ }}
254
+
255
+ .dark-table-container::-webkit-scrollbar-thumb:hover {{
256
+ background: #3d3e42;
257
+ }}
258
+ </style>
259
+ <div class="dark-table-container">
260
+ <table class="dark-styled-table">
261
+ <thead>
262
+ <tr>
263
+ <th>RANK</th>
264
+ <th>MODEL</th>
265
+ <th>TYPE</th>
266
+ <th>COST (I/O)</th>
267
+ <th>SCORE</th>
268
+ </tr>
269
+ </thead>
270
+ <tbody>
271
+ """
272
+
273
+ for _, row in filtered_df.iterrows():
274
+ rank_display = get_rank_badge(row["Rank"])
275
+ type_badge = get_type_badge(row["Model Type"])
276
+ score_bar = get_score_bar(row["Category Score"])
277
+
278
+ table_html += f"""
279
+ <tr>
280
+ <td>{rank_display}</td>
281
+ <td class="model-cell">{row['Model']}</td>
282
+ <td>{type_badge}</td>
283
+ <td class="cost-cell">${row['Input cost per million token']:.2f}/${row['Output cost per million token']:.2f}</td>
284
+ <td>{score_bar}</td>
285
+ </tr>
286
+ """
287
+
288
+ table_html += """
289
+ </tbody>
290
+ </table>
291
+ </div>
292
+ <div class="note-box">
293
+ Note: Cost for sorting is calculated using 3:1 ratio on I/O. Cost of Gemini 2.0 is assumed to be same as that of Gemini 1.5.
294
  </div>
295
  """
296
+
297
  return table_html, perf_chart, cost_chart