Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
1ac2307
·
1 Parent(s): dccb8fe

refactor: refactoring the dashboard codes

Browse files
Files changed (3) hide show
  1. app.py +30 -136
  2. src/display/gradio_formatting.py +92 -0
  3. src/display/utils.py +1 -1
app.py CHANGED
@@ -15,13 +15,13 @@ from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results, clear_reranking_selections
 
18
 
19
 
20
  def restart_space():
21
  API.restart_space(repo_id=REPO_ID)
22
 
23
 
24
-
25
  try:
26
  snapshot_download(
27
  repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
@@ -54,6 +54,9 @@ shown_columns_long_doc, types_long_doc = get_default_cols(
54
  leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
55
  leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
56
 
 
 
 
57
 
58
  def update_metric_qa(
59
  metric: str,
@@ -90,90 +93,33 @@ with demo:
90
  with gr.Column():
91
  # search retrieval models
92
  with gr.Row():
93
- selected_version = gr.Dropdown(
94
- choices=["AIR-Bench_24.04",],
95
- value="AIR-Bench_24.04",
96
- label="Select the version of AIR-Bench",
97
- interactive = True
98
- )
99
  with gr.Row():
100
- search_bar = gr.Textbox(
101
- placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
102
- show_label=False,
103
- elem_id="search-bar",
104
- info="Search the retrieval methods"
105
- )
106
- # select reranking model
107
- reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
108
  with gr.Row():
109
- selected_rerankings = gr.Dropdown(
110
- choices=reranking_models,
111
- # value=reranking_models,
112
- label="Select the reranking models",
113
- elem_id="reranking-select",
114
- interactive=True,
115
- multiselect=True
116
- )
117
  with gr.Row():
118
- select_noreranker_only_btn = gr.Button(
119
- value="Only show results without ranking models",
120
- )
121
 
122
  with gr.Column(min_width=320):
123
  # select the metric
124
- selected_metric = gr.Dropdown(
125
- choices=METRIC_LIST,
126
- value=DEFAULT_METRIC,
127
- label="Select the metric",
128
- interactive=True,
129
- elem_id="metric-select",
130
- )
131
  # select domain
132
  with gr.Row():
133
- selected_domains = gr.CheckboxGroup(
134
- choices=DOMAIN_COLS_QA,
135
- value=DOMAIN_COLS_QA,
136
- label="Select the domains",
137
- elem_id="domain-column-select",
138
- interactive=True,
139
- )
140
  # select language
141
  with gr.Row():
142
- selected_langs = gr.Dropdown(
143
- choices=LANG_COLS_QA,
144
- value=LANG_COLS_QA,
145
- label="Select the languages",
146
- elem_id="language-column-select",
147
- multiselect=True,
148
- interactive=True
149
- )
150
  with gr.Row():
151
- show_anonymous = gr.Checkbox(
152
- label="Show anonymous submissions",
153
- value=False,
154
- info="The anonymous submissions might have invalid model information."
155
- )
156
  with gr.Row():
157
- show_revision_and_timestamp = gr.Checkbox(
158
- label="Show submission details",
159
- value=False,
160
- info="Show the revision and timestamp information of submissions"
161
- )
162
 
163
- leaderboard_table = gr.components.Dataframe(
164
- value=leaderboard_df_qa,
165
- datatype=types_qa,
166
- elem_id="leaderboard-table",
167
- interactive=False,
168
- visible=True,
169
- )
170
 
171
  # Dummy leaderboard for handling the case when the user uses backspace key
172
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
173
- value=original_df_qa,
174
- datatype=types_qa,
175
- visible=False,
176
- )
177
 
178
  # Set search_bar listener
179
  search_bar.submit(
@@ -232,90 +178,38 @@ with demo:
232
  with gr.Row():
233
  with gr.Column():
234
  with gr.Row():
235
- selected_version = gr.Dropdown(
236
- choices=["AIR-Bench_24.04",],
237
- value="AIR-Bench_24.04",
238
- label="Select the version of AIR-Bench",
239
- interactive=True
240
- )
241
  with gr.Row():
242
- search_bar = gr.Textbox(
243
- info="Search the retrieval methods",
244
- placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`)"
245
- " and press ENTER...",
246
- show_label=False,
247
- elem_id="search-bar-long-doc",
248
- )
249
  # select reranking model
250
- reranking_models = list(frozenset([eval_result.reranking_model for eval_result in raw_data]))
251
  with gr.Row():
252
- selected_rerankings = gr.Dropdown(
253
- choices=reranking_models,
254
- # value=reranking_models,
255
- label="Select the reranking models",
256
- elem_id="reranking-select-long-doc",
257
- interactive=True,
258
- multiselect=True,
259
- )
260
  with gr.Row():
261
- select_noreranker_only_btn = gr.Button(
262
- value="Only show results without ranking models",
263
- )
264
  with gr.Column(min_width=320):
265
  # select the metric
266
  with gr.Row():
267
- selected_metric = gr.Dropdown(
268
- choices=METRIC_LIST,
269
- value=DEFAULT_METRIC,
270
- label="Select the metric",
271
- interactive=True,
272
- elem_id="metric-select-long-doc",
273
- )
274
  # select domain
275
  with gr.Row():
276
- selected_domains = gr.CheckboxGroup(
277
- choices=DOMAIN_COLS_LONG_DOC,
278
- value=DOMAIN_COLS_LONG_DOC,
279
- label="Select the domains",
280
- elem_id="domain-column-select-long-doc",
281
- interactive=True,
282
- )
283
  # select language
284
  with gr.Row():
285
- selected_langs = gr.Dropdown(
286
- choices=LANG_COLS_LONG_DOC,
287
- value=LANG_COLS_LONG_DOC,
288
- label="Select the languages",
289
- elem_id="language-column-select-long-doc",
290
- multiselect=True,
291
- interactive=True
292
  )
293
  with gr.Row():
294
- show_anonymous = gr.Checkbox(
295
- label="Show anonymous submissions",
296
- value=False,
297
- info="The anonymous submissions might have invalid model information."
298
- )
299
  with gr.Row():
300
- show_revision_and_timestamp = gr.Checkbox(
301
- label="Show submission details",
302
- value=False,
303
- info="Show the revision and timestamp information of submissions"
304
- )
305
 
306
- leaderboard_table_long_doc = gr.components.Dataframe(
307
- value=leaderboard_df_long_doc,
308
- datatype=types_long_doc,
309
- elem_id="leaderboard-table-long-doc",
310
- interactive=False,
311
- visible=True,
312
  )
313
 
314
  # Dummy leaderboard for handling the case when the user uses backspace key
315
- hidden_leaderboard_table_for_search = gr.components.Dataframe(
316
- value=original_df_long_doc,
317
- datatype=types_long_doc,
318
- visible=False,
319
  )
320
 
321
  # Set search_bar listener
 
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results, clear_reranking_selections
18
+ from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, get_noreranker_button, get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table
19
 
20
 
21
  def restart_space():
22
  API.restart_space(repo_id=REPO_ID)
23
 
24
 
 
25
  try:
26
  snapshot_download(
27
  repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
 
54
  leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
55
  leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
56
 
57
+ # select reranking model
58
+ reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
59
+
60
 
61
  def update_metric_qa(
62
  metric: str,
 
93
  with gr.Column():
94
  # search retrieval models
95
  with gr.Row():
96
+ selected_version = get_version_dropdown()
 
 
 
 
 
97
  with gr.Row():
98
+ search_bar = get_search_bar()
 
 
 
 
 
 
 
99
  with gr.Row():
100
+ selected_rerankings = get_reranking_dropdown(reranking_models)
 
 
 
 
 
 
 
101
  with gr.Row():
102
+ select_noreranker_only_btn = get_noreranker_button()
 
 
103
 
104
  with gr.Column(min_width=320):
105
  # select the metric
106
+ selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
 
 
 
 
 
 
107
  # select domain
108
  with gr.Row():
109
+ selected_domains = get_domain_dropdown(DOMAIN_COLS_QA, DOMAIN_COLS_QA)
 
 
 
 
 
 
110
  # select language
111
  with gr.Row():
112
+ selected_langs = get_language_dropdown(LANG_COLS_QA, LANG_COLS_QA)
 
 
 
 
 
 
 
113
  with gr.Row():
114
+ show_anonymous = get_anonymous_checkbox()
 
 
 
 
115
  with gr.Row():
116
+ show_revision_and_timestamp = get_revision_and_ts_checkbox()
 
 
 
 
117
 
118
+
119
+ leaderboard_table = get_leaderboard_table(leaderboard_df_qa, types_qa)
 
 
 
 
 
120
 
121
  # Dummy leaderboard for handling the case when the user uses backspace key
122
+ hidden_leaderboard_table_for_search = get_leaderboard_table(original_df_qa, types_qa, visible=False)
 
 
 
 
123
 
124
  # Set search_bar listener
125
  search_bar.submit(
 
178
  with gr.Row():
179
  with gr.Column():
180
  with gr.Row():
181
+ selected_version = get_version_dropdown()
 
 
 
 
 
182
  with gr.Row():
183
+ search_bar = get_search_bar()
 
 
 
 
 
 
184
  # select reranking model
 
185
  with gr.Row():
186
+ selected_rerankings = get_reranking_dropdown(reranking_models)
 
 
 
 
 
 
 
187
  with gr.Row():
188
+ select_noreranker_only_btn = get_noreranker_button()
 
 
189
  with gr.Column(min_width=320):
190
  # select the metric
191
  with gr.Row():
192
+ selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
 
 
 
 
 
 
193
  # select domain
194
  with gr.Row():
195
+ selected_domains = get_domain_dropdown(DOMAIN_COLS_LONG_DOC, DOMAIN_COLS_LONG_DOC)
 
 
 
 
 
 
196
  # select language
197
  with gr.Row():
198
+ selected_langs = get_language_dropdown(
199
+ LANG_COLS_LONG_DOC, LANG_COLS_LONG_DOC
 
 
 
 
 
200
  )
201
  with gr.Row():
202
+ show_anonymous = get_anonymous_checkbox()
 
 
 
 
203
  with gr.Row():
204
+ show_revision_and_timestamp = get_revision_and_ts_checkbox()
 
 
 
 
205
 
206
+ leaderboard_table_long_doc = get_leaderboard_table(
207
+ leaderboard_df_long_doc, types_long_doc
 
 
 
 
208
  )
209
 
210
  # Dummy leaderboard for handling the case when the user uses backspace key
211
+ hidden_leaderboard_table_for_search =get_leaderboard_table(
212
+ original_df_long_doc, types_long_doc, visible=False
 
 
213
  )
214
 
215
  # Set search_bar listener
src/display/gradio_formatting.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+
4
+ def get_version_dropdown():
5
+ return gr.Dropdown(
6
+ choices=["AIR-Bench_24.04", ],
7
+ value="AIR-Bench_24.04",
8
+ label="Select the version of AIR-Bench",
9
+ interactive=True
10
+ )
11
+
12
+
13
+ def get_search_bar():
14
+ return gr.Textbox(
15
+ placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
16
+ show_label=False,
17
+ # elem_id="search-bar",
18
+ info="Search the retrieval methods"
19
+ )
20
+
21
+
22
+ def get_reranking_dropdown(model_list):
23
+ return gr.Dropdown(
24
+ choices=model_list,
25
+ label="Select the reranking models",
26
+ # elem_id="reranking-select",
27
+ interactive=True,
28
+ multiselect=True
29
+ )
30
+
31
+
32
+ def get_noreranker_button():
33
+ return gr.Button(
34
+ value="Only show results without ranking models",
35
+ )
36
+
37
+
38
+ def get_metric_dropdown(metric_list, default_metrics):
39
+ return gr.Dropdown(
40
+ choices=metric_list,
41
+ value=default_metrics,
42
+ label="Select the metric",
43
+ interactive=True,
44
+ # elem_id="metric-select-long-doc",
45
+ )
46
+
47
+
48
+ def get_domain_dropdown(domain_list, default_domains):
49
+ return gr.CheckboxGroup(
50
+ choices=domain_list,
51
+ value=default_domains,
52
+ label="Select the domains",
53
+ # elem_id="domain-column-select",
54
+ interactive=True,
55
+ )
56
+
57
+
58
+ def get_language_dropdown(language_list, default_languages):
59
+ return gr.Dropdown(
60
+ choices=language_list,
61
+ value=language_list,
62
+ label="Select the languages",
63
+ # elem_id="language-column-select",
64
+ multiselect=True,
65
+ interactive=True
66
+ )
67
+
68
+
69
+ def get_anonymous_checkbox():
70
+ return gr.Checkbox(
71
+ label="Show anonymous submissions",
72
+ value=False,
73
+ info="The anonymous submissions might have invalid model information."
74
+ )
75
+
76
+
77
+ def get_revision_and_ts_checkbox():
78
+ return gr.Checkbox(
79
+ label="Show submission details",
80
+ value=False,
81
+ info="Show the revision and timestamp information of submissions"
82
+ )
83
+
84
+
85
+ def get_leaderboard_table(df, datatype, visible=True):
86
+ return gr.components.Dataframe(
87
+ value=df,
88
+ datatype=datatype,
89
+ elem_id="leaderboard-table",
90
+ interactive=False,
91
+ visible=visible,
92
+ )
src/display/utils.py CHANGED
@@ -90,4 +90,4 @@ COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default
90
 
91
  QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
92
 
93
- LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
 
90
 
91
  QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
92
 
93
+ LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]