Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: refactor the naming
Browse files- app.py +58 -58
- src/loaders.py +2 -2
- src/models.py +1 -2
app.py
CHANGED
@@ -57,13 +57,13 @@ except Exception:
|
|
57 |
print("failed to download")
|
58 |
restart_space()
|
59 |
|
60 |
-
global
|
61 |
-
|
62 |
global datastore
|
63 |
-
datastore =
|
64 |
|
65 |
|
66 |
-
def
|
67 |
metric: str,
|
68 |
domains: list,
|
69 |
langs: list,
|
@@ -72,6 +72,7 @@ def update_metric_qa(
|
|
72 |
show_anonymous: bool,
|
73 |
show_revision_and_timestamp: bool,
|
74 |
):
|
|
|
75 |
return update_metric(
|
76 |
datastore,
|
77 |
"qa",
|
@@ -85,7 +86,7 @@ def update_metric_qa(
|
|
85 |
)
|
86 |
|
87 |
|
88 |
-
def
|
89 |
metric: str,
|
90 |
domains: list,
|
91 |
langs: list,
|
@@ -94,6 +95,7 @@ def update_metric_long_doc(
|
|
94 |
show_anonymous: bool,
|
95 |
show_revision_and_timestamp,
|
96 |
):
|
|
|
97 |
return update_metric(
|
98 |
datastore,
|
99 |
"long-doc",
|
@@ -107,30 +109,28 @@ def update_metric_long_doc(
|
|
107 |
)
|
108 |
|
109 |
|
110 |
-
def
|
111 |
global datastore
|
112 |
-
global
|
113 |
-
datastore =
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
def update_datastore_long_doc(version):
|
124 |
global datastore
|
125 |
-
global
|
126 |
-
datastore =
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
134 |
|
135 |
|
136 |
demo = gr.Blocks(css=custom_css)
|
@@ -142,7 +142,7 @@ with demo:
|
|
142 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
143 |
with gr.TabItem("Results", elem_id="results-tab-table"):
|
144 |
with gr.Row():
|
145 |
-
|
146 |
|
147 |
with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
|
148 |
with gr.Row():
|
@@ -174,10 +174,10 @@ with demo:
|
|
174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
175 |
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
176 |
|
177 |
-
|
178 |
-
|
179 |
[
|
180 |
-
|
181 |
],
|
182 |
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
|
183 |
)
|
@@ -187,7 +187,7 @@ with demo:
|
|
187 |
lb_table,
|
188 |
hidden_lb_table,
|
189 |
search_bar,
|
190 |
-
|
191 |
selected_domains,
|
192 |
selected_langs,
|
193 |
selected_rerankings,
|
@@ -197,7 +197,7 @@ with demo:
|
|
197 |
|
198 |
# set metric listener
|
199 |
selected_metric.change(
|
200 |
-
|
201 |
[
|
202 |
selected_metric,
|
203 |
selected_domains,
|
@@ -233,10 +233,10 @@ with demo:
|
|
233 |
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
234 |
)
|
235 |
|
236 |
-
|
237 |
-
|
238 |
[
|
239 |
-
|
240 |
],
|
241 |
[
|
242 |
selected_domains,
|
@@ -252,7 +252,7 @@ with demo:
|
|
252 |
lb_table_retriever,
|
253 |
hidden_lb_table_retriever,
|
254 |
search_bar_retriever,
|
255 |
-
|
256 |
selected_domains,
|
257 |
selected_langs,
|
258 |
selected_noreranker,
|
@@ -262,7 +262,7 @@ with demo:
|
|
262 |
|
263 |
# set metric listener
|
264 |
selected_metric.change(
|
265 |
-
|
266 |
[
|
267 |
selected_metric,
|
268 |
selected_domains,
|
@@ -298,10 +298,10 @@ with demo:
|
|
298 |
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
299 |
)
|
300 |
|
301 |
-
|
302 |
-
|
303 |
[
|
304 |
-
|
305 |
],
|
306 |
[
|
307 |
selected_domains,
|
@@ -317,7 +317,7 @@ with demo:
|
|
317 |
lb_table_reranker,
|
318 |
hidden_lb_table_reranker,
|
319 |
search_bar_reranker,
|
320 |
-
|
321 |
selected_domains,
|
322 |
selected_langs,
|
323 |
selected_rerankings_reranker,
|
@@ -326,7 +326,7 @@ with demo:
|
|
326 |
)
|
327 |
# set metric listener
|
328 |
selected_metric.change(
|
329 |
-
|
330 |
[
|
331 |
selected_metric,
|
332 |
selected_domains,
|
@@ -373,10 +373,10 @@ with demo:
|
|
373 |
datastore.doc_raw_df, datastore.doc_types, visible=False
|
374 |
)
|
375 |
|
376 |
-
|
377 |
-
|
378 |
[
|
379 |
-
|
380 |
],
|
381 |
[
|
382 |
selected_domains,
|
@@ -392,7 +392,7 @@ with demo:
|
|
392 |
lb_table_long_doc,
|
393 |
hidden_lb_table_long_doc,
|
394 |
search_bar,
|
395 |
-
|
396 |
selected_domains,
|
397 |
selected_langs,
|
398 |
selected_rerankings,
|
@@ -402,7 +402,7 @@ with demo:
|
|
402 |
|
403 |
# set metric listener
|
404 |
selected_metric.change(
|
405 |
-
|
406 |
[
|
407 |
selected_metric,
|
408 |
selected_domains,
|
@@ -437,10 +437,10 @@ with demo:
|
|
437 |
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
438 |
)
|
439 |
|
440 |
-
|
441 |
-
|
442 |
[
|
443 |
-
|
444 |
],
|
445 |
[
|
446 |
selected_domains,
|
@@ -456,7 +456,7 @@ with demo:
|
|
456 |
lb_table_retriever_long_doc,
|
457 |
hidden_lb_table_retriever_long_doc,
|
458 |
search_bar_retriever,
|
459 |
-
|
460 |
selected_domains,
|
461 |
selected_langs,
|
462 |
selected_noreranker,
|
@@ -465,7 +465,7 @@ with demo:
|
|
465 |
)
|
466 |
|
467 |
selected_metric.change(
|
468 |
-
|
469 |
[
|
470 |
selected_metric,
|
471 |
selected_domains,
|
@@ -502,10 +502,10 @@ with demo:
|
|
502 |
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
503 |
)
|
504 |
|
505 |
-
|
506 |
-
|
507 |
[
|
508 |
-
|
509 |
],
|
510 |
[
|
511 |
selected_domains,
|
@@ -521,7 +521,7 @@ with demo:
|
|
521 |
lb_table_reranker_ldoc,
|
522 |
hidden_lb_table_reranker_ldoc,
|
523 |
search_bar_reranker_ldoc,
|
524 |
-
|
525 |
selected_domains,
|
526 |
selected_langs,
|
527 |
selected_rerankings_reranker_ldoc,
|
@@ -529,7 +529,7 @@ with demo:
|
|
529 |
show_revision_and_timestamp,
|
530 |
)
|
531 |
selected_metric.change(
|
532 |
-
|
533 |
[
|
534 |
selected_metric,
|
535 |
selected_domains,
|
|
|
57 |
print("failed to download")
|
58 |
restart_space()
|
59 |
|
60 |
+
global ds_dict
|
61 |
+
ds_dict = load_eval_results(EVAL_RESULTS_PATH)
|
62 |
global datastore
|
63 |
+
datastore = ds_dict[LATEST_BENCHMARK_VERSION]
|
64 |
|
65 |
|
66 |
+
def update_qa_metric(
|
67 |
metric: str,
|
68 |
domains: list,
|
69 |
langs: list,
|
|
|
72 |
show_anonymous: bool,
|
73 |
show_revision_and_timestamp: bool,
|
74 |
):
|
75 |
+
global datastore
|
76 |
return update_metric(
|
77 |
datastore,
|
78 |
"qa",
|
|
|
86 |
)
|
87 |
|
88 |
|
89 |
+
def update_doc_metric(
|
90 |
metric: str,
|
91 |
domains: list,
|
92 |
langs: list,
|
|
|
95 |
show_anonymous: bool,
|
96 |
show_revision_and_timestamp,
|
97 |
):
|
98 |
+
global datastore
|
99 |
return update_metric(
|
100 |
datastore,
|
101 |
"long-doc",
|
|
|
109 |
)
|
110 |
|
111 |
|
112 |
+
def update_qa_version(version):
|
113 |
global datastore
|
114 |
+
global ds_dict
|
115 |
+
datastore = ds_dict[version]
|
116 |
+
domain_elem = get_domain_dropdown(QABenchmarks[datastore.slug])
|
117 |
+
lang_elem = get_language_dropdown(QABenchmarks[datastore.slug])
|
118 |
+
model_elem = get_reranking_dropdown(datastore.reranking_models)
|
119 |
+
df_elem = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
120 |
+
hidden_df_elem = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
121 |
+
return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
|
122 |
+
|
123 |
+
|
124 |
+
def update_doc_version(version):
|
|
|
125 |
global datastore
|
126 |
+
global ds_dict
|
127 |
+
datastore = ds_dict[version]
|
128 |
+
domain_elem = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
129 |
+
lang_elem = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
130 |
+
model_elem = get_reranking_dropdown(datastore.reranking_models)
|
131 |
+
df_elem = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
|
132 |
+
hidden_df_elem = get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
|
133 |
+
return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
|
|
|
134 |
|
135 |
|
136 |
demo = gr.Blocks(css=custom_css)
|
|
|
142 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
143 |
with gr.TabItem("Results", elem_id="results-tab-table"):
|
144 |
with gr.Row():
|
145 |
+
version = get_version_dropdown()
|
146 |
|
147 |
with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
|
148 |
with gr.Row():
|
|
|
174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
175 |
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
176 |
|
177 |
+
version.change(
|
178 |
+
update_qa_version,
|
179 |
[
|
180 |
+
version,
|
181 |
],
|
182 |
[selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
|
183 |
)
|
|
|
187 |
lb_table,
|
188 |
hidden_lb_table,
|
189 |
search_bar,
|
190 |
+
version,
|
191 |
selected_domains,
|
192 |
selected_langs,
|
193 |
selected_rerankings,
|
|
|
197 |
|
198 |
# set metric listener
|
199 |
selected_metric.change(
|
200 |
+
update_qa_metric,
|
201 |
[
|
202 |
selected_metric,
|
203 |
selected_domains,
|
|
|
233 |
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
234 |
)
|
235 |
|
236 |
+
version.change(
|
237 |
+
update_qa_version,
|
238 |
[
|
239 |
+
version,
|
240 |
],
|
241 |
[
|
242 |
selected_domains,
|
|
|
252 |
lb_table_retriever,
|
253 |
hidden_lb_table_retriever,
|
254 |
search_bar_retriever,
|
255 |
+
version,
|
256 |
selected_domains,
|
257 |
selected_langs,
|
258 |
selected_noreranker,
|
|
|
262 |
|
263 |
# set metric listener
|
264 |
selected_metric.change(
|
265 |
+
update_qa_metric,
|
266 |
[
|
267 |
selected_metric,
|
268 |
selected_domains,
|
|
|
298 |
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
299 |
)
|
300 |
|
301 |
+
version.change(
|
302 |
+
update_qa_version,
|
303 |
[
|
304 |
+
version,
|
305 |
],
|
306 |
[
|
307 |
selected_domains,
|
|
|
317 |
lb_table_reranker,
|
318 |
hidden_lb_table_reranker,
|
319 |
search_bar_reranker,
|
320 |
+
version,
|
321 |
selected_domains,
|
322 |
selected_langs,
|
323 |
selected_rerankings_reranker,
|
|
|
326 |
)
|
327 |
# set metric listener
|
328 |
selected_metric.change(
|
329 |
+
update_qa_metric,
|
330 |
[
|
331 |
selected_metric,
|
332 |
selected_domains,
|
|
|
373 |
datastore.doc_raw_df, datastore.doc_types, visible=False
|
374 |
)
|
375 |
|
376 |
+
version.change(
|
377 |
+
update_doc_version,
|
378 |
[
|
379 |
+
version,
|
380 |
],
|
381 |
[
|
382 |
selected_domains,
|
|
|
392 |
lb_table_long_doc,
|
393 |
hidden_lb_table_long_doc,
|
394 |
search_bar,
|
395 |
+
version,
|
396 |
selected_domains,
|
397 |
selected_langs,
|
398 |
selected_rerankings,
|
|
|
402 |
|
403 |
# set metric listener
|
404 |
selected_metric.change(
|
405 |
+
update_doc_metric,
|
406 |
[
|
407 |
selected_metric,
|
408 |
selected_domains,
|
|
|
437 |
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
438 |
)
|
439 |
|
440 |
+
version.change(
|
441 |
+
update_doc_version,
|
442 |
[
|
443 |
+
version,
|
444 |
],
|
445 |
[
|
446 |
selected_domains,
|
|
|
456 |
lb_table_retriever_long_doc,
|
457 |
hidden_lb_table_retriever_long_doc,
|
458 |
search_bar_retriever,
|
459 |
+
version,
|
460 |
selected_domains,
|
461 |
selected_langs,
|
462 |
selected_noreranker,
|
|
|
465 |
)
|
466 |
|
467 |
selected_metric.change(
|
468 |
+
update_doc_metric,
|
469 |
[
|
470 |
selected_metric,
|
471 |
selected_domains,
|
|
|
502 |
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
503 |
)
|
504 |
|
505 |
+
version.change(
|
506 |
+
update_doc_version,
|
507 |
[
|
508 |
+
version,
|
509 |
],
|
510 |
[
|
511 |
selected_domains,
|
|
|
521 |
lb_table_reranker_ldoc,
|
522 |
hidden_lb_table_reranker_ldoc,
|
523 |
search_bar_reranker_ldoc,
|
524 |
+
version,
|
525 |
selected_domains,
|
526 |
selected_langs,
|
527 |
selected_rerankings_reranker_ldoc,
|
|
|
529 |
show_revision_and_timestamp,
|
530 |
)
|
531 |
selected_metric.change(
|
532 |
+
update_doc_metric,
|
533 |
[
|
534 |
selected_metric,
|
535 |
selected_domains,
|
src/loaders.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import os.path
|
2 |
-
from typing import List
|
3 |
|
4 |
import pandas as pd
|
5 |
|
@@ -94,7 +94,7 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
|
94 |
return lb_data_store
|
95 |
|
96 |
|
97 |
-
def load_eval_results(file_path: str):
|
98 |
output = {}
|
99 |
for version in BENCHMARK_VERSION_LIST:
|
100 |
fn = f"{file_path}/{version}"
|
|
|
1 |
import os.path
|
2 |
+
from typing import List, Dict
|
3 |
|
4 |
import pandas as pd
|
5 |
|
|
|
94 |
return lb_data_store
|
95 |
|
96 |
|
97 |
+
def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
|
98 |
output = {}
|
99 |
for version in BENCHMARK_VERSION_LIST:
|
100 |
fn = f"{file_path}/{version}"
|
src/models.py
CHANGED
@@ -147,5 +147,4 @@ class LeaderboardDataStore:
|
|
147 |
doc_fmt_df: Optional[pd.DataFrame]
|
148 |
reranking_models: Optional[list]
|
149 |
qa_types: Optional[list]
|
150 |
-
doc_types: Optional[list]
|
151 |
-
# qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,
|
|
|
147 |
doc_fmt_df: Optional[pd.DataFrame]
|
148 |
reranking_models: Optional[list]
|
149 |
qa_types: Optional[list]
|
150 |
+
doc_types: Optional[list]
|
|