Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
5e11615
·
1 Parent(s): 26e4c47

refactor: refactor the naming

Browse files
Files changed (3) hide show
  1. app.py +58 -58
  2. src/loaders.py +2 -2
  3. src/models.py +1 -2
app.py CHANGED
@@ -57,13 +57,13 @@ except Exception:
57
  print("failed to download")
58
  restart_space()
59
 
60
- global data
61
- data = load_eval_results(EVAL_RESULTS_PATH)
62
  global datastore
63
- datastore = data[LATEST_BENCHMARK_VERSION]
64
 
65
 
66
- def update_metric_qa(
67
  metric: str,
68
  domains: list,
69
  langs: list,
@@ -72,6 +72,7 @@ def update_metric_qa(
72
  show_anonymous: bool,
73
  show_revision_and_timestamp: bool,
74
  ):
 
75
  return update_metric(
76
  datastore,
77
  "qa",
@@ -85,7 +86,7 @@ def update_metric_qa(
85
  )
86
 
87
 
88
- def update_metric_long_doc(
89
  metric: str,
90
  domains: list,
91
  langs: list,
@@ -94,6 +95,7 @@ def update_metric_long_doc(
94
  show_anonymous: bool,
95
  show_revision_and_timestamp,
96
  ):
 
97
  return update_metric(
98
  datastore,
99
  "long-doc",
@@ -107,30 +109,28 @@ def update_metric_long_doc(
107
  )
108
 
109
 
110
- def update_datastore(version):
111
  global datastore
112
- global data
113
- datastore = data[version]
114
- selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
115
- selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
116
- selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
117
- leaderboard_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
118
- hidden_leaderboard_table = \
119
- get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
120
- return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
121
-
122
-
123
- def update_datastore_long_doc(version):
124
  global datastore
125
- global data
126
- datastore = data[version]
127
- selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
128
- selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
129
- selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
130
- leaderboard_table = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
131
- hidden_leaderboard_table = \
132
- get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
133
- return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
134
 
135
 
136
  demo = gr.Blocks(css=custom_css)
@@ -142,7 +142,7 @@ with demo:
142
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
143
  with gr.TabItem("Results", elem_id="results-tab-table"):
144
  with gr.Row():
145
- selected_version = get_version_dropdown()
146
 
147
  with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
148
  with gr.Row():
@@ -174,10 +174,10 @@ with demo:
174
  # Dummy leaderboard for handling the case when the user uses backspace key
175
  hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
176
 
177
- selected_version.change(
178
- update_datastore,
179
  [
180
- selected_version,
181
  ],
182
  [selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
183
  )
@@ -187,7 +187,7 @@ with demo:
187
  lb_table,
188
  hidden_lb_table,
189
  search_bar,
190
- selected_version,
191
  selected_domains,
192
  selected_langs,
193
  selected_rerankings,
@@ -197,7 +197,7 @@ with demo:
197
 
198
  # set metric listener
199
  selected_metric.change(
200
- update_metric_qa,
201
  [
202
  selected_metric,
203
  selected_domains,
@@ -233,10 +233,10 @@ with demo:
233
  hidden_lb_df_retriever, datastore.qa_types, visible=False
234
  )
235
 
236
- selected_version.change(
237
- update_datastore,
238
  [
239
- selected_version,
240
  ],
241
  [
242
  selected_domains,
@@ -252,7 +252,7 @@ with demo:
252
  lb_table_retriever,
253
  hidden_lb_table_retriever,
254
  search_bar_retriever,
255
- selected_version,
256
  selected_domains,
257
  selected_langs,
258
  selected_noreranker,
@@ -262,7 +262,7 @@ with demo:
262
 
263
  # set metric listener
264
  selected_metric.change(
265
- update_metric_qa,
266
  [
267
  selected_metric,
268
  selected_domains,
@@ -298,10 +298,10 @@ with demo:
298
  hidden_lb_df_reranker, datastore.qa_types, visible=False
299
  )
300
 
301
- selected_version.change(
302
- update_datastore,
303
  [
304
- selected_version,
305
  ],
306
  [
307
  selected_domains,
@@ -317,7 +317,7 @@ with demo:
317
  lb_table_reranker,
318
  hidden_lb_table_reranker,
319
  search_bar_reranker,
320
- selected_version,
321
  selected_domains,
322
  selected_langs,
323
  selected_rerankings_reranker,
@@ -326,7 +326,7 @@ with demo:
326
  )
327
  # set metric listener
328
  selected_metric.change(
329
- update_metric_qa,
330
  [
331
  selected_metric,
332
  selected_domains,
@@ -373,10 +373,10 @@ with demo:
373
  datastore.doc_raw_df, datastore.doc_types, visible=False
374
  )
375
 
376
- selected_version.change(
377
- update_datastore_long_doc,
378
  [
379
- selected_version,
380
  ],
381
  [
382
  selected_domains,
@@ -392,7 +392,7 @@ with demo:
392
  lb_table_long_doc,
393
  hidden_lb_table_long_doc,
394
  search_bar,
395
- selected_version,
396
  selected_domains,
397
  selected_langs,
398
  selected_rerankings,
@@ -402,7 +402,7 @@ with demo:
402
 
403
  # set metric listener
404
  selected_metric.change(
405
- update_metric_long_doc,
406
  [
407
  selected_metric,
408
  selected_domains,
@@ -437,10 +437,10 @@ with demo:
437
  hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
438
  )
439
 
440
- selected_version.change(
441
- update_datastore_long_doc,
442
  [
443
- selected_version,
444
  ],
445
  [
446
  selected_domains,
@@ -456,7 +456,7 @@ with demo:
456
  lb_table_retriever_long_doc,
457
  hidden_lb_table_retriever_long_doc,
458
  search_bar_retriever,
459
- selected_version,
460
  selected_domains,
461
  selected_langs,
462
  selected_noreranker,
@@ -465,7 +465,7 @@ with demo:
465
  )
466
 
467
  selected_metric.change(
468
- update_metric_long_doc,
469
  [
470
  selected_metric,
471
  selected_domains,
@@ -502,10 +502,10 @@ with demo:
502
  hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
503
  )
504
 
505
- selected_version.change(
506
- update_datastore_long_doc,
507
  [
508
- selected_version,
509
  ],
510
  [
511
  selected_domains,
@@ -521,7 +521,7 @@ with demo:
521
  lb_table_reranker_ldoc,
522
  hidden_lb_table_reranker_ldoc,
523
  search_bar_reranker_ldoc,
524
- selected_version,
525
  selected_domains,
526
  selected_langs,
527
  selected_rerankings_reranker_ldoc,
@@ -529,7 +529,7 @@ with demo:
529
  show_revision_and_timestamp,
530
  )
531
  selected_metric.change(
532
- update_metric_long_doc,
533
  [
534
  selected_metric,
535
  selected_domains,
 
57
  print("failed to download")
58
  restart_space()
59
 
60
+ global ds_dict
61
+ ds_dict = load_eval_results(EVAL_RESULTS_PATH)
62
  global datastore
63
+ datastore = ds_dict[LATEST_BENCHMARK_VERSION]
64
 
65
 
66
+ def update_qa_metric(
67
  metric: str,
68
  domains: list,
69
  langs: list,
 
72
  show_anonymous: bool,
73
  show_revision_and_timestamp: bool,
74
  ):
75
+ global datastore
76
  return update_metric(
77
  datastore,
78
  "qa",
 
86
  )
87
 
88
 
89
+ def update_doc_metric(
90
  metric: str,
91
  domains: list,
92
  langs: list,
 
95
  show_anonymous: bool,
96
  show_revision_and_timestamp,
97
  ):
98
+ global datastore
99
  return update_metric(
100
  datastore,
101
  "long-doc",
 
109
  )
110
 
111
 
112
+ def update_qa_version(version):
113
  global datastore
114
+ global ds_dict
115
+ datastore = ds_dict[version]
116
+ domain_elem = get_domain_dropdown(QABenchmarks[datastore.slug])
117
+ lang_elem = get_language_dropdown(QABenchmarks[datastore.slug])
118
+ model_elem = get_reranking_dropdown(datastore.reranking_models)
119
+ df_elem = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
120
+ hidden_df_elem = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
121
+ return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
122
+
123
+
124
+ def update_doc_version(version):
 
125
  global datastore
126
+ global ds_dict
127
+ datastore = ds_dict[version]
128
+ domain_elem = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
129
+ lang_elem = get_language_dropdown(LongDocBenchmarks[datastore.slug])
130
+ model_elem = get_reranking_dropdown(datastore.reranking_models)
131
+ df_elem = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
132
+ hidden_df_elem = get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
133
+ return domain_elem, lang_elem, model_elem, df_elem, hidden_df_elem
 
134
 
135
 
136
  demo = gr.Blocks(css=custom_css)
 
142
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
143
  with gr.TabItem("Results", elem_id="results-tab-table"):
144
  with gr.Row():
145
+ version = get_version_dropdown()
146
 
147
  with gr.TabItem("QA", elem_id="qa-benchmark-tab-table", id=0):
148
  with gr.Row():
 
174
  # Dummy leaderboard for handling the case when the user uses backspace key
175
  hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
176
 
177
+ version.change(
178
+ update_qa_version,
179
  [
180
+ version,
181
  ],
182
  [selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table],
183
  )
 
187
  lb_table,
188
  hidden_lb_table,
189
  search_bar,
190
+ version,
191
  selected_domains,
192
  selected_langs,
193
  selected_rerankings,
 
197
 
198
  # set metric listener
199
  selected_metric.change(
200
+ update_qa_metric,
201
  [
202
  selected_metric,
203
  selected_domains,
 
233
  hidden_lb_df_retriever, datastore.qa_types, visible=False
234
  )
235
 
236
+ version.change(
237
+ update_qa_version,
238
  [
239
+ version,
240
  ],
241
  [
242
  selected_domains,
 
252
  lb_table_retriever,
253
  hidden_lb_table_retriever,
254
  search_bar_retriever,
255
+ version,
256
  selected_domains,
257
  selected_langs,
258
  selected_noreranker,
 
262
 
263
  # set metric listener
264
  selected_metric.change(
265
+ update_qa_metric,
266
  [
267
  selected_metric,
268
  selected_domains,
 
298
  hidden_lb_df_reranker, datastore.qa_types, visible=False
299
  )
300
 
301
+ version.change(
302
+ update_qa_version,
303
  [
304
+ version,
305
  ],
306
  [
307
  selected_domains,
 
317
  lb_table_reranker,
318
  hidden_lb_table_reranker,
319
  search_bar_reranker,
320
+ version,
321
  selected_domains,
322
  selected_langs,
323
  selected_rerankings_reranker,
 
326
  )
327
  # set metric listener
328
  selected_metric.change(
329
+ update_qa_metric,
330
  [
331
  selected_metric,
332
  selected_domains,
 
373
  datastore.doc_raw_df, datastore.doc_types, visible=False
374
  )
375
 
376
+ version.change(
377
+ update_doc_version,
378
  [
379
+ version,
380
  ],
381
  [
382
  selected_domains,
 
392
  lb_table_long_doc,
393
  hidden_lb_table_long_doc,
394
  search_bar,
395
+ version,
396
  selected_domains,
397
  selected_langs,
398
  selected_rerankings,
 
402
 
403
  # set metric listener
404
  selected_metric.change(
405
+ update_doc_metric,
406
  [
407
  selected_metric,
408
  selected_domains,
 
437
  hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
438
  )
439
 
440
+ version.change(
441
+ update_doc_version,
442
  [
443
+ version,
444
  ],
445
  [
446
  selected_domains,
 
456
  lb_table_retriever_long_doc,
457
  hidden_lb_table_retriever_long_doc,
458
  search_bar_retriever,
459
+ version,
460
  selected_domains,
461
  selected_langs,
462
  selected_noreranker,
 
465
  )
466
 
467
  selected_metric.change(
468
+ update_doc_metric,
469
  [
470
  selected_metric,
471
  selected_domains,
 
502
  hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
503
  )
504
 
505
+ version.change(
506
+ update_doc_version,
507
  [
508
+ version,
509
  ],
510
  [
511
  selected_domains,
 
521
  lb_table_reranker_ldoc,
522
  hidden_lb_table_reranker_ldoc,
523
  search_bar_reranker_ldoc,
524
+ version,
525
  selected_domains,
526
  selected_langs,
527
  selected_rerankings_reranker_ldoc,
 
529
  show_revision_and_timestamp,
530
  )
531
  selected_metric.change(
532
+ update_doc_metric,
533
  [
534
  selected_metric,
535
  selected_domains,
src/loaders.py CHANGED
@@ -1,5 +1,5 @@
1
  import os.path
2
- from typing import List
3
 
4
  import pandas as pd
5
 
@@ -94,7 +94,7 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
94
  return lb_data_store
95
 
96
 
97
- def load_eval_results(file_path: str):
98
  output = {}
99
  for version in BENCHMARK_VERSION_LIST:
100
  fn = f"{file_path}/{version}"
 
1
  import os.path
2
+ from typing import List, Dict
3
 
4
  import pandas as pd
5
 
 
94
  return lb_data_store
95
 
96
 
97
+ def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
98
  output = {}
99
  for version in BENCHMARK_VERSION_LIST:
100
  fn = f"{file_path}/{version}"
src/models.py CHANGED
@@ -147,5 +147,4 @@ class LeaderboardDataStore:
147
  doc_fmt_df: Optional[pd.DataFrame]
148
  reranking_models: Optional[list]
149
  qa_types: Optional[list]
150
- doc_types: Optional[list]
151
- # qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,
 
147
  doc_fmt_df: Optional[pd.DataFrame]
148
  reranking_models: Optional[list]
149
  qa_types: Optional[list]
150
+ doc_types: Optional[list]