sh1gechan commited on
Commit
a59b982
·
verified ·
1 Parent(s): 6df04ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -105
app.py CHANGED
@@ -4,7 +4,6 @@ import gradio as gr
4
  import pandas as pd
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
  from huggingface_hub import snapshot_download
7
- from decimal import Decimal
8
 
9
  from src.about import (
10
  CITATION_BUTTON_LABEL,
@@ -54,13 +53,7 @@ except Exception:
54
  restart_space()
55
 
56
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
57
- print(LEADERBOARD_DF.head())
58
  original_df = LEADERBOARD_DF
59
- print("Initial LEADERBOARD_DF:")
60
- print(LEADERBOARD_DF.head())
61
- print(f"LEADERBOARD_DF shape: {LEADERBOARD_DF.shape}")
62
- print("LEADERBOARD_DF columns:")
63
- print(LEADERBOARD_DF.columns.tolist())
64
  leaderboard_df = original_df.copy()
65
  (
66
  finished_eval_queue_df,
@@ -83,10 +76,10 @@ def update_table(
83
  show_flagged: bool,
84
  query: str,
85
  ):
86
-
87
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, add_special_tokens_query, num_few_shots_query, show_deleted, show_merges, show_flagged)
88
-
89
  filtered_df = filter_queries(query, filtered_df)
 
 
90
 
91
  df = select_columns(filtered_df, columns)
92
  return df
@@ -136,75 +129,40 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
136
  def filter_models(
137
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, add_special_tokens_query: list, num_few_shots_query: list, show_deleted: bool, show_merges: bool, show_flagged: bool
138
  ) -> pd.DataFrame:
139
- print(f"filter_models called with: type_query={type_query}, size_query={size_query}, precision_query={precision_query}")
140
- print(f"Initial df shape: {df.shape}")
141
- print("Initial df columns:")
142
- print(df.columns.tolist())
143
-
144
- filtered_df = df.copy() # Create a copy to avoid modifying the original dataframe
145
 
146
- # Show all models (comment out for debugging)
147
- # if not show_deleted:
148
- # filtered_df = filtered_df[filtered_df[AutoEvalColumn.still_on_hub.name] == True]
149
 
150
- print(f"After deletion filter: {filtered_df.shape}")
 
151
 
152
- # Type filter
153
  type_emoji = [t[0] for t in type_query]
154
- if 'T' in filtered_df.columns:
155
- filtered_df = filtered_df[filtered_df['T'].isin(type_emoji + ['?'])] # Include '?' as a valid type
156
- elif 'Type_Symbol' in filtered_df.columns:
157
- filtered_df = filtered_df[filtered_df['Type_Symbol'].isin(type_emoji + ['?'])]
158
- else:
159
- print("Warning: Neither 'T' nor 'Type_Symbol' column found in the dataframe")
 
160
 
161
- print(f"After type filter: {filtered_df.shape}")
162
-
163
- # Precision filter
164
- precision_query = precision_query + ['Unknown', '?']
165
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.precision.name].isin(precision_query)]
166
- print(f"After precision filter: {filtered_df.shape}")
167
-
168
- # Add Special Tokens filter
169
- add_special_tokens_query = add_special_tokens_query + ["Unknown"]
170
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.add_special_tokens.name].isin(add_special_tokens_query)]
171
- print(f"After add_special_tokens filter: {filtered_df.shape}")
172
-
173
- # Num Few Shots filter
174
- num_few_shots_query = num_few_shots_query + ["Unknown"]
175
- filtered_df = filtered_df[filtered_df[AutoEvalColumn.num_few_shots.name].isin(num_few_shots_query)]
176
- print(f"After num_few_shots filter: {filtered_df.shape}")
177
 
178
- # Size filter
179
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
180
- params_column = pd.to_numeric(filtered_df[AutoEvalColumn.params.name], errors="coerce")
181
- mask = params_column.apply(lambda x: pd.isna(x) or any(numeric_interval.contains(x)))
182
- filtered_df = filtered_df[mask]
183
- print(f"After size filter: {filtered_df.shape}")
184
-
185
- if filtered_df.empty:
186
- print("Warning: Filtered dataframe is empty!")
187
- return pd.DataFrame(columns=df.columns) # Return an empty dataframe with the same columns
188
-
189
- print("Filtered dataframe head:")
190
- print(filtered_df.head())
191
- print("Column names:")
192
- print(filtered_df.columns.tolist())
193
- print("Column data types:")
194
- print(filtered_df.dtypes)
195
- print("Final filtered dataframe sample:")
196
- print(filtered_df.head().to_dict('records'))
197
-
198
- print("Filtered DataFrame sample:")
199
- print(filtered_df.head().to_dict('records'))
200
-
201
- filtered_df = filtered_df.astype(str)
202
  return filtered_df
203
 
204
- def convert_decimal_to_str(item):
205
- return {k: str(v) if isinstance(v, Decimal) else v for k, v in item.items()}
 
206
 
207
- leaderboard_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], [i.value.name for i in AddSpecialTokens], [i.value.name for i in NumFewShots], False, False, False)
 
208
 
209
  demo = gr.Blocks(css=custom_css)
210
  with demo:
@@ -285,48 +243,15 @@ with demo:
285
  elem_id="filter-columns-num-few-shots",
286
  )
287
 
288
- # leaderboard_table = gr.components.Dataframe(
289
- # value=leaderboard_df[
290
- # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
291
- # + shown_columns.value
292
- # # + [AutoEvalColumn.dummy.name]
293
- # ],
294
- # headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
295
- # datatype=TYPES,
296
- # elem_id="leaderboard-table",
297
- # interactive=False,
298
- # visible=True,
299
- # #column_widths=["2%", "33%"]
300
- # )
301
- filtered_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], [i.value.name for i in AddSpecialTokens], [i.value.name for i in NumFewShots], False, False, False)
302
- print("After filter_models:")
303
- print(f"filtered_df shape: {filtered_df.shape}")
304
- print("filtered_df columns:")
305
- print(filtered_df.columns.tolist())
306
-
307
- initial_data = [convert_decimal_to_str(item) for item in filtered_df.to_dict('records')]
308
- headers = filtered_df.columns.tolist()
309
- print("Filtered DataFrame contents:")
310
- print(filtered_df.head().to_dict('records'))
311
- print("Filtered DataFrame columns:")
312
- print(filtered_df.columns.tolist())
313
- filtered_df_without_T = filtered_df.drop('T', axis=1)
314
  leaderboard_table = gr.components.Dataframe(
315
- value=filtered_df_without_T.to_dict('records'),
316
- headers=filtered_df_without_T.columns.tolist(),
317
- datatype={col: "str" for col in filtered_df_without_T.columns},
318
- row_count=(len(filtered_df_without_T), "dynamic"),
319
- col_count=(len(filtered_df_without_T.columns), "fixed"),
320
- wrap=True,
321
  elem_id="leaderboard-table",
322
  interactive=False,
323
  visible=True,
324
  )
325
- print("Leaderboard table initial value:")
326
- print(initial_data[:5] if initial_data else "Empty")
327
- print("Headers:")
328
- print(headers)
329
- print("After Dataframe initialization")
330
 
331
  # Dummy leaderboard for handling the case when the user uses backspace key
332
  hidden_leaderboard_table_for_search = gr.components.Dataframe(
 
4
  import pandas as pd
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
  from huggingface_hub import snapshot_download
 
7
 
8
  from src.about import (
9
  CITATION_BUTTON_LABEL,
 
53
  restart_space()
54
 
55
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
56
  original_df = LEADERBOARD_DF
 
 
 
 
 
57
  leaderboard_df = original_df.copy()
58
  (
59
  finished_eval_queue_df,
 
76
  show_flagged: bool,
77
  query: str,
78
  ):
 
79
  filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, add_special_tokens_query, num_few_shots_query, show_deleted, show_merges, show_flagged)
 
80
  filtered_df = filter_queries(query, filtered_df)
81
+ print(f"Filter applied: query={query}, columns={columns}, type_query={type_query}, precision_query={precision_query}")
82
+ print(filtered_df.head()) # フィルタ後のデータを確認
83
 
84
  df = select_columns(filtered_df, columns)
85
  return df
 
129
  def filter_models(
130
  df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, add_special_tokens_query: list, num_few_shots_query: list, show_deleted: bool, show_merges: bool, show_flagged: bool
131
  ) -> pd.DataFrame:
132
+ # Show all models
133
+ if show_deleted:
134
+ filtered_df = df
135
+ else: # Show only still on the hub models
136
+ filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
 
137
 
138
+ #if not show_merges:
139
+ # filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
 
140
 
141
+ #if not show_flagged:
142
+ # filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
143
 
 
144
  type_emoji = [t[0] for t in type_query]
145
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
146
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
147
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.add_special_tokens.name].isin(add_special_tokens_query)]
148
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.num_few_shots.name].isin(num_few_shots_query)]
149
+ print("Filtered DataFrame shape:", filtered_df.shape)
150
+ print("Filtered DataFrame columns:", filtered_df.columns.tolist())
151
+ print("Filtered DataFrame sample:", filtered_df.head().to_dict('records'))
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
 
154
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
155
+ params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
156
+ mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
157
+ filtered_df = filtered_df.loc[mask]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  return filtered_df
159
 
160
+ filtered_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], [i.value.name for i in AddSpecialTokens], [i.value.name for i in NumFewShots], False, False, False)
161
+ display_columns = [c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value
162
+ display_data = filtered_df[display_columns].to_dict('records')
163
 
164
+ print("Display columns:", display_columns)
165
+ print("Display data sample:", display_data[:1])
166
 
167
  demo = gr.Blocks(css=custom_css)
168
  with demo:
 
243
  elem_id="filter-columns-num-few-shots",
244
  )
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  leaderboard_table = gr.components.Dataframe(
247
+ value=display_data,
248
+ headers=display_columns,
249
+ datatype={col: str(TYPES.get(col, "str")) for col in display_columns},
 
 
 
250
  elem_id="leaderboard-table",
251
  interactive=False,
252
  visible=True,
253
  )
254
+ print(leaderboard_df.head()) # リーダーボードテーブルに渡される前のデータを確認
 
 
 
 
255
 
256
  # Dummy leaderboard for handling the case when the user uses backspace key
257
  hidden_leaderboard_table_for_search = gr.components.Dataframe(