Spaces:

open-llm-leaderboard
/

comparator

Running on CPU Upgrade

albertvillanova HF staff commited on Oct 11, 2024

Commit

8e404a5

verified ·

1 Parent(s): 1c1cb58

Fix missing results by reading all files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ from src.details import update_subtasks_component, update_load_details_component
     display_details, update_sample_idx_component, clear_details
 from src.results import update_load_results_component, \
     load_results_dataframes, display_results, update_tasks_component, clear_results, \
-    filter_latest_result_path_per_model, fetch_result_paths
 # if __name__ == "__main__":
-latest_result_path_per_model = filter_latest_result_path_per_model(fetch_result_paths())
-load_results_dataframes = partial(load_results_dataframes, result_path_per_model=latest_result_path_per_model)
 with gr.Blocks(fill_height=True) as demo:
     gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
@@ -20,10 +20,10 @@ with gr.Blocks(fill_height=True) as demo:
     with gr.Row():
         with gr.Column():
-            model_id_1 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
             dataframe_1 = gr.Dataframe(visible=False)
         with gr.Column():
-            model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Models")
             dataframe_2 = gr.Dataframe(visible=False)
     with gr.Row():

     display_details, update_sample_idx_component, clear_details
 from src.results import update_load_results_component, \
     load_results_dataframes, display_results, update_tasks_component, clear_results, \
+    sort_result_paths_per_model, fetch_result_paths
 # if __name__ == "__main__":
+result_paths_per_model = sort_result_paths_per_model(fetch_result_paths())
+load_results_dataframes = partial(load_results_dataframes, result_paths_per_model=result_paths_per_model)
 with gr.Blocks(fill_height=True) as demo:
     gr.HTML("<h1 style='text-align: center;'>Compare Results of the 🤗 Open LLM Leaderboard</h1>")
     with gr.Row():
         with gr.Column():
+            model_id_1 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
             dataframe_1 = gr.Dataframe(visible=False)
         with gr.Column():
+            model_id_2 = gr.Dropdown(choices=list(result_paths_per_model.keys()), label="Models")
             dataframe_2 = gr.Dataframe(visible=False)
     with gr.Row():

src/results.py CHANGED Viewed

@@ -13,35 +13,39 @@ def fetch_result_paths():
     return paths
-def filter_latest_result_path_per_model(paths):
     from collections import defaultdict
     d = defaultdict(list)
     for path in paths:
         model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
         d[model_id].append(path)
-    return {model_id: max(paths) for model_id, paths in d.items()}
 def update_load_results_component():
     return (gr.Button("Load", interactive=True), ) * 2
-def load_results_dataframe(model_id, result_path_per_model=None):
-    if not model_id or not result_path_per_model:
         return
-    result_path = result_path_per_model[model_id]
     fs = HfFileSystem()
-    with fs.open(result_path, "r") as f:
-        data = json.load(f)
-    model_name = data.get("model_name", "Model")
     df = pd.json_normalize([{key: value for key, value in data.items()}])
     # df.columns = df.columns.str.split(".")  # .split return a list instead of a tuple
     return df.set_index(pd.Index([model_name])).reset_index()
-def load_results_dataframes(*model_ids, result_path_per_model=None):
-    return [load_results_dataframe(model_id, result_path_per_model=result_path_per_model) for model_id in model_ids]
 def display_results(task, *dfs):

     return paths
+def sort_result_paths_per_model(paths):
     from collections import defaultdict
     d = defaultdict(list)
     for path in paths:
         model_id, _ = path[len(RESULTS_DATASET_ID) + 1:].rsplit("/", 1)
         d[model_id].append(path)
+    return {model_id: sorted(paths) for model_id, paths in d.items()}
 def update_load_results_component():
     return (gr.Button("Load", interactive=True), ) * 2
+def load_results_dataframe(model_id, result_paths_per_model=None):
+    if not model_id or not result_paths_per_model:
         return
+    result_paths = result_paths_per_model[model_id]
     fs = HfFileSystem()
+    data = {"results": {}, "configs": {}}
+    for path in result_paths:
+        with fs.open(path, "r") as f:
+            d = json.load(f)
+        data["results"].update(d["results"])
+        data["configs"].update(d["configs"])
+        model_name = d.get("model_name", "Model")
     df = pd.json_normalize([{key: value for key, value in data.items()}])
     # df.columns = df.columns.str.split(".")  # .split return a list instead of a tuple
     return df.set_index(pd.Index([model_name])).reset_index()
+def load_results_dataframes(*model_ids, result_paths_per_model=None):
+    return [load_results_dataframe(model_id, result_paths_per_model=result_paths_per_model) for model_id in model_ids]
 def display_results(task, *dfs):