albertvillanova HF staff commited on
Commit
2f4d877
1 Parent(s): d0f55c6

Load Details asynchronously

Browse files
Files changed (2) hide show
  1. src/details.py +7 -6
  2. src/hub.py +8 -0
src/details.py CHANGED
@@ -1,10 +1,11 @@
1
- import json
2
 
3
  import gradio as gr
4
  import pandas as pd
5
  from huggingface_hub import HfFileSystem
6
 
7
  from src.constants import SUBTASKS, DETAILS_DATASET_ID, DETAILS_FILENAME
 
8
 
9
 
10
  def update_subtasks_component(task):
@@ -22,7 +23,7 @@ def update_load_details_component(model_id_1, model_id_2, subtask):
22
  return gr.Button("Load Details", interactive=False)
23
 
24
 
25
- def load_details_dataframe(model_id, subtask):
26
  fs = HfFileSystem()
27
  if not model_id or not subtask:
28
  return
@@ -35,8 +36,7 @@ def load_details_dataframe(model_id, subtask):
35
  if not paths:
36
  return
37
  path = max(paths)
38
- with fs.open(path, "r") as f:
39
- data = [json.loads(line) for line in f]
40
  df = pd.json_normalize(data)
41
  # df = df.rename_axis("Parameters", axis="columns")
42
  df["model_name"] = model_id # Keep model_name
@@ -44,8 +44,9 @@ def load_details_dataframe(model_id, subtask):
44
  # return df.set_index(pd.Index([model_id])).reset_index()
45
 
46
 
47
- def load_details_dataframes(subtask, *model_ids):
48
- return [load_details_dataframe(model_id, subtask) for model_id in model_ids]
 
49
 
50
 
51
  def display_details(sample_idx, *dfs):
 
1
+ import asyncio
2
 
3
  import gradio as gr
4
  import pandas as pd
5
  from huggingface_hub import HfFileSystem
6
 
7
  from src.constants import SUBTASKS, DETAILS_DATASET_ID, DETAILS_FILENAME
8
+ from src.hub import load_details_file
9
 
10
 
11
  def update_subtasks_component(task):
 
23
  return gr.Button("Load Details", interactive=False)
24
 
25
 
26
+ async def load_details_dataframe(model_id, subtask):
27
  fs = HfFileSystem()
28
  if not model_id or not subtask:
29
  return
 
36
  if not paths:
37
  return
38
  path = max(paths)
39
+ data = await load_details_file(path)
 
40
  df = pd.json_normalize(data)
41
  # df = df.rename_axis("Parameters", axis="columns")
42
  df["model_name"] = model_id # Keep model_name
 
44
  # return df.set_index(pd.Index([model_id])).reset_index()
45
 
46
 
47
+ async def load_details_dataframes(subtask, *model_ids):
48
+ result = await asyncio.gather(*[load_details_dataframe(model_id, subtask) for model_id in model_ids])
49
+ return result
50
 
51
 
52
  def display_details(sample_idx, *dfs):
src/hub.py CHANGED
@@ -1,6 +1,8 @@
1
  import httpx
2
  from huggingface_hub import hf_hub_url
 
3
 
 
4
 
5
  client = httpx.AsyncClient()
6
 
@@ -11,6 +13,12 @@ async def load_file(path):
11
  return r.json()
12
 
13
 
 
 
 
 
 
 
14
  def to_url(path):
15
  _, org_name, ds_name, filename = path.split("/", 3)
16
  return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")
 
1
  import httpx
2
  from huggingface_hub import hf_hub_url
3
+ from huggingface_hub.utils import build_hf_headers
4
 
5
+ import json
6
 
7
  client = httpx.AsyncClient()
8
 
 
13
  return r.json()
14
 
15
 
16
+ async def load_details_file(path):
17
+ url = to_url(path)
18
+ r = await client.get(url, headers=build_hf_headers())
19
+ return [json.loads(line) for line in r.text.splitlines()]
20
+
21
+
22
  def to_url(path):
23
  _, org_name, ds_name, filename = path.split("/", 3)
24
  return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")