File size: 743 Bytes
662ed4b 611a3ed d0f55c6 7e32ac7 2f4d877 d0f55c6 39ff146 7e32ac7 d0f55c6 fae0e19 d0f55c6 fae0e19 2f4d877 662ed4b 2f4d877 d0f55c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import io
import json
import httpx
from huggingface_hub import HfFileSystem, hf_hub_url
from huggingface_hub.utils import build_hf_headers
client = httpx.AsyncClient(follow_redirects=True)
fs = HfFileSystem()
def glob(path):
paths = fs.glob(path)
return paths
async def load_json_file(path):
url = to_url(path)
r = await client.get(url)
return r.json()
async def load_jsonlines_file(path):
url = to_url(path)
r = await client.get(url, headers=build_hf_headers())
f = io.StringIO(r.text)
return [json.loads(line) for line in f]
def to_url(path):
_, org_name, ds_name, filename = path.split("/", 3)
return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type="dataset")
|