|
import asyncio |
|
import io |
|
import json |
|
|
|
import httpx |
|
from huggingface_hub import HfFileSystem, ModelCard, hf_hub_url |
|
from huggingface_hub.utils import build_hf_headers |
|
|
|
import src.constants as constants |
|
|
|
|
|
class Client: |
|
def __init__(self): |
|
self.client = httpx.AsyncClient(follow_redirects=True) |
|
|
|
async def _get(self, url, headers=None, params=None): |
|
r = await self.client.get(url, headers=headers, params=params) |
|
r.raise_for_status() |
|
return r |
|
|
|
async def get(self, url, headers=None, params=None): |
|
try: |
|
r = await self._get(url, headers=headers, params=params) |
|
except httpx.ReadTimeout: |
|
return await self.retry(self._get, url, headers=headers, params=params) |
|
except httpx.HTTPError: |
|
return |
|
return r |
|
|
|
async def retry(self, func, url, max_retries=4, max_wait_time=8, wait_time=1, **kwargs): |
|
for _ in range(max_retries): |
|
try: |
|
await asyncio.sleep(wait_time) |
|
return await func(url, **kwargs) |
|
except httpx.ReadTimeout: |
|
wait_time = wait_time * 2 |
|
if wait_time > max_wait_time: |
|
print("HTTP Timeout: max retries exceeded with url:", url) |
|
return |
|
|
|
|
|
client = Client() |
|
fs = HfFileSystem() |
|
|
|
|
|
def glob(path): |
|
paths = fs.glob(path) |
|
return paths |
|
|
|
|
|
async def load_json_file(path): |
|
url = to_url(path) |
|
r = await client.get(url) |
|
if r is None: |
|
return |
|
return r.json() |
|
|
|
|
|
async def load_jsonlines_file(path): |
|
url = to_url(path) |
|
r = await client.get(url, headers=build_hf_headers()) |
|
if r is None: |
|
return |
|
f = io.StringIO(r.text) |
|
return [json.loads(line) for line in f] |
|
|
|
|
|
def to_url(path): |
|
*repo_type, org_name, ds_name, filename = path.split("/", 3) |
|
repo_type = repo_type[0][:-1] if repo_type else None |
|
return hf_hub_url(repo_id=f"{org_name}/{ds_name}", filename=filename, repo_type=repo_type) |
|
|
|
|
|
async def load_model_card(model_id): |
|
url = to_url(f"{model_id}/README.md") |
|
r = await client.get(url) |
|
if r is None: |
|
return |
|
return ModelCard(r.text, ignore_metadata_errors=True) |
|
|
|
|
|
async def list_models(filtering=None): |
|
params = {} |
|
if filtering: |
|
params["filter"] = filtering |
|
r = await client.get(f"{constants.HF_API_URL}/models", params=params) |
|
if r is None: |
|
return |
|
return r.json() |
|
|