Spaces:
Runtime error
Runtime error
import dataclasses | |
import datetime | |
import operator | |
import pathlib | |
import pandas as pd | |
import tqdm.auto | |
import yaml | |
from huggingface_hub import HfApi | |
from constants import (OWNER_CHOICES, SLEEP_TIME_INT_TO_STR, | |
SLEEP_TIME_STR_TO_INT, WHOAMI) | |
class DemoInfo: | |
space_id: str | |
url: str | |
title: str | |
owner: str | |
sdk: str | |
sdk_version: str | |
likes: int | |
status: str | |
last_modified: str | |
sleep_time: int | |
replicas: int | |
private: bool | |
hardware: str | |
suggested_hardware: str | |
created: str = '' | |
arxiv: list[str] = dataclasses.field(default_factory=list) | |
github: list[str] = dataclasses.field(default_factory=list) | |
tags: list[str] = dataclasses.field(default_factory=list) | |
def __post_init__(self): | |
object.__setattr__(self, 'last_modified', | |
DemoInfo.convert_timestamp(self.last_modified)) | |
object.__setattr__(self, 'created', | |
DemoInfo.convert_timestamp(self.created)) | |
def convert_timestamp(timestamp: str) -> str: | |
try: | |
return datetime.datetime.strptime( | |
timestamp, | |
'%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y/%m/%d %H:%M:%S') | |
except ValueError: | |
return timestamp | |
def from_space_id(cls, space_id: str) -> 'DemoInfo': | |
api = HfApi() | |
space_info = api.space_info(repo_id=space_id) | |
card = space_info.cardData | |
runtime = space_info.runtime | |
resources = runtime['resources'] | |
return cls( | |
space_id=space_id, | |
url=f'https://huggingface.co./spaces/{space_id}', | |
title=card['title'] if 'title' in card else '', | |
owner=space_id.split('/')[0], | |
sdk=card['sdk'], | |
sdk_version=card.get('sdk_version', ''), | |
likes=space_info.likes, | |
status=runtime['stage'], | |
last_modified=space_info.lastModified, | |
sleep_time=runtime['gcTimeout'] or 0, | |
replicas=resources['replicas'] if resources is not None else 0, | |
private=space_info.private, | |
hardware=runtime['hardware']['current'] | |
or runtime['hardware']['requested'], | |
suggested_hardware=card.get('suggested_hardware', ''), | |
) | |
def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame: | |
with pathlib.Path(path).open() as f: | |
data = yaml.safe_load(f) | |
demo_info = [] | |
for space_id in tqdm.auto.tqdm(list(data)): | |
base_info = DemoInfo.from_space_id(space_id) | |
info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id])) | |
demo_info.append(info) | |
return pd.DataFrame([dataclasses.asdict(info) for info in demo_info]) | |
class Prettifier: | |
def get_arxiv_link(links: list[str]) -> str: | |
links = [ | |
Prettifier.create_link(link.split('/')[-1], link) for link in links | |
] | |
return '\n'.join(links) | |
def get_github_link(links: list[str]) -> str: | |
links = [Prettifier.create_link('github', link) for link in links] | |
return '\n'.join(links) | |
def get_tag_list(tags: list[str]) -> str: | |
return ', '.join(tags) | |
def create_link(text: str, url: str) -> str: | |
return f'<a href={url} target="_blank">{text}</a>' | |
def to_div(text: str | None, category_name: str) -> str: | |
if text is None: | |
text = '' | |
class_name = f'{category_name}-{text.lower()}' | |
return f'<div class="{class_name}">{text}</div>' | |
def add_div_tag_to_replicas(replicas: int) -> str: | |
if replicas == 0: | |
return '' | |
if replicas == 1: | |
return '1' | |
return f'<div class="multiple-replicas">{replicas}</div>' | |
def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str: | |
if hardware == 'cpu-basic': | |
return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>' | |
s = sleep_time_s.replace(' ', '-') | |
return f'<div class="sleep-time-{s}">{sleep_time_s}</div>' | |
def __call__(self, df: pd.DataFrame) -> pd.DataFrame: | |
new_rows = [] | |
for _, row in df.iterrows(): | |
new_row = dict(row) | { | |
'status': | |
self.to_div(row.status, 'status'), | |
'hardware': | |
self.to_div(row.hardware, 'hardware'), | |
'suggested_hardware': | |
self.to_div(row.suggested_hardware, 'hardware'), | |
'title': | |
self.create_link(row.title, row.url), | |
'owner': | |
self.create_link(row.owner, | |
f'https://huggingface.co./{row.owner}'), | |
'sdk': | |
self.to_div(row.sdk, 'sdk'), | |
'sleep_time': | |
self.add_div_tag_to_sleep_time( | |
SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware), | |
'replicas': | |
self.add_div_tag_to_replicas(row.replicas), | |
'arxiv': | |
self.get_arxiv_link(row.arxiv), | |
'github': | |
self.get_github_link(row.github), | |
'tags': | |
self.get_tag_list(row.tags), | |
} | |
new_rows.append(new_row) | |
return pd.DataFrame(new_rows, columns=df.columns) | |
class DemoList: | |
COLUMN_INFO = [ | |
['status', 'markdown'], | |
['hardware', 'markdown'], | |
['title', 'markdown'], | |
['owner', 'markdown'], | |
['arxiv', 'markdown'], | |
['github', 'markdown'], | |
['likes', 'number'], | |
['tags', 'str'], | |
['last_modified', 'str'], | |
['created', 'str'], | |
['sdk', 'markdown'], | |
['sdk_version', 'str'], | |
['suggested_hardware', 'markdown'], | |
['sleep_time', 'markdown'], | |
['replicas', 'markdown'], | |
['private', 'bool'], | |
] | |
def __init__(self, df: pd.DataFrame): | |
self.df_raw = df | |
self._prettifier = Prettifier() | |
self.df_prettified = self._prettifier(df).loc[:, self.column_names] | |
def column_names(self): | |
return list(map(operator.itemgetter(0), self.COLUMN_INFO)) | |
def column_datatype(self): | |
return list(map(operator.itemgetter(1), self.COLUMN_INFO)) | |
def filter( | |
self, | |
status: list[str], | |
hardware: list[str], | |
sleep_time: list[str], | |
multiple_replicas: bool, | |
sdk: list[str], | |
visibility: list[str], | |
owner: list[str], | |
) -> pd.DataFrame: | |
df = self.df_raw.copy() | |
if multiple_replicas: | |
df = df[self.df_raw.replicas > 1] | |
if visibility == ['public']: | |
df = df[~self.df_raw.private] | |
elif visibility == ['private']: | |
df = df[self.df_raw.private] | |
df = df[(self.df_raw.status.isin(status)) | |
& (self.df_raw.hardware.isin(hardware)) | |
& (self.df_raw.sdk.isin(sdk))] | |
sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time] | |
df = df[self.df_raw.sleep_time.isin(sleep_time_int)] | |
if set(owner) == set(OWNER_CHOICES): | |
pass | |
elif WHOAMI in owner: | |
df = df[self.df_raw.owner == WHOAMI] | |
else: | |
df = df[self.df_raw.owner != WHOAMI] | |
return self._prettifier(df).loc[:, self.column_names] | |