Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import list_spaces, list_models, list_datasets | |
from cachetools import TTLCache, cached | |
from toolz import groupby, valmap | |
import platform | |
from enum import Enum | |
is_macos = platform.system() == "Darwin" | |
LIMIT = 1_000_000 if is_macos else None | |
NONE_AUTHOR = "HuggingFace Team" # TODO deal with this | |
class HubRepoType(Enum): | |
MODEL = "model" | |
DATASET = "dataset" | |
SPACE = "space" | |
def get_spaces(): # β | |
return list(list_spaces(full=True, limit=LIMIT)) | |
def get_models(): | |
return list(iter(list_models(full=True, limit=LIMIT))) | |
def get_datasets(): | |
return list(iter(list_datasets(full=True, limit=LIMIT))) | |
get_spaces() # to warm up the cache | |
get_models() # to warm up the cache | |
get_datasets() # to warm up the cache | |
def valid_dataset_ids(): | |
return {dataset.id for dataset in get_datasets()} | |
def valid_model_ids(): | |
return {model.id for model in get_models()} | |
def valid_space_ids(): | |
return {space.id for space in get_spaces()} | |
VALID_DATASET_IDS = valid_dataset_ids() | |
VALID_MODEL_IDS = valid_model_ids() | |
VALID_SPACE_IDS = valid_space_ids() | |
def create_space_to_like_dict(): | |
spaces = get_spaces() | |
return {space.id: space.likes for space in spaces} | |
def create_org_to_space_like_dict(): | |
spaces = get_spaces() | |
grouped = groupby(lambda x: x.author, spaces) | |
return valmap(lambda x: sum(s.likes for s in x), grouped) | |
def create_model_to_like_dict(metric_kind): | |
models = get_models() | |
if metric_kind == "likes": | |
return {model.id: model.likes for model in models} | |
if metric_kind == "downloads": | |
return {model.id: model.downloads for model in models} | |
raise ValueError(f"Unsupported metric_kind: {metric_kind}") | |
def create_org_to_model_metrics(metric_kind="likes"): | |
models = get_models() | |
# remove authors who are None | |
models = [model for model in models if model.author is not None] | |
grouped = groupby(lambda x: x.author, models) | |
if metric_kind: | |
return valmap(lambda x: sum(s.likes for s in x), grouped) | |
else: | |
return valmap(lambda x: sum(s.downloads for s in x), grouped) | |
def create_dataset_to_like_dict(metric_kind="likes"): | |
datasets = get_datasets() | |
if metric_kind == "likes": | |
return {dataset.id: dataset.likes for dataset in datasets} | |
if metric_kind == "downloads": | |
return {dataset.id: dataset.downloads for dataset in datasets} | |
def create_org_to_dataset_metrics(metric_kind="likes"): | |
datasets = get_datasets() | |
# remove authors who are None | |
datasets = [dataset for dataset in datasets if dataset.author is not None] | |
grouped = groupby(lambda x: x.author, datasets) | |
if metric_kind: | |
return valmap(lambda x: sum(s.likes for s in x), grouped) | |
else: | |
return valmap(lambda x: sum(s.downloads for s in x), grouped) | |
def relative_rank(my_dict, target_key, filter_zero=False): | |
if filter_zero: | |
my_dict = {k: v for k, v in my_dict.items() if v != 0} | |
if target_key not in my_dict: | |
raise gr.Error(f"'{target_key}' not found please check the ID and try again.") | |
sorted_items = sorted(my_dict.items(), key=lambda item: item[1], reverse=True) | |
position = [key for key, _ in sorted_items].index(target_key) | |
num_lower = len(sorted_items) - position - 1 | |
num_higher = position | |
return { | |
"rank": (num_higher + 1) / len(my_dict) * 100, | |
"num_higher": num_higher, | |
"num_lower": num_lower, | |
"value": my_dict[target_key], | |
"position": num_higher + 1, | |
} | |
def relative_rank_for_space(space_id, filter_zero=False): | |
space_to_like_dict = create_space_to_like_dict() | |
return relative_rank(space_to_like_dict, space_id, filter_zero=filter_zero) | |
def relative_rank_for_model(model_id, metric_kind="likes", filter_zero=False): | |
model_to_like_dict = create_model_to_like_dict(metric_kind) | |
return relative_rank(model_to_like_dict, model_id, filter_zero=filter_zero) | |
def relative_rank_for_dataset(dataset_id, metric_kind="likes", filter_zero=False): | |
dataset_to_like_dict = create_dataset_to_like_dict(metric_kind) | |
return relative_rank(dataset_to_like_dict, dataset_id, filter_zero=filter_zero) | |
def relative_space_rank_for_org(org_id, filter_zero=False): | |
org_to_like_dict = create_org_to_space_like_dict() | |
return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero) | |
def relative_model_rank_for_org(org_id, metric_kind="likes", filter_zero=False): | |
org_to_like_dict = create_org_to_model_metrics(metric_kind) | |
return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero) | |
def relative_dataset_rank_for_org(org_id, metric_kind="likes", filter_zero=False): | |
org_to_like_dict = create_org_to_dataset_metrics(metric_kind) | |
return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero) | |
# @cached(cache=TTLCache(maxsize=100, ttl=60 * 30)) | |
# def rank_space(space_id): | |
# return relative_rank_for_space(space_id) | |
def rank_space_and_org(space_or_org_id, kind, filter_zero): | |
filter_zero = filter_zero == "yes" | |
split_length = len(space_or_org_id.split("/")) | |
# Logic for split_length == 2 | |
if split_length == 2: | |
return _rank_single_repo(space_or_org_id, kind, filter_zero) | |
# Handle kind-specific logic for split_length == 1 | |
if split_length == 1: | |
valid_ids = {"model": VALID_MODEL_IDS, "dataset": VALID_DATASET_IDS} | |
if kind in valid_ids and space_or_org_id in valid_ids[kind]: | |
return _rank_single_repo(space_or_org_id, kind, filter_zero) | |
else: | |
return _rank_by_org(space_or_org_id, kind, filter_zero) | |
# If no conditions match, handle unexpected cases (optional) | |
raise ValueError( | |
f"Unexpected combination of space_or_org_id '{space_or_org_id}' and kind" | |
f" '{kind}'" | |
) | |
def _rank_by_org(space_or_org_id, kind, filter_zero): | |
if kind == "space": | |
org_rank = relative_space_rank_for_org(space_or_org_id, filter_zero=filter_zero) | |
elif kind == "model": | |
org_rank = relative_model_rank_for_org(space_or_org_id, filter_zero=filter_zero) | |
elif kind == "dataset": | |
org_rank = relative_dataset_rank_for_org( | |
space_or_org_id, filter_zero=filter_zero | |
) | |
result = ( | |
f"## βοΈ Org/User {kind.title()} Likes Rankings βοΈ\n" | |
+ f"Here are the rankings for the org/user across all of their {kind}s \n" | |
) | |
result += f"""- You have {org_rank['value']:,} likes for this org/user.\n""" | |
result += f"""- Your org/user is ranked {org_rank['position']:,}\n""" | |
result += f"""- You have {org_rank['num_higher']:,} orgs/users above and {org_rank['num_lower']:,} orgs/users below in the ranking of {kind} likes \n\n""" | |
result += f"""- Organization or user [{space_or_org_id}](https://huggingface.co./{space_or_org_id}) is ranked in the top {org_rank['rank']:.2f}% \n\n""" | |
if kind == "space": | |
result += f"""You can find all your Spaces sorted by likes [here](https://huggingface.co./{space_or_org_id}?sort_spaces=likes#spaces)\n""" | |
if kind == "model": | |
result += f"""You can find all your Models sorted by likes [here](https://huggingface.co./{space_or_org_id}?sort_models=likes#models)\n""" | |
if kind == "dataset": | |
result += f"""You can find all your Datasets sorted by likes [here](https://huggingface.co./{space_or_org_id}?sort_datasets=likes#datasets)\n""" | |
return _create_footer_message(result, kind) | |
def _rank_single_repo(space_or_org_id, kind, filter_zero): | |
if kind == "space": | |
repo_rank = relative_rank_for_space(space_or_org_id, filter_zero=filter_zero) | |
elif kind == "model": | |
repo_rank = relative_rank_for_model(space_or_org_id, filter_zero=filter_zero) | |
elif kind == "dataset": | |
repo_rank = relative_rank_for_dataset(space_or_org_id, filter_zero=filter_zero) | |
result = f"## βοΈ {kind.title()} Likes Rankings βοΈ\n" | |
result += f"""Here are the rankings by likes for [`{space_or_org_id}`](https://huggingface.co./spaces/{space_or_org_id}) across all {kind}s \n""" | |
result += f"""- You have {repo_rank['value']:,} likes for this {kind}.\n""" | |
result += f"""- Your {kind} is ranked {repo_rank['position']:,}.\n""" | |
if kind == "space": | |
result += f"""- Space [{space_or_org_id}](https://huggingface.co./spaces/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n""" | |
if kind == "model": | |
result += f"""- Model [{space_or_org_id}](https://huggingface.co./{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n""" | |
if kind == "dataset": | |
result += f"""- Dataset [{space_or_org_id}](https://huggingface.co./dataset/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n""" | |
result += f"""- You have {repo_rank['num_higher']:,} {kind}s above and {repo_rank['num_lower']:,} {kind}s below in the ranking of {kind}s likes\n\n""" | |
return _create_footer_message(result, kind) | |
def _create_footer_message(result, kind): | |
result += """### β¨ Remember likes aren't everything!β¨\n""" | |
if kind == "space": | |
result += """Some Spaces go very viral whilst other Spaces may be very useful for a smaller audience. If you think your Space is useful, please add it to this [thread](https://huggingface.co./spaces/librarian-bots/ranker/discussions/3) of awesome Spaces. | |
We'll look out for awesome Spaces added to this thread to promote more widely!""" | |
return result | |
def get_top_n_orgs_and_users_spaces(top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
orgs_to_likes = create_org_to_space_like_dict() | |
sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def get_top_n_orgs_and_users_models(metric, top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
orgs_to_likes = create_org_to_model_metrics(metric) | |
sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def get_top_n_orgs_and_users_datasets(metric, top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
orgs_to_likes = create_org_to_dataset_metrics(metric) | |
sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def plot_top_n_orgs_and_users(kind, metric="likes", top_n=100): | |
if kind == "space": | |
top_n = get_top_n_orgs_and_users_spaces(top_n) | |
header = """## π Top 100 Orgs and Users by Space Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{org}](https://huggingface.co./{org}) with {likes:,} likes" | |
for i, (org, likes) in enumerate(top_n) | |
) | |
return header + body | |
elif kind == "model": | |
top_n = get_top_n_orgs_and_users_models(metric, top_n=top_n) | |
header = """## π Top 100 Orgs and Users by Model Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{org}](https://huggingface.co./{org}) with {likes:,} likes" | |
for i, (org, likes) in enumerate(top_n) | |
) | |
return header + body | |
elif kind == "dataset": | |
top_n = get_top_n_orgs_and_users_datasets(metric, top_n=top_n) | |
header = """## π Top 100 Orgs and Users by Dataset Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{org}](https://huggingface.co./{org}) with {likes:,} likes" | |
for i, (org, likes) in enumerate(top_n) | |
) | |
return header + body | |
def get_top_n_spaces(top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
space_to_likes = create_space_to_like_dict() | |
sorted_items = sorted( | |
space_to_likes.items(), key=lambda item: item[1], reverse=True | |
) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def get_top_n_models(metric_kind, top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
model_to_likes = create_model_to_like_dict(metric_kind) | |
sorted_items = sorted( | |
model_to_likes.items(), key=lambda item: item[1], reverse=True | |
) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def get_top_n_datasets(metric, top_n=100): | |
# gr.Info("Updating leaderboard, this may take a few seconds...") | |
dataset_to_likes = create_dataset_to_like_dict(metric) | |
sorted_items = sorted( | |
dataset_to_likes.items(), key=lambda item: item[1], reverse=True | |
) | |
sorted_items = sorted_items[:top_n] | |
return sorted_items | |
def _plot_top_n_hub_repos(kind: HubRepoType, metric="likes", top_n=100): | |
if kind == HubRepoType.SPACE: | |
top_n = get_top_n_spaces(top_n) | |
header = """## π Top 100 Space repositories by Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{space}](https://huggingface.co./spaces/{space}) with" | |
f" {likes:,} likes" | |
for i, (space, likes) in enumerate(top_n) | |
) | |
return header + body | |
elif kind == HubRepoType.MODEL: | |
top_n = get_top_n_models(metric, top_n) | |
header = """## π Top 100 Model repositories by Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{model}](https://huggingface.co./{model}) with" | |
f" {likes:,} likes" | |
for i, (model, likes) in enumerate(top_n) | |
) | |
return header + body | |
elif kind == HubRepoType.DATASET: | |
top_n = get_top_n_datasets(metric, top_n) | |
header = """## π Top 100 Dataset repositories by Likes π """ | |
body = "".join( | |
f"\n{i+1}. [{dataset}](https://huggingface.co./dataset/{dataset}) with" | |
f" {likes:,} likes" | |
for i, (dataset, likes) in enumerate(top_n) | |
) | |
return header + body | |
def plot_top_n_hub_repos(kind, metric_kind="likes", top_n=100): | |
if kind == "space": | |
return _plot_top_n_hub_repos(HubRepoType.SPACE, top_n) | |
elif kind == "model": | |
return _plot_top_n_hub_repos(HubRepoType.MODEL, metric=metric_kind, top_n=top_n) | |
elif kind == "dataset": | |
return _plot_top_n_hub_repos( | |
HubRepoType.DATASET, metric=metric_kind, top_n=top_n | |
) | |
with gr.Blocks() as demo: | |
gr.HTML("<h1 style='text-align: center;'> 🏆 HuggyRanker 🏆 </h1>") | |
gr.HTML( | |
"""<p style='text-align: center;'>Rank a single repository or all of the repositories created by an organization or user by likes</p>""" | |
) | |
gr.HTML( | |
"""<p style="text-align: center;"><i>Remember likes aren't everything!</i></p>""" | |
) | |
gr.Markdown( | |
"""## Rank Specific Hub repositories or rank an organization or user by likes | |
Provide this app with a Hub ID e.g. `librarian-bots/ranker` or a Username/Organization name e.g. `librarian-bots` to rank by likes.""" | |
) | |
with gr.Row(): | |
space_id = gr.Textbox( | |
"librarian-bots", max_lines=1, label="Space or user/organization ID" | |
) | |
filter_zero_likes = gr.Radio( | |
choices=["no", "yes"], | |
label="Filter out repositories with 0 likes in the ranking?", | |
value="yes", | |
) | |
repo_type = gr.Radio( | |
choices=["space", "model", "dataset"], | |
label="Type of repo", | |
value="space", | |
interactive=True, | |
) | |
run_btn = gr.Button("Show ranking for this Space or org/user!", label="Rank Space") | |
result = gr.Markdown() | |
run_btn.click( | |
rank_space_and_org, | |
inputs=[space_id, repo_type, filter_zero_likes], | |
outputs=result, | |
) | |
gr.Markdown("## Leaderboard of Top 100 Spaces and Orgs/Users by Likes") | |
gr.Markdown( | |
"""The leaderboard is updated every 30 minutes. | |
Choose the type of repo to rank by likes and click the button to show the leaderboard.""" | |
) | |
show_refresh_btn = gr.Button("Show/refresh Leaderboard", label="Refresh") | |
with gr.Row(): | |
with gr.Accordion("Show rankings for Orgs and Users", open=False): | |
org_user_ranking = gr.Markdown() | |
show_refresh_btn.click( | |
plot_top_n_orgs_and_users, inputs=[repo_type], outputs=org_user_ranking | |
) | |
with gr.Accordion("Show rankings for individual repositories", open=False): | |
repo_level_ranking = gr.Markdown() | |
show_refresh_btn.click( | |
plot_top_n_hub_repos, inputs=[repo_type], outputs=repo_level_ranking | |
) | |
demo.queue(concurrency_count=4).launch() | |