Spaces:
Runtime error
Runtime error
import json | |
import os | |
import re | |
from datetime import datetime | |
from pathlib import Path | |
import gradio as gr | |
from huggingface_hub import CommitScheduler, HfApi | |
from huggingface_hub.utils import HfHubHTTPError | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
JSON_DATASET_DIR = Path("dataset") | |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
JSON_DATASET_PATH = JSON_DATASET_DIR / "dataset.jsonl" | |
scheduler = CommitScheduler( | |
repo_id="librarian-bots/collection_cloner-usage-stats", | |
repo_type="dataset", | |
folder_path=JSON_DATASET_DIR, | |
path_in_repo=str(JSON_DATASET_PATH), | |
token=HF_TOKEN, | |
) | |
def save_json(source_slug: str, destination_slug: str) -> None: | |
with scheduler.lock: | |
with JSON_DATASET_PATH.open("a") as f: | |
if source_slug.startswith("hf_"): # catch people accidentally adding tokens | |
return None | |
if destination_slug.startswith("hf_"): | |
return None | |
json.dump( | |
{ | |
"source_collection": source_slug, | |
"destination_collection": destination_slug, | |
"datetime": datetime.now().isoformat(), | |
}, | |
f, | |
) | |
f.write("\n") | |
def extract_slug(url): | |
pattern = r"https://huggingface\.co/collections/(.*)" | |
return match.group(1) if (match := re.search(pattern, url)) else None | |
def clone_collection( | |
source_slug, dest_title, token, dest_namespace=None, private=False, exist_ok=False | |
): | |
api = HfApi(token=token) | |
source_slug = source_slug.strip() | |
# check if formatted as url | |
if source_slug.startswith("https://huggingface.co./collections/"): | |
source_slug = extract_slug(source_slug) | |
collection = api.get_collection(source_slug) | |
if not collection: | |
raise gr.Error( | |
f"Collection {source_slug} does not exist or you do not have access to it." | |
) | |
description = f"Copied from {collection.title} using https://huggingface.co./spaces/librarian-bots/collection_cloner." | |
if dest_namespace == "username": | |
dest_namespace = None | |
new_collection = api.create_collection( | |
dest_title, | |
namespace=dest_namespace, | |
exists_ok=exist_ok, | |
private=private, | |
description=description, | |
token=token, | |
) | |
for item in collection.items: | |
try: | |
api.add_collection_item( | |
new_collection.slug, item.item_id, item_type=item.item_type | |
) | |
except HfHubHTTPError as e: | |
gr.Info( | |
f"Failed to add item {item.item_id} to collection {new_collection.slug} because it already exists in this collection." | |
) | |
if not private: | |
save_json(collection.slug, new_collection.slug) | |
return f"[Collection]({collection.url}) has been cloned into [{new_collection.slug}]({new_collection.url})" | |
title = ( | |
"""<h1 style='text-align: center;'> 🧬 Collection Cloner 🧬</h1>""" | |
) | |
with gr.Blocks(css="style.css") as demo: | |
gr.HTML(title) | |
gr.HTML( | |
"""<p style='text-align: center;'> | |
This space allows you to clone a <a href="https://huggingface.co./docs/hub/collections">Collection</a> from the Hugging Face Hub into your own namespace.<p> | |
<p style='text-align: center;'> You can edit this cloned Collection to your liking!</p>""" | |
) | |
gr.Markdown( | |
""" | |
**Note**: To track interest in this feature this Space keeps a record of clones which are cloned into public collection. Clones into private Collections are not tracked.""" | |
) | |
gr.Markdown("## Authentication") | |
gr.Markdown( | |
"Token is required to create a new collection and clone private collections. You can get your token from your [profile page](https://huggingface.co./settings/token)." | |
) | |
with gr.Row(): | |
token = gr.Textbox( | |
label="Token", | |
type="password", | |
) | |
with gr.Column(): | |
gr.Markdown("## Source Collection") | |
source_slug = gr.Textbox( | |
label="Source Collection slug or URL", | |
placeholder="e.g. username/collection-slug", | |
) | |
gr.Markdown("## Destination Collection info") | |
dest_title = gr.Textbox( | |
label="Destination Title", | |
) | |
dest_namespace = gr.Textbox( | |
value="username", | |
label="Destination Namespace (optional - defaults to your username))", | |
interactive=True, | |
) | |
with gr.Row(): | |
private = gr.Checkbox( | |
False, | |
label="Make new collection private?", | |
) | |
overwrite = gr.Checkbox( | |
False, | |
label="Overwrite any collection with same slug as the destination?", | |
) | |
submit_btn = gr.Button("Clone Collection") | |
response = gr.Markdown() | |
submit_btn.click( | |
clone_collection, | |
[ | |
source_slug, | |
dest_title, | |
token, | |
dest_namespace, | |
private, | |
overwrite, | |
], | |
response, | |
) | |
demo.launch() | |