davanstrien's picture
davanstrien HF staff
Remove unused import and variable assignments.
971a659
raw
history blame
5.1 kB
import json
import os
import re
from datetime import datetime
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, HfApi
from huggingface_hub.utils import HfHubHTTPError
HF_TOKEN = os.getenv("HF_TOKEN")
JSON_DATASET_DIR = Path("dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / "dataset.jsonl"
scheduler = CommitScheduler(
repo_id="librarian-bots/collection_cloner-usage-stats",
repo_type="dataset",
folder_path=JSON_DATASET_DIR,
path_in_repo=str(JSON_DATASET_PATH),
token=HF_TOKEN,
)
def save_json(source_slug: str, destination_slug: str) -> None:
with scheduler.lock:
with JSON_DATASET_PATH.open("a") as f:
if source_slug.startswith("hf_"): # catch people accidentally adding tokens
return None
if destination_slug.startswith("hf_"):
return None
json.dump(
{
"source_collection": source_slug,
"destination_collection": destination_slug,
"datetime": datetime.now().isoformat(),
},
f,
)
f.write("\n")
def extract_slug(url):
pattern = r"https://huggingface\.co/collections/(.*)"
return match.group(1) if (match := re.search(pattern, url)) else None
def clone_collection(
source_slug, dest_title, token, dest_namespace=None, private=False, exist_ok=False
):
api = HfApi(token=token)
source_slug = source_slug.strip()
# check if formatted as url
if source_slug.startswith("https://huggingface.co./collections/"):
source_slug = extract_slug(source_slug)
collection = api.get_collection(source_slug)
if not collection:
raise gr.Error(
f"Collection {source_slug} does not exist or you do not have access to it."
)
description = f"Copied from {collection.title} using https://huggingface.co./spaces/librarian-bots/collection_cloner."
if dest_namespace == "username":
dest_namespace = None
new_collection = api.create_collection(
dest_title,
namespace=dest_namespace,
exists_ok=exist_ok,
private=private,
description=description,
token=token,
)
for item in collection.items:
try:
api.add_collection_item(
new_collection.slug, item.item_id, item_type=item.item_type
)
except HfHubHTTPError as e:
gr.Info(
f"Failed to add item {item.item_id} to collection {new_collection.slug} because it already exists in this collection."
)
if not private:
save_json(collection.slug, new_collection.slug)
return f"[Collection]({collection.url}) has been cloned into [{new_collection.slug}]({new_collection.url})"
title = (
"""<h1 style='text-align: center;'> &#129516; Collection Cloner &#129516;</h1>"""
)
with gr.Blocks(css="style.css") as demo:
gr.HTML(title)
gr.HTML(
"""<p style='text-align: center;'>
This space allows you to clone a <a href="https://huggingface.co./docs/hub/collections">Collection</a> from the Hugging Face Hub into your own namespace.<p>
<p style='text-align: center;'> You can edit this cloned Collection to your liking!</p>"""
)
gr.Markdown(
"""
**Note**: To track interest in this feature this Space keeps a record of clones which are cloned into public collection. Clones into private Collections are not tracked."""
)
gr.Markdown("## Authentication")
gr.Markdown(
"Token is required to create a new collection and clone private collections. You can get your token from your [profile page](https://huggingface.co./settings/token)."
)
with gr.Row():
token = gr.Textbox(
label="Token",
type="password",
)
with gr.Column():
gr.Markdown("## Source Collection")
source_slug = gr.Textbox(
label="Source Collection slug or URL",
placeholder="e.g. username/collection-slug",
)
gr.Markdown("## Destination Collection info")
dest_title = gr.Textbox(
label="Destination Title",
)
dest_namespace = gr.Textbox(
value="username",
label="Destination Namespace (optional - defaults to your username))",
interactive=True,
)
with gr.Row():
private = gr.Checkbox(
False,
label="Make new collection private?",
)
overwrite = gr.Checkbox(
False,
label="Overwrite any collection with same slug as the destination?",
)
submit_btn = gr.Button("Clone Collection")
response = gr.Markdown()
submit_btn.click(
clone_collection,
[
source_slug,
dest_title,
token,
dest_namespace,
private,
overwrite,
],
response,
)
demo.launch()