# Start by setting token and debug mode before starting schedulers import os from huggingface_hub import logging, login login(token=os.environ.get("HF_TOKEN"), write_permission=True) logging.set_verbosity_debug() # Start apps from pathlib import Path import gradio as gr from app_1M_image import get_demo as get_demo_1M_image from app_image import get_demo as get_demo_image from app_json import get_demo as get_demo_json def _get_demo_code(path: str) -> str: code = Path(path).read_text() code = code.replace("def get_demo():", "with gr.Blocks() as demo:") code += "\n\ndemo.launch()" return code DEMO_EXPLANATION = """

How to persist data from a Space to a Dataset?

This demo shows how to leverage `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub. When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits, number of files, and more. The tabs below show different ways of implementing a "save to dataset" feature. Depending on the complexity and usage of your app, you might want to use one or the other. This Space comes as a demo for this `huggingface_hub` [guide](https://huggingface.co./docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). Please check it out if you need more technical details. """ JSON_DEMO_EXPLANATION = """ ## Use case - Save inputs and outputs - Build an annotation platform ## Data Json-able only: text and numeric but no binaries. ## Robustness Works with concurrent users and replicas. ## Limitations If you expect millions of lines, you must split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub. ## Demo """ IMAGE_DEMO_EXPLANATION = """ ## Use case Save images with metadata (caption, parameters, datetime, etc.). ## Robustness Works with concurrent users and replicas. ## Limitations - only 10k images/folder are supported on the Hub. If you expect more usage, you must save data in subfolders. - only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before uploading. See the _1M images Dataset_ demo. ## Demo """ IMAGE_1M_DEMO_EXPLANATION = """ ## Use case: Save 1M images with metadata (caption, parameters, datetime, etc.). ## Robustness Works with concurrent users and replicas. ## Limitations None. ## Demo """ with gr.Blocks() as demo: gr.Markdown(DEMO_EXPLANATION) with gr.Tab("JSON Dataset"): gr.Markdown(JSON_DEMO_EXPLANATION) get_demo_json() gr.Markdown("## Result\n\nhttps://huggingface.co./datasets/Wauplin/example-space-to-dataset-json\n\n## Code") with gr.Accordion("Source code", open=True): gr.Code(_get_demo_code("app_json.py"), language="python") with gr.Tab("Image Dataset"): gr.Markdown(IMAGE_DEMO_EXPLANATION) get_demo_image() gr.Markdown("## Result\n\nhttps://huggingface.co./datasets/Wauplin/example-space-to-dataset-image\n\n## Code") with gr.Accordion("Source code", open=True): gr.Code(_get_demo_code("app_image.py"), language="python") with gr.Tab("1M images Dataset"): gr.Markdown(IMAGE_1M_DEMO_EXPLANATION) get_demo_1M_image() gr.Markdown( "## Result\n\nhttps://huggingface.co./datasets/Wauplin/example-space-to-dataset-image-zip\n\n## Code" ) with gr.Accordion("Source code", open=True): gr.Code(_get_demo_code("app_1M_image.py"), language="python") demo.launch()