Spaces:
Sleeping
Sleeping
Merge pull request #1 from rerun-io/jleibs/hugging_face_deploy
Browse files- .gitignore +3 -0
- Dockerfile +29 -0
- README.md +32 -4
- app.py +90 -0
- dataset_conversion.py +58 -0
- lychee.toml +1 -0
- main.py +7 -54
- requirements.txt +2 -0
.gitignore
CHANGED
@@ -15,3 +15,6 @@ target_wasm
|
|
15 |
# Pixi environment
|
16 |
.pixi
|
17 |
.ruff_cache
|
|
|
|
|
|
|
|
15 |
# Pixi environment
|
16 |
.pixi
|
17 |
.ruff_cache
|
18 |
+
|
19 |
+
tmp/**
|
20 |
+
venv/**
|
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This Dockerfile is used for creating the Hugging Face docker space
|
2 |
+
# See: https://huggingface.co/docs/hub/en/spaces-sdks-docker
|
3 |
+
|
4 |
+
FROM python:3.11.8
|
5 |
+
|
6 |
+
# Set up a new user named "user" with user ID 1000
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
|
9 |
+
# Switch to the "user" user
|
10 |
+
USER user
|
11 |
+
|
12 |
+
# Set home to the user's home directory
|
13 |
+
ENV HOME=/home/user \
|
14 |
+
PATH=/home/user/.local/bin:$PATH
|
15 |
+
|
16 |
+
# Set the working directory to the user's home directory
|
17 |
+
WORKDIR $HOME/app
|
18 |
+
|
19 |
+
# Try and run pip command after setting the user with `USER user` to avoid permission issues with Python
|
20 |
+
RUN pip install --no-cache-dir --upgrade pip
|
21 |
+
|
22 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
23 |
+
COPY --chown=user . $HOME/app
|
24 |
+
|
25 |
+
# Install requirements.txt
|
26 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
27 |
+
|
28 |
+
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
29 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
Originally built for the LeRobot datasets:
|
5 |
|
@@ -8,8 +19,9 @@ Originally built for the LeRobot datasets:
|
|
8 |
|
9 |
https://github.com/rerun-io/python-example-lerobot/assets/1148717/19e9983c-531f-4c48-9b37-37c5cbe1e0bd
|
10 |
|
|
|
11 |
|
12 |
-
## Getting started
|
13 |
Requires Python 3.10 or higher.
|
14 |
|
15 |
```sh
|
@@ -17,7 +29,12 @@ pip install -r requirements.txt
|
|
17 |
python main.py --dataset lerobot/aloha_sim_insertion_human
|
18 |
```
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
* `lerobot/aloha_sim_insertion_human`
|
22 |
* `lerobot/aloha_sim_insertion_scripted`
|
23 |
* `lerobot/aloha_sim_transfer_cube_human`
|
@@ -27,5 +44,16 @@ Example datasets to explore:
|
|
27 |
* `nateraw/kitti`
|
28 |
* `sayakpaul/nyu_depth_v2`
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
## Note for the maintainer
|
31 |
You can update this repository with the latest changes from https://github.com/rerun-io/rerun_template by running `scripts/template_update.py update --languages python`.
|
|
|
1 |
+
---
|
2 |
+
title: Preview Dataset
|
3 |
+
emoji: 👀
|
4 |
+
colorFrom: yellow
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: docker
|
7 |
+
app_port: 7860
|
8 |
+
pinned: false
|
9 |
+
license: mit
|
10 |
+
---
|
11 |
+
|
12 |
+
# Rerun visualization of Hugging Face datasets
|
13 |
+
Visualize Hugging Face datasets using [Rerun](https://www.rerun.io/).
|
14 |
|
15 |
Originally built for the LeRobot datasets:
|
16 |
|
|
|
19 |
|
20 |
https://github.com/rerun-io/python-example-lerobot/assets/1148717/19e9983c-531f-4c48-9b37-37c5cbe1e0bd
|
21 |
|
22 |
+
Deployed live on Hugging Face: https://huggingface.co/spaces/rerun/preview_dataset
|
23 |
|
24 |
+
## Getting started (native)
|
25 |
Requires Python 3.10 or higher.
|
26 |
|
27 |
```sh
|
|
|
29 |
python main.py --dataset lerobot/aloha_sim_insertion_human
|
30 |
```
|
31 |
|
32 |
+
## Getting started (gradio)
|
33 |
+
```sh
|
34 |
+
pip install -r requirements.txt
|
35 |
+
uvicorn app:app --reload
|
36 |
+
```
|
37 |
+
## Example datasets to explore:
|
38 |
* `lerobot/aloha_sim_insertion_human`
|
39 |
* `lerobot/aloha_sim_insertion_scripted`
|
40 |
* `lerobot/aloha_sim_transfer_cube_human`
|
|
|
44 |
* `nateraw/kitti`
|
45 |
* `sayakpaul/nyu_depth_v2`
|
46 |
|
47 |
+
## Deploying to Hugging Face
|
48 |
+
|
49 |
+
Hugging Face space runs off of the head `main` branch pushed to: https://huggingface.co/spaces/rerun/preview_dataset/tree/main
|
50 |
+
|
51 |
+
To update this from the rerun repository, add the Hugging Face repository as an additional remote,
|
52 |
+
and then push to it.
|
53 |
+
```sh
|
54 |
+
git remote add huggingface [email protected]:spaces/rerun/preview_dataset
|
55 |
+
git push huggingface main
|
56 |
+
```
|
57 |
+
|
58 |
## Note for the maintainer
|
59 |
You can update this repository with the latest changes from https://github.com/rerun-io/rerun_template by running `scripts/template_update.py update --languages python`.
|
app.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
A Gradio app that uses Rerun to visualize a Hugging Face dataset.
|
3 |
+
|
4 |
+
This app mounts the Gradio app inside of FastAPI in order to set the CORS headers.
|
5 |
+
|
6 |
+
Run this from the terminal as you would normally start a FastAPI app: `uvicorn app:app`
|
7 |
+
and navigate to http://localhost:8000 in your browser.
|
8 |
+
"""
|
9 |
+
|
10 |
+
from __future__ import annotations
|
11 |
+
|
12 |
+
import urllib
|
13 |
+
from pathlib import Path
|
14 |
+
|
15 |
+
import gradio as gr
|
16 |
+
import rerun as rr
|
17 |
+
from datasets import load_dataset
|
18 |
+
from fastapi import FastAPI
|
19 |
+
from fastapi.middleware.cors import CORSMiddleware
|
20 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
21 |
+
|
22 |
+
from dataset_conversion import log_dataset_to_rerun
|
23 |
+
|
24 |
+
CUSTOM_PATH = "/"
|
25 |
+
|
26 |
+
app = FastAPI()
|
27 |
+
|
28 |
+
origins = [
|
29 |
+
"https://app.rerun.io",
|
30 |
+
]
|
31 |
+
|
32 |
+
app.add_middleware(
|
33 |
+
CORSMiddleware,
|
34 |
+
allow_origins=origins,
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def html_template(rrd: str, app_url: str = "https://app.rerun.io") -> str:
|
39 |
+
encoded_url = urllib.parse.quote(rrd)
|
40 |
+
return f"""<div style="width:100%; height:70vh;"><iframe style="width:100%; height:100%;" src="{app_url}?url={encoded_url}" frameborder="0" allowfullscreen=""></iframe></div>"""
|
41 |
+
|
42 |
+
|
43 |
+
def show_dataset(dataset_id: str, episode_index: int) -> str:
|
44 |
+
rr.init("dataset")
|
45 |
+
|
46 |
+
# TODO(jleibs): manage cache better and put in proper storage
|
47 |
+
filename = Path(f"tmp/{dataset_id}_{episode_index}.rrd")
|
48 |
+
if not filename.exists():
|
49 |
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
50 |
+
|
51 |
+
rr.save(filename.as_posix())
|
52 |
+
|
53 |
+
dataset = load_dataset(dataset_id, split="train", streaming=True)
|
54 |
+
|
55 |
+
# This is for LeRobot datasets (https://huggingface.co/lerobot):
|
56 |
+
ds_subset = dataset.filter(
|
57 |
+
lambda frame: "episode_index" not in frame or frame["episode_index"] == episode_index
|
58 |
+
)
|
59 |
+
|
60 |
+
log_dataset_to_rerun(ds_subset)
|
61 |
+
|
62 |
+
return filename.as_posix()
|
63 |
+
|
64 |
+
|
65 |
+
with gr.Blocks() as demo:
|
66 |
+
with gr.Row():
|
67 |
+
search_in = HuggingfaceHubSearch(
|
68 |
+
"lerobot/pusht",
|
69 |
+
label="Search Huggingface Hub",
|
70 |
+
placeholder="Search for models on Huggingface",
|
71 |
+
search_type="dataset",
|
72 |
+
)
|
73 |
+
episode_index = gr.Number(1, label="Episode Index")
|
74 |
+
button = gr.Button("Show Dataset")
|
75 |
+
with gr.Row():
|
76 |
+
rrd = gr.File()
|
77 |
+
with gr.Row():
|
78 |
+
viewer = gr.HTML()
|
79 |
+
|
80 |
+
button.click(show_dataset, inputs=[search_in, episode_index], outputs=rrd)
|
81 |
+
rrd.change(
|
82 |
+
html_template,
|
83 |
+
js="""(rrd) => { console.log(rrd.url); return rrd.url}""",
|
84 |
+
inputs=[rrd],
|
85 |
+
outputs=viewer,
|
86 |
+
preprocess=False,
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
app = gr.mount_gradio_app(app, demo, path=CUSTOM_PATH)
|
dataset_conversion.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import logging
|
4 |
+
from typing import Any
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
import rerun as rr
|
8 |
+
from PIL import Image
|
9 |
+
from tqdm import tqdm
|
10 |
+
|
11 |
+
logger = logging.getLogger(__name__)
|
12 |
+
|
13 |
+
|
14 |
+
def to_rerun(column_name: str, value: Any) -> Any:
|
15 |
+
"""Do our best to interpret the value and convert it to a Rerun-compatible archetype."""
|
16 |
+
if isinstance(value, Image.Image):
|
17 |
+
if "depth" in column_name:
|
18 |
+
return rr.DepthImage(value)
|
19 |
+
else:
|
20 |
+
return rr.Image(value)
|
21 |
+
elif isinstance(value, np.ndarray):
|
22 |
+
return rr.Tensor(value)
|
23 |
+
elif isinstance(value, list):
|
24 |
+
if isinstance(value[0], float):
|
25 |
+
return rr.BarChart(value)
|
26 |
+
else:
|
27 |
+
return rr.TextDocument(str(value)) # Fallback to text
|
28 |
+
elif isinstance(value, float) or isinstance(value, int):
|
29 |
+
return rr.Scalar(value)
|
30 |
+
else:
|
31 |
+
return rr.TextDocument(str(value)) # Fallback to text
|
32 |
+
|
33 |
+
|
34 |
+
def log_dataset_to_rerun(dataset: Any) -> None:
|
35 |
+
# Special time-like columns for LeRobot datasets (https://huggingface.co/datasets/lerobot/):
|
36 |
+
TIME_LIKE = {"index", "frame_id", "timestamp"}
|
37 |
+
|
38 |
+
# Ignore these columns (again, LeRobot-specific):
|
39 |
+
IGNORE = {"episode_data_index_from", "episode_data_index_to", "episode_id"}
|
40 |
+
|
41 |
+
for row in tqdm(dataset):
|
42 |
+
# Handle time-like columns first, since they set a state (time is an index in Rerun):
|
43 |
+
for column_name in TIME_LIKE:
|
44 |
+
if column_name in row:
|
45 |
+
cell = row[column_name]
|
46 |
+
if isinstance(cell, int):
|
47 |
+
rr.set_time_sequence(column_name, cell)
|
48 |
+
elif isinstance(cell, float):
|
49 |
+
rr.set_time_seconds(column_name, cell) # assume seconds
|
50 |
+
else:
|
51 |
+
print(f"Unknown time-like column {column_name} with value {cell}")
|
52 |
+
|
53 |
+
# Now log actual data columns:
|
54 |
+
for column_name, cell in row.items():
|
55 |
+
if column_name in TIME_LIKE or column_name in IGNORE:
|
56 |
+
continue
|
57 |
+
|
58 |
+
rr.log(column_name, to_rerun(column_name, cell))
|
lychee.toml
CHANGED
@@ -93,6 +93,7 @@ exclude = [
|
|
93 |
'https://stackoverflow.com/.', # Stackoverflow links are no longer accessible from CI.
|
94 |
'https://www.tensorflow.org/', # tensorflow.org apparently blocks CI.
|
95 |
'https://9p.io/sys/doc/lexnames.html', # Works locally but on ci we get: `Failed: Network error: error:0A000152:SSL routines:final_renegotiate:unsafe legacy renegotiation disabled:ssl/statem/extensions.c:946:`
|
|
|
96 |
|
97 |
# Need GitHub login.
|
98 |
'https://github.com/rerun-io/landing',
|
|
|
93 |
'https://stackoverflow.com/.', # Stackoverflow links are no longer accessible from CI.
|
94 |
'https://www.tensorflow.org/', # tensorflow.org apparently blocks CI.
|
95 |
'https://9p.io/sys/doc/lexnames.html', # Works locally but on ci we get: `Failed: Network error: error:0A000152:SSL routines:final_renegotiate:unsafe legacy renegotiation disabled:ssl/statem/extensions.c:946:`
|
96 |
+
'https://huggingface.co/.*', # huggingface.co apparently blocks CI and returns 401.
|
97 |
|
98 |
# Need GitHub login.
|
99 |
'https://github.com/rerun-io/landing',
|
main.py
CHANGED
@@ -4,79 +4,32 @@ from __future__ import annotations
|
|
4 |
|
5 |
import argparse
|
6 |
import logging
|
7 |
-
from typing import Any
|
8 |
|
9 |
-
import numpy as np
|
10 |
import rerun as rr
|
11 |
from datasets import load_dataset
|
12 |
-
from PIL import Image
|
13 |
-
from tqdm import tqdm
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
def to_rerun(column_name: str, value: Any) -> Any:
|
19 |
-
"""Do our best to interpret the value and convert it to a Rerun-compatible archetype."""
|
20 |
-
if isinstance(value, Image.Image):
|
21 |
-
if "depth" in column_name:
|
22 |
-
return rr.DepthImage(value)
|
23 |
-
else:
|
24 |
-
return rr.Image(value)
|
25 |
-
elif isinstance(value, np.ndarray):
|
26 |
-
return rr.Tensor(value)
|
27 |
-
elif isinstance(value, list):
|
28 |
-
if isinstance(value[0], float):
|
29 |
-
return rr.BarChart(value)
|
30 |
-
else:
|
31 |
-
return rr.TextDocument(str(value)) # Fallback to text
|
32 |
-
elif isinstance(value, float) or isinstance(value, int):
|
33 |
-
return rr.Scalar(value)
|
34 |
-
else:
|
35 |
-
return rr.TextDocument(str(value)) # Fallback to text
|
36 |
-
|
37 |
-
|
38 |
-
def log_dataset_to_rerun(dataset) -> None:
|
39 |
-
# Special time-like columns for LeRobot datasets (https://huggingface.co/datasets/lerobot/):
|
40 |
-
TIME_LIKE = {"index", "frame_id", "timestamp"}
|
41 |
|
42 |
-
|
43 |
-
IGNORE = {"episode_data_index_from", "episode_data_index_to", "episode_id"}
|
44 |
-
|
45 |
-
for row in tqdm(dataset):
|
46 |
-
# Handle time-like columns first, since they set a state (time is an index in Rerun):
|
47 |
-
for column_name in TIME_LIKE:
|
48 |
-
if column_name in row:
|
49 |
-
cell = row[column_name]
|
50 |
-
if isinstance(cell, int):
|
51 |
-
rr.set_time_sequence(column_name, cell)
|
52 |
-
elif isinstance(cell, float):
|
53 |
-
rr.set_time_seconds(column_name, cell) # assume seconds
|
54 |
-
else:
|
55 |
-
print(f"Unknown time-like column {column_name} with value {cell}")
|
56 |
-
|
57 |
-
# Now log actual data columns:
|
58 |
-
for column_name, cell in row.items():
|
59 |
-
if column_name in TIME_LIKE or column_name in IGNORE:
|
60 |
-
continue
|
61 |
-
|
62 |
-
rr.log(column_name, to_rerun(column_name, cell))
|
63 |
|
64 |
|
65 |
-
def main():
|
66 |
# Ensure the logging gets written to stderr:
|
67 |
logging.getLogger().addHandler(logging.StreamHandler())
|
68 |
logging.getLogger().setLevel(logging.INFO)
|
69 |
|
70 |
parser = argparse.ArgumentParser(description="Log a HuggingFace dataset to Rerun.")
|
71 |
parser.add_argument("--dataset", default="lerobot/pusht", help="The name of the dataset to load")
|
72 |
-
parser.add_argument("--episode-
|
73 |
args = parser.parse_args()
|
74 |
|
75 |
print("Loading dataset…")
|
76 |
dataset = load_dataset(args.dataset, split="train", streaming=True)
|
77 |
|
78 |
# This is for LeRobot datasets (https://huggingface.co/lerobot):
|
79 |
-
ds_subset = dataset.filter(
|
|
|
|
|
80 |
|
81 |
print("Starting Rerun…")
|
82 |
rr.init(f"rerun_example_huggingface {args.dataset}", spawn=True)
|
|
|
4 |
|
5 |
import argparse
|
6 |
import logging
|
|
|
7 |
|
|
|
8 |
import rerun as rr
|
9 |
from datasets import load_dataset
|
|
|
|
|
10 |
|
11 |
+
from dataset_conversion import log_dataset_to_rerun
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
+
def main() -> None:
|
17 |
# Ensure the logging gets written to stderr:
|
18 |
logging.getLogger().addHandler(logging.StreamHandler())
|
19 |
logging.getLogger().setLevel(logging.INFO)
|
20 |
|
21 |
parser = argparse.ArgumentParser(description="Log a HuggingFace dataset to Rerun.")
|
22 |
parser.add_argument("--dataset", default="lerobot/pusht", help="The name of the dataset to load")
|
23 |
+
parser.add_argument("--episode-index", default=1, help="Which episode to select")
|
24 |
args = parser.parse_args()
|
25 |
|
26 |
print("Loading dataset…")
|
27 |
dataset = load_dataset(args.dataset, split="train", streaming=True)
|
28 |
|
29 |
# This is for LeRobot datasets (https://huggingface.co/lerobot):
|
30 |
+
ds_subset = dataset.filter(
|
31 |
+
lambda frame: "episode_index" not in frame or frame["episode_index"] == args.episode_index
|
32 |
+
)
|
33 |
|
34 |
print("Starting Rerun…")
|
35 |
rr.init(f"rerun_example_huggingface {args.dataset}", spawn=True)
|
requirements.txt
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
datasets
|
2 |
h5py
|
|
|
|
|
3 |
pillow
|
4 |
rerun-sdk>=0.15.0,<0.16.0
|
5 |
tqdm
|
|
|
1 |
datasets
|
2 |
h5py
|
3 |
+
gradio==4.27.0
|
4 |
+
gradio_huggingfacehub_search
|
5 |
pillow
|
6 |
rerun-sdk>=0.15.0,<0.16.0
|
7 |
tqdm
|