Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pyarrow.parquet as pq | |
from gradio_huggingfacehub_search import HuggingfaceHubSearch | |
from huggingface_hub import HfFileSystem | |
fs = HfFileSystem() | |
with gr.Blocks() as demo: | |
with gr.Column(): | |
dataset_search = HuggingfaceHubSearch( | |
label="Hub Dataset ID", | |
placeholder="Search for dataset id on Huggingface", | |
search_type="dataset", | |
) | |
revision_textbox = gr.Textbox("main") | |
parquet_file_dropdown = gr.Dropdown() | |
with gr.Column(): | |
output_dataframe = gr.DataFrame() | |
def _show_input_preview(dataset, revision, parquet_file): | |
yield {revision_textbox: revision} | |
if isinstance(parquet_file, int): | |
parquet_files = fs.glob(f"datasets/{dataset}@{revision}/**/*.parquet") | |
parquet_file = parquet_files[parquet_file] | |
yield {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file)} | |
else: | |
yield {parquet_file_dropdown: gr.Dropdown(value=parquet_file)} | |
yield {output_dataframe: pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pandas()} | |
def show_input_from_dataset_search(dataset): | |
yield from _show_input_preview(dataset, revision="main", parquet_file=0) | |
def show_input_from_revision(dataset, revision): | |
yield from _show_input_preview(dataset, revision=revision, parquet_file=0) | |
def show_input_from_parquet_file(dataset, revision, parquet_file): | |
yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file) | |
demo.launch() | |