import gradio as gr import pyarrow.parquet as pq from gradio_huggingfacehub_search import HuggingfaceHubSearch from huggingface_hub import HfFileSystem fs = HfFileSystem() with gr.Blocks() as demo: with gr.Column(): dataset_search = HuggingfaceHubSearch( label="Hub Dataset ID", placeholder="Search for dataset id on Huggingface", search_type="dataset", ) revision_textbox = gr.Textbox("main") parquet_file_dropdown = gr.Dropdown() with gr.Column(): output_dataframe = gr.DataFrame() def _show_input_preview(dataset, revision, parquet_file): yield {revision_textbox: revision} if isinstance(parquet_file, int): parquet_files = fs.glob(f"datasets/{dataset}@{revision}/**/*.parquet") parquet_file = parquet_files[parquet_file] yield {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file)} else: yield {parquet_file_dropdown: gr.Dropdown(value=parquet_file)} yield {output_dataframe: pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pandas()} @dataset_search.change(inputs=[dataset_search], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe]) def show_input_from_dataset_search(dataset): yield from _show_input_preview(dataset, revision="main", parquet_file=0) @revision_textbox.change(inputs=[dataset_search, revision_textbox], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe]) def show_input_from_revision(dataset, revision): yield from _show_input_preview(dataset, revision=revision, parquet_file=0) @revision_textbox.change(inputs=[dataset_search, revision_textbox, parquet_file_dropdown], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe]) def show_input_from_parquet_file(dataset, revision, parquet_file): yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file) demo.launch()