parquet-viewer / app.py
lhoestq's picture
lhoestq HF staff
wip
ec912e5
raw
history blame
2.03 kB
import gradio as gr
import pyarrow.parquet as pq
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from huggingface_hub import HfFileSystem
fs = HfFileSystem()
with gr.Blocks() as demo:
with gr.Column():
dataset_search = HuggingfaceHubSearch(
label="Hub Dataset ID",
placeholder="Search for dataset id on Huggingface",
search_type="dataset",
)
revision_textbox = gr.Textbox("main")
parquet_file_dropdown = gr.Dropdown()
with gr.Column():
output_dataframe = gr.DataFrame()
def _show_input_preview(dataset, revision, parquet_file):
yield {revision_textbox: revision}
if isinstance(parquet_file, int):
parquet_files = fs.glob(f"datasets/{dataset}@{revision}/**/*.parquet")
parquet_file = parquet_files[parquet_file]
yield {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file)}
else:
yield {parquet_file_dropdown: gr.Dropdown(value=parquet_file)}
yield {output_dataframe: pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pandas()}
@dataset_search.change(inputs=[dataset_search], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
def show_input_from_dataset_search(dataset):
yield from _show_input_preview(dataset, revision="main", parquet_file=0)
@revision_textbox.change(inputs=[dataset_search, revision_textbox], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
def show_input_from_revision(dataset, revision):
yield from _show_input_preview(dataset, revision=revision, parquet_file=0)
@revision_textbox.change(inputs=[dataset_search, revision_textbox, parquet_file_dropdown], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
def show_input_from_parquet_file(dataset, revision, parquet_file):
yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file)
demo.launch()