File size: 2,026 Bytes
ec912e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
import pyarrow.parquet as pq
from gradio_huggingfacehub_search import HuggingfaceHubSearch
from huggingface_hub import HfFileSystem

fs = HfFileSystem()

with gr.Blocks() as demo:
    with gr.Column():
        dataset_search = HuggingfaceHubSearch(
            label="Hub Dataset ID",
            placeholder="Search for dataset id on Huggingface",
            search_type="dataset",
        )
        revision_textbox = gr.Textbox("main")
        parquet_file_dropdown = gr.Dropdown()
    with gr.Column():
        output_dataframe = gr.DataFrame()

    def _show_input_preview(dataset, revision, parquet_file):
        yield {revision_textbox: revision}
        if isinstance(parquet_file, int):
            parquet_files = fs.glob(f"datasets/{dataset}@{revision}/**/*.parquet")
            parquet_file = parquet_files[parquet_file]
            yield {parquet_file_dropdown: gr.Dropdown(choices=parquet_files, value=parquet_file)}
        else:
            yield {parquet_file_dropdown: gr.Dropdown(value=parquet_file)}
        yield {output_dataframe: pq.ParquetFile(parquet_file, filesystem=fs).read_row_group(0).to_pandas()}

    @dataset_search.change(inputs=[dataset_search], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
    def show_input_from_dataset_search(dataset):
        yield from _show_input_preview(dataset, revision="main", parquet_file=0)

    @revision_textbox.change(inputs=[dataset_search, revision_textbox], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
    def show_input_from_revision(dataset, revision):
        yield from _show_input_preview(dataset, revision=revision, parquet_file=0)

    @revision_textbox.change(inputs=[dataset_search, revision_textbox, parquet_file_dropdown], outputs=[revision_textbox, parquet_file_dropdown, output_dataframe])
    def show_input_from_parquet_file(dataset, revision, parquet_file):
        yield from _show_input_preview(dataset, revision=revision, parquet_file=parquet_file)

demo.launch()