import json import gradio as gr from datasets import load_dataset import os auth_token = os.environ.get("auth_token") whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test'] BUCKET_PATH = 'https://wmtis.s3.eu-west-1.amazonaws.com/wmtis_images' df = whoops.to_pandas() print(f"Got {len(df)} items in dataframe") df = df.sample(frac=1) def get_image_url(img_id): return f"{BUCKET_PATH}/{img_id}.png" df['image_url'] = df['image_id'].apply(lambda x: get_image_url(x)) df['image_url'] = df['image_url'].apply(lambda x: ' ') def dumps(x, c): if c in ['crowd_captions', 'crowd_underspecified_captions']: return json.dumps(list(x)) elif c == 'question_answering_pairs': return json.dumps([list(xi) for xi in x]) return json.dumps(x) for c in ['designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions', 'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']: print(c) df[c] = df[c].apply(lambda x: dumps(x, c)) df = df[['image_url', 'designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions', 'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']] LINES_NUMBER = 20 def display_df(): df_images = df.head(LINES_NUMBER) return df_images def display_next(dataframe, end): start = int(end or len(dataframe)) end = int(start) + int(LINES_NUMBER) global df if end >= len(df) - 1: start = 0 end = LINES_NUMBER df = df.sample(frac=1) print(f"Shuffle") # print(f"end: {end},start: {start}") df_images = df.iloc[start:end] assert len(df_images) == LINES_NUMBER return df_images, end initial_dataframe = display_df() # add # Gradio Blocks with gr.Blocks() as demo: gr.Markdown("

WHOOPS! Dataset Viewer

") with gr.Row(): num_end = gr.Number(visible=False) b1 = gr.Button("Get Initial dataframe") b2 = gr.Button("Next Rows") with gr.Row(): out_dataframe = gr.Dataframe(initial_dataframe, wrap=True, max_rows=LINES_NUMBER, overflow_row_behaviour="paginate", datatype=["markdown", "markdown", "str", "str", "str", "str", "str", "str","str","str"], interactive=False) # add initial dataframe before b1.click(fn=display_df, outputs=out_dataframe, api_name="initial_dataframe") b2.click(fn=display_next, inputs=[out_dataframe, num_end], outputs=[out_dataframe, num_end], api_name="next_rows") demo.launch(debug=True, show_error=True)