Spaces:
Runtime error
Runtime error
import json | |
import gradio as gr | |
from datasets import load_dataset | |
import os | |
auth_token = os.environ.get("auth_token") | |
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test'] | |
BUCKET_PATH = 'https://wmtis.s3.eu-west-1.amazonaws.com/wmtis_images' | |
df = whoops.to_pandas() | |
print(f"Got {len(df)} items in dataframe") | |
df = df.sample(frac=1) | |
def get_image_url(img_id): | |
return f"{BUCKET_PATH}/{img_id}.png" | |
df['image_url'] = df['image_id'].apply(lambda x: get_image_url(x)) | |
df['image_url'] = df['image_url'].apply(lambda x: '<a href= "' + str(x) + '" target="_blank"> <img src= "' + str( | |
x) + '"/> </a>') | |
def dumps(x, c): | |
if c in ['crowd_captions', 'crowd_underspecified_captions']: | |
return json.dumps(list(x)) | |
elif c == 'question_answering_pairs': | |
return json.dumps([list(xi) for xi in x]) | |
return json.dumps(x) | |
for c in ['designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions', | |
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']: | |
print(c) | |
df[c] = df[c].apply(lambda x: dumps(x, c)) | |
df = df[['image_url', 'designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions', | |
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']] | |
LINES_NUMBER = 20 | |
def display_df(): | |
df_images = df.head(LINES_NUMBER) | |
return df_images | |
def display_next(dataframe, end): | |
start = int(end or len(dataframe)) | |
end = int(start) + int(LINES_NUMBER) | |
global df | |
if end >= len(df) - 1: | |
start = 0 | |
end = LINES_NUMBER | |
df = df.sample(frac=1) | |
print(f"Shuffle") | |
# print(f"end: {end},start: {start}") | |
df_images = df.iloc[start:end] | |
assert len(df_images) == LINES_NUMBER | |
return df_images, end | |
initial_dataframe = display_df() # add | |
# Gradio Blocks | |
with gr.Blocks() as demo: | |
gr.Markdown("<h1><center>WHOOPS! Dataset Viewer</center></h1>") | |
with gr.Row(): | |
num_end = gr.Number(visible=False) | |
b1 = gr.Button("Get Initial dataframe") | |
b2 = gr.Button("Next Rows") | |
with gr.Row(): | |
out_dataframe = gr.Dataframe(initial_dataframe, wrap=True, max_rows=LINES_NUMBER, overflow_row_behaviour="paginate", | |
datatype=["markdown", "markdown", "str", "str", "str", "str", "str", "str","str","str"], | |
interactive=False) # add initial dataframe before | |
b1.click(fn=display_df, outputs=out_dataframe, api_name="initial_dataframe") | |
b2.click(fn=display_next, inputs=[out_dataframe, num_end], outputs=[out_dataframe, num_end], | |
api_name="next_rows") | |
demo.launch(debug=True, show_error=True) |