yonatanbitton's picture
Update app.py
c2f1a80
import json
import gradio as gr
from datasets import load_dataset
import os
auth_token = os.environ.get("auth_token")
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test']
BUCKET_PATH = 'https://wmtis.s3.eu-west-1.amazonaws.com/wmtis_images'
df = whoops.to_pandas()
print(f"Got {len(df)} items in dataframe")
df = df.sample(frac=1)
def get_image_url(img_id):
return f"{BUCKET_PATH}/{img_id}.png"
df['image_url'] = df['image_id'].apply(lambda x: get_image_url(x))
df['image_url'] = df['image_url'].apply(lambda x: '<a href= "' + str(x) + '" target="_blank"> <img src= "' + str(
x) + '"/> </a>')
def dumps(x, c):
if c in ['crowd_captions', 'crowd_underspecified_captions']:
return json.dumps(list(x))
elif c == 'question_answering_pairs':
return json.dumps([list(xi) for xi in x])
return json.dumps(x)
for c in ['designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']:
print(c)
df[c] = df[c].apply(lambda x: dumps(x, c))
df = df[['image_url', 'designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']]
LINES_NUMBER = 20
def display_df():
df_images = df.head(LINES_NUMBER)
return df_images
def display_next(dataframe, end):
start = int(end or len(dataframe))
end = int(start) + int(LINES_NUMBER)
global df
if end >= len(df) - 1:
start = 0
end = LINES_NUMBER
df = df.sample(frac=1)
print(f"Shuffle")
# print(f"end: {end},start: {start}")
df_images = df.iloc[start:end]
assert len(df_images) == LINES_NUMBER
return df_images, end
initial_dataframe = display_df() # add
# Gradio Blocks
with gr.Blocks() as demo:
gr.Markdown("<h1><center>WHOOPS! Dataset Viewer</center></h1>")
with gr.Row():
num_end = gr.Number(visible=False)
b1 = gr.Button("Get Initial dataframe")
b2 = gr.Button("Next Rows")
with gr.Row():
out_dataframe = gr.Dataframe(initial_dataframe, wrap=True, max_rows=LINES_NUMBER, overflow_row_behaviour="paginate",
datatype=["markdown", "markdown", "str", "str", "str", "str", "str", "str","str","str"],
interactive=False) # add initial dataframe before
b1.click(fn=display_df, outputs=out_dataframe, api_name="initial_dataframe")
b2.click(fn=display_next, inputs=[out_dataframe, num_end], outputs=[out_dataframe, num_end],
api_name="next_rows")
demo.launch(debug=True, show_error=True)