import json
import gradio as gr
from datasets import load_dataset
import os
auth_token = os.environ.get("auth_token")
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test']
BUCKET_PATH = 'https://wmtis.s3.eu-west-1.amazonaws.com/wmtis_images'
df = whoops.to_pandas()
print(f"Got {len(df)} items in dataframe")
df = df.sample(frac=1)
def get_image_url(img_id):
return f"{BUCKET_PATH}/{img_id}.png"
df['image_url'] = df['image_id'].apply(lambda x: get_image_url(x))
df['image_url'] = df['image_url'].apply(lambda x: ' ')
def dumps(x, c):
if c in ['crowd_captions', 'crowd_underspecified_captions']:
return json.dumps(list(x))
elif c == 'question_answering_pairs':
return json.dumps([list(xi) for xi in x])
return json.dumps(x)
for c in ['designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']:
print(c)
df[c] = df[c].apply(lambda x: dumps(x, c))
df = df[['image_url', 'designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']]
LINES_NUMBER = 20
def display_df():
df_images = df.head(LINES_NUMBER)
return df_images
def display_next(dataframe, end):
start = int(end or len(dataframe))
end = int(start) + int(LINES_NUMBER)
global df
if end >= len(df) - 1:
start = 0
end = LINES_NUMBER
df = df.sample(frac=1)
print(f"Shuffle")
# print(f"end: {end},start: {start}")
df_images = df.iloc[start:end]
assert len(df_images) == LINES_NUMBER
return df_images, end
initial_dataframe = display_df() # add
# Gradio Blocks
with gr.Blocks() as demo:
gr.Markdown("