File size: 2,741 Bytes
7930e1d
 
 
0e7ad7d
7930e1d
263f8dd
 
7930e1d
 
 
01c1744
75812ad
7930e1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01c1744
 
 
 
 
 
 
9a6688c
 
01c1744
 
7930e1d
 
c2f1a80
7930e1d
 
 
 
 
 
 
 
 
 
c2f1a80
7930e1d
c2f1a80
7930e1d
 
 
 
 
c2f1a80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import json
import gradio as gr
from datasets import load_dataset
import os 

auth_token = os.environ.get("auth_token")
whoops = load_dataset("nlphuji/whoops", use_auth_token=auth_token)['test']
BUCKET_PATH = 'https://wmtis.s3.eu-west-1.amazonaws.com/wmtis_images'

df = whoops.to_pandas()
print(f"Got {len(df)} items in dataframe")
df = df.sample(frac=1)
def get_image_url(img_id):
    return f"{BUCKET_PATH}/{img_id}.png"

df['image_url'] = df['image_id'].apply(lambda x: get_image_url(x))
df['image_url'] = df['image_url'].apply(lambda x: '<a href= "' + str(x) + '" target="_blank"> <img src= "' + str(
    x) + '"/> </a>')

def dumps(x, c):
    if c in ['crowd_captions', 'crowd_underspecified_captions']:
        return json.dumps(list(x))
    elif c == 'question_answering_pairs':
        return json.dumps([list(xi) for xi in x])
    return json.dumps(x)

for c in ['designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
          'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']:
    print(c)
    df[c] = df[c].apply(lambda x: dumps(x, c))

df = df[['image_url', 'designer_explanation', 'selected_caption', 'crowd_captions', 'crowd_underspecified_captions',
         'question_answering_pairs', 'commonsense_category', 'image_id', 'image_designer']]

LINES_NUMBER = 20

def display_df():
    df_images = df.head(LINES_NUMBER)
    return df_images

def display_next(dataframe, end):
    start = int(end or len(dataframe))
    end = int(start) + int(LINES_NUMBER)
    global df
    if end >= len(df) - 1:
        start = 0
        end = LINES_NUMBER
        df = df.sample(frac=1)
        print(f"Shuffle")
    # print(f"end: {end},start: {start}")
    df_images = df.iloc[start:end]
    assert len(df_images) == LINES_NUMBER
    return df_images, end

initial_dataframe = display_df() # add
# Gradio Blocks
with gr.Blocks() as demo:
    gr.Markdown("<h1><center>WHOOPS! Dataset Viewer</center></h1>")

    with gr.Row():
        num_end = gr.Number(visible=False)
        b1 = gr.Button("Get Initial dataframe")
        b2 = gr.Button("Next Rows")

    with gr.Row():
        out_dataframe = gr.Dataframe(initial_dataframe, wrap=True, max_rows=LINES_NUMBER, overflow_row_behaviour="paginate",
                                     datatype=["markdown", "markdown", "str", "str", "str", "str", "str", "str","str","str"],
                                     interactive=False) # add initial dataframe before

    b1.click(fn=display_df, outputs=out_dataframe, api_name="initial_dataframe")
    b2.click(fn=display_next, inputs=[out_dataframe, num_end], outputs=[out_dataframe, num_end],
             api_name="next_rows")

demo.launch(debug=True, show_error=True)