File size: 3,784 Bytes
d18a9b2
 
 
6a483a1
 
d18a9b2
 
 
 
 
 
 
 
 
 
6a483a1
 
 
 
 
 
 
 
 
 
d18a9b2
 
 
 
 
 
6a483a1
d18a9b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2eb115d
d18a9b2
 
 
2eb115d
d18a9b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
import pandas as pd
from datasets import load_dataset
from PIL import Image, ImageOps


df_final = pd.read_pickle("./df_final.pkl")
dataset = load_dataset("XAI/vlmsareblind")


def show_row(row_index, selected_task):
    task_df = df_final[df_final["task"] == selected_task]
    row = task_df.iloc[int(row_index)]
    custom_id = int(row["custom_id"])
    image = dataset["valid"][custom_id]["image"]

    # Add white padding to the image
    original_size = image.size
    new_size = (original_size[0] * 2, original_size[1] * 2)
    padding = (
        (new_size[0] - original_size[0]) // 2,
        (new_size[1] - original_size[1]) // 2,
    )
    image_with_padding = ImageOps.expand(image, border=padding, fill="white")

    prompt = dataset["valid"][custom_id]["prompt"]
    model_output = row["content_raw"]
    ground_truth = row["gt"]
    task = row["task"]
    is_correct = row["is_correct"]

    return image_with_padding, prompt, model_output, ground_truth, task, is_correct


def update_slider(selected_task):
    task_df = df_final[df_final["task"] == selected_task]
    return gr.Slider(
        minimum=0,
        maximum=len(task_df) - 1,
        step=1,
        label=f"Select Row Index (0-{len(task_df) - 1})",
        value=0,
    )


# Create accuracy breakdown dataframe
accuracy_breakdown = (
    df_final.groupby("task")["is_correct"]
    .mean()
    .sort_values(ascending=False)
    .mul(100)
    .apply(lambda x: f"{x:.2f}")
    .reset_index()
)
accuracy_breakdown.columns = ["Task", "Accuracy (%)"]

# Create the Gradio interface
with gr.Blocks() as app:
    gr.Markdown("# BlindTest Results Review (GPT-4o mini)")
    gr.HTML(
        """
        <p style="text-align: center;">
            This is a review of results from the GPT-4 mini model on the VLMs Are Blind dataset.
            <br>
            <a href="https://vlmsareblind.github.io/" target="_blank">Project Website</a> | 
            <a href="https://arxiv.org/abs/2407.06581" target="_blank">arXiv Paper</a>
        </p>
    """
    )

    with gr.Row():
        task_dropdown = gr.Dropdown(
            choices=df_final["task"].unique().tolist(),
            label="Select Task",
            value=df_final["task"].unique()[0],
        )
        row_selector = gr.Slider(
            minimum=0,
            maximum=len(df_final[df_final["task"] == df_final["task"].unique()[0]]) - 1,
            step=1,
            label=f"Select Row Index (0-{len(df_final[df_final['task'] == df_final['task'].unique()[0]]) - 1})",
            value=0,
        )

    with gr.Row():
        with gr.Column(scale=2):
            image_output = gr.Image(label="Image", type="pil")

        with gr.Column(scale=3):
            prompt_output = gr.Textbox(label="Prompt", lines=3)
            model_output = gr.Textbox(label="Model Output", lines=2)
            ground_truth = gr.Textbox(label="Ground Truth", lines=2)
            task = gr.Textbox(label="Task")
            is_correct = gr.Checkbox(label="Is Correct")

    gr.Markdown("## Accuracy Breakdown by Task")
    gr.DataFrame(accuracy_breakdown)

    task_dropdown.change(update_slider, inputs=task_dropdown, outputs=row_selector)

    task_dropdown.change(
        show_row,
        inputs=[gr.Slider(value=0, visible=False), task_dropdown],
        outputs=[
            image_output,
            prompt_output,
            model_output,
            ground_truth,
            task,
            is_correct,
        ],
    )

    row_selector.change(
        show_row,
        inputs=[row_selector, task_dropdown],
        outputs=[
            image_output,
            prompt_output,
            model_output,
            ground_truth,
            task,
            is_correct,
        ],
    )

# Launch the app
app.launch()