taesiri commited on
Commit
d18a9b2
1 Parent(s): 589beac
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from datasets import load_dataset
4
+
5
+ df_final = pd.read_pickle("./df_final.pkl")
6
+ dataset = load_dataset("XAI/vlmsareblind")
7
+
8
+
9
+ def show_row(row_index, selected_task):
10
+ task_df = df_final[df_final["task"] == selected_task]
11
+ row = task_df.iloc[int(row_index)]
12
+ custom_id = int(row["custom_id"])
13
+ image = dataset["valid"][custom_id]["image"]
14
+ prompt = dataset["valid"][custom_id]["prompt"]
15
+ model_output = row["content_raw"]
16
+ ground_truth = row["gt"]
17
+ task = row["task"]
18
+ is_correct = row["is_correct"]
19
+
20
+ return image, prompt, model_output, ground_truth, task, is_correct
21
+
22
+
23
+ def update_slider(selected_task):
24
+ task_df = df_final[df_final["task"] == selected_task]
25
+ return gr.Slider(
26
+ minimum=0,
27
+ maximum=len(task_df) - 1,
28
+ step=1,
29
+ label=f"Select Row Index (0-{len(task_df) - 1})",
30
+ value=0,
31
+ )
32
+
33
+
34
+ # Create accuracy breakdown dataframe
35
+ accuracy_breakdown = (
36
+ df_final.groupby("task")["is_correct"]
37
+ .mean()
38
+ .sort_values(ascending=False)
39
+ .mul(100)
40
+ .apply(lambda x: f"{x:.2f}")
41
+ .reset_index()
42
+ )
43
+ accuracy_breakdown.columns = ["Task", "Accuracy (%)"]
44
+
45
+ # Create the Gradio interface
46
+ with gr.Blocks() as app:
47
+ gr.Markdown("# VLMs Are Blind Results Review (GPT-4o-mini)")
48
+ gr.HTML(
49
+ """
50
+ <p style="text-align: center;">
51
+ This is a review of results from the GPT-4 model on the VLMs Are Blind dataset.
52
+ <br>
53
+ <a href="https://vlmsareblind.github.io/" target="_blank">Project Website</a> |
54
+ <a href="https://arxiv.org/abs/2407.06581" target="_blank">arXiv Paper</a>
55
+ </p>
56
+ """
57
+ )
58
+
59
+ with gr.Row():
60
+ task_dropdown = gr.Dropdown(
61
+ choices=df_final["task"].unique().tolist(),
62
+ label="Select Task",
63
+ value=df_final["task"].unique()[0],
64
+ )
65
+ row_selector = gr.Slider(
66
+ minimum=0,
67
+ maximum=len(df_final[df_final["task"] == df_final["task"].unique()[0]]) - 1,
68
+ step=1,
69
+ label=f"Select Row Index (0-{len(df_final[df_final['task'] == df_final['task'].unique()[0]]) - 1})",
70
+ value=0,
71
+ )
72
+
73
+ with gr.Row():
74
+ with gr.Column(scale=2):
75
+ image_output = gr.Image(label="Image", type="pil")
76
+
77
+ with gr.Column(scale=3):
78
+ prompt_output = gr.Textbox(label="Prompt", lines=3)
79
+ model_output = gr.Textbox(label="Model Output", lines=2)
80
+ ground_truth = gr.Textbox(label="Ground Truth", lines=2)
81
+ task = gr.Textbox(label="Task")
82
+ is_correct = gr.Checkbox(label="Is Correct")
83
+
84
+ gr.Markdown("## Accuracy Breakdown by Task")
85
+ gr.DataFrame(accuracy_breakdown)
86
+
87
+ task_dropdown.change(update_slider, inputs=task_dropdown, outputs=row_selector)
88
+
89
+ task_dropdown.change(
90
+ show_row,
91
+ inputs=[gr.Slider(value=0, visible=False), task_dropdown],
92
+ outputs=[
93
+ image_output,
94
+ prompt_output,
95
+ model_output,
96
+ ground_truth,
97
+ task,
98
+ is_correct,
99
+ ],
100
+ )
101
+
102
+ row_selector.change(
103
+ show_row,
104
+ inputs=[row_selector, task_dropdown],
105
+ outputs=[
106
+ image_output,
107
+ prompt_output,
108
+ model_output,
109
+ ground_truth,
110
+ task,
111
+ is_correct,
112
+ ],
113
+ )
114
+
115
+ # Launch the app
116
+ app.launch()