import gradio as gr import pandas as pd from datasets import load_dataset from PIL import Image, ImageOps df_final = pd.read_pickle("./df_final.pkl") dataset = load_dataset("XAI/vlmsareblind") def show_row(row_index, selected_task): task_df = df_final[df_final["task"] == selected_task] row = task_df.iloc[int(row_index)] custom_id = int(row["custom_id"]) image = dataset["valid"][custom_id]["image"] # Add white padding to the image original_size = image.size new_size = (original_size[0] * 2, original_size[1] * 2) padding = ( (new_size[0] - original_size[0]) // 2, (new_size[1] - original_size[1]) // 2, ) image_with_padding = ImageOps.expand(image, border=padding, fill="white") prompt = dataset["valid"][custom_id]["prompt"] model_output = row["content_raw"] ground_truth = row["gt"] task = row["task"] is_correct = row["is_correct"] return image_with_padding, prompt, model_output, ground_truth, task, is_correct def update_slider(selected_task): task_df = df_final[df_final["task"] == selected_task] return gr.Slider( minimum=0, maximum=len(task_df) - 1, step=1, label=f"Select Row Index (0-{len(task_df) - 1})", value=0, ) # Create accuracy breakdown dataframe accuracy_breakdown = ( df_final.groupby("task")["is_correct"] .mean() .sort_values(ascending=False) .mul(100) .apply(lambda x: f"{x:.2f}") .reset_index() ) accuracy_breakdown.columns = ["Task", "Accuracy (%)"] # Create the Gradio interface with gr.Blocks() as app: gr.Markdown("# BlindTest Results Review (GPT-4o mini)") gr.HTML( """
This is a review of results from the GPT-4 mini model on the VLMs Are Blind dataset.
Project Website |
arXiv Paper