Spaces:

ahmedheakl
/

CAMEL-Bench-leaderboard

Running

File size: 1,738 Bytes

8460af1
b0ee7b4
 
 
aa9bb5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0ee7b4
 
 
aae65ae
 
f2e3361
b0ee7b4
 
 
20f0a61
 
 
 
 
 
b0ee7b4
f2e3361

import pandas as pd
import gradio as gr

data = {
    "Method": [
        "GPT-4o", "GPT-4o-mini", "Gemini-1.5-Pro", "Gemini-1.5-Flash", "Qwen2-VL-2B", "Pangea-7B", "InternVL2-8B", "LLaVa-NeXt-7B"
    ],
    "MM Understanding & Reasoning": [
        57.90, 48.82, 46.67, 45.58, 40.59, 40.09, 30.41, 26.33
    ],
    "OCR & Document Understanding": [
        59.11, 42.89, 36.59, 33.59, 25.68, 26.47, 15.91, 19.12
    ],
    "Charts & Diagram Understanding": [
        73.57, 64.98, 47.06, 48.25, 27.83, 38.87, 30.27, 27.56
    ],
    "Video Understanding": [
        74.27, 68.11, 42.94, 53.31, 38.90, 49.01, 51.42, 44.90
    ],
    "Cultural Specific Understanding": [
        80.86, 65.92, 56.24, 46.54, 34.27, 20.34, 20.88, 28.30
    ],
    "Medical Imaging": [
        49.90, 47.37, 33.77, 42.86, 29.12, 31.99, 29.48, 22.54
    ],
    "Agro Specific": [
        80.75, 79.58, 72.12, 76.06, 52.02, 74.51, 44.47, 42.00
    ],
    "Remote Sensing Understanding": [
        22.85, 16.93, 17.07, 14.95, 12.56, 6.67, 5.36, 8.33
    ]
}

df = pd.DataFrame(data)
df['Average Score'] = df.iloc[:, 1:].mean(axis=1)

def display_data():
    return df

with gr.Blocks() as demo:
    gr.Markdown("![camel icon](https://cdn-uploads.huggingface.co/production/uploads/656864e12d73834278a8dea7/n-XfVKd1xVywH_vgPyJyQ.png)", elem_id="camel-icon")  # Replace with actual camel icon URL
    gr.Markdown("# **CAMEL-Bench: Model Performance Across Vision Understanding Tasks**")
    gr.Markdown("""
    This table shows the performance of different models across various tasks including OCR, chart understanding, video, medical imaging, and more. 
    """)
    gr.Dataframe(value=df, label="CAMEL-Bench Model Performance", interactive=False)

demo.launch()