Spaces:

CultriX
/

Tiny-LeaderBoard

Running

File size: 37,398 Bytes

import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
import io
import os
import base64
import zipfile
from PIL import Image
from io import BytesIO
import tempfile
import sys

# --------------------------------------------------------------------
# PART 1: TINY DATA + PLOTS
# --------------------------------------------------------------------

# This dataframe is your “tiny” version of model performance data.
# Used for plotting & demonstration in the Gradio app.
data_full = [
    ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co./CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
    ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co./djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
    ['CultriX/Qwen2.5-14B-FinalMerge', 'https://huggingface.co./CultriX/Qwen2.5-14B-FinalMerge', 0.7248, 0.8277, 0.7113, 0.7052, 0.57, 0.7001],
    ['CultriX/Qwen2.5-14B-MultiCultyv2', 'https://huggingface.co./CultriX/Qwen2.5-14B-MultiCultyv2', 0.7295, 0.8359, 0.7363, 0.5767, 0.44, 0.7316],
    ['CultriX/Qwen2.5-14B-Brocav7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav7', 0.7445, 0.8353, 0.7508, 0.6292, 0.46, 0.7629],
    ['CultriX/Qwen2.5-14B-Broca', 'https://huggingface.co./CultriX/Qwen2.5-14B-Broca', 0.7456, 0.8352, 0.748, 0.6034, 0.44, 0.7716],
    ['CultriX/Qwen2.5-14B-Brocav3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav3', 0.7395, 0.8388, 0.7393, 0.6405, 0.47, 0.7659],
    ['CultriX/Qwen2.5-14B-Brocav4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav4', 0.7432, 0.8377, 0.7444, 0.6277, 0.48, 0.758],
    ['CultriX/Qwen2.5-14B-Brocav2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav2', 0.7492, 0.8302, 0.7508, 0.6377, 0.51, 0.7478],
    ['CultriX/Qwen2.5-14B-Brocav5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav5', 0.7445, 0.8313, 0.7547, 0.6376, 0.5, 0.7304],
    ['CultriX/Qwen2.5-14B-Brocav6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav6', 0.7179, 0.8354, 0.7531, 0.6378, 0.49, 0.7524],
    ['CultriX/Qwenfinity-2.5-14B', 'https://huggingface.co./CultriX/Qwenfinity-2.5-14B', 0.7347, 0.8254, 0.7279, 0.7267, 0.56, 0.697],
    ['CultriX/Qwen2.5-14B-Emergedv2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Emergedv2', 0.7137, 0.8335, 0.7363, 0.5836, 0.44, 0.7344],
    ['CultriX/Qwen2.5-14B-Unity', 'https://huggingface.co./CultriX/Qwen2.5-14B-Unity', 0.7063, 0.8343, 0.7423, 0.682, 0.57, 0.7498],
    ['CultriX/Qwen2.5-14B-MultiCultyv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-MultiCultyv3', 0.7132, 0.8216, 0.7395, 0.6792, 0.55, 0.712],
    ['CultriX/Qwen2.5-14B-Emergedv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Emergedv3', 0.7436, 0.8312, 0.7519, 0.6585, 0.55, 0.7068],
    ['CultriX/SeQwence-14Bv1', 'https://huggingface.co./CultriX/SeQwence-14Bv1', 0.7278, 0.841, 0.7541, 0.6816, 0.52, 0.7539],
    ['CultriX/Qwen2.5-14B-Wernickev2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev2', 0.7391, 0.8168, 0.7273, 0.622, 0.45, 0.7572],
    ['CultriX/Qwen2.5-14B-Wernickev3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev3', 0.7357, 0.8148, 0.7245, 0.7023, 0.55, 0.7869],
    ['CultriX/Qwen2.5-14B-Wernickev4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev4', 0.7355, 0.829, 0.7497, 0.6306, 0.48, 0.7635],
    ['CultriX/SeQwential-14B-v1', 'https://huggingface.co./CultriX/SeQwential-14B-v1', 0.7355, 0.8205, 0.7549, 0.6367, 0.48, 0.7626],
    ['CultriX/Qwen2.5-14B-Wernickev5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev5', 0.7224, 0.8272, 0.7541, 0.679, 0.51, 0.7578],
    ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
    ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
    ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co./CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
    ['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.43, 0.7646],
    ['CultriX/Qwexit-2.5-14B-2024', 'https://huggingface.co./CultriX/Qwexit-2.5-14B-2024', 0.7253, 0.8174, 0.7456, 0.6688, 0.5300, 0.7027],
    ['CultriX/Qwen2.5-14B-BrocaV9', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaV9', 0.7432, 0.8307, 0.7467, 0.6221, 0.5000, 0.7623],
    ['CultriX/Qwen2.5-14B-partialmergept1', 'https://huggingface.co./CultriX/Qwen2.5-14B-partialmergept1', 0.7389, 0.8370, 0.7451, 0.6715, 0.5700, 0.7308],
    ['CultriX/Qwen2.5-14B-partialmergept2', 'https://huggingface.co./CultriX/Qwen2.5-14B-partialmergept2', 0.7300, 0.8428, 0.7371, 0.5944, 0.4200, 0.7581],
    ['CultriX/model', 'https://huggingface.co./CultriX/model', 0.7010, 0.8320, 0.7194, 0.6158, 0.4700, 0.7385],
    ['CultriX/Qwen2.5-14B-BrocaFinal', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaFinal', 0.6265, 0.7688, 0.7007, 0.7035, 0.5100, 0.7218],
    ['CultriX/Qwen2.5-14B-Hyperionv1', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv1', 0.7300, 0.8477, 0.7448, 0.6063, 0.4400, 0.7651],
    ['CultriX/Qwen2.5-14B-Hyperionv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv3', 0.7445, 0.8414, 0.7458, 0.6371, 0.4900, 0.7543],
    ['sometimesanotion/Lamarck-14B-v0.6', 'https://hf.xwall.us.kg.m/sometimesanotion/Lamarck-14B-v0.6', 0.7446, 0.8294, 0.7368, 0.6008, 0.4300, 0.7423],
    ['CultriX/Qwen2.5-14B-Hyper', 'https://hf.xwall.us.kg.m/CultriX/Qwen2.5-14B-Hyper', 0.7372, 0.8411, 0.7424, 0.5830, 0.4400, 0.7792],
    ['CultriX/Qwen2.5-14B-Hyperionv4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv4', 0.7305, 0.8359, 0.7454, 0.5827, 0.4600, 0.7797],
    ['CultriX/Qwen2.5-14B-Hyperionv5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv5', 0.7458, 0.8290, 0.7508, 0.6228, 0.5200, 0.7540],
    ['CultriX/Qwen2.5-14B-Hyperionv6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv6', 0.7430, 0.8308, 0.7353, 0.6184, 0.4500, 0.7665],
    ['CultriX/Qwen2.5-14B-Hyperionv7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv7', 0.7412, 0.8287, 0.7508, 0.6208, 0.4800, 0.7532],

]
columns = [
    "Model Configuration", "Model Link", "tinyArc", "tinyHellaswag",
    "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"
]
df_full = pd.DataFrame(data_full, columns=columns)

def plot_average_scores():
    df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
    df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)

    plt.figure(figsize=(14, 10))
    plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
    plt.title("Average Performance of Models Across Tasks", fontsize=16)
    plt.xlabel("Average Score", fontsize=14)
    plt.ylabel("Model Configuration", fontsize=14)
    plt.gca().invert_yaxis()
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    img_buffer = io.BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
    plt.close()

    pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
    temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    pil_image.save(temp_image_file.name)
    return pil_image, temp_image_file.name

def plot_task_performance():
    df_full_melted = df_full.melt(
        id_vars=["Model Configuration", "Model Link"], 
        var_name="Task", value_name="Score"
    )

    plt.figure(figsize=(16, 12))
    for model in df_full["Model Configuration"]:
        model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
        plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)

    plt.title("Performance of All Models Across Tasks", fontsize=16)
    plt.xlabel("Task", fontsize=14)
    plt.ylabel("Score", fontsize=14)
    plt.xticks(rotation=45)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    
    img_buffer = io.BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
    plt.close()

    pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
    temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    pil_image.save(temp_image_file.name)
    return pil_image, temp_image_file.name

def plot_task_specific_top_models():
    top_models = df_full.iloc[:, 2:].idxmax()
    top_scores = df_full.iloc[:, 2:].max()
    results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})

    plt.figure(figsize=(14, 8))
    plt.bar(results["Task"], results["Score"])
    plt.title("Task-Specific Top Models", fontsize=16)
    plt.xlabel("Task", fontsize=14)
    plt.ylabel("Score", fontsize=14)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.tight_layout()

    img_buffer = io.BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
    plt.close()
    pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
    temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    pil_image.save(temp_image_file.name)
    return pil_image, temp_image_file.name



def plot_heatmap():
    # Add a column for the total scores across all tasks
    df_full["Total Scores"] = df_full.iloc[:, 2:].sum(axis=1)
    
    # Normalize each column individually for consistent coloring
    normalized_data = df_full.iloc[:, 2:].apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0)
    
    plt.figure(figsize=(14, 10))
    sns.heatmap(
        normalized_data, 
        annot=df_full.iloc[:, 2:],  # Show actual values in annotations
        cmap="YlGnBu", 
        xticklabels=list(columns[2:]) + ["Total Scores"], 
        yticklabels=df_full["Model Configuration"]
    )
    plt.title("Performance Heatmap", fontsize=16)
    plt.tight_layout()
    
    img_buffer = io.BytesIO()
    plt.savefig(img_buffer, format='png')
    img_buffer.seek(0)
    img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
    plt.close()
    pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
    temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
    pil_image.save(temp_image_file.name)
    return pil_image, temp_image_file.name


    


def scrape_mergekit_config(model_name):
    """
    For the *tiny* table’s model links. 
    Scrapes <pre> tags on the huggingface model page to find a YAML config.
    """
    df_row = df_full.loc[df_full["Model Configuration"] == model_name]
    if df_row.empty:
        return f"No data found for model {model_name}."

    model_link = df_row["Model Link"].values[0]
    response = requests.get(model_link)
    if response.status_code != 200:
        return f"Failed to fetch model page for {model_name}. Please check the link."

    soup = BeautifulSoup(response.text, "html.parser")
    yaml_config = soup.find("pre")  # Assume YAML is in <pre> tags
    if yaml_config:
        return yaml_config.text.strip()
    return f"No YAML configuration found for {model_name}."

def download_yaml(yaml_content, model_name):
    """
    Let users download the scraped YAML if it exists. 
    """
    if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
        return None
    filename = f"{model_name.replace('/', '_')}_config.yaml"
    return gr.File(value=yaml_content.encode(), filename=filename)

def scrape_model_page(model_url):
    """
    Used for the "Live Scraping" text box in the Gradio UI.
    """
    try:
        response = requests.get(model_url)
        if response.status_code != 200:
            return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
        
        soup = BeautifulSoup(response.text, "html.parser")
        yaml_config = soup.find("pre")
        yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
        metadata_section = soup.find("div", class_="metadata")
        metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
        return f"**YAML Configuration:**\n{yaml_text}\n\n**Metadata:**\n{metadata_text}"
    except Exception as e:
        return f"Error: {str(e)}"

def display_scraped_model_data(model_url):
    """
    Helper for the "Live Scraping Features" section of the Gradio app.
    """
    return scrape_model_page(model_url)

def download_all_data():
    """
    Builds and returns a zip of:
      - the CSV of your 'tiny' data,
      - four plots (average performance, task performance, top models, heatmap),
      - any YAML configurations for the 'tiny' table's models (if found).
    """
    import io
    csv_buffer = io.StringIO()
    df_full.to_csv(csv_buffer, index=False)
    csv_data = csv_buffer.getvalue().encode('utf-8')
    
    average_plot_pil, average_plot_name = plot_average_scores()
    task_plot_pil, task_plot_name = plot_task_performance()
    top_models_plot_pil, top_models_plot_name = plot_task_specific_top_models()
    heatmap_plot_pil, heatmap_plot_name = plot_heatmap()

    plot_dict = {
        "average_performance": (average_plot_pil, average_plot_name),
        "task_performance": (task_plot_pil, task_plot_name),
        "top_models": (top_models_plot_pil, top_models_plot_name),
        "heatmap": (heatmap_plot_pil, heatmap_plot_name)
    }

    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, 'w') as zf:
        zf.writestr("model_scores.csv", csv_data)

        # Add the images
        for name, (pil_image, filename) in plot_dict.items():
            image_bytes = io.BytesIO()
            pil_image.save(image_bytes, format='PNG')
            image_bytes.seek(0)
            zf.writestr(filename, image_bytes.read())

        # Also try scraping each model in the *tiny* dataset for a YAML config
        for model_name in df_full["Model Configuration"].to_list():
            yaml_content = scrape_mergekit_config(model_name)
            if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
                zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())

    zip_buffer.seek(0)
    return zip_buffer, "analysis_data.zip"

# --------------------------------------------------------------------
# PART 2: THE "DATA START" SNIPPET (RANKS 44–105) + Parser
# --------------------------------------------------------------------
# This is your larger dataset, rank = 44..105
benchmark_data = [
    {
        "rank": 44,
        "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
        "scores": {
            "average": 40.10,
            "IFEval": 72.57,
            "BBH": 48.58,
            "MATH": 34.44,
            "GPQA": 17.34,
            "MUSR": 19.39,
            "MMLU-PRO": 48.26
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
        "known_config": {
            "models": [
                {"model": "CultriX/SeQwence-14Bv1"},
                {"model": "allknowingroger/Qwenslerp5-14B"}
            ],
            "merge_method": "slerp",
            "base_model": "CultriX/SeQwence-14Bv1",
            "dtype": "bfloat16",
            "parameters": {
                "t": [0, 0.5, 1, 0.5, 0]
            }
        }
    },
    {
        "rank": 45,
        "name": "sthenno-com/miscii-14b-1225",
        "scores": {
            "average": 40.08,
            "IFEval": 78.78,
            "BBH": 50.91,
            "MATH": 31.57,
            "GPQA": 17.00,
            "MUSR": 14.77,
            "MMLU-PRO": 47.46
        },
        "hf_url": "https://huggingface.co./sthenno-com/miscii-14b-1225",
        "known_config": None
    },
    {
        "rank": 46,
        "name": "djuna/Q2.5-Veltha-14B-0.5",
        "scores": {
            "average": 39.96,
            "IFEval": 77.96,
            "BBH": 50.32,
            "MATH": 33.84,
            "GPQA": 15.77,
            "MUSR": 14.17,
            "MMLU-PRO": 47.72
        },
        "hf_url": "https://huggingface.co./djuna/Q2.5-Veltha-14B-0.5",
        "known_config": None
    },
    {
        "rank": 48,
        "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
        "scores": {
            "average": 39.81,
            "IFEval": 71.62,
            "BBH": 48.76,
            "MATH": 33.99,
            "GPQA": 17.34,
            "MUSR": 19.23,
            "MMLU-PRO": 47.95
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
        "known_config": None
    },
    {
        "rank": 50,
        "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
        "scores": {
            "average": 39.46,
            "IFEval": 68.72,
            "BBH": 47.71,
            "MATH": 35.05,
            "GPQA": 18.23,
            "MUSR": 19.56,
            "MMLU-PRO": 47.50
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
        "known_config": None
    },
    {
        "rank": 52,
        "name": "arcee-ai/Virtuoso-Small",
        "scores": {
            "average": 39.43,
            "IFEval": 79.35,
            "BBH": 50.40,
            "MATH": 34.29,
            "GPQA": 11.52,
            "MUSR": 14.44,
            "MMLU-PRO": 46.57
        },
        "hf_url": "https://huggingface.co./arcee-ai/Virtuoso-Small",
        "known_config": None
    },
    {
        "rank": 54,
        "name": "sometimesanotion/Qwentinuum-14B-v6",
        "scores": {
            "average": 39.23,
            "IFEval": 63.04,
            "BBH": 50.23,
            "MATH": 33.84,
            "GPQA": 18.23,
            "MUSR": 21.18,
            "MMLU-PRO": 48.89
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v6",
        "known_config": None
    },
    {
        "rank": 55,
        "name": "djuna/Q2.5-Veltha-14B",
        "scores": {
            "average": 39.21,
            "IFEval": 82.92,
            "BBH": 49.75,
            "MATH": 28.02,
            "GPQA": 14.54,
            "MUSR": 12.26,
            "MMLU-PRO": 47.76
        },
        "hf_url": "https://huggingface.co./djuna/Q2.5-Veltha-14B",
        "known_config": None
    },
    {
        "rank": 57,
        "name": "allknowingroger/QwenSlerp6-14B",
        "scores": {
            "average": 39.02,
            "IFEval": 68.67,
            "BBH": 47.59,
            "MATH": 34.14,
            "GPQA": 16.44,
            "MUSR": 18.32,
            "MMLU-PRO": 48.95
        },
        "hf_url": "https://huggingface.co./allknowingroger/QwenSlerp6-14B",
        "known_config": None
    },
    {
        "rank": 58,
        "name": "allknowingroger/QwenSlerp5-14B",
        "scores": {
            "average": 38.94,
            "IFEval": 71.19,
            "BBH": 47.39,
            "MATH": 33.16,
            "GPQA": 15.32,
            "MUSR": 17.81,
            "MMLU-PRO": 48.78
        },
        "hf_url": "https://huggingface.co./allknowingroger/QwenSlerp5-14B",
        "known_config": None
    },
    {
        "rank": 59,
        "name": "sometimesanotion/Qwentinuum-14B-v5",
        "scores": {
            "average": 38.87,
            "IFEval": 62.86,
            "BBH": 50.28,
            "MATH": 31.57,
            "GPQA": 18.34,
            "MUSR": 21.09,
            "MMLU-PRO": 49.09
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v5",
        "known_config": None
    },
    {
        "rank": 60,
        "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
        "scores": {
            "average": 38.82,
            "IFEval": 59.90,
            "BBH": 50.12,
            "MATH": 34.89,
            "GPQA": 18.46,
            "MUSR": 21.02,
            "MMLU-PRO": 48.56
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwenvergence-14B-v6-Prose",
        "known_config": None
    },
    {
        "rank": 61,
        "name": "CultriX/Qwen2.5-14B-Brocav3",
        "scores": {
            "average": 38.76,
            "IFEval": 69.52,
            "BBH": 49.05,
            "MATH": 32.25,
            "GPQA": 14.54,
            "MUSR": 19.25,
            "MMLU-PRO": 47.97
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav3",
        "known_config": None
    },
    {
        "rank": 62,
        "name": "sometimesanotion/Qwentinuum-14B-v7",
        "scores": {
            "average": 38.76,
            "IFEval": 61.09,
            "BBH": 50.35,
            "MATH": 33.38,
            "GPQA": 18.79,
            "MUSR": 19.95,
            "MMLU-PRO": 49.00
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v7",
        "known_config": None
    },
    {
        "rank": 64,
        "name": "sometimesanotion/Qwentinuum-14B-v3",
        "scores": {
            "average": 38.74,
            "IFEval": 61.58,
            "BBH": 50.04,
            "MATH": 32.85,
            "GPQA": 18.34,
            "MUSR": 20.62,
            "MMLU-PRO": 49.03
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v3",
        "known_config": None
    },
    {
        "rank": 65,
        "name": "allura-org/TQ2.5-14B-Aletheia-v1",
        "scores": {
            "average": 38.74,
            "IFEval": 75.30,
            "BBH": 50.88,
            "MATH": 29.53,
            "GPQA": 14.99,
            "MUSR": 14.61,
            "MMLU-PRO": 47.12
        },
        "hf_url": "https://huggingface.co./allura-org/TQ2.5-14B-Aletheia-v1",
        "known_config": None
    },
    {
        "rank": 66,
        "name": "qingy2024/Fusion4-14B-Instruct",
        "scores": {
            "average": 38.73,
            "IFEval": 76.49,
            "BBH": 50.70,
            "MATH": 33.91,
            "GPQA": 10.74,
            "MUSR": 13.97,
            "MMLU-PRO": 46.60
        },
        "hf_url": "https://huggingface.co./qingy2024/Fusion4-14B-Instruct",
        "known_config": None
    },
    {
        "rank": 68,
        "name": "CultriX/Qwen2.5-14B-Brocav7",
        "scores": {
            "average": 38.52,
            "IFEval": 67.24,
            "BBH": 48.91,
            "MATH": 31.87,
            "GPQA": 15.66,
            "MUSR": 20.15,
            "MMLU-PRO": 47.31
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav7",
        "known_config": None
    },
    {
        "rank": 71,
        "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
        "scores": {
            "average": 38.46,
            "IFEval": 56.43,
            "BBH": 50.14,
            "MATH": 35.57,
            "GPQA": 18.46,
            "MUSR": 21.34,
            "MMLU-PRO": 48.80
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v6-Prose",
        "known_config": None
    },
    {
        "rank": 76,
        "name": "CultriX/Qwen2.5-14B-Brocav6",
        "scores": {
            "average": 38.32,
            "IFEval": 69.95,
            "BBH": 47.82,
            "MATH": 29.61,
            "GPQA": 15.66,
            "MUSR": 18.88,
            "MMLU-PRO": 47.99
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav6",
        "known_config": None
    },
    {
        "rank": 80,
        "name": "CultriX/SeQwence-14Bv1",
        "scores": {
            "average": 38.20,
            "IFEval": 66.78,
            "BBH": 47.19,
            "MATH": 33.53,
            "GPQA": 14.88,
            "MUSR": 18.80,
            "MMLU-PRO": 48.00
        },
        "hf_url": "https://huggingface.co./CultriX/SeQwence-14Bv1",
        "known_config": None
    },
    {
        "rank": 85,
        "name": "sometimesanotion/Qwentinuum-14B-v013",
        "scores": {
            "average": 37.96,
            "IFEval": 67.11,
            "BBH": 43.97,
            "MATH": 33.01,
            "GPQA": 14.32,
            "MUSR": 24.99,
            "MMLU-PRO": 44.34
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v013",
        "known_config": None
    },
    {
        "rank": 86,
        "name": "CultriX/Qwen2.5-14B-Wernickev3",
        "scores": {
            "average": 37.94,
            "IFEval": 70.48,
            "BBH": 44.58,
            "MATH": 32.78,
            "GPQA": 14.99,
            "MUSR": 18.69,
            "MMLU-PRO": 46.13
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev3",
        "known_config": None
    },
    {
        "rank": 88,
        "name": "allknowingroger/QwenSlerp4-14B",
        "scores": {
            "average": 37.80,
            "IFEval": 63.28,
            "BBH": 49.38,
            "MATH": 30.97,
            "GPQA": 16.33,
            "MUSR": 17.59,
            "MMLU-PRO": 49.28
        },
        "hf_url": "https://huggingface.co./allknowingroger/QwenSlerp4-14B",
        "known_config": None
    },
    {
        "rank": 89,
        "name": "CultriX/Qwen2.5-14B-Broca",
        "scores": {
            "average": 37.72,
            "IFEval": 56.04,
            "BBH": 50.03,
            "MATH": 34.59,
            "GPQA": 18.23,
            "MUSR": 18.95,
            "MMLU-PRO": 48.49
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Broca",
        "known_config": None
    },
    {
        "rank": 90,
        "name": "CultriX/Qwen2.5-14B-Emerged",
        "scores": {
            "average": 37.66,
            "IFEval": 70.00,
            "BBH": 45.93,
            "MATH": 30.74,
            "GPQA": 14.32,
            "MUSR": 18.47,
            "MMLU-PRO": 46.51
        },
        "hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Emerged",
        "known_config": None
    },
    {
        "rank": 91,
        "name": "sometimesanotion/Qwentinuum-14B-v8",
        "scores": {
            "average": 37.65,
            "IFEval": 54.12,
            "BBH": 50.11,
            "MATH": 34.14,
            "GPQA": 17.79,
            "MUSR": 20.75,
            "MMLU-PRO": 49.02
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v8",
        "known_config": None
    },
    {
        "rank": 92,
        "name": "qingy2024/Fusion-14B-Instruct",
        "scores": {
            "average": 37.64,
            "IFEval": 72.60,
            "BBH": 48.58,
            "MATH": 30.97,
            "GPQA": 13.98,
            "MUSR": 14.81,
            "MMLU-PRO": 44.93
        },
        "hf_url": "https://huggingface.co./qingy2024/Fusion-14B-Instruct",
        "known_config": None
    },
    {
        "rank": 94,
        "name": "CultriX/Qwestion-14B",
        "scores": {
            "average": 37.63,
            "IFEval": 63.18,
            "BBH": 48.76,
            "MATH": 31.72,
            "GPQA": 15.77,
            "MUSR": 17.22,
            "MMLU-PRO": 49.14
        },
        "hf_url": "https://huggingface.co./CultriX/Qwestion-14B",
        "known_config": None
    },
    {
        "rank": 99,
        "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
        "scores": {
            "average": 37.37,
            "IFEval": 49.18,
            "BBH": 49.80,
            "MATH": 35.57,
            "GPQA": 19.35,
            "MUSR": 21.77,
            "MMLU-PRO": 48.55
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwenvergence-14B-v3-Prose",
        "known_config": None
    },
    {
        "rank": 102,
        "name": "CultriX/SeQwence-14B-v5",
        "scores": {
            "average": 37.27,
            "IFEval": 59.20,
            "BBH": 50.00,
            "MATH": 31.04,
            "GPQA": 16.00,
            "MUSR": 18.33,
            "MMLU-PRO": 49.05
        },
        "hf_url": "https://huggingface.co./CultriX/SeQwence-14B-v5",
        "known_config": None
    },
    {
        "rank": 103,
        "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
        "scores": {
            "average": 37.23,
            "IFEval": 49.42,
            "BBH": 49.54,
            "MATH": 35.50,
            "GPQA": 18.46,
            "MUSR": 21.70,
            "MMLU-PRO": 48.74
        },
        "hf_url": "https://huggingface.co./sometimesanotion/Qwen-14B-ProseStock-v4",
        "known_config": None
    },
    {
        "rank": 104,
        "name": "sometimesanotion/IF-reasoning-experiment-40",
        "scores": {
            "average": 37.21,
            "IFEval": 63.30,
            "BBH": 44.31,
            "MATH": 27.72,
            "GPQA": 17.34,
            "MUSR": 25.86,
            "MMLU-PRO": 44.72
        },
        "hf_url": "https://huggingface.co./sometimesanotion/IF-reasoning-experiment-40",
        "known_config": None
    },
    {
        "rank": 105,
        "name": "CultriX/SeQwence-14B-EvolMerge",
        "scores": {
            "average": 37.20,
            "IFEval": 53.82,
            "BBH": 50.78,
            "MATH": 31.80,
            "GPQA": 17.45,
            "MUSR": 20.26,
            "MMLU-PRO": 49.10
        },
        "hf_url": "https://huggingface.co./CultriX/SeQwence-14B-EvolMerge",
        "known_config": None
    }
]

def snippet_scrape_model_page(url):
    """
    Equivalent scraping function for the larger dataset
    to look for <pre> YAML and a .metadata section.
    """
    try:
        response = requests.get(url)
        if response.status_code != 200:
            return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
        
        soup = BeautifulSoup(response.text, "html.parser")

        yaml_config = soup.find("pre")
        yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

        metadata_section = soup.find("div", class_="metadata")
        metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

        return {
            "yaml_configuration": yaml_text,
            "metadata": metadata_text
        }

    except Exception as e:
        return f"Error: {str(e)}"

def snippet_print_benchmark_and_config_info(model_info):
    """
    Prints an overview for each model in the rank=44..105 dataset.
    If known_config is not None, prints it. Otherwise attempts to scrape.
    """
    print(f"---\nModel Rank: {model_info['rank']}")
    print(f"Model Name: {model_info['name']}")
    print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
    print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
    print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}")
    print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}")
    print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}")
    print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
    print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")

    # If there's a known_config, print it in YAML form and stop.
    if model_info["known_config"] is not None:
        print("###")
        print("models:")
        for m in model_info["known_config"]["models"]:
            print(f"  - model: {m['model']}")
        print(f"merge_method: {model_info['known_config']['merge_method']}")
        print(f"base_model: {model_info['known_config']['base_model']}")
        print(f"dtype: {model_info['known_config']['dtype']}")
        print("parameters:")
        t_vals = model_info["known_config"]["parameters"]["t"]
        print(f"  t: {t_vals} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
        print("###")
        return

    # Otherwise, do scraping:
    scraped = snippet_scrape_model_page(model_info["hf_url"])
    if isinstance(scraped, str):
        # Means it's an error string or something
        print("(No MergeKit configuration found or scraping error.)")
        print(scraped)
        return
    else:
        # It's presumably a dict
        if "No YAML configuration found." in scraped["yaml_configuration"]:
            print("(No MergeKit configuration found.)\n")
            print("You can try the following Python script to scrape the model page:\n")
            print("#" * 70)
            print(f'''import requests
from bs4 import BeautifulSoup

def scrape_model_page(model_url):
    try:
        response = requests.get(model_url)
        if response.status_code != 200:
            return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
        
        soup = BeautifulSoup(response.text, "html.parser")

        yaml_config = soup.find("pre")
        yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

        metadata_section = soup.find("div", class_="metadata")
        metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

        return {{
            "yaml_configuration": yaml_text,
            "metadata": metadata_text
        }}

    except Exception as e:
        return f"Error: {{str(e)}}"

if __name__ == "__main__":
    model_url = "{model_info['hf_url']}"
    result = scrape_model_page(model_url)
    print(result)''')
            print("#" * 70)
        else:
            # Found some YAML
            print("###")
            print(scraped["yaml_configuration"])
            print("###")

def run_non_tiny_benchmarks():
    """
    Captures the stdout from printing each model in benchmark_data (ranks 44..105),
    returning the entire output as a single string for Gradio to display.
    """
    old_stdout = sys.stdout
    buffer = io.StringIO()
    sys.stdout = buffer

    for model in benchmark_data:
        snippet_print_benchmark_and_config_info(model)

    sys.stdout = old_stdout
    return buffer.getvalue()

# --------------------------------------------------------------------
# PART 3: The Gradio App
# --------------------------------------------------------------------
with gr.Blocks() as demo:
    gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")

    # The existing UI for the “tiny” data
    with gr.Row():
        btn1 = gr.Button("Show Average Performance")
        img1 = gr.Image(type="pil", label="Average Performance Plot")
        img1_download = gr.File(label="Download Average Performance")
        btn1.click(plot_average_scores, outputs=[img1, img1_download])
        
    with gr.Row():
        btn2 = gr.Button("Show Task Performance")
        img2 = gr.Image(type="pil", label="Task Performance Plot")
        img2_download = gr.File(label="Download Task Performance")
        btn2.click(plot_task_performance, outputs=[img2, img2_download])

    with gr.Row():
        btn3 = gr.Button("Task-Specific Top Models")
        img3 = gr.Image(type="pil", label="Task-Specific Top Models Plot")
        img3_download = gr.File(label="Download Top Models")
        btn3.click(plot_task_specific_top_models, outputs=[img3, img3_download])
    
    with gr.Row():
        btn4 = gr.Button("Plot Performance Heatmap")
        heatmap_img = gr.Image(type="pil", label="Performance Heatmap")
        heatmap_download = gr.File(label="Download Heatmap")
        btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])

    # Scraping & YAML handling for the *tiny* table
    with gr.Row():
        model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
        with gr.Column():
            scrape_btn = gr.Button("Scrape MergeKit Configuration")
            yaml_output = gr.Textbox(lines=10, placeholder="YAML Configuration will appear here.")
            scrape_btn.click(scrape_mergekit_config, inputs=model_selector, outputs=yaml_output)
        with gr.Column():
            save_yaml_btn = gr.Button("Save MergeKit Configuration")
            yaml_download = gr.File(label="Download MergeKit Configuration")
            save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)

    # Download everything (CSV, plots, any found YAML)
    with gr.Row():
        download_all_btn = gr.Button("Download Everything")
        all_downloads = gr.File(label="Download All Data")
        download_all_btn.click(download_all_data, outputs=all_downloads)
        
    # Live Scraping
    gr.Markdown("## Live Scraping Features")
    with gr.Row():
        url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co./<model>")
        live_scrape_btn = gr.Button("Scrape Model Page")
        live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
        live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)

    # Non-Tiny Benchmarks
    gr.Markdown("## Non-Tiny Benchmark Parser (Ranks 44–105)")
    with gr.Row():
        parse_non_tiny_btn = gr.Button("Parse Non-Tiny Benchmarks")
        parse_non_tiny_output = gr.Textbox(label="Non-Tiny Benchmark Output", lines=30)
        parse_non_tiny_btn.click(fn=run_non_tiny_benchmarks, outputs=parse_non_tiny_output)

demo.launch()