Spaces:

CultriX
/

Tiny-LeaderBoard

Running

App Files Files Community

Tiny-LeaderBoard / app.py

CultriX

Update app.py

d8ef618 verified 15 days ago

raw

history blame contribute delete

37.4 kB

	import requests
	from bs4 import BeautifulSoup
	import pandas as pd
	import matplotlib.pyplot as plt
	import seaborn as sns
	import gradio as gr
	import io
	import os
	import base64
	import zipfile
	from PIL import Image
	from io import BytesIO
	import tempfile
	import sys

	# --------------------------------------------------------------------
	# PART 1: TINY DATA + PLOTS
	# --------------------------------------------------------------------

	# This dataframe is your “tiny” version of model performance data.
	# Used for plotting & demonstration in the Gradio app.
	data_full = [
	['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co./CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
	['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co./djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
	['CultriX/Qwen2.5-14B-FinalMerge', 'https://huggingface.co./CultriX/Qwen2.5-14B-FinalMerge', 0.7248, 0.8277, 0.7113, 0.7052, 0.57, 0.7001],
	['CultriX/Qwen2.5-14B-MultiCultyv2', 'https://huggingface.co./CultriX/Qwen2.5-14B-MultiCultyv2', 0.7295, 0.8359, 0.7363, 0.5767, 0.44, 0.7316],
	['CultriX/Qwen2.5-14B-Brocav7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav7', 0.7445, 0.8353, 0.7508, 0.6292, 0.46, 0.7629],
	['CultriX/Qwen2.5-14B-Broca', 'https://huggingface.co./CultriX/Qwen2.5-14B-Broca', 0.7456, 0.8352, 0.748, 0.6034, 0.44, 0.7716],
	['CultriX/Qwen2.5-14B-Brocav3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav3', 0.7395, 0.8388, 0.7393, 0.6405, 0.47, 0.7659],
	['CultriX/Qwen2.5-14B-Brocav4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav4', 0.7432, 0.8377, 0.7444, 0.6277, 0.48, 0.758],
	['CultriX/Qwen2.5-14B-Brocav2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav2', 0.7492, 0.8302, 0.7508, 0.6377, 0.51, 0.7478],
	['CultriX/Qwen2.5-14B-Brocav5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav5', 0.7445, 0.8313, 0.7547, 0.6376, 0.5, 0.7304],
	['CultriX/Qwen2.5-14B-Brocav6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Brocav6', 0.7179, 0.8354, 0.7531, 0.6378, 0.49, 0.7524],
	['CultriX/Qwenfinity-2.5-14B', 'https://huggingface.co./CultriX/Qwenfinity-2.5-14B', 0.7347, 0.8254, 0.7279, 0.7267, 0.56, 0.697],
	['CultriX/Qwen2.5-14B-Emergedv2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Emergedv2', 0.7137, 0.8335, 0.7363, 0.5836, 0.44, 0.7344],
	['CultriX/Qwen2.5-14B-Unity', 'https://huggingface.co./CultriX/Qwen2.5-14B-Unity', 0.7063, 0.8343, 0.7423, 0.682, 0.57, 0.7498],
	['CultriX/Qwen2.5-14B-MultiCultyv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-MultiCultyv3', 0.7132, 0.8216, 0.7395, 0.6792, 0.55, 0.712],
	['CultriX/Qwen2.5-14B-Emergedv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Emergedv3', 0.7436, 0.8312, 0.7519, 0.6585, 0.55, 0.7068],
	['CultriX/SeQwence-14Bv1', 'https://huggingface.co./CultriX/SeQwence-14Bv1', 0.7278, 0.841, 0.7541, 0.6816, 0.52, 0.7539],
	['CultriX/Qwen2.5-14B-Wernickev2', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev2', 0.7391, 0.8168, 0.7273, 0.622, 0.45, 0.7572],
	['CultriX/Qwen2.5-14B-Wernickev3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev3', 0.7357, 0.8148, 0.7245, 0.7023, 0.55, 0.7869],
	['CultriX/Qwen2.5-14B-Wernickev4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev4', 0.7355, 0.829, 0.7497, 0.6306, 0.48, 0.7635],
	['CultriX/SeQwential-14B-v1', 'https://huggingface.co./CultriX/SeQwential-14B-v1', 0.7355, 0.8205, 0.7549, 0.6367, 0.48, 0.7626],
	['CultriX/Qwen2.5-14B-Wernickev5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev5', 0.7224, 0.8272, 0.7541, 0.679, 0.51, 0.7578],
	['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
	['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
	['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co./CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
	['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.43, 0.7646],
	['CultriX/Qwexit-2.5-14B-2024', 'https://huggingface.co./CultriX/Qwexit-2.5-14B-2024', 0.7253, 0.8174, 0.7456, 0.6688, 0.5300, 0.7027],
	['CultriX/Qwen2.5-14B-BrocaV9', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaV9', 0.7432, 0.8307, 0.7467, 0.6221, 0.5000, 0.7623],
	['CultriX/Qwen2.5-14B-partialmergept1', 'https://huggingface.co./CultriX/Qwen2.5-14B-partialmergept1', 0.7389, 0.8370, 0.7451, 0.6715, 0.5700, 0.7308],
	['CultriX/Qwen2.5-14B-partialmergept2', 'https://huggingface.co./CultriX/Qwen2.5-14B-partialmergept2', 0.7300, 0.8428, 0.7371, 0.5944, 0.4200, 0.7581],
	['CultriX/model', 'https://huggingface.co./CultriX/model', 0.7010, 0.8320, 0.7194, 0.6158, 0.4700, 0.7385],
	['CultriX/Qwen2.5-14B-BrocaFinal', 'https://huggingface.co./CultriX/Qwen2.5-14B-BrocaFinal', 0.6265, 0.7688, 0.7007, 0.7035, 0.5100, 0.7218],
	['CultriX/Qwen2.5-14B-Hyperionv1', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv1', 0.7300, 0.8477, 0.7448, 0.6063, 0.4400, 0.7651],
	['CultriX/Qwen2.5-14B-Hyperionv3', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv3', 0.7445, 0.8414, 0.7458, 0.6371, 0.4900, 0.7543],
	['sometimesanotion/Lamarck-14B-v0.6', 'https://hf.xwall.us.kg.m/sometimesanotion/Lamarck-14B-v0.6', 0.7446, 0.8294, 0.7368, 0.6008, 0.4300, 0.7423],
	['CultriX/Qwen2.5-14B-Hyper', 'https://hf.xwall.us.kg.m/CultriX/Qwen2.5-14B-Hyper', 0.7372, 0.8411, 0.7424, 0.5830, 0.4400, 0.7792],
	['CultriX/Qwen2.5-14B-Hyperionv4', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv4', 0.7305, 0.8359, 0.7454, 0.5827, 0.4600, 0.7797],
	['CultriX/Qwen2.5-14B-Hyperionv5', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv5', 0.7458, 0.8290, 0.7508, 0.6228, 0.5200, 0.7540],
	['CultriX/Qwen2.5-14B-Hyperionv6', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv6', 0.7430, 0.8308, 0.7353, 0.6184, 0.4500, 0.7665],
	['CultriX/Qwen2.5-14B-Hyperionv7', 'https://huggingface.co./CultriX/Qwen2.5-14B-Hyperionv7', 0.7412, 0.8287, 0.7508, 0.6208, 0.4800, 0.7532],

	]
	columns = [
	"Model Configuration", "Model Link", "tinyArc", "tinyHellaswag",
	"tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"
	]
	df_full = pd.DataFrame(data_full, columns=columns)

	def plot_average_scores():
	df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
	df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)

	plt.figure(figsize=(14, 10))
	plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
	plt.title("Average Performance of Models Across Tasks", fontsize=16)
	plt.xlabel("Average Score", fontsize=14)
	plt.ylabel("Model Configuration", fontsize=14)
	plt.gca().invert_yaxis()
	plt.grid(axis='x', linestyle='--', alpha=0.7)
	plt.tight_layout()

	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png')
	img_buffer.seek(0)
	img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
	plt.close()

	pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
	temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	pil_image.save(temp_image_file.name)
	return pil_image, temp_image_file.name

	def plot_task_performance():
	df_full_melted = df_full.melt(
	id_vars=["Model Configuration", "Model Link"],
	var_name="Task", value_name="Score"
	)

	plt.figure(figsize=(16, 12))
	for model in df_full["Model Configuration"]:
	model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
	plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)

	plt.title("Performance of All Models Across Tasks", fontsize=16)
	plt.xlabel("Task", fontsize=14)
	plt.ylabel("Score", fontsize=14)
	plt.xticks(rotation=45)
	plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
	plt.grid(axis='y', linestyle='--', alpha=0.7)
	plt.tight_layout()

	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png')
	img_buffer.seek(0)
	img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
	plt.close()

	pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
	temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	pil_image.save(temp_image_file.name)
	return pil_image, temp_image_file.name

	def plot_task_specific_top_models():
	top_models = df_full.iloc[:, 2:].idxmax()
	top_scores = df_full.iloc[:, 2:].max()
	results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})

	plt.figure(figsize=(14, 8))
	plt.bar(results["Task"], results["Score"])
	plt.title("Task-Specific Top Models", fontsize=16)
	plt.xlabel("Task", fontsize=14)
	plt.ylabel("Score", fontsize=14)
	plt.grid(axis="y", linestyle="--", alpha=0.7)
	plt.tight_layout()

	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png')
	img_buffer.seek(0)
	img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
	plt.close()
	pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
	temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	pil_image.save(temp_image_file.name)
	return pil_image, temp_image_file.name



	def plot_heatmap():
	# Add a column for the total scores across all tasks
	df_full["Total Scores"] = df_full.iloc[:, 2:].sum(axis=1)

	# Normalize each column individually for consistent coloring
	normalized_data = df_full.iloc[:, 2:].apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=0)

	plt.figure(figsize=(14, 10))
	sns.heatmap(
	normalized_data,
	annot=df_full.iloc[:, 2:], # Show actual values in annotations
	cmap="YlGnBu",
	xticklabels=list(columns[2:]) + ["Total Scores"],
	yticklabels=df_full["Model Configuration"]
	)
	plt.title("Performance Heatmap", fontsize=16)
	plt.tight_layout()

	img_buffer = io.BytesIO()
	plt.savefig(img_buffer, format='png')
	img_buffer.seek(0)
	img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
	plt.close()
	pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
	temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
	pil_image.save(temp_image_file.name)
	return pil_image, temp_image_file.name





	def scrape_mergekit_config(model_name):
	"""
	For the tiny table’s model links.
	Scrapes <pre> tags on the huggingface model page to find a YAML config.
	"""
	df_row = df_full.loc[df_full["Model Configuration"] == model_name]
	if df_row.empty:
	return f"No data found for model {model_name}."

	model_link = df_row["Model Link"].values[0]
	response = requests.get(model_link)
	if response.status_code != 200:
	return f"Failed to fetch model page for {model_name}. Please check the link."

	soup = BeautifulSoup(response.text, "html.parser")
	yaml_config = soup.find("pre") # Assume YAML is in <pre> tags
	if yaml_config:
	return yaml_config.text.strip()
	return f"No YAML configuration found for {model_name}."

	def download_yaml(yaml_content, model_name):
	"""
	Let users download the scraped YAML if it exists.
	"""
	if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
	return None
	filename = f"{model_name.replace('/', '_')}_config.yaml"
	return gr.File(value=yaml_content.encode(), filename=filename)

	def scrape_model_page(model_url):
	"""
	Used for the "Live Scraping" text box in the Gradio UI.
	"""
	try:
	response = requests.get(model_url)
	if response.status_code != 200:
	return f"Error: Unable to fetch the page (Status Code: {response.status_code})"

	soup = BeautifulSoup(response.text, "html.parser")
	yaml_config = soup.find("pre")
	yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
	metadata_section = soup.find("div", class_="metadata")
	metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
	return f"YAML Configuration:\n{yaml_text}\n\nMetadata:\n{metadata_text}"
	except Exception as e:
	return f"Error: {str(e)}"

	def display_scraped_model_data(model_url):
	"""
	Helper for the "Live Scraping Features" section of the Gradio app.
	"""
	return scrape_model_page(model_url)

	def download_all_data():
	"""
	Builds and returns a zip of:
	- the CSV of your 'tiny' data,
	- four plots (average performance, task performance, top models, heatmap),
	- any YAML configurations for the 'tiny' table's models (if found).
	"""
	import io
	csv_buffer = io.StringIO()
	df_full.to_csv(csv_buffer, index=False)
	csv_data = csv_buffer.getvalue().encode('utf-8')

	average_plot_pil, average_plot_name = plot_average_scores()
	task_plot_pil, task_plot_name = plot_task_performance()
	top_models_plot_pil, top_models_plot_name = plot_task_specific_top_models()
	heatmap_plot_pil, heatmap_plot_name = plot_heatmap()

	plot_dict = {
	"average_performance": (average_plot_pil, average_plot_name),
	"task_performance": (task_plot_pil, task_plot_name),
	"top_models": (top_models_plot_pil, top_models_plot_name),
	"heatmap": (heatmap_plot_pil, heatmap_plot_name)
	}

	zip_buffer = io.BytesIO()
	with zipfile.ZipFile(zip_buffer, 'w') as zf:
	zf.writestr("model_scores.csv", csv_data)

	# Add the images
	for name, (pil_image, filename) in plot_dict.items():
	image_bytes = io.BytesIO()
	pil_image.save(image_bytes, format='PNG')
	image_bytes.seek(0)
	zf.writestr(filename, image_bytes.read())

	# Also try scraping each model in the tiny dataset for a YAML config
	for model_name in df_full["Model Configuration"].to_list():
	yaml_content = scrape_mergekit_config(model_name)
	if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
	zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())

	zip_buffer.seek(0)
	return zip_buffer, "analysis_data.zip"

	# --------------------------------------------------------------------
	# PART 2: THE "DATA START" SNIPPET (RANKS 44–105) + Parser
	# --------------------------------------------------------------------
	# This is your larger dataset, rank = 44..105
	benchmark_data = [
	{
	"rank": 44,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
	"scores": {
	"average": 40.10,
	"IFEval": 72.57,
	"BBH": 48.58,
	"MATH": 34.44,
	"GPQA": 17.34,
	"MUSR": 19.39,
	"MMLU-PRO": 48.26
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
	"known_config": {
	"models": [
	{"model": "CultriX/SeQwence-14Bv1"},
	{"model": "allknowingroger/Qwenslerp5-14B"}
	],
	"merge_method": "slerp",
	"base_model": "CultriX/SeQwence-14Bv1",
	"dtype": "bfloat16",
	"parameters": {
	"t": [0, 0.5, 1, 0.5, 0]
	}
	}
	},
	{
	"rank": 45,
	"name": "sthenno-com/miscii-14b-1225",
	"scores": {
	"average": 40.08,
	"IFEval": 78.78,
	"BBH": 50.91,
	"MATH": 31.57,
	"GPQA": 17.00,
	"MUSR": 14.77,
	"MMLU-PRO": 47.46
	},
	"hf_url": "https://huggingface.co./sthenno-com/miscii-14b-1225",
	"known_config": None
	},
	{
	"rank": 46,
	"name": "djuna/Q2.5-Veltha-14B-0.5",
	"scores": {
	"average": 39.96,
	"IFEval": 77.96,
	"BBH": 50.32,
	"MATH": 33.84,
	"GPQA": 15.77,
	"MUSR": 14.17,
	"MMLU-PRO": 47.72
	},
	"hf_url": "https://huggingface.co./djuna/Q2.5-Veltha-14B-0.5",
	"known_config": None
	},
	{
	"rank": 48,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
	"scores": {
	"average": 39.81,
	"IFEval": 71.62,
	"BBH": 48.76,
	"MATH": 33.99,
	"GPQA": 17.34,
	"MUSR": 19.23,
	"MMLU-PRO": 47.95
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
	"known_config": None
	},
	{
	"rank": 50,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
	"scores": {
	"average": 39.46,
	"IFEval": 68.72,
	"BBH": 47.71,
	"MATH": 35.05,
	"GPQA": 18.23,
	"MUSR": 19.56,
	"MMLU-PRO": 47.50
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
	"known_config": None
	},
	{
	"rank": 52,
	"name": "arcee-ai/Virtuoso-Small",
	"scores": {
	"average": 39.43,
	"IFEval": 79.35,
	"BBH": 50.40,
	"MATH": 34.29,
	"GPQA": 11.52,
	"MUSR": 14.44,
	"MMLU-PRO": 46.57
	},
	"hf_url": "https://huggingface.co./arcee-ai/Virtuoso-Small",
	"known_config": None
	},
	{
	"rank": 54,
	"name": "sometimesanotion/Qwentinuum-14B-v6",
	"scores": {
	"average": 39.23,
	"IFEval": 63.04,
	"BBH": 50.23,
	"MATH": 33.84,
	"GPQA": 18.23,
	"MUSR": 21.18,
	"MMLU-PRO": 48.89
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v6",
	"known_config": None
	},
	{
	"rank": 55,
	"name": "djuna/Q2.5-Veltha-14B",
	"scores": {
	"average": 39.21,
	"IFEval": 82.92,
	"BBH": 49.75,
	"MATH": 28.02,
	"GPQA": 14.54,
	"MUSR": 12.26,
	"MMLU-PRO": 47.76
	},
	"hf_url": "https://huggingface.co./djuna/Q2.5-Veltha-14B",
	"known_config": None
	},
	{
	"rank": 57,
	"name": "allknowingroger/QwenSlerp6-14B",
	"scores": {
	"average": 39.02,
	"IFEval": 68.67,
	"BBH": 47.59,
	"MATH": 34.14,
	"GPQA": 16.44,
	"MUSR": 18.32,
	"MMLU-PRO": 48.95
	},
	"hf_url": "https://huggingface.co./allknowingroger/QwenSlerp6-14B",
	"known_config": None
	},
	{
	"rank": 58,
	"name": "allknowingroger/QwenSlerp5-14B",
	"scores": {
	"average": 38.94,
	"IFEval": 71.19,
	"BBH": 47.39,
	"MATH": 33.16,
	"GPQA": 15.32,
	"MUSR": 17.81,
	"MMLU-PRO": 48.78
	},
	"hf_url": "https://huggingface.co./allknowingroger/QwenSlerp5-14B",
	"known_config": None
	},
	{
	"rank": 59,
	"name": "sometimesanotion/Qwentinuum-14B-v5",
	"scores": {
	"average": 38.87,
	"IFEval": 62.86,
	"BBH": 50.28,
	"MATH": 31.57,
	"GPQA": 18.34,
	"MUSR": 21.09,
	"MMLU-PRO": 49.09
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v5",
	"known_config": None
	},
	{
	"rank": 60,
	"name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
	"scores": {
	"average": 38.82,
	"IFEval": 59.90,
	"BBH": 50.12,
	"MATH": 34.89,
	"GPQA": 18.46,
	"MUSR": 21.02,
	"MMLU-PRO": 48.56
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwenvergence-14B-v6-Prose",
	"known_config": None
	},
	{
	"rank": 61,
	"name": "CultriX/Qwen2.5-14B-Brocav3",
	"scores": {
	"average": 38.76,
	"IFEval": 69.52,
	"BBH": 49.05,
	"MATH": 32.25,
	"GPQA": 14.54,
	"MUSR": 19.25,
	"MMLU-PRO": 47.97
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav3",
	"known_config": None
	},
	{
	"rank": 62,
	"name": "sometimesanotion/Qwentinuum-14B-v7",
	"scores": {
	"average": 38.76,
	"IFEval": 61.09,
	"BBH": 50.35,
	"MATH": 33.38,
	"GPQA": 18.79,
	"MUSR": 19.95,
	"MMLU-PRO": 49.00
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v7",
	"known_config": None
	},
	{
	"rank": 64,
	"name": "sometimesanotion/Qwentinuum-14B-v3",
	"scores": {
	"average": 38.74,
	"IFEval": 61.58,
	"BBH": 50.04,
	"MATH": 32.85,
	"GPQA": 18.34,
	"MUSR": 20.62,
	"MMLU-PRO": 49.03
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v3",
	"known_config": None
	},
	{
	"rank": 65,
	"name": "allura-org/TQ2.5-14B-Aletheia-v1",
	"scores": {
	"average": 38.74,
	"IFEval": 75.30,
	"BBH": 50.88,
	"MATH": 29.53,
	"GPQA": 14.99,
	"MUSR": 14.61,
	"MMLU-PRO": 47.12
	},
	"hf_url": "https://huggingface.co./allura-org/TQ2.5-14B-Aletheia-v1",
	"known_config": None
	},
	{
	"rank": 66,
	"name": "qingy2024/Fusion4-14B-Instruct",
	"scores": {
	"average": 38.73,
	"IFEval": 76.49,
	"BBH": 50.70,
	"MATH": 33.91,
	"GPQA": 10.74,
	"MUSR": 13.97,
	"MMLU-PRO": 46.60
	},
	"hf_url": "https://huggingface.co./qingy2024/Fusion4-14B-Instruct",
	"known_config": None
	},
	{
	"rank": 68,
	"name": "CultriX/Qwen2.5-14B-Brocav7",
	"scores": {
	"average": 38.52,
	"IFEval": 67.24,
	"BBH": 48.91,
	"MATH": 31.87,
	"GPQA": 15.66,
	"MUSR": 20.15,
	"MMLU-PRO": 47.31
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav7",
	"known_config": None
	},
	{
	"rank": 71,
	"name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
	"scores": {
	"average": 38.46,
	"IFEval": 56.43,
	"BBH": 50.14,
	"MATH": 35.57,
	"GPQA": 18.46,
	"MUSR": 21.34,
	"MMLU-PRO": 48.80
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v6-Prose",
	"known_config": None
	},
	{
	"rank": 76,
	"name": "CultriX/Qwen2.5-14B-Brocav6",
	"scores": {
	"average": 38.32,
	"IFEval": 69.95,
	"BBH": 47.82,
	"MATH": 29.61,
	"GPQA": 15.66,
	"MUSR": 18.88,
	"MMLU-PRO": 47.99
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Brocav6",
	"known_config": None
	},
	{
	"rank": 80,
	"name": "CultriX/SeQwence-14Bv1",
	"scores": {
	"average": 38.20,
	"IFEval": 66.78,
	"BBH": 47.19,
	"MATH": 33.53,
	"GPQA": 14.88,
	"MUSR": 18.80,
	"MMLU-PRO": 48.00
	},
	"hf_url": "https://huggingface.co./CultriX/SeQwence-14Bv1",
	"known_config": None
	},
	{
	"rank": 85,
	"name": "sometimesanotion/Qwentinuum-14B-v013",
	"scores": {
	"average": 37.96,
	"IFEval": 67.11,
	"BBH": 43.97,
	"MATH": 33.01,
	"GPQA": 14.32,
	"MUSR": 24.99,
	"MMLU-PRO": 44.34
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v013",
	"known_config": None
	},
	{
	"rank": 86,
	"name": "CultriX/Qwen2.5-14B-Wernickev3",
	"scores": {
	"average": 37.94,
	"IFEval": 70.48,
	"BBH": 44.58,
	"MATH": 32.78,
	"GPQA": 14.99,
	"MUSR": 18.69,
	"MMLU-PRO": 46.13
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Wernickev3",
	"known_config": None
	},
	{
	"rank": 88,
	"name": "allknowingroger/QwenSlerp4-14B",
	"scores": {
	"average": 37.80,
	"IFEval": 63.28,
	"BBH": 49.38,
	"MATH": 30.97,
	"GPQA": 16.33,
	"MUSR": 17.59,
	"MMLU-PRO": 49.28
	},
	"hf_url": "https://huggingface.co./allknowingroger/QwenSlerp4-14B",
	"known_config": None
	},
	{
	"rank": 89,
	"name": "CultriX/Qwen2.5-14B-Broca",
	"scores": {
	"average": 37.72,
	"IFEval": 56.04,
	"BBH": 50.03,
	"MATH": 34.59,
	"GPQA": 18.23,
	"MUSR": 18.95,
	"MMLU-PRO": 48.49
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Broca",
	"known_config": None
	},
	{
	"rank": 90,
	"name": "CultriX/Qwen2.5-14B-Emerged",
	"scores": {
	"average": 37.66,
	"IFEval": 70.00,
	"BBH": 45.93,
	"MATH": 30.74,
	"GPQA": 14.32,
	"MUSR": 18.47,
	"MMLU-PRO": 46.51
	},
	"hf_url": "https://huggingface.co./CultriX/Qwen2.5-14B-Emerged",
	"known_config": None
	},
	{
	"rank": 91,
	"name": "sometimesanotion/Qwentinuum-14B-v8",
	"scores": {
	"average": 37.65,
	"IFEval": 54.12,
	"BBH": 50.11,
	"MATH": 34.14,
	"GPQA": 17.79,
	"MUSR": 20.75,
	"MMLU-PRO": 49.02
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwentinuum-14B-v8",
	"known_config": None
	},
	{
	"rank": 92,
	"name": "qingy2024/Fusion-14B-Instruct",
	"scores": {
	"average": 37.64,
	"IFEval": 72.60,
	"BBH": 48.58,
	"MATH": 30.97,
	"GPQA": 13.98,
	"MUSR": 14.81,
	"MMLU-PRO": 44.93
	},
	"hf_url": "https://huggingface.co./qingy2024/Fusion-14B-Instruct",
	"known_config": None
	},
	{
	"rank": 94,
	"name": "CultriX/Qwestion-14B",
	"scores": {
	"average": 37.63,
	"IFEval": 63.18,
	"BBH": 48.76,
	"MATH": 31.72,
	"GPQA": 15.77,
	"MUSR": 17.22,
	"MMLU-PRO": 49.14
	},
	"hf_url": "https://huggingface.co./CultriX/Qwestion-14B",
	"known_config": None
	},
	{
	"rank": 99,
	"name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
	"scores": {
	"average": 37.37,
	"IFEval": 49.18,
	"BBH": 49.80,
	"MATH": 35.57,
	"GPQA": 19.35,
	"MUSR": 21.77,
	"MMLU-PRO": 48.55
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwenvergence-14B-v3-Prose",
	"known_config": None
	},
	{
	"rank": 102,
	"name": "CultriX/SeQwence-14B-v5",
	"scores": {
	"average": 37.27,
	"IFEval": 59.20,
	"BBH": 50.00,
	"MATH": 31.04,
	"GPQA": 16.00,
	"MUSR": 18.33,
	"MMLU-PRO": 49.05
	},
	"hf_url": "https://huggingface.co./CultriX/SeQwence-14B-v5",
	"known_config": None
	},
	{
	"rank": 103,
	"name": "sometimesanotion/Qwen-14B-ProseStock-v4",
	"scores": {
	"average": 37.23,
	"IFEval": 49.42,
	"BBH": 49.54,
	"MATH": 35.50,
	"GPQA": 18.46,
	"MUSR": 21.70,
	"MMLU-PRO": 48.74
	},
	"hf_url": "https://huggingface.co./sometimesanotion/Qwen-14B-ProseStock-v4",
	"known_config": None
	},
	{
	"rank": 104,
	"name": "sometimesanotion/IF-reasoning-experiment-40",
	"scores": {
	"average": 37.21,
	"IFEval": 63.30,
	"BBH": 44.31,
	"MATH": 27.72,
	"GPQA": 17.34,
	"MUSR": 25.86,
	"MMLU-PRO": 44.72
	},
	"hf_url": "https://huggingface.co./sometimesanotion/IF-reasoning-experiment-40",
	"known_config": None
	},
	{
	"rank": 105,
	"name": "CultriX/SeQwence-14B-EvolMerge",
	"scores": {
	"average": 37.20,
	"IFEval": 53.82,
	"BBH": 50.78,
	"MATH": 31.80,
	"GPQA": 17.45,
	"MUSR": 20.26,
	"MMLU-PRO": 49.10
	},
	"hf_url": "https://huggingface.co./CultriX/SeQwence-14B-EvolMerge",
	"known_config": None
	}
	]

	def snippet_scrape_model_page(url):
	"""
	Equivalent scraping function for the larger dataset
	to look for <pre> YAML and a .metadata section.
	"""
	try:
	response = requests.get(url)
	if response.status_code != 200:
	return f"Error: Unable to fetch the page (Status Code: {response.status_code})"

	soup = BeautifulSoup(response.text, "html.parser")

	yaml_config = soup.find("pre")
	yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

	metadata_section = soup.find("div", class_="metadata")
	metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

	return {
	"yaml_configuration": yaml_text,
	"metadata": metadata_text
	}

	except Exception as e:
	return f"Error: {str(e)}"

	def snippet_print_benchmark_and_config_info(model_info):
	"""
	Prints an overview for each model in the rank=44..105 dataset.
	If known_config is not None, prints it. Otherwise attempts to scrape.
	"""
	print(f"---\nModel Rank: {model_info['rank']}")
	print(f"Model Name: {model_info['name']}")
	print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
	print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
	print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}")
	print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}")
	print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}")
	print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
	print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")

	# If there's a known_config, print it in YAML form and stop.
	if model_info["known_config"] is not None:
	print("###")
	print("models:")
	for m in model_info["known_config"]["models"]:
	print(f" - model: {m['model']}")
	print(f"merge_method: {model_info['known_config']['merge_method']}")
	print(f"base_model: {model_info['known_config']['base_model']}")
	print(f"dtype: {model_info['known_config']['dtype']}")
	print("parameters:")
	t_vals = model_info["known_config"]["parameters"]["t"]
	print(f" t: {t_vals} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
	print("###")
	return

	# Otherwise, do scraping:
	scraped = snippet_scrape_model_page(model_info["hf_url"])
	if isinstance(scraped, str):
	# Means it's an error string or something
	print("(No MergeKit configuration found or scraping error.)")
	print(scraped)
	return
	else:
	# It's presumably a dict
	if "No YAML configuration found." in scraped["yaml_configuration"]:
	print("(No MergeKit configuration found.)\n")
	print("You can try the following Python script to scrape the model page:\n")
	print("#" * 70)
	print(f'''import requests
	from bs4 import BeautifulSoup

	def scrape_model_page(model_url):
	try:
	response = requests.get(model_url)
	if response.status_code != 200:
	return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"

	soup = BeautifulSoup(response.text, "html.parser")

	yaml_config = soup.find("pre")
	yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

	metadata_section = soup.find("div", class_="metadata")
	metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

	return {{
	"yaml_configuration": yaml_text,
	"metadata": metadata_text
	}}

	except Exception as e:
	return f"Error: {{str(e)}}"

	if __name__ == "__main__":
	model_url = "{model_info['hf_url']}"
	result = scrape_model_page(model_url)
	print(result)''')
	print("#" * 70)
	else:
	# Found some YAML
	print("###")
	print(scraped["yaml_configuration"])
	print("###")

	def run_non_tiny_benchmarks():
	"""
	Captures the stdout from printing each model in benchmark_data (ranks 44..105),
	returning the entire output as a single string for Gradio to display.
	"""
	old_stdout = sys.stdout
	buffer = io.StringIO()
	sys.stdout = buffer

	for model in benchmark_data:
	snippet_print_benchmark_and_config_info(model)

	sys.stdout = old_stdout
	return buffer.getvalue()

	# --------------------------------------------------------------------
	# PART 3: The Gradio App
	# --------------------------------------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")

	# The existing UI for the “tiny” data
	with gr.Row():
	btn1 = gr.Button("Show Average Performance")
	img1 = gr.Image(type="pil", label="Average Performance Plot")
	img1_download = gr.File(label="Download Average Performance")
	btn1.click(plot_average_scores, outputs=[img1, img1_download])

	with gr.Row():
	btn2 = gr.Button("Show Task Performance")
	img2 = gr.Image(type="pil", label="Task Performance Plot")
	img2_download = gr.File(label="Download Task Performance")
	btn2.click(plot_task_performance, outputs=[img2, img2_download])

	with gr.Row():
	btn3 = gr.Button("Task-Specific Top Models")
	img3 = gr.Image(type="pil", label="Task-Specific Top Models Plot")
	img3_download = gr.File(label="Download Top Models")
	btn3.click(plot_task_specific_top_models, outputs=[img3, img3_download])

	with gr.Row():
	btn4 = gr.Button("Plot Performance Heatmap")
	heatmap_img = gr.Image(type="pil", label="Performance Heatmap")
	heatmap_download = gr.File(label="Download Heatmap")
	btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])

	# Scraping & YAML handling for the tiny table
	with gr.Row():
	model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
	with gr.Column():
	scrape_btn = gr.Button("Scrape MergeKit Configuration")
	yaml_output = gr.Textbox(lines=10, placeholder="YAML Configuration will appear here.")
	scrape_btn.click(scrape_mergekit_config, inputs=model_selector, outputs=yaml_output)
	with gr.Column():
	save_yaml_btn = gr.Button("Save MergeKit Configuration")
	yaml_download = gr.File(label="Download MergeKit Configuration")
	save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)

	# Download everything (CSV, plots, any found YAML)
	with gr.Row():
	download_all_btn = gr.Button("Download Everything")
	all_downloads = gr.File(label="Download All Data")
	download_all_btn.click(download_all_data, outputs=all_downloads)

	# Live Scraping
	gr.Markdown("## Live Scraping Features")
	with gr.Row():
	url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co./<model>")
	live_scrape_btn = gr.Button("Scrape Model Page")
	live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
	live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)

	# Non-Tiny Benchmarks
	gr.Markdown("## Non-Tiny Benchmark Parser (Ranks 44–105)")
	with gr.Row():
	parse_non_tiny_btn = gr.Button("Parse Non-Tiny Benchmarks")
	parse_non_tiny_output = gr.Textbox(label="Non-Tiny Benchmark Output", lines=30)
	parse_non_tiny_btn.click(fn=run_non_tiny_benchmarks, outputs=parse_non_tiny_output)

	demo.launch()