# Code adapted from: https://huggingface.co./spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py import datetime import os import gradio as gr import matplotlib.pyplot as plt from apscheduler.schedulers.background import BackgroundScheduler from dotenv import load_dotenv from huggingface_hub import HfApi import competitions import utils FONT = ( """""" ) TITLE = """

Finetuning Subnet Leaderboard

""" HEADER = """

Finetuning is a Bittensor subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.

""" EVALUATION_HEADER = """

Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator

""" HF_REPO_ID = "macrocosm-os/finetuning-leaderboard" SECONDS_PER_BLOCK = 12 load_dotenv() HF_TOKEN = os.environ.get("HF_TOKEN", None) API = HfApi(token=HF_TOKEN) def get_next_update_div(current_block: int, next_update_block: int) -> str: now = datetime.datetime.now() blocks_to_go = next_update_block - current_block next_update_time = now + datetime.timedelta( seconds=blocks_to_go * SECONDS_PER_BLOCK ) delta = next_update_time - now return f"""
Next reward update: {blocks_to_go} blocks (~{int(delta.total_seconds() // 60)} minutes)
""" def get_last_updated_div() -> str: return f"""
Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)
""" def restart_space(): API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN) def main(): # To avoid leaderboard failures, infinitely try until we get all data # needed to populate the dashboard state_vars = utils.load_state_vars() model_data = state_vars["model_data"] vali_runs = state_vars["vali_runs"] scores = state_vars["scores"] validator_df = state_vars["validator_df"] benchmarks_df = state_vars["benchmarks_df"] benchmarks_targets = state_vars["benchmarks_targets"] demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}") with demo: gr.HTML(FONT) gr.HTML(TITLE) gr.HTML(HEADER) gr.Label( label="Emissions", value={ f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive for c in model_data if c.incentive }, num_top_classes=10, ) comp_ids = [2, 3] with gr.Accordion("Competition Results"): gr.HTML(EVALUATION_HEADER) show_stale = gr.Checkbox(label="Show Stale", interactive=True) competition_leaderboards = [] for comp_id in comp_ids: details = competitions.COMPETITION_DETAILS[comp_id] with gr.Accordion(f"{details.name} Competition"): gr.HTML(details.html_description) competition_leaderboards.append( gr.components.Dataframe( value=utils.leaderboard_data( model_data, scores, comp_id, show_stale.value ), headers=[ "Name", "Win Rate", "Score", "Weight", "UID", "Block", ], datatype=[ "markdown", "number", "number", "number", "number", "number", ], elem_id=f"comp{comp_id}-table", interactive=False, visible=True, ) ) gr.HTML( """
More stats on taostats.""" ) show_stale.change( lambda stale: [ utils.leaderboard_data(model_data, scores, id, stale) for id in comp_ids ], inputs=[show_stale], outputs=competition_leaderboards, ) if benchmarks_df is not None: def create_benchmark_plot(benchmark: str, comp_id: int): fig = plt.figure(figsize=(10, 8)) # Filter to just entries for this competition. df = benchmarks_df[benchmarks_df["competition_id"] == comp_id] plt.plot(df["timestamp"], df[benchmark]) # Adding horizontal dotted lines for various benchmark targets (well-known models) for model, score in benchmarks_targets[benchmark].items(): plt.axhline(y=score, linestyle="--", label=f"{model}") plt.text( benchmarks_df["timestamp"].max(), score, f"{model}", va="center", ha="right", backgroundcolor="white", ) # Adding labels and title plt.ylabel(benchmark.upper()) plt.title(f"{benchmark.upper()} Over Time") plt.xticks(rotation=45) return fig with gr.Accordion("Top Model Benchmarks"): for comp_id in comp_ids: details = competitions.COMPETITION_DETAILS[comp_id] with gr.Accordion(f"{details.name} Benchmarks"): mmlu = create_benchmark_plot("mmlu", comp_id) mmlu_pro = create_benchmark_plot("mmlu_pro", comp_id) gr.Plot(mmlu) gr.Plot(mmlu_pro) gr.HTML( """
Benchmarks computed using lm-eval harness
""" ) gr.HTML( """""" ) with gr.Accordion("Validator Stats"): gr.components.Dataframe( utils.make_validator_dataframe(validator_df, model_data), interactive=False, visible=True, ) gr.HTML(value=get_last_updated_div()) scheduler = BackgroundScheduler() scheduler.add_job( restart_space, "interval", seconds=60 * 30 ) # restart every 15 minutes scheduler.start() demo.launch() main()