# Code adapted from: https://huggingface.co./spaces/RaoFoundation/pretraining-leaderboard/blob/main/app.py
import datetime
import os
import gradio as gr
from apscheduler.schedulers.background import BackgroundScheduler
from dotenv import load_dotenv
from huggingface_hub import HfApi
import competitions
import utils
FONT = (
""" """
)
TITLE = """
Finetuning Subnet Leaderboard """
HEADER = """Finetuning is a Bittensor subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO."""
EVALUATION_HEADER = """Shows the latest per-competition evaluation statistics as calculated by the Taoverse validator """
HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
SECONDS_PER_BLOCK = 12
load_dotenv()
HF_TOKEN = os.environ.get("HF_TOKEN", None)
API = HfApi(token=HF_TOKEN)
def get_next_update_div(current_block: int, next_update_block: int) -> str:
now = datetime.datetime.now()
blocks_to_go = next_update_block - current_block
next_update_time = now + datetime.timedelta(
seconds=blocks_to_go * SECONDS_PER_BLOCK
)
delta = next_update_time - now
return f"""Next reward update: {blocks_to_go} blocks (~{int(delta.total_seconds() // 60)} minutes)
"""
def get_last_updated_div() -> str:
return f"""Last Updated: {datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")} (UTC)
"""
def restart_space():
API.restart_space(repo_id=HF_REPO_ID, token=HF_TOKEN)
def main():
# To avoid leaderboard failures, infinitely try until we get all data
# needed to populate the dashboard
state_vars = utils.load_state_vars()
model_data = state_vars["model_data"]
vali_runs = state_vars["vali_runs"]
scores = state_vars["scores"]
validator_df = state_vars["validator_df"]
benchmarks = state_vars.get("benchmarks", None)
benchmark_timestamp = state_vars.get("benchmark_timestamp", None)
losses_1 = state_vars["losses_1"]
losses_2 = state_vars["losses_2"]
demo = gr.Blocks(css=".typewriter {font-family: 'JMH Typewriter', sans-serif;}")
with demo:
gr.HTML(FONT)
gr.HTML(TITLE)
gr.HTML(HEADER)
gr.Label(
label="Emissions",
value={
f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
for c in model_data
if c.incentive
},
num_top_classes=10,
)
if benchmarks is not None:
with gr.Accordion("Top Model Benchmarks"):
gr.components.Dataframe(benchmarks)
gr.HTML("""PPL computed using a stride of 512. See
here for the full code.
""")
gr.HTML(f"""Last Updated: {benchmark_timestamp.strftime("%Y-%m-%d %H:%M:%S")} (UTC)
""")
with gr.Accordion("Competition Results"):
gr.HTML(EVALUATION_HEADER)
show_stale = gr.Checkbox(label="Show Stale", interactive=True)
competition_leaderboards = []
comp_1 = competitions.COMPETITION_DETAILS[1]
with gr.Accordion(f"{comp_1.name} Competition"):
gr.HTML(comp_1.html_description)
competition_leaderboards.append(gr.components.Dataframe(
value=utils.leaderboard_data(model_data, scores, 1, show_stale.value),
headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
datatype=["markdown", "number", "number", "number", "number", "number"],
elem_id="comp1-table",
interactive=False,
visible=True,
))
gr.LinePlot(
losses_1,
x="timestamp",
x_title="Date",
y="losses",
y_title="Average Loss",
interactive=True,
visible=True,
width=1024,
title="Best Average Loss Over Time",
)
comp_2 = competitions.COMPETITION_DETAILS[2]
# Covert the losses into % of correct answers.
losses_2["losses"] = losses_2["losses"].apply(lambda x: 1 - x if x else None)
with gr.Accordion(f"{comp_2.name} Competition"):
gr.HTML(comp_2.html_description)
competition_leaderboards.append(gr.components.Dataframe(
value=utils.leaderboard_data(model_data, scores, 2, show_stale.value),
headers=["Name", "Win Rate", "MC Score", "Weight", "UID", "Block"],
datatype=["markdown", "number", "number", "number", "number", "number"],
elem_id="comp2-table",
interactive=False,
visible=True,
))
gr.LinePlot(
losses_2,
x="timestamp",
x_title="Date",
y="losses",
y_title="MC Score",
interactive=True,
visible=True,
width=1024,
title="Best MC Score Over Time",
)
gr.HTML("""
Name: the 🤗 Hugging Face repo (click to go to the model card)
Win Rate: % of head-to-head evals won vs. other eval'd models, given an epsilon advantage or disadvantage
Average Loss: the last loss value on the evaluation data for the model as calculated by the OTF validator (lower is better)
MC Score: the % of correct multiple choice answers given by the model as calculated by the OTF validator (higher is better)
UID: the Bittensor UID of the miner
Weight: the bittensor weight set for this model
Block: the Bittensor block that the model was submitted in More stats on taostats .""")
show_stale.change(
lambda stale: [utils.leaderboard_data(model_data, scores, 1, stale), utils.leaderboard_data(model_data, scores, 2, stale)],
inputs=[show_stale],
outputs=competition_leaderboards,
)
with gr.Accordion("Validator Stats"):
gr.components.Dataframe(
utils.make_validator_dataframe(validator_df, model_data),
interactive=False,
visible=True,
)
gr.HTML(value=get_last_updated_div())
scheduler = BackgroundScheduler()
scheduler.add_job(
restart_space, "interval", seconds=60 * 30
) # restart every 15 minutes
scheduler.start()
demo.launch()
main()