Spaces:

macrocosm-os
/

finetuning-leaderboard

Running on CPU Upgrade

App Files Files Community

rusticluftig commited on Jun 16, 2024

Commit

17bb6e0

1 Parent(s): f2a2662

First pass at finetuning api

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +23 -13
utils.py +8 -10

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Sn9
-emoji: 👁
 colorFrom: purple
 colorTo: pink
 sdk: gradio

 ---
+title: Finetuning subnet
+emoji: :em
 colorFrom: purple
 colorTo: pink
 sdk: gradio

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import datetime
 import gradio as gr
 from dotenv import load_dotenv
@@ -13,14 +14,18 @@ import utils
 FONT = (
     """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
 )
-TITLE = """<h1 align="center" id="space-title" class="typewriter">Subnet 9 Leaderboard</h1>"""
-HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/pretraining" target="_blank">Subnet 9</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing pretrained Foundation-Models on the <a href="https://huggingface.co/datasets/tiiuae/falcon-refinedweb" target="_blank">Falcon Refined Web dataset</a>. It acts like a continuous benchmark whereby miners are rewarded for attaining the best losses on randomly sampled pages of Falcon.<br/>The models with the best head-to-head loss on the evaluation data receive a steady emission of TAO.</h3>"""
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
-HF_REPO_ID = "macrocosm-os/pretraining-leaderboard"
 SECONDS_PER_BLOCK = 12
 load_dotenv()
@@ -68,6 +73,7 @@ def main():
         # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
         # gr.HTML(value=get_next_update_div(current_block, next_epoch_block))
         gr.Label(
             value={
                 f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
@@ -85,19 +91,23 @@ def main():
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
-            leaderboard_table = gr.components.Dataframe(
-                value=utils.leaderboard_data(model_data, scores, show_stale.value),
-                headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
-                datatype=["markdown", "number", "number", "number", "number", "number"],
-                elem_id="leaderboard-table",
-                interactive=False,
-                visible=True,
-            )
             gr.HTML(EVALUATION_DETAILS)
             show_stale.change(
                 lambda stale: utils.leaderboard_data(model_data, scores, stale),
                 inputs=[show_stale],
-                outputs=leaderboard_table,
             )
             gr.LinePlot(

 import os
 import datetime
+from typing import Dict
 import gradio as gr
 from dotenv import load_dotenv
 FONT = (
     """<link href="https://fonts.cdnfonts.com/css/jmh-typewriter" rel="stylesheet">"""
 )
+TITLE = """<h1 align="center" id="space-title" class="typewriter">Finetuning Subnet Leaderboard</h1>"""
+HEADER = """<h2 align="center" class="typewriter"><a href="https://github.com/macrocosm-os/finetuning" target="_blank">Finetuning</a> is a <a href="https://bittensor.com/" target="_blank">Bittensor</a> subnet that rewards miners for producing finetuned models in defined competitions. The model with the best head-to-head score in each competition receive a steady emission of TAO.</h3>"""
+# TODO: Update links once subnet is regged.
 EVALUATION_DETAILS = """<ul><li><b>Name:</b> the 🤗 Hugging Face model name (click to go to the model card)</li><li><b>Rewards / Day:</b> the expected rewards per day based on current ranking.</li><li><b>Last Average Loss:</b> the last loss value on the evaluation data for the model as calculated by a validator (lower is better)</li><li><b>UID:</b> the Bittensor UID of the miner</li><li><b>Block:</b> the Bittensor block that the model was submitted in</li></ul><br/>More stats on <a href="https://taostats.io/subnets/netuid-9/" target="_blank">taostats</a>."""
 EVALUATION_HEADER = """<h3 align="center">Shows the latest internal evaluation statistics as calculated by the Opentensor validator</h3>"""
+# A map of competition IDs to HTML descriptions.
+COMPETITION_DETAILS: Dict[int, str] = {
+    1: """<b>Competition ID 1:</b> Produce the best fine-tuned model from a Subnet 9 pretrained model. Models are evaluated using synthetic prompt/response data from Subnet 18."""
+}
+HF_REPO_ID = "macrocosm-os/finetuning-leaderboard"
 SECONDS_PER_BLOCK = 12
 load_dotenv()
         # TODO: Re-enable once ""SubtensorModule.BlocksSinceEpoch" not found" issue is resolved.
         # gr.HTML(value=get_next_update_div(current_block, next_epoch_block))
+        # TODO: Figure out the best approach to showing the per competition rewards.
         gr.Label(
             value={
                 f"{c.namespace}/{c.name} ({c.commit[0:8]}) · (τ{round(c.emission, 2):,})": c.incentive
         with gr.Accordion("Evaluation Stats"):
             gr.HTML(EVALUATION_HEADER)
             show_stale = gr.Checkbox(label="Show Stale", interactive=True)
+            competition_leaderboards = []
+            # TODO: Dynamically generate per-competition leaderboards based on model_data.
+            with gr.Accordion("Finetuned SN9 competition"):
+                gr.HTML(COMPETITION_DETAILS[1])
+                competition_leaderboards.append(gr.components.Dataframe(
+                    value=utils.leaderboard_data(model_data, scores, show_stale.value),
+                    headers=["Name", "Win Rate", "Average Loss", "Weight", "UID", "Block"],
+                    datatype=["markdown", "number", "number", "number", "number", "number"],
+                    elem_id="leaderboard-table",
+                    interactive=False,
+                    visible=True,
+                ))
             gr.HTML(EVALUATION_DETAILS)
             show_stale.change(
                 lambda stale: utils.leaderboard_data(model_data, scores, stale),
                 inputs=[show_stale],
+                outputs=competition_leaderboards,
             )
             gr.LinePlot(

utils.py CHANGED Viewed

@@ -148,9 +148,10 @@ def get_subnet_data(
 def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
     """Get the latest runs from Wandb, retrying infinitely until we get them.
     Returns:
-        List: List of runs matching the provided filters, newest run (by creation time) first."""
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
@@ -172,7 +173,7 @@ def get_scores(
     wandb_runs: List,
 ) -> Dict[int, Dict[str, Optional[float]]]:
     """Returns the most recent scores for the provided UIDs.
     Args:
         uids (List[int]): List of UIDs to get scores for.
         wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
@@ -204,6 +205,7 @@ def get_scores(
                     "win_rate": uid_data.get("win_rate", None),
                     "win_total": uid_data.get("win_total", None),
                     "weight": uid_data.get("weight", None),
                     "fresh": is_fresh,
                 }
         if len(result) == len(uids):
@@ -244,12 +246,7 @@ def get_losses_over_time(wandb_runs: List) -> pd.DataFrame:
         best_loss = math.inf
         for _, uid_data in all_uid_data.items():
             loss = uid_data.get("average_loss", math.inf)
-            # Filter out the numbers from the exploit and when validators lost the best model.
-            if (
-                loss < best_loss
-                and (loss > 2.5 or timestamp > datetime.datetime(2024, 2, 12))
-                and (loss < 5 or timestamp > datetime.datetime(2024, 3, 27))
-            ):
                 best_loss = uid_data["average_loss"]
         if best_loss != math.inf:
             timestamps.append(timestamp)
@@ -386,6 +383,7 @@ def load_state_vars() -> dict[Any]:
             bt.logging.success(f"Loaded {len(model_data)} models")
             vali_runs = get_wandb_runs(
                 project=VALIDATOR_WANDB_PROJECT,
                 filters={"config.type": "validator", "config.uid": 238},
             )
@@ -427,7 +425,7 @@ def load_state_vars() -> dict[Any]:
 def test_load_state_vars():
     subtensor = bt.subtensor("finney")
     metagraph = subtensor.metagraph(NETUID, lite=True)
     model_data = [

 def get_wandb_runs(project: str, filters: Dict[str, Any]) -> List:
     """Get the latest runs from Wandb, retrying infinitely until we get them.
     Returns:
+        List: List of runs matching the provided filters, newest run (by creation time) first.
+    """
     while True:
         api = wandb.Api(api_key=WANDB_TOKEN)
         runs = list(
     wandb_runs: List,
 ) -> Dict[int, Dict[str, Optional[float]]]:
     """Returns the most recent scores for the provided UIDs.
     Args:
         uids (List[int]): List of UIDs to get scores for.
         wandb_runs (List): List of validator runs from Wandb. Requires the runs are provided in descending order.
                     "win_rate": uid_data.get("win_rate", None),
                     "win_total": uid_data.get("win_total", None),
                     "weight": uid_data.get("weight", None),
+                    "competition_id": uid_data.get("competition_id", None),
                     "fresh": is_fresh,
                 }
         if len(result) == len(uids):
         best_loss = math.inf
         for _, uid_data in all_uid_data.items():
             loss = uid_data.get("average_loss", math.inf)
+            if loss < best_loss:
                 best_loss = uid_data["average_loss"]
         if best_loss != math.inf:
             timestamps.append(timestamp)
             bt.logging.success(f"Loaded {len(model_data)} models")
             vali_runs = get_wandb_runs(
                 project=VALIDATOR_WANDB_PROJECT,
+                # TODO: Update to point to the OTF vali on finetuning
                 filters={"config.type": "validator", "config.uid": 238},
             )
 def test_load_state_vars():
+    # TODO: Change to finetuning data.
     subtensor = bt.subtensor("finney")
     metagraph = subtensor.metagraph(NETUID, lite=True)
     model_data = [