Spaces:

macrocosm-os
/

Sn25

Sleeping

App Files Files Community

hotfix/total-completed-jobs

by schampoux - opened Aug 26, 2024

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+98

-211

This PR is in draft mode

Files changed (4) hide show

api.py +12 -32
app.py +48 -67
classes.py +1 -21
utils.py +37 -91

api.py CHANGED Viewed

@@ -2,20 +2,20 @@
 import atexit
 import datetime
-import pandas as pd
-import uvicorn
 from apscheduler.schedulers.background import BackgroundScheduler
 from fastapi import FastAPI
 import utils
-from classes import Metagraph, Productivity, Throughput
 # Global variables (saves time on loading data)
 state_vars = None
 reload_timestamp = datetime.datetime.now().strftime('%D %T')
 data_all = None
-data_30d = None
 data_24h = None
 app = FastAPI()
@@ -24,14 +24,12 @@ def load_data():
     """
     Reload the state variables
     """
-    global data_all, data_30d ,data_24h, reload_timestamp
     utils.fetch_new_runs()
     data_all = utils.preload_data()
-    data_30d = data_all[(pd.Timestamp.now() -  data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('30 days'))]
     data_24h = data_all[(pd.Timestamp.now() -  data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
     reload_timestamp = datetime.datetime.now().strftime('%D %T')
@@ -63,31 +61,13 @@ def productivity_metrics():
     Get the productivity metrics
     """
-    result = utils.get_productivity(df_all=data_all, df_24h=data_24h, df_30d=data_30d)
-    return result
-@app.get("/metagraph", response_model=Metagraph)
-def get_metagraph():
-    """
-    Get the metagraph
-    """
-    df_m = utils.get_metagraph()
-    df_miners = df_m.sort_values('I', ascending=False).reset_index()
-    incentives = df_miners['I'].astype(float).values
-    emissions = df_miners['E'].astype(float).values
-    identities = df_miners['identity']
-    hotkeys = df_miners['hotkey']
-    coldkeys = df_miners['coldkey']
-    trusts = df_miners['trust'].astype(float).values
-    results = {'incentives': incentives,
-               'emissions': emissions,
-               'identities': identities,
-               'hotkeys': hotkeys,
-               'coldkeys': coldkeys,
-               'trusts': trusts}
-    return results
 @app.get("/throughput", response_model=Throughput)
 def throughput_metrics():
@@ -95,7 +75,7 @@ def throughput_metrics():
     Get the throughput metrics
     """
-    return utils.get_data_transferred(data_all, data_24h)

 import atexit
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
 from fastapi import FastAPI
 import utils
+import pandas as pd
+import uvicorn
+from classes import Productivity, ProductivityData, Throughput
 # Global variables (saves time on loading data)
 state_vars = None
 reload_timestamp = datetime.datetime.now().strftime('%D %T')
 data_all = None
 data_24h = None
 app = FastAPI()
     """
     Reload the state variables
     """
+    global data_all, data_24h, reload_timestamp
     utils.fetch_new_runs()
     data_all = utils.preload_data()
     data_24h = data_all[(pd.Timestamp.now() -  data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
     reload_timestamp = datetime.datetime.now().strftime('%D %T')
     Get the productivity metrics
     """
+    # Unpack the metrics using the correct keys
+    result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
+    all_time = ProductivityData(**result['all_time'])
+    last_24h = ProductivityData(**result['last_24h'])
+    return Productivity(all_time=all_time, last_24h=last_24h)
 @app.get("/throughput", response_model=Throughput)
 def throughput_metrics():
     Get the throughput metrics
     """
+    return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import time
 import pandas as pd
 import plotly.express as px
 import requests
-import streamlit as st
 import utils
@@ -18,24 +17,12 @@ Simulation duration distribution
 UPDATE_INTERVAL = 3600
 BASE_URL = 'http://143.198.21.86:5001/'
 st.title('Folding Subnet Dashboard')
 st.markdown('<br>', unsafe_allow_html=True)
-@st.cache_data(ttl=UPDATE_INTERVAL)
-def fetch_productivity_data():
-    return requests.get(f'{BASE_URL}/productivity').json()
-@st.cache_data(ttl=UPDATE_INTERVAL)
-def fetch_throughput_data():
-    return requests.get(f'{BASE_URL}/throughput').json()
-@st.cache_data(ttl=UPDATE_INTERVAL)
-def fetch_metagraph_data():
-    return utils.get_metagraph()
-@st.cache_data(ttl=UPDATE_INTERVAL)
-def fetch_leaderboard_data(df_m, ntop, entity_choice):
-    return utils.get_leaderboard(df_m, entity_choice=entity_choice)
 #### ------ PRODUCTIVITY ------
@@ -43,84 +30,75 @@ def fetch_leaderboard_data(df_m, ntop, entity_choice):
 st.subheader('Productivity overview')
 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
-productivity_all = fetch_productivity_data()
-completed_jobs = productivity_all['all_time']['total_completed_jobs_data']
 productivity_24h = productivity_all['last_24h']
-completed_jobs = pd.DataFrame(completed_jobs)
-unique_folded = pd.DataFrame(productivity_all['all_time']['unique_folded_data'])
-# unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
-m1, m2, m3 = st.columns(3)
-m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
-m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
-m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
-st.markdown('<br>', unsafe_allow_html=True)
-PROD_CHOICES = {
-    'Total jobs completed': 'total_pdbs',
-    'Unique proteins folded': 'unique_pdbs',
-}
-prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
-prod_choice = PROD_CHOICES[prod_choice_label]
-PROD_DATA = {
-    'unique_pdbs': unique_folded,
-    'total_pdbs': completed_jobs,
-}
-df = PROD_DATA[prod_choice]
-df = df.sort_values(by='last_event_at').reset_index()
-# Create a cumulative count column
-df['cumulative_jobs'] = df.index + 1
-# Plot the cumulative jobs over time
-st.plotly_chart(
-    px.line(df, x='last_event_at', y='cumulative_jobs',
-              labels={'last_event_at': 'Time', 'cumulative_jobs': prod_choice_label}).update_traces(fill='tozeroy'),
-    use_container_width=True,
-)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ THROUGHPUT ------
 st.subheader('Throughput overview')
 st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
 MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
-throughput = fetch_throughput_data()
 data_transferred = throughput['all_time']
 data_transferred_24h = throughput['last_24h']
-data_df = pd.DataFrame(throughput['data'])
-data_df = data_df.sort_values(by='updated_at').reset_index()
-data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
-data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
-data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
 m1, m2, m3 = st.columns(3)
 m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
 m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
 m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
-st.plotly_chart(
-    px.line(data_df, x='updated_at', y=['Total validator data sent', 'Total received data'],
-            labels={'updated_at':'Time', 'value':f'Data Transferred ({MEM_UNIT})', 'variable':'Direction'},
-            ).update_traces(fill='tozeroy').update_layout(legend=dict(
-    yanchor="top",
-    y=0.99,
-    xanchor="left",
-    x=0.01
-)),
-    use_container_width=True,
-)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LEADERBOARD ------
 st.subheader('Leaderboard')
@@ -129,21 +107,24 @@ m1, m2 = st.columns(2)
 ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
 entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
-df_m = fetch_metagraph_data()
-df_miners = fetch_leaderboard_data(df_m, ntop=ntop, entity_choice=entity_choice)
 # hide colorbar and don't show y axis
 st.plotly_chart(
-    px.bar(df_miners.iloc[-ntop:], x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
             labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
     ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
     use_container_width=True,
 )
 with st.expander('Show raw metagraph data'):
     st.dataframe(df_m)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LOGGED RUNS ------
 # st.subheader('Logged runs')

 import time
 import pandas as pd
+import streamlit as st
 import plotly.express as px
 import requests
 import utils
 UPDATE_INTERVAL = 3600
 BASE_URL = 'http://143.198.21.86:5001/'
 st.title('Folding Subnet Dashboard')
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ PRODUCTIVITY ------
 st.subheader('Productivity overview')
 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
+productivity_all = requests.get(f'{BASE_URL}/productivity').json()
+productivity = productivity_all['all_time']
 productivity_24h = productivity_all['last_24h']
+# st.write(productivity_all)
+# # st.write(productivity)
+# st.write(productivity_24h)
+m1, m2 = st.columns(2)
+m1.metric('Unique proteins folded', f'{productivity.get("unique_folded", 0):,.0f}', delta=f'{productivity_24h.get("unique_folded", 0):,.0f} (24h)')
+m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
+# m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
+# st.markdown('<br>', unsafe_allow_html=True)
+# time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
+# PROD_CHOICES = {
+#     'Unique proteins folded': 'unique_pdbs',
+#     'Total simulations': 'total_pdbs',
+#     'Total simulation steps': 'total_md_steps',
+# }
+# prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
+# prod_choice = PROD_CHOICES[prod_choice_label]
+# steps_running_total = time_binned_data[prod_choice].sum().cumsum()
+# st.plotly_chart(
+#     # add fillgradient to make it easier to see the trend
+#     px.area(steps_running_total, y=prod_choice,
+#             labels={'last_event_at':'', prod_choice: prod_choice_label},
+#     ).update_traces(fill='tozeroy'),
+#     use_container_width=True,
+# )
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ THROUGHPUT ------
 st.subheader('Throughput overview')
 st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
 MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
+throughput = requests.get(f'{BASE_URL}/throughput').json()
 data_transferred = throughput['all_time']
 data_transferred_24h = throughput['last_24h']
 m1, m2, m3 = st.columns(3)
 m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
 m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
 m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
+# IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
+# io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
+# io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
+# st.plotly_chart(
+#     px.area(io_running_total, y='value', color='variable',
+#             labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
+#     ),
+#     use_container_width=True,
+# )
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LEADERBOARD ------
 st.subheader('Leaderboard')
 ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
 entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
+df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
+df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
 # hide colorbar and don't show y axis
 st.plotly_chart(
+    px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
             labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
     ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
     use_container_width=True,
 )
 with st.expander('Show raw metagraph data'):
     st.dataframe(df_m)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LOGGED RUNS ------
 # st.subheader('Logged runs')

classes.py CHANGED Viewed

@@ -1,23 +1,12 @@
 from pydantic import BaseModel
-from datetime import datetime
-from typing import List
-class Data(BaseModel):
-    last_event_at: List[datetime]
-    cumulative_jobs: List[int]
 class ProductivityData(BaseModel):
     unique_folded: int
     total_completed_jobs: int
-    unique_folded_data: Data
-    total_completed_jobs_data: Data
 class Productivity(BaseModel):
     all_time: ProductivityData
     last_24h: ProductivityData
-    last_30d: ProductivityData
 class ThroughputData(BaseModel):
     validator_sent: float
@@ -25,13 +14,4 @@ class ThroughputData(BaseModel):
 class Throughput(BaseModel):
     all_time: ThroughputData
-    last_24h: ThroughputData
-    data: dict
-class Metagraph(BaseModel):
-    incentives: List[float]
-    emissions: List[float]
-    identities: List[str]
-    hotkeys: List[str]
-    coldkeys: List[str]
-    trusts: List[float]

 from pydantic import BaseModel
 class ProductivityData(BaseModel):
     unique_folded: int
     total_completed_jobs: int
 class Productivity(BaseModel):
     all_time: ProductivityData
     last_24h: ProductivityData
 class ThroughputData(BaseModel):
     validator_sent: float
 class Throughput(BaseModel):
     all_time: ThroughputData
+    last_24h: ThroughputData

utils.py CHANGED Viewed

@@ -1,13 +1,13 @@
-import json
 import os
-import time
-import bittensor as bt
-import numpy as np
-import pandas as pd
-import streamlit as st
 import tqdm
 import wandb
 # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
 # TODO: Store relevant wandb data in a database for faster access
@@ -142,109 +142,54 @@ def get_total_md_input_sizes(run):
     return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
-def get_data_transferred(df, df_24h, unit='GB'):
-    def safe_json_loads(x):
-        try:
-            return json.loads(x)
-        except ValueError:
-            return []
-    def np_sum(x):
-        try:
-            # Flatten the list of lists and convert it to a NumPy array
-            flat_array = np.array([item for sublist in x for item in sublist])
-            # Use np.sum() to sum all elements in the flattened array
-            total_sum = np.sum(flat_array)
-            return total_sum
-        except TypeError:
-            return 0
-    df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
-    df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
-    df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
-    df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
-    df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
-    df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
-    df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
-    df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
-    df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
-    df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
-    df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
-    df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
-    validator_sent = np.nansum(df['md_inputs_sum'].values)
-    miner_sent = np.nansum(df['md_outputs_sum'].values)
-    validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
-    miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
-    return {'all_time': {
-        'validator_sent': validator_sent,
-        'miner_sent': miner_sent,
-    },
-    'last_24h': {
-        'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
-        'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
-    },
-    'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
-    }
-def calculate_productivity_data(df):
-    completed_jobs = df[df['updated_count'] == 10]
-    completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
-    unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
-    completed_jobs = completed_jobs.sort_values(by='last_event_at').reset_index()
-    completed_jobs['cumulative_jobs'] = completed_jobs.index + 1
-    unique_folded = unique_folded.sort_values(by='last_event_at').reset_index()
-    unique_folded['cumulative_jobs'] = unique_folded.index + 1
     return {
-        'unique_folded': len(unique_folded),
-        'total_completed_jobs': len(completed_jobs),
-        'unique_folded_data': {'last_event_at': unique_folded['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':unique_folded['cumulative_jobs'].values},
-        'total_completed_jobs_data': {'last_event_at': completed_jobs['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':completed_jobs['cumulative_jobs'].values}
     }
-def get_productivity(df_all, df_24h, df_30d):
     result = {
         'all_time': {
             'unique_folded': 0,
-            'total_completed_jobs': 0,
-            'unique_folded_data': {},
-            'total_completed_jobs_data': {}
         },
         'last_24h': {
             'unique_folded': 0,
-            'total_completed_jobs': 0,
-            "unique_folded_data": {},
-            'total_completed_jobs_data': {}
-        },
-        'last_30d': {
-            'unique_folded': 0,
-            'total_completed_jobs': 0,
-            "unique_folded_data": {},
-            'total_completed_jobs_data': {}
         }
     }
-    if df_all is not None:
-        result['all_time'].update(calculate_productivity_data(df_all))
-    if df_24h is not None:
-        result['last_24h'].update(calculate_productivity_data(df_24h))
-    if df_30d is not None:
-        result['last_30d'].update(calculate_productivity_data(df_30d))
     return result
-def get_leaderboard(df, entity_choice='identity'):
     df = df.loc[df.validator_permit==False]
     df.index = range(df.shape[0])
-    return df.groupby(entity_choice).I.sum().sort_values().reset_index()
@@ -324,7 +269,8 @@ def preload_data():
     return combined_df
 @st.cache_data()
-def get_metagraph():
     subtensor = bt.subtensor(network=NETWORK)
     m = subtensor.metagraph(netuid=NETUID)
     meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']

 import os
 import tqdm
+import time
 import wandb
+import streamlit as st
+import pandas as pd
+import bittensor as bt
+import ast
 # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
 # TODO: Store relevant wandb data in a database for faster access
     return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
+def get_data_transferred(df, unit='GB'):
+    validator_sent = df.md_inputs_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().sum()
+    miner_sent = df.response_returned_files_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().explode().sum()
     return {
+        'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
+        'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
     }
+def get_productivity(df_all, df_24h):
     result = {
         'all_time': {
             'unique_folded': 0,
+            'total_completed_jobs': 0
         },
         'last_24h': {
             'unique_folded': 0,
+            'total_completed_jobs': 0
         }
     }
+    if df_all is not None:
+        unique_folded_all = len(df_all.pdb_id.value_counts())
+        completed_jobs_all = len(df_all[df_all.active == False])
+        total_historical_run_updates = df_all.active.isna().sum()
+        total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
+        result['all_time'].update({
+            'unique_folded': unique_folded_all,
+            'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
+        })
+    if df_24h is not None:
+        completed_jobs_24h = df_24h[df_24h['updated_count'] >= 10]
+        unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
+        result['last_24h'].update({
+            'unique_folded': len(unique_completed_jobs_24h),
+            'total_completed_jobs': len(completed_jobs_24h)
+        })
     return result
+def get_leaderboard(df, ntop=10, entity_choice='identity'):
     df = df.loc[df.validator_permit==False]
     df.index = range(df.shape[0])
+    return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
     return combined_df
 @st.cache_data()
+def get_metagraph(time):
+    print(f'Loading metagraph with time {time}')
     subtensor = bt.subtensor(network=NETWORK)
     m = subtensor.metagraph(netuid=NETUID)
     meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']