Spaces:
Sleeping
Sleeping
hotfix/total-completed-jobs
#4
by
schampoux
- opened
api.py
CHANGED
@@ -2,20 +2,20 @@
|
|
2 |
import atexit
|
3 |
import datetime
|
4 |
|
5 |
-
import pandas as pd
|
6 |
-
import uvicorn
|
7 |
from apscheduler.schedulers.background import BackgroundScheduler
|
8 |
from fastapi import FastAPI
|
9 |
-
|
10 |
import utils
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Global variables (saves time on loading data)
|
14 |
state_vars = None
|
15 |
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
16 |
|
17 |
data_all = None
|
18 |
-
data_30d = None
|
19 |
data_24h = None
|
20 |
|
21 |
app = FastAPI()
|
@@ -24,14 +24,12 @@ def load_data():
|
|
24 |
"""
|
25 |
Reload the state variables
|
26 |
"""
|
27 |
-
global data_all,
|
28 |
|
29 |
utils.fetch_new_runs()
|
30 |
|
31 |
data_all = utils.preload_data()
|
32 |
|
33 |
-
data_30d = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('30 days'))]
|
34 |
-
|
35 |
data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
|
36 |
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
37 |
|
@@ -63,31 +61,13 @@ def productivity_metrics():
|
|
63 |
Get the productivity metrics
|
64 |
"""
|
65 |
|
66 |
-
|
|
|
|
|
|
|
67 |
|
|
|
68 |
|
69 |
-
return result
|
70 |
-
@app.get("/metagraph", response_model=Metagraph)
|
71 |
-
def get_metagraph():
|
72 |
-
"""
|
73 |
-
Get the metagraph
|
74 |
-
"""
|
75 |
-
|
76 |
-
df_m = utils.get_metagraph()
|
77 |
-
df_miners = df_m.sort_values('I', ascending=False).reset_index()
|
78 |
-
incentives = df_miners['I'].astype(float).values
|
79 |
-
emissions = df_miners['E'].astype(float).values
|
80 |
-
identities = df_miners['identity']
|
81 |
-
hotkeys = df_miners['hotkey']
|
82 |
-
coldkeys = df_miners['coldkey']
|
83 |
-
trusts = df_miners['trust'].astype(float).values
|
84 |
-
results = {'incentives': incentives,
|
85 |
-
'emissions': emissions,
|
86 |
-
'identities': identities,
|
87 |
-
'hotkeys': hotkeys,
|
88 |
-
'coldkeys': coldkeys,
|
89 |
-
'trusts': trusts}
|
90 |
-
return results
|
91 |
|
92 |
@app.get("/throughput", response_model=Throughput)
|
93 |
def throughput_metrics():
|
@@ -95,7 +75,7 @@ def throughput_metrics():
|
|
95 |
Get the throughput metrics
|
96 |
"""
|
97 |
|
98 |
-
return utils.get_data_transferred(data_all, data_24h)
|
99 |
|
100 |
|
101 |
|
|
|
2 |
import atexit
|
3 |
import datetime
|
4 |
|
|
|
|
|
5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
6 |
from fastapi import FastAPI
|
|
|
7 |
import utils
|
8 |
+
import pandas as pd
|
9 |
+
import uvicorn
|
10 |
+
|
11 |
+
from classes import Productivity, ProductivityData, Throughput
|
12 |
+
|
13 |
|
14 |
# Global variables (saves time on loading data)
|
15 |
state_vars = None
|
16 |
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
17 |
|
18 |
data_all = None
|
|
|
19 |
data_24h = None
|
20 |
|
21 |
app = FastAPI()
|
|
|
24 |
"""
|
25 |
Reload the state variables
|
26 |
"""
|
27 |
+
global data_all, data_24h, reload_timestamp
|
28 |
|
29 |
utils.fetch_new_runs()
|
30 |
|
31 |
data_all = utils.preload_data()
|
32 |
|
|
|
|
|
33 |
data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
|
34 |
reload_timestamp = datetime.datetime.now().strftime('%D %T')
|
35 |
|
|
|
61 |
Get the productivity metrics
|
62 |
"""
|
63 |
|
64 |
+
# Unpack the metrics using the correct keys
|
65 |
+
result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
|
66 |
+
all_time = ProductivityData(**result['all_time'])
|
67 |
+
last_24h = ProductivityData(**result['last_24h'])
|
68 |
|
69 |
+
return Productivity(all_time=all_time, last_24h=last_24h)
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
@app.get("/throughput", response_model=Throughput)
|
73 |
def throughput_metrics():
|
|
|
75 |
Get the throughput metrics
|
76 |
"""
|
77 |
|
78 |
+
return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))
|
79 |
|
80 |
|
81 |
|
app.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import time
|
2 |
-
|
3 |
import pandas as pd
|
|
|
4 |
import plotly.express as px
|
5 |
import requests
|
6 |
-
import streamlit as st
|
7 |
|
8 |
import utils
|
9 |
|
@@ -18,24 +17,12 @@ Simulation duration distribution
|
|
18 |
UPDATE_INTERVAL = 3600
|
19 |
BASE_URL = 'http://143.198.21.86:5001/'
|
20 |
|
|
|
21 |
st.title('Folding Subnet Dashboard')
|
22 |
st.markdown('<br>', unsafe_allow_html=True)
|
23 |
|
24 |
-
@st.cache_data(ttl=UPDATE_INTERVAL)
|
25 |
-
def fetch_productivity_data():
|
26 |
-
return requests.get(f'{BASE_URL}/productivity').json()
|
27 |
|
28 |
-
@st.cache_data(ttl=UPDATE_INTERVAL)
|
29 |
-
def fetch_throughput_data():
|
30 |
-
return requests.get(f'{BASE_URL}/throughput').json()
|
31 |
|
32 |
-
@st.cache_data(ttl=UPDATE_INTERVAL)
|
33 |
-
def fetch_metagraph_data():
|
34 |
-
return utils.get_metagraph()
|
35 |
-
|
36 |
-
@st.cache_data(ttl=UPDATE_INTERVAL)
|
37 |
-
def fetch_leaderboard_data(df_m, ntop, entity_choice):
|
38 |
-
return utils.get_leaderboard(df_m, entity_choice=entity_choice)
|
39 |
|
40 |
#### ------ PRODUCTIVITY ------
|
41 |
|
@@ -43,84 +30,75 @@ def fetch_leaderboard_data(df_m, ntop, entity_choice):
|
|
43 |
st.subheader('Productivity overview')
|
44 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
45 |
|
46 |
-
productivity_all =
|
47 |
-
|
48 |
-
|
49 |
productivity_24h = productivity_all['last_24h']
|
50 |
-
completed_jobs = pd.DataFrame(completed_jobs)
|
51 |
-
|
52 |
-
unique_folded = pd.DataFrame(productivity_all['all_time']['unique_folded_data'])
|
53 |
-
# unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
|
59 |
-
st.markdown('<br>', unsafe_allow_html=True)
|
60 |
|
61 |
-
|
62 |
-
'Total jobs completed': 'total_pdbs',
|
63 |
-
'Unique proteins folded': 'unique_pdbs',
|
64 |
-
}
|
65 |
|
66 |
-
|
67 |
-
|
68 |
|
69 |
-
|
70 |
-
'unique_pdbs': unique_folded,
|
71 |
-
'total_pdbs': completed_jobs,
|
72 |
-
}
|
73 |
-
df = PROD_DATA[prod_choice]
|
74 |
|
75 |
-
|
76 |
|
77 |
-
#
|
78 |
-
df['cumulative_jobs'] = df.index + 1
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
st.markdown('<br>', unsafe_allow_html=True)
|
88 |
|
|
|
89 |
#### ------ THROUGHPUT ------
|
90 |
st.subheader('Throughput overview')
|
91 |
|
92 |
st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
|
93 |
|
94 |
MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
|
95 |
-
throughput =
|
96 |
|
97 |
data_transferred = throughput['all_time']
|
98 |
data_transferred_24h = throughput['last_24h']
|
99 |
-
data_df = pd.DataFrame(throughput['data'])
|
100 |
-
data_df = data_df.sort_values(by='updated_at').reset_index()
|
101 |
-
data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
|
102 |
-
data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
|
103 |
-
data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
|
104 |
|
105 |
m1, m2, m3 = st.columns(3)
|
106 |
m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
|
107 |
m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
|
108 |
m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
)
|
119 |
-
|
120 |
-
)
|
121 |
|
122 |
st.markdown('<br>', unsafe_allow_html=True)
|
123 |
|
|
|
124 |
#### ------ LEADERBOARD ------
|
125 |
|
126 |
st.subheader('Leaderboard')
|
@@ -129,21 +107,24 @@ m1, m2 = st.columns(2)
|
|
129 |
ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
|
130 |
entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
|
131 |
|
132 |
-
df_m =
|
133 |
-
df_miners =
|
|
|
134 |
# hide colorbar and don't show y axis
|
135 |
st.plotly_chart(
|
136 |
-
px.bar(df_miners
|
137 |
labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
|
138 |
).update_layout(coloraxis_showscale=False, yaxis_visible=False),
|
139 |
use_container_width=True,
|
140 |
)
|
141 |
|
|
|
142 |
with st.expander('Show raw metagraph data'):
|
143 |
st.dataframe(df_m)
|
144 |
|
145 |
st.markdown('<br>', unsafe_allow_html=True)
|
146 |
|
|
|
147 |
#### ------ LOGGED RUNS ------
|
148 |
|
149 |
# st.subheader('Logged runs')
|
|
|
1 |
import time
|
|
|
2 |
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
import plotly.express as px
|
5 |
import requests
|
|
|
6 |
|
7 |
import utils
|
8 |
|
|
|
17 |
UPDATE_INTERVAL = 3600
|
18 |
BASE_URL = 'http://143.198.21.86:5001/'
|
19 |
|
20 |
+
|
21 |
st.title('Folding Subnet Dashboard')
|
22 |
st.markdown('<br>', unsafe_allow_html=True)
|
23 |
|
|
|
|
|
|
|
24 |
|
|
|
|
|
|
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
#### ------ PRODUCTIVITY ------
|
28 |
|
|
|
30 |
st.subheader('Productivity overview')
|
31 |
st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
|
32 |
|
33 |
+
productivity_all = requests.get(f'{BASE_URL}/productivity').json()
|
34 |
+
productivity = productivity_all['all_time']
|
|
|
35 |
productivity_24h = productivity_all['last_24h']
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
# st.write(productivity_all)
|
38 |
+
# # st.write(productivity)
|
39 |
+
# st.write(productivity_24h)
|
|
|
|
|
40 |
|
41 |
+
m1, m2 = st.columns(2)
|
|
|
|
|
|
|
42 |
|
43 |
+
m1.metric('Unique proteins folded', f'{productivity.get("unique_folded", 0):,.0f}', delta=f'{productivity_24h.get("unique_folded", 0):,.0f} (24h)')
|
44 |
+
m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
|
45 |
|
46 |
+
# m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
|
|
|
|
|
|
|
|
|
47 |
|
48 |
+
# st.markdown('<br>', unsafe_allow_html=True)
|
49 |
|
50 |
+
# time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
|
|
|
51 |
|
52 |
+
# PROD_CHOICES = {
|
53 |
+
# 'Unique proteins folded': 'unique_pdbs',
|
54 |
+
# 'Total simulations': 'total_pdbs',
|
55 |
+
# 'Total simulation steps': 'total_md_steps',
|
56 |
+
# }
|
57 |
+
# prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
|
58 |
+
# prod_choice = PROD_CHOICES[prod_choice_label]
|
59 |
+
# steps_running_total = time_binned_data[prod_choice].sum().cumsum()
|
60 |
+
# st.plotly_chart(
|
61 |
+
# # add fillgradient to make it easier to see the trend
|
62 |
+
# px.area(steps_running_total, y=prod_choice,
|
63 |
+
# labels={'last_event_at':'', prod_choice: prod_choice_label},
|
64 |
+
# ).update_traces(fill='tozeroy'),
|
65 |
+
# use_container_width=True,
|
66 |
+
# )
|
67 |
|
68 |
st.markdown('<br>', unsafe_allow_html=True)
|
69 |
|
70 |
+
|
71 |
#### ------ THROUGHPUT ------
|
72 |
st.subheader('Throughput overview')
|
73 |
|
74 |
st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
|
75 |
|
76 |
MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
|
77 |
+
throughput = requests.get(f'{BASE_URL}/throughput').json()
|
78 |
|
79 |
data_transferred = throughput['all_time']
|
80 |
data_transferred_24h = throughput['last_24h']
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
m1, m2, m3 = st.columns(3)
|
83 |
m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
|
84 |
m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
|
85 |
m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
|
86 |
|
87 |
+
|
88 |
+
# IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
|
89 |
+
# io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
|
90 |
+
# io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
|
91 |
+
|
92 |
+
# st.plotly_chart(
|
93 |
+
# px.area(io_running_total, y='value', color='variable',
|
94 |
+
# labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
|
95 |
+
# ),
|
96 |
+
# use_container_width=True,
|
97 |
+
# )
|
98 |
|
99 |
st.markdown('<br>', unsafe_allow_html=True)
|
100 |
|
101 |
+
|
102 |
#### ------ LEADERBOARD ------
|
103 |
|
104 |
st.subheader('Leaderboard')
|
|
|
107 |
ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
|
108 |
entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
|
109 |
|
110 |
+
df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
|
111 |
+
df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
|
112 |
+
|
113 |
# hide colorbar and don't show y axis
|
114 |
st.plotly_chart(
|
115 |
+
px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
|
116 |
labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
|
117 |
).update_layout(coloraxis_showscale=False, yaxis_visible=False),
|
118 |
use_container_width=True,
|
119 |
)
|
120 |
|
121 |
+
|
122 |
with st.expander('Show raw metagraph data'):
|
123 |
st.dataframe(df_m)
|
124 |
|
125 |
st.markdown('<br>', unsafe_allow_html=True)
|
126 |
|
127 |
+
|
128 |
#### ------ LOGGED RUNS ------
|
129 |
|
130 |
# st.subheader('Logged runs')
|
classes.py
CHANGED
@@ -1,23 +1,12 @@
|
|
1 |
from pydantic import BaseModel
|
2 |
-
from datetime import datetime
|
3 |
-
from typing import List
|
4 |
-
|
5 |
-
|
6 |
-
class Data(BaseModel):
|
7 |
-
last_event_at: List[datetime]
|
8 |
-
cumulative_jobs: List[int]
|
9 |
|
10 |
class ProductivityData(BaseModel):
|
11 |
unique_folded: int
|
12 |
total_completed_jobs: int
|
13 |
-
unique_folded_data: Data
|
14 |
-
total_completed_jobs_data: Data
|
15 |
|
16 |
-
|
17 |
class Productivity(BaseModel):
|
18 |
all_time: ProductivityData
|
19 |
last_24h: ProductivityData
|
20 |
-
last_30d: ProductivityData
|
21 |
|
22 |
class ThroughputData(BaseModel):
|
23 |
validator_sent: float
|
@@ -25,13 +14,4 @@ class ThroughputData(BaseModel):
|
|
25 |
|
26 |
class Throughput(BaseModel):
|
27 |
all_time: ThroughputData
|
28 |
-
last_24h: ThroughputData
|
29 |
-
data: dict
|
30 |
-
|
31 |
-
class Metagraph(BaseModel):
|
32 |
-
incentives: List[float]
|
33 |
-
emissions: List[float]
|
34 |
-
identities: List[str]
|
35 |
-
hotkeys: List[str]
|
36 |
-
coldkeys: List[str]
|
37 |
-
trusts: List[float]
|
|
|
1 |
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
class ProductivityData(BaseModel):
|
4 |
unique_folded: int
|
5 |
total_completed_jobs: int
|
|
|
|
|
6 |
|
|
|
7 |
class Productivity(BaseModel):
|
8 |
all_time: ProductivityData
|
9 |
last_24h: ProductivityData
|
|
|
10 |
|
11 |
class ThroughputData(BaseModel):
|
12 |
validator_sent: float
|
|
|
14 |
|
15 |
class Throughput(BaseModel):
|
16 |
all_time: ThroughputData
|
17 |
+
last_24h: ThroughputData
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
-
import json
|
2 |
import os
|
3 |
-
import time
|
4 |
-
|
5 |
-
import bittensor as bt
|
6 |
-
import numpy as np
|
7 |
-
import pandas as pd
|
8 |
-
import streamlit as st
|
9 |
import tqdm
|
|
|
10 |
import wandb
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
|
12 |
# TODO: Store relevant wandb data in a database for faster access
|
13 |
|
@@ -142,109 +142,54 @@ def get_total_md_input_sizes(run):
|
|
142 |
return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
|
143 |
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
def get_data_transferred(df, df_24h, unit='GB'):
|
148 |
-
def safe_json_loads(x):
|
149 |
-
try:
|
150 |
-
return json.loads(x)
|
151 |
-
except ValueError:
|
152 |
-
return []
|
153 |
-
def np_sum(x):
|
154 |
-
try:
|
155 |
-
# Flatten the list of lists and convert it to a NumPy array
|
156 |
-
flat_array = np.array([item for sublist in x for item in sublist])
|
157 |
-
|
158 |
-
# Use np.sum() to sum all elements in the flattened array
|
159 |
-
total_sum = np.sum(flat_array)
|
160 |
-
return total_sum
|
161 |
-
except TypeError:
|
162 |
-
return 0
|
163 |
-
df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
|
164 |
-
df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
|
165 |
-
df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
|
166 |
-
df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
|
167 |
-
df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
|
168 |
-
df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
|
169 |
-
df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
|
170 |
-
|
171 |
-
df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
|
172 |
-
df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
|
173 |
-
df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
|
174 |
-
df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
|
175 |
-
df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
|
176 |
-
|
177 |
-
|
178 |
-
validator_sent = np.nansum(df['md_inputs_sum'].values)
|
179 |
-
miner_sent = np.nansum(df['md_outputs_sum'].values)
|
180 |
-
validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
|
181 |
-
miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
},
|
187 |
-
'last_24h': {
|
188 |
-
'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
|
189 |
-
'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
|
190 |
-
},
|
191 |
-
'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
|
192 |
-
}
|
193 |
-
|
194 |
-
def calculate_productivity_data(df):
|
195 |
-
completed_jobs = df[df['updated_count'] == 10]
|
196 |
-
completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
|
197 |
-
unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
|
198 |
-
completed_jobs = completed_jobs.sort_values(by='last_event_at').reset_index()
|
199 |
-
completed_jobs['cumulative_jobs'] = completed_jobs.index + 1
|
200 |
-
unique_folded = unique_folded.sort_values(by='last_event_at').reset_index()
|
201 |
-
unique_folded['cumulative_jobs'] = unique_folded.index + 1
|
202 |
return {
|
203 |
-
'
|
204 |
-
'
|
205 |
-
'unique_folded_data': {'last_event_at': unique_folded['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':unique_folded['cumulative_jobs'].values},
|
206 |
-
'total_completed_jobs_data': {'last_event_at': completed_jobs['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':completed_jobs['cumulative_jobs'].values}
|
207 |
}
|
208 |
|
209 |
-
|
|
|
210 |
result = {
|
211 |
'all_time': {
|
212 |
'unique_folded': 0,
|
213 |
-
'total_completed_jobs': 0
|
214 |
-
'unique_folded_data': {},
|
215 |
-
'total_completed_jobs_data': {}
|
216 |
},
|
217 |
'last_24h': {
|
218 |
'unique_folded': 0,
|
219 |
-
'total_completed_jobs': 0
|
220 |
-
"unique_folded_data": {},
|
221 |
-
'total_completed_jobs_data': {}
|
222 |
-
},
|
223 |
-
'last_30d': {
|
224 |
-
'unique_folded': 0,
|
225 |
-
'total_completed_jobs': 0,
|
226 |
-
"unique_folded_data": {},
|
227 |
-
'total_completed_jobs_data': {}
|
228 |
}
|
229 |
}
|
|
|
|
|
|
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
if df_all is not None:
|
234 |
-
result['all_time'].update(calculate_productivity_data(df_all))
|
235 |
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
|
239 |
-
if
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
241 |
return result
|
242 |
|
243 |
-
def get_leaderboard(df, entity_choice='identity'):
|
244 |
|
245 |
df = df.loc[df.validator_permit==False]
|
246 |
df.index = range(df.shape[0])
|
247 |
-
return df.groupby(entity_choice).I.sum().sort_values().reset_index()
|
248 |
|
249 |
|
250 |
|
@@ -324,7 +269,8 @@ def preload_data():
|
|
324 |
return combined_df
|
325 |
|
326 |
@st.cache_data()
|
327 |
-
def get_metagraph():
|
|
|
328 |
subtensor = bt.subtensor(network=NETWORK)
|
329 |
m = subtensor.metagraph(netuid=NETUID)
|
330 |
meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
|
|
|
|
|
1 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import tqdm
|
3 |
+
import time
|
4 |
import wandb
|
5 |
+
import streamlit as st
|
6 |
+
import pandas as pd
|
7 |
+
import bittensor as bt
|
8 |
+
import ast
|
9 |
+
|
10 |
+
|
11 |
# TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
|
12 |
# TODO: Store relevant wandb data in a database for faster access
|
13 |
|
|
|
142 |
return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
|
143 |
|
144 |
|
145 |
+
def get_data_transferred(df, unit='GB'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
|
147 |
+
validator_sent = df.md_inputs_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().sum()
|
148 |
+
miner_sent = df.response_returned_files_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().explode().sum()
|
149 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
return {
|
151 |
+
'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
|
152 |
+
'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
|
|
|
|
|
153 |
}
|
154 |
|
155 |
+
|
156 |
+
def get_productivity(df_all, df_24h):
|
157 |
result = {
|
158 |
'all_time': {
|
159 |
'unique_folded': 0,
|
160 |
+
'total_completed_jobs': 0
|
|
|
|
|
161 |
},
|
162 |
'last_24h': {
|
163 |
'unique_folded': 0,
|
164 |
+
'total_completed_jobs': 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
}
|
166 |
}
|
167 |
+
if df_all is not None:
|
168 |
+
unique_folded_all = len(df_all.pdb_id.value_counts())
|
169 |
+
completed_jobs_all = len(df_all[df_all.active == False])
|
170 |
|
171 |
+
total_historical_run_updates = df_all.active.isna().sum()
|
172 |
+
total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
|
|
|
|
|
173 |
|
174 |
+
result['all_time'].update({
|
175 |
+
'unique_folded': unique_folded_all,
|
176 |
+
'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
|
177 |
+
})
|
178 |
|
179 |
+
if df_24h is not None:
|
180 |
+
completed_jobs_24h = df_24h[df_24h['updated_count'] >= 10]
|
181 |
+
unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
|
182 |
+
result['last_24h'].update({
|
183 |
+
'unique_folded': len(unique_completed_jobs_24h),
|
184 |
+
'total_completed_jobs': len(completed_jobs_24h)
|
185 |
+
})
|
186 |
return result
|
187 |
|
188 |
+
def get_leaderboard(df, ntop=10, entity_choice='identity'):
|
189 |
|
190 |
df = df.loc[df.validator_permit==False]
|
191 |
df.index = range(df.shape[0])
|
192 |
+
return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
|
193 |
|
194 |
|
195 |
|
|
|
269 |
return combined_df
|
270 |
|
271 |
@st.cache_data()
|
272 |
+
def get_metagraph(time):
|
273 |
+
print(f'Loading metagraph with time {time}')
|
274 |
subtensor = bt.subtensor(network=NETWORK)
|
275 |
m = subtensor.metagraph(netuid=NETUID)
|
276 |
meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
|