hotfix/total-completed-jobs

#4
by schampoux - opened
Files changed (4) hide show
  1. api.py +12 -32
  2. app.py +48 -67
  3. classes.py +1 -21
  4. utils.py +37 -91
api.py CHANGED
@@ -2,20 +2,20 @@
2
  import atexit
3
  import datetime
4
 
5
- import pandas as pd
6
- import uvicorn
7
  from apscheduler.schedulers.background import BackgroundScheduler
8
  from fastapi import FastAPI
9
-
10
  import utils
11
- from classes import Metagraph, Productivity, Throughput
 
 
 
 
12
 
13
  # Global variables (saves time on loading data)
14
  state_vars = None
15
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
16
 
17
  data_all = None
18
- data_30d = None
19
  data_24h = None
20
 
21
  app = FastAPI()
@@ -24,14 +24,12 @@ def load_data():
24
  """
25
  Reload the state variables
26
  """
27
- global data_all, data_30d ,data_24h, reload_timestamp
28
 
29
  utils.fetch_new_runs()
30
 
31
  data_all = utils.preload_data()
32
 
33
- data_30d = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('30 days'))]
34
-
35
  data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
36
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
37
 
@@ -63,31 +61,13 @@ def productivity_metrics():
63
  Get the productivity metrics
64
  """
65
 
66
- result = utils.get_productivity(df_all=data_all, df_24h=data_24h, df_30d=data_30d)
 
 
 
67
 
 
68
 
69
- return result
70
- @app.get("/metagraph", response_model=Metagraph)
71
- def get_metagraph():
72
- """
73
- Get the metagraph
74
- """
75
-
76
- df_m = utils.get_metagraph()
77
- df_miners = df_m.sort_values('I', ascending=False).reset_index()
78
- incentives = df_miners['I'].astype(float).values
79
- emissions = df_miners['E'].astype(float).values
80
- identities = df_miners['identity']
81
- hotkeys = df_miners['hotkey']
82
- coldkeys = df_miners['coldkey']
83
- trusts = df_miners['trust'].astype(float).values
84
- results = {'incentives': incentives,
85
- 'emissions': emissions,
86
- 'identities': identities,
87
- 'hotkeys': hotkeys,
88
- 'coldkeys': coldkeys,
89
- 'trusts': trusts}
90
- return results
91
 
92
  @app.get("/throughput", response_model=Throughput)
93
  def throughput_metrics():
@@ -95,7 +75,7 @@ def throughput_metrics():
95
  Get the throughput metrics
96
  """
97
 
98
- return utils.get_data_transferred(data_all, data_24h)
99
 
100
 
101
 
 
2
  import atexit
3
  import datetime
4
 
 
 
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
  from fastapi import FastAPI
 
7
  import utils
8
+ import pandas as pd
9
+ import uvicorn
10
+
11
+ from classes import Productivity, ProductivityData, Throughput
12
+
13
 
14
  # Global variables (saves time on loading data)
15
  state_vars = None
16
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
17
 
18
  data_all = None
 
19
  data_24h = None
20
 
21
  app = FastAPI()
 
24
  """
25
  Reload the state variables
26
  """
27
+ global data_all, data_24h, reload_timestamp
28
 
29
  utils.fetch_new_runs()
30
 
31
  data_all = utils.preload_data()
32
 
 
 
33
  data_24h = data_all[(pd.Timestamp.now() - data_all['updated_at'].apply(lambda x: pd.Timestamp(x)) < pd.Timedelta('1 days'))]
34
  reload_timestamp = datetime.datetime.now().strftime('%D %T')
35
 
 
61
  Get the productivity metrics
62
  """
63
 
64
+ # Unpack the metrics using the correct keys
65
+ result = utils.get_productivity(df_all=data_all, df_24h=data_24h)
66
+ all_time = ProductivityData(**result['all_time'])
67
+ last_24h = ProductivityData(**result['last_24h'])
68
 
69
+ return Productivity(all_time=all_time, last_24h=last_24h)
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  @app.get("/throughput", response_model=Throughput)
73
  def throughput_metrics():
 
75
  Get the throughput metrics
76
  """
77
 
78
+ return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))
79
 
80
 
81
 
app.py CHANGED
@@ -1,9 +1,8 @@
1
  import time
2
-
3
  import pandas as pd
 
4
  import plotly.express as px
5
  import requests
6
- import streamlit as st
7
 
8
  import utils
9
 
@@ -18,24 +17,12 @@ Simulation duration distribution
18
  UPDATE_INTERVAL = 3600
19
  BASE_URL = 'http://143.198.21.86:5001/'
20
 
 
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
24
- @st.cache_data(ttl=UPDATE_INTERVAL)
25
- def fetch_productivity_data():
26
- return requests.get(f'{BASE_URL}/productivity').json()
27
 
28
- @st.cache_data(ttl=UPDATE_INTERVAL)
29
- def fetch_throughput_data():
30
- return requests.get(f'{BASE_URL}/throughput').json()
31
 
32
- @st.cache_data(ttl=UPDATE_INTERVAL)
33
- def fetch_metagraph_data():
34
- return utils.get_metagraph()
35
-
36
- @st.cache_data(ttl=UPDATE_INTERVAL)
37
- def fetch_leaderboard_data(df_m, ntop, entity_choice):
38
- return utils.get_leaderboard(df_m, entity_choice=entity_choice)
39
 
40
  #### ------ PRODUCTIVITY ------
41
 
@@ -43,84 +30,75 @@ def fetch_leaderboard_data(df_m, ntop, entity_choice):
43
  st.subheader('Productivity overview')
44
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
45
 
46
- productivity_all = fetch_productivity_data()
47
- completed_jobs = productivity_all['all_time']['total_completed_jobs_data']
48
-
49
  productivity_24h = productivity_all['last_24h']
50
- completed_jobs = pd.DataFrame(completed_jobs)
51
-
52
- unique_folded = pd.DataFrame(productivity_all['all_time']['unique_folded_data'])
53
- # unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
54
 
55
- m1, m2, m3 = st.columns(3)
56
- m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
57
- m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
58
- m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
59
- st.markdown('<br>', unsafe_allow_html=True)
60
 
61
- PROD_CHOICES = {
62
- 'Total jobs completed': 'total_pdbs',
63
- 'Unique proteins folded': 'unique_pdbs',
64
- }
65
 
66
- prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
67
- prod_choice = PROD_CHOICES[prod_choice_label]
68
 
69
- PROD_DATA = {
70
- 'unique_pdbs': unique_folded,
71
- 'total_pdbs': completed_jobs,
72
- }
73
- df = PROD_DATA[prod_choice]
74
 
75
- df = df.sort_values(by='last_event_at').reset_index()
76
 
77
- # Create a cumulative count column
78
- df['cumulative_jobs'] = df.index + 1
79
 
80
- # Plot the cumulative jobs over time
81
- st.plotly_chart(
82
- px.line(df, x='last_event_at', y='cumulative_jobs',
83
- labels={'last_event_at': 'Time', 'cumulative_jobs': prod_choice_label}).update_traces(fill='tozeroy'),
84
- use_container_width=True,
85
- )
 
 
 
 
 
 
 
 
 
86
 
87
  st.markdown('<br>', unsafe_allow_html=True)
88
 
 
89
  #### ------ THROUGHPUT ------
90
  st.subheader('Throughput overview')
91
 
92
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
93
 
94
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
95
- throughput = fetch_throughput_data()
96
 
97
  data_transferred = throughput['all_time']
98
  data_transferred_24h = throughput['last_24h']
99
- data_df = pd.DataFrame(throughput['data'])
100
- data_df = data_df.sort_values(by='updated_at').reset_index()
101
- data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
102
- data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
103
- data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
104
 
105
  m1, m2, m3 = st.columns(3)
106
  m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
107
  m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
108
  m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
109
 
110
- st.plotly_chart(
111
- px.line(data_df, x='updated_at', y=['Total validator data sent', 'Total received data'],
112
- labels={'updated_at':'Time', 'value':f'Data Transferred ({MEM_UNIT})', 'variable':'Direction'},
113
- ).update_traces(fill='tozeroy').update_layout(legend=dict(
114
- yanchor="top",
115
- y=0.99,
116
- xanchor="left",
117
- x=0.01
118
- )),
119
- use_container_width=True,
120
- )
121
 
122
  st.markdown('<br>', unsafe_allow_html=True)
123
 
 
124
  #### ------ LEADERBOARD ------
125
 
126
  st.subheader('Leaderboard')
@@ -129,21 +107,24 @@ m1, m2 = st.columns(2)
129
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
130
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
131
 
132
- df_m = fetch_metagraph_data()
133
- df_miners = fetch_leaderboard_data(df_m, ntop=ntop, entity_choice=entity_choice)
 
134
  # hide colorbar and don't show y axis
135
  st.plotly_chart(
136
- px.bar(df_miners.iloc[-ntop:], x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
137
  labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
138
  ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
139
  use_container_width=True,
140
  )
141
 
 
142
  with st.expander('Show raw metagraph data'):
143
  st.dataframe(df_m)
144
 
145
  st.markdown('<br>', unsafe_allow_html=True)
146
 
 
147
  #### ------ LOGGED RUNS ------
148
 
149
  # st.subheader('Logged runs')
 
1
  import time
 
2
  import pandas as pd
3
+ import streamlit as st
4
  import plotly.express as px
5
  import requests
 
6
 
7
  import utils
8
 
 
17
  UPDATE_INTERVAL = 3600
18
  BASE_URL = 'http://143.198.21.86:5001/'
19
 
20
+
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
 
 
 
24
 
 
 
 
25
 
 
 
 
 
 
 
 
26
 
27
  #### ------ PRODUCTIVITY ------
28
 
 
30
  st.subheader('Productivity overview')
31
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
32
 
33
+ productivity_all = requests.get(f'{BASE_URL}/productivity').json()
34
+ productivity = productivity_all['all_time']
 
35
  productivity_24h = productivity_all['last_24h']
 
 
 
 
36
 
37
+ # st.write(productivity_all)
38
+ # # st.write(productivity)
39
+ # st.write(productivity_24h)
 
 
40
 
41
+ m1, m2 = st.columns(2)
 
 
 
42
 
43
+ m1.metric('Unique proteins folded', f'{productivity.get("unique_folded", 0):,.0f}', delta=f'{productivity_24h.get("unique_folded", 0):,.0f} (24h)')
44
+ m2.metric('Total jobs completed', f'{productivity.get("total_completed_jobs", 0):,.0f}', delta=f'{productivity_24h.get("total_completed_jobs", 0):,.0f} (24h)')
45
 
46
+ # m3.metric('Total simulation steps', f'{productivity.get("total_md_steps"):,.0f}', delta=f'{productivity_24h.get("total_md_steps"):,.0f} (24h)')
 
 
 
 
47
 
48
+ # st.markdown('<br>', unsafe_allow_html=True)
49
 
50
+ # time_binned_data = df.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
 
51
 
52
+ # PROD_CHOICES = {
53
+ # 'Unique proteins folded': 'unique_pdbs',
54
+ # 'Total simulations': 'total_pdbs',
55
+ # 'Total simulation steps': 'total_md_steps',
56
+ # }
57
+ # prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
58
+ # prod_choice = PROD_CHOICES[prod_choice_label]
59
+ # steps_running_total = time_binned_data[prod_choice].sum().cumsum()
60
+ # st.plotly_chart(
61
+ # # add fillgradient to make it easier to see the trend
62
+ # px.area(steps_running_total, y=prod_choice,
63
+ # labels={'last_event_at':'', prod_choice: prod_choice_label},
64
+ # ).update_traces(fill='tozeroy'),
65
+ # use_container_width=True,
66
+ # )
67
 
68
  st.markdown('<br>', unsafe_allow_html=True)
69
 
70
+
71
  #### ------ THROUGHPUT ------
72
  st.subheader('Throughput overview')
73
 
74
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
75
 
76
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
77
+ throughput = requests.get(f'{BASE_URL}/throughput').json()
78
 
79
  data_transferred = throughput['all_time']
80
  data_transferred_24h = throughput['last_24h']
 
 
 
 
 
81
 
82
  m1, m2, m3 = st.columns(3)
83
  m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
84
  m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
85
  m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
86
 
87
+
88
+ # IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
89
+ # io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
90
+ # io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
91
+
92
+ # st.plotly_chart(
93
+ # px.area(io_running_total, y='value', color='variable',
94
+ # labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
95
+ # ),
96
+ # use_container_width=True,
97
+ # )
98
 
99
  st.markdown('<br>', unsafe_allow_html=True)
100
 
101
+
102
  #### ------ LEADERBOARD ------
103
 
104
  st.subheader('Leaderboard')
 
107
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
108
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
109
 
110
+ df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
111
+ df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
112
+
113
  # hide colorbar and don't show y axis
114
  st.plotly_chart(
115
+ px.bar(df_miners, x='I', color='I', hover_name=entity_choice, text=entity_choice if ntop < 20 else None,
116
  labels={'I':'Incentive', 'trust':'Trust', 'stake':'Stake', '_index':'Rank'},
117
  ).update_layout(coloraxis_showscale=False, yaxis_visible=False),
118
  use_container_width=True,
119
  )
120
 
121
+
122
  with st.expander('Show raw metagraph data'):
123
  st.dataframe(df_m)
124
 
125
  st.markdown('<br>', unsafe_allow_html=True)
126
 
127
+
128
  #### ------ LOGGED RUNS ------
129
 
130
  # st.subheader('Logged runs')
classes.py CHANGED
@@ -1,23 +1,12 @@
1
  from pydantic import BaseModel
2
- from datetime import datetime
3
- from typing import List
4
-
5
-
6
- class Data(BaseModel):
7
- last_event_at: List[datetime]
8
- cumulative_jobs: List[int]
9
 
10
  class ProductivityData(BaseModel):
11
  unique_folded: int
12
  total_completed_jobs: int
13
- unique_folded_data: Data
14
- total_completed_jobs_data: Data
15
 
16
-
17
  class Productivity(BaseModel):
18
  all_time: ProductivityData
19
  last_24h: ProductivityData
20
- last_30d: ProductivityData
21
 
22
  class ThroughputData(BaseModel):
23
  validator_sent: float
@@ -25,13 +14,4 @@ class ThroughputData(BaseModel):
25
 
26
  class Throughput(BaseModel):
27
  all_time: ThroughputData
28
- last_24h: ThroughputData
29
- data: dict
30
-
31
- class Metagraph(BaseModel):
32
- incentives: List[float]
33
- emissions: List[float]
34
- identities: List[str]
35
- hotkeys: List[str]
36
- coldkeys: List[str]
37
- trusts: List[float]
 
1
  from pydantic import BaseModel
 
 
 
 
 
 
 
2
 
3
  class ProductivityData(BaseModel):
4
  unique_folded: int
5
  total_completed_jobs: int
 
 
6
 
 
7
  class Productivity(BaseModel):
8
  all_time: ProductivityData
9
  last_24h: ProductivityData
 
10
 
11
  class ThroughputData(BaseModel):
12
  validator_sent: float
 
14
 
15
  class Throughput(BaseModel):
16
  all_time: ThroughputData
17
+ last_24h: ThroughputData
 
 
 
 
 
 
 
 
 
utils.py CHANGED
@@ -1,13 +1,13 @@
1
- import json
2
  import os
3
- import time
4
-
5
- import bittensor as bt
6
- import numpy as np
7
- import pandas as pd
8
- import streamlit as st
9
  import tqdm
 
10
  import wandb
 
 
 
 
 
 
11
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
12
  # TODO: Store relevant wandb data in a database for faster access
13
 
@@ -142,109 +142,54 @@ def get_total_md_input_sizes(run):
142
  return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
143
 
144
 
145
-
146
-
147
- def get_data_transferred(df, df_24h, unit='GB'):
148
- def safe_json_loads(x):
149
- try:
150
- return json.loads(x)
151
- except ValueError:
152
- return []
153
- def np_sum(x):
154
- try:
155
- # Flatten the list of lists and convert it to a NumPy array
156
- flat_array = np.array([item for sublist in x for item in sublist])
157
-
158
- # Use np.sum() to sum all elements in the flattened array
159
- total_sum = np.sum(flat_array)
160
- return total_sum
161
- except TypeError:
162
- return 0
163
- df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
164
- df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
165
- df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
166
- df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
167
- df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
168
- df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
169
- df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
170
-
171
- df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
172
- df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
173
- df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
174
- df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
175
- df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
176
-
177
-
178
- validator_sent = np.nansum(df['md_inputs_sum'].values)
179
- miner_sent = np.nansum(df['md_outputs_sum'].values)
180
- validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
181
- miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
182
 
183
- return {'all_time': {
184
- 'validator_sent': validator_sent,
185
- 'miner_sent': miner_sent,
186
- },
187
- 'last_24h': {
188
- 'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
189
- 'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
190
- },
191
- 'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
192
- }
193
-
194
- def calculate_productivity_data(df):
195
- completed_jobs = df[df['updated_count'] == 10]
196
- completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
197
- unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
198
- completed_jobs = completed_jobs.sort_values(by='last_event_at').reset_index()
199
- completed_jobs['cumulative_jobs'] = completed_jobs.index + 1
200
- unique_folded = unique_folded.sort_values(by='last_event_at').reset_index()
201
- unique_folded['cumulative_jobs'] = unique_folded.index + 1
202
  return {
203
- 'unique_folded': len(unique_folded),
204
- 'total_completed_jobs': len(completed_jobs),
205
- 'unique_folded_data': {'last_event_at': unique_folded['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':unique_folded['cumulative_jobs'].values},
206
- 'total_completed_jobs_data': {'last_event_at': completed_jobs['last_event_at'].dt.to_pydatetime(), 'cumulative_jobs':completed_jobs['cumulative_jobs'].values}
207
  }
208
 
209
- def get_productivity(df_all, df_24h, df_30d):
 
210
  result = {
211
  'all_time': {
212
  'unique_folded': 0,
213
- 'total_completed_jobs': 0,
214
- 'unique_folded_data': {},
215
- 'total_completed_jobs_data': {}
216
  },
217
  'last_24h': {
218
  'unique_folded': 0,
219
- 'total_completed_jobs': 0,
220
- "unique_folded_data": {},
221
- 'total_completed_jobs_data': {}
222
- },
223
- 'last_30d': {
224
- 'unique_folded': 0,
225
- 'total_completed_jobs': 0,
226
- "unique_folded_data": {},
227
- 'total_completed_jobs_data': {}
228
  }
229
  }
 
 
 
230
 
231
-
232
-
233
- if df_all is not None:
234
- result['all_time'].update(calculate_productivity_data(df_all))
235
 
236
- if df_24h is not None:
237
- result['last_24h'].update(calculate_productivity_data(df_24h))
 
 
238
 
239
- if df_30d is not None:
240
- result['last_30d'].update(calculate_productivity_data(df_30d))
 
 
 
 
 
241
  return result
242
 
243
- def get_leaderboard(df, entity_choice='identity'):
244
 
245
  df = df.loc[df.validator_permit==False]
246
  df.index = range(df.shape[0])
247
- return df.groupby(entity_choice).I.sum().sort_values().reset_index()
248
 
249
 
250
 
@@ -324,7 +269,8 @@ def preload_data():
324
  return combined_df
325
 
326
  @st.cache_data()
327
- def get_metagraph():
 
328
  subtensor = bt.subtensor(network=NETWORK)
329
  m = subtensor.metagraph(netuid=NETUID)
330
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']
 
 
1
  import os
 
 
 
 
 
 
2
  import tqdm
3
+ import time
4
  import wandb
5
+ import streamlit as st
6
+ import pandas as pd
7
+ import bittensor as bt
8
+ import ast
9
+
10
+
11
  # TODO: Store the runs dataframe (as in sn1 dashboard) and top up with the ones created since the last snapshot
12
  # TODO: Store relevant wandb data in a database for faster access
13
 
 
142
  return convert_unit(size_bytes, from_unit='B', to_unit=BASE_UNITS)
143
 
144
 
145
+ def get_data_transferred(df, unit='GB'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ validator_sent = df.md_inputs_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().sum()
148
+ miner_sent = df.response_returned_files_sizes.dropna().apply(lambda x: ast.literal_eval(x)).explode().explode().sum()
149
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  return {
151
+ 'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
152
+ 'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
 
 
153
  }
154
 
155
+
156
+ def get_productivity(df_all, df_24h):
157
  result = {
158
  'all_time': {
159
  'unique_folded': 0,
160
+ 'total_completed_jobs': 0
 
 
161
  },
162
  'last_24h': {
163
  'unique_folded': 0,
164
+ 'total_completed_jobs': 0
 
 
 
 
 
 
 
 
165
  }
166
  }
167
+ if df_all is not None:
168
+ unique_folded_all = len(df_all.pdb_id.value_counts())
169
+ completed_jobs_all = len(df_all[df_all.active == False])
170
 
171
+ total_historical_run_updates = df_all.active.isna().sum()
172
+ total_historical_completed_jobs = total_historical_run_updates//10 # this is an estimate based on minimum number of updates per pdb
 
 
173
 
174
+ result['all_time'].update({
175
+ 'unique_folded': unique_folded_all,
176
+ 'total_completed_jobs': (completed_jobs_all + total_historical_completed_jobs).item(),
177
+ })
178
 
179
+ if df_24h is not None:
180
+ completed_jobs_24h = df_24h[df_24h['updated_count'] >= 10]
181
+ unique_completed_jobs_24h = completed_jobs_24h.drop_duplicates(subset=['pdb_id'], keep='first')
182
+ result['last_24h'].update({
183
+ 'unique_folded': len(unique_completed_jobs_24h),
184
+ 'total_completed_jobs': len(completed_jobs_24h)
185
+ })
186
  return result
187
 
188
+ def get_leaderboard(df, ntop=10, entity_choice='identity'):
189
 
190
  df = df.loc[df.validator_permit==False]
191
  df.index = range(df.shape[0])
192
+ return df.groupby(entity_choice).I.sum().sort_values().reset_index().tail(ntop)
193
 
194
 
195
 
 
269
  return combined_df
270
 
271
  @st.cache_data()
272
+ def get_metagraph(time):
273
+ print(f'Loading metagraph with time {time}')
274
  subtensor = bt.subtensor(network=NETWORK)
275
  m = subtensor.metagraph(netuid=NETUID)
276
  meta_cols = ['I','stake','trust','validator_trust','validator_permit','C','R','E','dividends','last_update']