Sarkosos commited on
Commit
328256f
·
1 Parent(s): 14285d3

Added graphs for data transferred, made the app more interaction friendly, and added total simulations done metric

Browse files
Files changed (4) hide show
  1. api.py +1 -1
  2. app.py +39 -35
  3. classes.py +2 -1
  4. utils.py +39 -7
api.py CHANGED
@@ -75,7 +75,7 @@ def throughput_metrics():
75
  Get the throughput metrics
76
  """
77
 
78
- return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))
79
 
80
 
81
 
 
75
  Get the throughput metrics
76
  """
77
 
78
+ return utils.get_data_transferred(data_all, data_24h)
79
 
80
 
81
 
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import time
 
2
  import pandas as pd
3
- import streamlit as st
4
  import plotly.express as px
5
  import requests
 
6
 
7
  import utils
8
 
@@ -17,12 +18,24 @@ Simulation duration distribution
17
  UPDATE_INTERVAL = 3600
18
  BASE_URL = 'http://143.198.21.86:5001/'
19
 
20
-
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
 
 
 
24
 
 
 
 
25
 
 
 
 
 
 
 
 
26
 
27
  #### ------ PRODUCTIVITY ------
28
 
@@ -30,7 +43,7 @@ st.markdown('<br>', unsafe_allow_html=True)
30
  st.subheader('Productivity overview')
31
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
32
 
33
- productivity_all = requests.get(f'{BASE_URL}/productivity').json()
34
  completed_jobs = productivity_all['all_time']['total_completed_jobs']
35
 
36
  productivity_24h = productivity_all['last_24h']
@@ -40,24 +53,17 @@ completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
40
  unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
41
  unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
42
 
43
- m1, m2 = st.columns(2)
44
  m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
45
  m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
46
-
47
  st.markdown('<br>', unsafe_allow_html=True)
48
 
49
-
50
-
51
- # time_binned_data_complete = completed_jobs.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
52
- # time_binned_data_unique = unique_folded.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
53
-
54
  PROD_CHOICES = {
 
55
  'Unique proteins folded': 'unique_pdbs',
56
- 'Total simulations': 'total_pdbs',
57
  }
58
 
59
-
60
-
61
  prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
62
  prod_choice = PROD_CHOICES[prod_choice_label]
63
 
@@ -73,49 +79,49 @@ df = df.sort_values(by='last_event_at').reset_index()
73
  df['cumulative_jobs'] = df.index + 1
74
 
75
  # Plot the cumulative jobs over time
76
-
77
  st.plotly_chart(
78
- # add fillgradient to make it easier to see the trend
79
  px.line(df, x='last_event_at', y='cumulative_jobs',
80
- title='Total Jobs Completed Over Time',
81
- labels={'last_event_at': 'Time', 'cumulative_jobs': 'Total Jobs Completed'}).update_traces(fill='tozeroy'),
82
  use_container_width=True,
83
  )
84
 
85
  st.markdown('<br>', unsafe_allow_html=True)
86
 
87
-
88
  #### ------ THROUGHPUT ------
89
  st.subheader('Throughput overview')
90
 
91
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
92
 
93
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
94
- throughput = requests.get(f'{BASE_URL}/throughput').json()
95
 
96
  data_transferred = throughput['all_time']
97
  data_transferred_24h = throughput['last_24h']
 
 
 
 
 
98
 
99
  m1, m2, m3 = st.columns(3)
100
  m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
101
  m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
102
  m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
103
 
104
-
105
- # IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
106
- # io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
107
- # io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
108
-
109
- # st.plotly_chart(
110
- # px.area(io_running_total, y='value', color='variable',
111
- # labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
112
- # ),
113
- # use_container_width=True,
114
- # )
115
 
116
  st.markdown('<br>', unsafe_allow_html=True)
117
 
118
-
119
  #### ------ LEADERBOARD ------
120
 
121
  st.subheader('Leaderboard')
@@ -124,8 +130,8 @@ m1, m2 = st.columns(2)
124
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
125
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
126
 
127
- df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
128
- df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
129
 
130
  # hide colorbar and don't show y axis
131
  st.plotly_chart(
@@ -135,13 +141,11 @@ st.plotly_chart(
135
  use_container_width=True,
136
  )
137
 
138
-
139
  with st.expander('Show raw metagraph data'):
140
  st.dataframe(df_m)
141
 
142
  st.markdown('<br>', unsafe_allow_html=True)
143
 
144
-
145
  #### ------ LOGGED RUNS ------
146
 
147
  # st.subheader('Logged runs')
 
1
  import time
2
+
3
  import pandas as pd
 
4
  import plotly.express as px
5
  import requests
6
+ import streamlit as st
7
 
8
  import utils
9
 
 
18
  UPDATE_INTERVAL = 3600
19
  BASE_URL = 'http://143.198.21.86:5001/'
20
 
 
21
  st.title('Folding Subnet Dashboard')
22
  st.markdown('<br>', unsafe_allow_html=True)
23
 
24
+ @st.cache_data(ttl=UPDATE_INTERVAL)
25
+ def fetch_productivity_data():
26
+ return requests.get(f'{BASE_URL}/productivity').json()
27
 
28
+ @st.cache_data(ttl=UPDATE_INTERVAL)
29
+ def fetch_throughput_data():
30
+ return requests.get(f'{BASE_URL}/throughput').json()
31
 
32
+ @st.cache_data(ttl=UPDATE_INTERVAL)
33
+ def fetch_metagraph_data():
34
+ return utils.get_metagraph(time.time() // UPDATE_INTERVAL)
35
+
36
+ @st.cache_data(ttl=UPDATE_INTERVAL)
37
+ def fetch_leaderboard_data(df_m, ntop, entity_choice):
38
+ return utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
39
 
40
  #### ------ PRODUCTIVITY ------
41
 
 
43
  st.subheader('Productivity overview')
44
  st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
45
 
46
+ productivity_all = fetch_productivity_data()
47
  completed_jobs = productivity_all['all_time']['total_completed_jobs']
48
 
49
  productivity_24h = productivity_all['last_24h']
 
53
  unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
54
  unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
55
 
56
+ m1, m2, m3 = st.columns(3)
57
  m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
58
  m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
59
+ m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
60
  st.markdown('<br>', unsafe_allow_html=True)
61
 
 
 
 
 
 
62
  PROD_CHOICES = {
63
+ 'Total jobs completed': 'total_pdbs',
64
  'Unique proteins folded': 'unique_pdbs',
 
65
  }
66
 
 
 
67
  prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
68
  prod_choice = PROD_CHOICES[prod_choice_label]
69
 
 
79
  df['cumulative_jobs'] = df.index + 1
80
 
81
  # Plot the cumulative jobs over time
 
82
  st.plotly_chart(
 
83
  px.line(df, x='last_event_at', y='cumulative_jobs',
84
+ labels={'last_event_at': 'Time', 'cumulative_jobs': prod_choice_label}).update_traces(fill='tozeroy'),
 
85
  use_container_width=True,
86
  )
87
 
88
  st.markdown('<br>', unsafe_allow_html=True)
89
 
 
90
  #### ------ THROUGHPUT ------
91
  st.subheader('Throughput overview')
92
 
93
  st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
94
 
95
  MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
96
+ throughput = fetch_throughput_data()
97
 
98
  data_transferred = throughput['all_time']
99
  data_transferred_24h = throughput['last_24h']
100
+ data_df = pd.DataFrame(throughput['data'])
101
+ data_df = data_df.sort_values(by='updated_at').reset_index()
102
+ data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
103
+ data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
104
+ data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
105
 
106
  m1, m2, m3 = st.columns(3)
107
  m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
108
  m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
109
  m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
110
 
111
+ st.plotly_chart(
112
+ px.line(data_df, x='updated_at', y=['Total validator data sent', 'Total received data'],
113
+ labels={'updated_at':'Time', 'value':f'Data Transferred ({MEM_UNIT})', 'variable':'Direction'},
114
+ ).update_traces(fill='tozeroy').update_layout(legend=dict(
115
+ yanchor="top",
116
+ y=0.99,
117
+ xanchor="left",
118
+ x=0.01
119
+ )),
120
+ use_container_width=True,
121
+ )
122
 
123
  st.markdown('<br>', unsafe_allow_html=True)
124
 
 
125
  #### ------ LEADERBOARD ------
126
 
127
  st.subheader('Leaderboard')
 
130
  ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
131
  entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
132
 
133
+ df_m = fetch_metagraph_data()
134
+ df_miners = fetch_leaderboard_data(df_m, ntop=ntop, entity_choice=entity_choice)
135
 
136
  # hide colorbar and don't show y axis
137
  st.plotly_chart(
 
141
  use_container_width=True,
142
  )
143
 
 
144
  with st.expander('Show raw metagraph data'):
145
  st.dataframe(df_m)
146
 
147
  st.markdown('<br>', unsafe_allow_html=True)
148
 
 
149
  #### ------ LOGGED RUNS ------
150
 
151
  # st.subheader('Logged runs')
classes.py CHANGED
@@ -19,4 +19,5 @@ class ThroughputData(BaseModel):
19
 
20
  class Throughput(BaseModel):
21
  all_time: ThroughputData
22
- last_24h: ThroughputData
 
 
19
 
20
  class Throughput(BaseModel):
21
  all_time: ThroughputData
22
+ last_24h: ThroughputData
23
+ data: dict
utils.py CHANGED
@@ -145,19 +145,51 @@ def get_total_md_input_sizes(run):
145
 
146
 
147
 
148
- def get_data_transferred(df, unit='GB'):
149
  def safe_json_loads(x):
150
  try:
151
  return json.loads(x)
152
  except ValueError:
153
  return []
154
-
155
- validator_sent = np.nansum(df.md_inputs_sizes.dropna().apply(safe_json_loads).explode().replace([np.inf, -np.inf], np.nan).values)
156
- miner_sent = np.nansum(df.response_returned_files_sizes.dropna().apply(safe_json_loads).explode().explode().replace([np.inf, -np.inf], np.nan).values)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
- return {
159
- 'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
160
- 'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  }
162
 
163
 
 
145
 
146
 
147
 
148
+ def get_data_transferred(df, df_24h, unit='GB'):
149
  def safe_json_loads(x):
150
  try:
151
  return json.loads(x)
152
  except ValueError:
153
  return []
154
+ def np_sum(x):
155
+ try:
156
+ # Flatten the list of lists and convert it to a NumPy array
157
+ flat_array = np.array([item for sublist in x for item in sublist])
158
+
159
+ # Use np.sum() to sum all elements in the flattened array
160
+ total_sum = np.sum(flat_array)
161
+ return total_sum
162
+ except TypeError:
163
+ return 0
164
+ df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
165
+ df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
166
+ df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
167
+ df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
168
+ df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
169
+ df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
170
+ df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
171
 
172
+ df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
173
+ df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
174
+ df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
175
+ df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
176
+ df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
177
+
178
+
179
+ validator_sent = np.nansum(df['md_inputs_sum'].values)
180
+ miner_sent = np.nansum(df['md_outputs_sum'].values)
181
+ validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
182
+ miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
183
+
184
+ return {'all_time': {
185
+ 'validator_sent': validator_sent,
186
+ 'miner_sent': miner_sent,
187
+ },
188
+ 'last_24h': {
189
+ 'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
190
+ 'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
191
+ },
192
+ 'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
193
  }
194
 
195