Spaces:

macrocosm-os
/

Sn25

Sleeping

App Files Files Community

Sarkosos commited on Aug 27, 2024

Commit

328256f

1 Parent(s): 14285d3

Added graphs for data transferred, made the app more interaction friendly, and added total simulations done metric

Browse files

Files changed (4) hide show

api.py +1 -1
app.py +39 -35
classes.py +2 -1
utils.py +39 -7

api.py CHANGED Viewed

@@ -75,7 +75,7 @@ def throughput_metrics():
     Get the throughput metrics
     """
-    return Throughput(all_time=utils.get_data_transferred(data_all), last_24h=utils.get_data_transferred(data_24h))

     Get the throughput metrics
     """
+    return utils.get_data_transferred(data_all, data_24h)

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import time
 import pandas as pd
-import streamlit as st
 import plotly.express as px
 import requests
 import utils
@@ -17,12 +18,24 @@ Simulation duration distribution
 UPDATE_INTERVAL = 3600
 BASE_URL = 'http://143.198.21.86:5001/'
 st.title('Folding Subnet Dashboard')
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ PRODUCTIVITY ------
@@ -30,7 +43,7 @@ st.markdown('<br>', unsafe_allow_html=True)
 st.subheader('Productivity overview')
 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
-productivity_all = requests.get(f'{BASE_URL}/productivity').json()
 completed_jobs = productivity_all['all_time']['total_completed_jobs']
 productivity_24h = productivity_all['last_24h']
@@ -40,24 +53,17 @@ completed_jobs['last_event_at'] = pd.to_datetime(completed_jobs['updated_at'])
 unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
 unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
-m1, m2 = st.columns(2)
 m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
 m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
 st.markdown('<br>', unsafe_allow_html=True)
-# time_binned_data_complete = completed_jobs.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
-# time_binned_data_unique = unique_folded.set_index('last_event_at').groupby(pd.Grouper(freq='12h'))
 PROD_CHOICES = {
     'Unique proteins folded': 'unique_pdbs',
-    'Total simulations': 'total_pdbs',
 }
 prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
 prod_choice = PROD_CHOICES[prod_choice_label]
@@ -73,49 +79,49 @@ df = df.sort_values(by='last_event_at').reset_index()
 df['cumulative_jobs'] = df.index + 1
 # Plot the cumulative jobs over time
 st.plotly_chart(
-    # add fillgradient to make it easier to see the trend
     px.line(df, x='last_event_at', y='cumulative_jobs',
-              title='Total Jobs Completed Over Time',
-              labels={'last_event_at': 'Time', 'cumulative_jobs': 'Total Jobs Completed'}).update_traces(fill='tozeroy'),
     use_container_width=True,
 )
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ THROUGHPUT ------
 st.subheader('Throughput overview')
 st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
 MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
-throughput = requests.get(f'{BASE_URL}/throughput').json()
 data_transferred = throughput['all_time']
 data_transferred_24h = throughput['last_24h']
 m1, m2, m3 = st.columns(3)
 m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
 m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
 m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
-# IO_CHOICES = {'total_data_sent':'Sent', 'total_data_received':'Received'}
-# io_running_total = time_binned_data[list(IO_CHOICES.keys())].sum().rename(columns=IO_CHOICES).cumsum().melt(ignore_index=False)
-# io_running_total['value'] = io_running_total['value'].apply(utils.convert_unit, args=(utils.BASE_UNITS, MEM_UNIT))
-# st.plotly_chart(
-#     px.area(io_running_total, y='value', color='variable',
-#             labels={'last_event_at':'', 'value': f'Data transferred ({MEM_UNIT})', 'variable':'Direction'},
-#     ),
-#     use_container_width=True,
-# )
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LEADERBOARD ------
 st.subheader('Leaderboard')
@@ -124,8 +130,8 @@ m1, m2 = st.columns(2)
 ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
 entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
-df_m = utils.get_metagraph(time.time()//UPDATE_INTERVAL)
-df_miners = utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
 # hide colorbar and don't show y axis
 st.plotly_chart(
@@ -135,13 +141,11 @@ st.plotly_chart(
     use_container_width=True,
 )
 with st.expander('Show raw metagraph data'):
     st.dataframe(df_m)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LOGGED RUNS ------
 # st.subheader('Logged runs')

 import time
 import pandas as pd
 import plotly.express as px
 import requests
+import streamlit as st
 import utils
 UPDATE_INTERVAL = 3600
 BASE_URL = 'http://143.198.21.86:5001/'
 st.title('Folding Subnet Dashboard')
 st.markdown('<br>', unsafe_allow_html=True)
+@st.cache_data(ttl=UPDATE_INTERVAL)
+def fetch_productivity_data():
+    return requests.get(f'{BASE_URL}/productivity').json()
+@st.cache_data(ttl=UPDATE_INTERVAL)
+def fetch_throughput_data():
+    return requests.get(f'{BASE_URL}/throughput').json()
+@st.cache_data(ttl=UPDATE_INTERVAL)
+def fetch_metagraph_data():
+    return utils.get_metagraph(time.time() // UPDATE_INTERVAL)
+@st.cache_data(ttl=UPDATE_INTERVAL)
+def fetch_leaderboard_data(df_m, ntop, entity_choice):
+    return utils.get_leaderboard(df_m, ntop=ntop, entity_choice=entity_choice)
 #### ------ PRODUCTIVITY ------
 st.subheader('Productivity overview')
 st.info('Productivity metrics show how many proteins have been folded, which is the primary goal of the subnet. Metrics are estimated using weights and biases data combined with heuristics.')
+productivity_all = fetch_productivity_data()
 completed_jobs = productivity_all['all_time']['total_completed_jobs']
 productivity_24h = productivity_all['last_24h']
 unique_folded = completed_jobs.drop_duplicates(subset=['pdb_id'], keep='first')
 unique_folded['last_event_at'] = pd.to_datetime(unique_folded['updated_at'])
+m1, m2, m3 = st.columns(3)
 m1.metric('Unique proteins folded', f'{len(unique_folded):,.0f}', delta=f'{productivity_24h["unique_folded"]:,.0f} (24h)')
 m2.metric('Total jobs completed', f'{len(completed_jobs):,.0f}', delta=f'{productivity_24h["total_completed_jobs"]:,.0f} (24h)')
+m3.metric('Total simulations ran', f'{len(completed_jobs)*10:,.0f}', delta=f'{productivity_24h["total_completed_jobs"]*10:,.0f} (24h)')
 st.markdown('<br>', unsafe_allow_html=True)
 PROD_CHOICES = {
+    'Total jobs completed': 'total_pdbs',
     'Unique proteins folded': 'unique_pdbs',
 }
 prod_choice_label = st.radio('Select productivity metric', list(PROD_CHOICES.keys()), index=0, horizontal=True)
 prod_choice = PROD_CHOICES[prod_choice_label]
 df['cumulative_jobs'] = df.index + 1
 # Plot the cumulative jobs over time
 st.plotly_chart(
     px.line(df, x='last_event_at', y='cumulative_jobs',
+              labels={'last_event_at': 'Time', 'cumulative_jobs': prod_choice_label}).update_traces(fill='tozeroy'),
     use_container_width=True,
 )
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ THROUGHPUT ------
 st.subheader('Throughput overview')
 st.info('Throughput metrics show the total amount of data sent and received by the validators. This is a measure of the network activity and the amount of data that is being processed by the subnet.')
 MEM_UNIT = 'GB' #st.radio('Select memory unit', ['TB','GB', 'MB'], index=0, horizontal=True)
+throughput = fetch_throughput_data()
 data_transferred = throughput['all_time']
 data_transferred_24h = throughput['last_24h']
+data_df = pd.DataFrame(throughput['data'])
+data_df = data_df.sort_values(by='updated_at').reset_index()
+data_df['updated_at'] = pd.to_datetime(data_df['updated_at'])
+data_df['Total validator data sent'] = data_df['md_inputs_sum'].cumsum()
+data_df['Total received data'] = data_df['md_outputs_sum'].cumsum()
 m1, m2, m3 = st.columns(3)
 m1.metric(f'Total validator data sent ({MEM_UNIT})', f'{data_transferred["validator_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]:,.0f} (24h)')
 m2.metric(f'Total received data ({MEM_UNIT})', f'{data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["miner_sent"]:,.0f} (24h)')
 m3.metric(f'Total transferred data ({MEM_UNIT})', f'{data_transferred["validator_sent"]+data_transferred["miner_sent"]:,.0f}', delta=f'{data_transferred_24h["validator_sent"]+data_transferred_24h["miner_sent"]:,.0f} (24h)')
+st.plotly_chart(
+    px.line(data_df, x='updated_at', y=['Total validator data sent', 'Total received data'],
+            labels={'updated_at':'Time', 'value':f'Data Transferred ({MEM_UNIT})', 'variable':'Direction'},
+            ).update_traces(fill='tozeroy').update_layout(legend=dict(
+    yanchor="top",
+    y=0.99,
+    xanchor="left",
+    x=0.01
+)),
+    use_container_width=True,
+)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LEADERBOARD ------
 st.subheader('Leaderboard')
 ntop = m1.slider('Number of top miners to display', value=10, min_value=3, max_value=50, step=1)
 entity_choice = m2.radio('Select entity', utils.ENTITY_CHOICES, index=0, horizontal=True)
+df_m = fetch_metagraph_data()
+df_miners = fetch_leaderboard_data(df_m, ntop=ntop, entity_choice=entity_choice)
 # hide colorbar and don't show y axis
 st.plotly_chart(
     use_container_width=True,
 )
 with st.expander('Show raw metagraph data'):
     st.dataframe(df_m)
 st.markdown('<br>', unsafe_allow_html=True)
 #### ------ LOGGED RUNS ------
 # st.subheader('Logged runs')

classes.py CHANGED Viewed

@@ -19,4 +19,5 @@ class ThroughputData(BaseModel):
 class Throughput(BaseModel):
     all_time: ThroughputData
-    last_24h: ThroughputData

 class Throughput(BaseModel):
     all_time: ThroughputData
+    last_24h: ThroughputData
+    data: dict

utils.py CHANGED Viewed

@@ -145,19 +145,51 @@ def get_total_md_input_sizes(run):
-def get_data_transferred(df, unit='GB'):
     def safe_json_loads(x):
         try:
             return json.loads(x)
         except ValueError:
             return []
-    validator_sent = np.nansum(df.md_inputs_sizes.dropna().apply(safe_json_loads).explode().replace([np.inf, -np.inf], np.nan).values)
-    miner_sent = np.nansum(df.response_returned_files_sizes.dropna().apply(safe_json_loads).explode().explode().replace([np.inf, -np.inf], np.nan).values)
-    return {
-        'validator_sent': convert_unit(validator_sent, from_unit='B', to_unit=BASE_UNITS),
-        'miner_sent': convert_unit(miner_sent, from_unit='B', to_unit=BASE_UNITS),
     }

+def get_data_transferred(df, df_24h, unit='GB'):
     def safe_json_loads(x):
         try:
             return json.loads(x)
         except ValueError:
             return []
+    def np_sum(x):
+        try:
+            # Flatten the list of lists and convert it to a NumPy array
+            flat_array = np.array([item for sublist in x for item in sublist])
+            # Use np.sum() to sum all elements in the flattened array
+            total_sum = np.sum(flat_array)
+            return total_sum
+        except TypeError:
+            return 0
+    df = df.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
+    df['md_inputs_sizes'] = df.md_inputs_sizes.apply(safe_json_loads)
+    df['response_returned_files_sizes'] = df.response_returned_files_sizes.apply(safe_json_loads)
+    df['md_inputs_sum'] = df.md_inputs_sizes.apply(np.sum)
+    df['md_outputs_sum'] = df.response_returned_files_sizes.apply(np_sum)
+    df['md_inputs_sum'] = df['md_inputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
+    df['md_outputs_sum'] = df['md_outputs_sum'].apply(convert_unit, from_unit='B', to_unit=BASE_UNITS)
+    df_24h = df_24h.dropna(subset=['md_inputs_sizes', 'response_returned_files_sizes'])
+    df_24h['md_inputs_sizes'] = df_24h.md_inputs_sizes.apply(safe_json_loads)
+    df_24h['response_returned_files_sizes'] = df_24h.response_returned_files_sizes.apply(safe_json_loads)
+    df_24h['md_inputs_sum'] = df_24h.md_inputs_sizes.apply(np.sum)
+    df_24h['md_outputs_sum'] = df_24h.response_returned_files_sizes.apply(np_sum)
+    validator_sent = np.nansum(df['md_inputs_sum'].values)
+    miner_sent = np.nansum(df['md_outputs_sum'].values)
+    validator_sent_24h = np.nansum(df_24h['md_inputs_sum'].values)
+    miner_sent_24h = np.nansum(df_24h['md_outputs_sum'].values)
+    return {'all_time': {
+        'validator_sent': validator_sent,
+        'miner_sent': miner_sent,
+    },
+    'last_24h': {
+        'validator_sent': convert_unit(validator_sent_24h, from_unit='B', to_unit=BASE_UNITS),
+        'miner_sent': convert_unit(miner_sent_24h, from_unit='B', to_unit=BASE_UNITS),
+    },
+    'data': df[['md_inputs_sum', 'md_outputs_sum', 'updated_at']].to_dict()
     }