Spaces:
Sleeping
Sleeping
import streamlit as st | |
import re | |
import pandas as pd | |
import networkx as nx | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib import cm | |
st.image("banner.png", use_column_width=True) | |
st.markdown( | |
"<h1 style='text-align: center;'>CMR and Heart Failure Colocalisation Drug Interaction Viewer</h1>", | |
unsafe_allow_html=True | |
) | |
# Description text | |
st.markdown( | |
""" | |
This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs. | |
You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information. | |
Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data. | |
""", | |
unsafe_allow_html=True | |
) | |
# Load and prepare colocalisation results | |
annotations = pd.read_csv("colocalisation_results.csv") | |
annotations.fillna(0, inplace=True) | |
annotations = annotations.set_index("Gene") | |
# Filter based on gene list | |
st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.") | |
# Define a function to collect genes from input | |
collect_genes = lambda x: [str(i) for i in re.split(",|,\s+|\s+", x) if i != ""] | |
input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):") | |
gene_list = collect_genes(input_gene_list) | |
# Function to convert DataFrame to CSV for download | |
def convert_df(df): | |
return df.to_csv(index=False).encode('utf-8') | |
if len(gene_list) > 1: | |
# Filter for input gene list | |
df = annotations[annotations.index.isin(gene_list)] | |
df['Gene'] = df.index | |
df.reset_index(drop=True, inplace=True) | |
# Reorder columns to have "Gene" as the first column | |
df = df[['Gene'] + [col for col in df.columns if col != 'Gene']] | |
# Display the filtered results | |
st.dataframe(df) | |
output = df[['Gene']] | |
csv = convert_df(output) | |
# st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv') | |
# Add a new search box for filtering by disease name | |
input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):") | |
if input_disease: | |
# Search for partial matches in the "terms_drug" column | |
df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)] | |
if not df_disease_filtered.empty: | |
st.markdown(f"### Colocalisation results for disease: {input_disease}") | |
df_disease_filtered['Gene'] = df_disease_filtered.index | |
df_disease_filtered.reset_index(drop=True, inplace=True) | |
# Reorder columns to have "Gene" as the first column | |
df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']] | |
# Display filtered dataframe | |
st.dataframe(df_disease_filtered) | |
# Convert filtered dataframe to CSV for download | |
csv_disease_filtered = convert_df(df_disease_filtered) | |
# st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv') | |
else: | |
st.write(f"No results found for disease: {input_disease}") | |
# Display individual gene details if a single gene is input | |
input_gene = st.text_input("Input an individual HGNC gene:") | |
if input_gene: | |
df2 = annotations[annotations.index == input_gene] | |
if not df2.empty: | |
df2['Gene'] = df2.index | |
df2.reset_index(drop=True, inplace=True) | |
# Reorder columns to have "Gene" as the first column | |
df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']] | |
st.dataframe(df2) | |
# Provide a link to the gene's DrugnomeAI page | |
url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}" | |
markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})" | |
st.markdown(markdown_link, unsafe_allow_html=True) | |
else: | |
st.write("Gene not found in the dataset.") | |
# Display the entire dataset with download option | |
st.markdown("### All Colocalisation Results Interacting with Drugs") | |
df_total_output = annotations.copy() | |
df_total_output['Gene'] = df_total_output.index | |
df_total_output.reset_index(drop=True, inplace=True) | |
# Reorder columns to have "Gene" as the first column | |
df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']] | |
st.dataframe(df_total_output) | |
csv = convert_df(df_total_output) | |
# st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv') | |
# Protein interaction network visualization using STRINGDB_data.tsv | |
st.markdown( | |
"<h1 style='text-align: center;'>Protein Interaction Networks of Colocalising Drug Targets</h1>", | |
unsafe_allow_html=True | |
) | |
# Description text | |
st.markdown( | |
""" | |
- The colour of each node represents its degree (number of direct connections it has with other nodes). | |
- The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins). | |
- Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions). | |
- Genes that interact with cardiovascular drugs are highlighted with a bold black outline. | |
""", | |
unsafe_allow_html=True | |
) | |
# Load STRINGDB dataset | |
ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t') | |
# Create a graph from the STRINGDB PPI data | |
G = nx.Graph() | |
# Add edges to the graph based on PPI data | |
for index, row in ppi_data.iterrows(): | |
G.add_edge(row['node1'], row['node2'], weight=row['combined_score']) | |
# Function to rescale values to a given range | |
def rescale(l, newmin, newmax): | |
arr = list(l) | |
return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr] | |
# Use the plasma colormap | |
graph_colormap = plt.get_cmap('plasma', 12) | |
# Node color varies with Degree | |
c = rescale([G.degree(v) for v in G], 0.0, 0.9) | |
c = [graph_colormap(i) for i in c] | |
# Node size varies with betweeness centrality - map to range [1500, 7000] | |
bc = nx.betweenness_centrality(G) | |
s = rescale([v for v in bc.values()], 1500, 7000) | |
# Edge width shows 1 - weight (to convert cost back to strength of interaction) | |
ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4) | |
ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1) | |
ec = [graph_colormap(i) for i in ec] | |
# Adjust spring_layout parameters to bring the networks closer together | |
pos = nx.spring_layout(G, k=0.5) | |
# Prepare to highlight genes with "Cardiovascular_Drug" as "Yes" | |
highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index | |
# Draw the network plot | |
plt.figure(figsize=(19, 9), facecolor='white') | |
# Draw the nodes with black outline for highlighted ones | |
nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2) | |
# Draw the edges | |
nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew) | |
# Draw node labels with customized font color based on degree | |
# Draw node labels with customized font color based on degree | |
for node, (x, y) in pos.items(): | |
# Determine font color | |
font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black' | |
# Dynamically adjust font size for nodes with white text (smaller font size to fit inside node) | |
if font_color == 'white': | |
font_size = min(s[list(G.nodes).index(node)] * 0.01, 10) # Adjust the multiplier and limit font size | |
else: | |
font_size = 12 # Default size for black font | |
plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color) | |
# Add a colorbar to represent the node degree color scale | |
sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1)) | |
sm.set_array([]) | |
cbar = plt.colorbar(sm) | |
cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12) | |
plt.axis('off') | |
# Display the network plot in the Streamlit app directly | |
st.pyplot(plt) | |