Spaces:

hlnicholls
/

CMR-HF-Coloc

Sleeping

App Files Files Community

CMR-HF-Coloc / app.py

hlnicholls

Update app.py

f2fcde1 verified 4 months ago

raw

history blame contribute delete

8.41 kB

	import streamlit as st
	import re
	import pandas as pd
	import networkx as nx
	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib import cm

	st.image("banner.png", use_column_width=True)
	st.markdown(
	"<h1 style='text-align: center;'>CMR and Heart Failure Colocalisation Drug Interaction Viewer</h1>",
	unsafe_allow_html=True
	)

	# Description text
	st.markdown(
	"""
	This interactive app allows you to explore colocalising genes between cardiovascular magnetic resonance image (CMR) traits and heart failure (HF) that have interacting drugs.
	You can input multiple HGNC gene names or disease terms to filter the dataset or enter a single gene for more detailed information.
	Additionally, you can visualize a protein interaction network for specific genes using STRINGdb data.
	""",
	unsafe_allow_html=True
	)

	# Load and prepare colocalisation results
	annotations = pd.read_csv("colocalisation_results.csv")
	annotations.fillna(0, inplace=True)
	annotations = annotations.set_index("Gene")

	# Filter based on gene list
	st.markdown("### View colocalising gene drug interaction results for selected genes/diseases or the entire dataset.")

	# Define a function to collect genes from input
	collect_genes = lambda x: [str(i) for i in re.split(",\|,\s+\|\s+", x) if i != ""]
	input_gene_list = st.text_input("Input a list of multiple HGNC genes (enter comma separated):")
	gene_list = collect_genes(input_gene_list)

	# Function to convert DataFrame to CSV for download
	@st.cache_data
	def convert_df(df):
	return df.to_csv(index=False).encode('utf-8')

	if len(gene_list) > 1:
	# Filter for input gene list
	df = annotations[annotations.index.isin(gene_list)]
	df['Gene'] = df.index
	df.reset_index(drop=True, inplace=True)

	# Reorder columns to have "Gene" as the first column
	df = df[['Gene'] + [col for col in df.columns if col != 'Gene']]

	# Display the filtered results
	st.dataframe(df)
	output = df[['Gene']]
	csv = convert_df(output)
	# st.download_button("Download Filtered Colocalisation Results", csv, "filtered_colocalisation_results.csv", "text/csv", key='download-csv')

	# Add a new search box for filtering by disease name
	input_disease = st.text_input("Input a disease name to search in drug terms (partial match allowed):")

	if input_disease:
	# Search for partial matches in the "terms_drug" column
	df_disease_filtered = annotations[annotations['terms_drug'].str.contains(input_disease, case=False, na=False)]

	if not df_disease_filtered.empty:
	st.markdown(f"### Colocalisation results for disease: {input_disease}")
	df_disease_filtered['Gene'] = df_disease_filtered.index
	df_disease_filtered.reset_index(drop=True, inplace=True)

	# Reorder columns to have "Gene" as the first column
	df_disease_filtered = df_disease_filtered[['Gene'] + [col for col in df_disease_filtered.columns if col != 'Gene']]

	# Display filtered dataframe
	st.dataframe(df_disease_filtered)

	# Convert filtered dataframe to CSV for download
	csv_disease_filtered = convert_df(df_disease_filtered)
	# st.download_button("Download Filtered Colocalisation Results", csv_disease_filtered, "filtered_colocalisation_disease_results.csv", "text/csv", key='download-disease-csv')
	else:
	st.write(f"No results found for disease: {input_disease}")

	# Display individual gene details if a single gene is input
	input_gene = st.text_input("Input an individual HGNC gene:")
	if input_gene:
	df2 = annotations[annotations.index == input_gene]
	if not df2.empty:
	df2['Gene'] = df2.index
	df2.reset_index(drop=True, inplace=True)

	# Reorder columns to have "Gene" as the first column
	df2 = df2[['Gene'] + [col for col in df2.columns if col != 'Gene']]

	st.dataframe(df2)

	# Provide a link to the gene's DrugnomeAI page
	url = f"https://astrazeneca-cgr-publications.github.io/DrugnomeAI/geneview.html?gene={input_gene}"
	markdown_link = f"[{input_gene} druggability in DrugnomeAI]({url})"
	st.markdown(markdown_link, unsafe_allow_html=True)
	else:
	st.write("Gene not found in the dataset.")

	# Display the entire dataset with download option
	st.markdown("### All Colocalisation Results Interacting with Drugs")
	df_total_output = annotations.copy()
	df_total_output['Gene'] = df_total_output.index
	df_total_output.reset_index(drop=True, inplace=True)

	# Reorder columns to have "Gene" as the first column
	df_total_output = df_total_output[['Gene'] + [col for col in df_total_output.columns if col != 'Gene']]

	st.dataframe(df_total_output)
	csv = convert_df(df_total_output)
	# st.download_button("Download Complete Colocalisation Results", csv, "complete_colocalisation_results.csv", "text/csv", key='download-all-csv')

	# Protein interaction network visualization using STRINGDB_data.tsv
	st.markdown(
	"<h1 style='text-align: center;'>Protein Interaction Networks of Colocalising Drug Targets</h1>",
	unsafe_allow_html=True
	)

	# Description text
	st.markdown(
	"""
	- The colour of each node represents its degree (number of direct connections it has with other nodes).
	- The size of each node represents its betweenness centrality (larger nodes play a more central role in the network, facilitating communication between other proteins).
	- Node edges/connections are colour-coded by confidence of PPI (lighter colours (brighter) represent stronger interactions).
	- Genes that interact with cardiovascular drugs are highlighted with a bold black outline.
	""",
	unsafe_allow_html=True
	)


	# Load STRINGDB dataset
	ppi_data = pd.read_csv("STRINGdb_data.tsv", sep='\t')

	# Create a graph from the STRINGDB PPI data
	G = nx.Graph()

	# Add edges to the graph based on PPI data
	for index, row in ppi_data.iterrows():
	G.add_edge(row['node1'], row['node2'], weight=row['combined_score'])

	# Function to rescale values to a given range
	def rescale(l, newmin, newmax):
	arr = list(l)
	return [(x - min(arr)) / (max(arr) - min(arr)) * (newmax - newmin) + newmin for x in arr]

	# Use the plasma colormap
	graph_colormap = plt.get_cmap('plasma', 12)

	# Node color varies with Degree
	c = rescale([G.degree(v) for v in G], 0.0, 0.9)
	c = [graph_colormap(i) for i in c]

	# Node size varies with betweeness centrality - map to range [1500, 7000]
	bc = nx.betweenness_centrality(G)
	s = rescale([v for v in bc.values()], 1500, 7000)

	# Edge width shows 1 - weight (to convert cost back to strength of interaction)
	ew = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 4)
	ec = rescale([float(G[u][v]['weight']) for u, v in G.edges], 0.1, 1)
	ec = [graph_colormap(i) for i in ec]

	# Adjust spring_layout parameters to bring the networks closer together
	pos = nx.spring_layout(G, k=0.5)

	# Prepare to highlight genes with "Cardiovascular_Drug" as "Yes"
	highlighted_nodes = annotations[annotations['Cardiovascular_Drug'] == 'Yes'].index

	# Draw the network plot
	plt.figure(figsize=(19, 9), facecolor='white')

	# Draw the nodes with black outline for highlighted ones
	nx.draw_networkx_nodes(G, pos, node_color=c, node_size=s, edgecolors=['black' if node in highlighted_nodes else 'none' for node in G], linewidths=2)

	# Draw the edges
	nx.draw_networkx_edges(G, pos, edge_color=ec, width=ew)

	# Draw node labels with customized font color based on degree
	# Draw node labels with customized font color based on degree
	for node, (x, y) in pos.items():
	# Determine font color
	font_color = 'white' if G.degree(node) < np.median([G.degree(n) for n in G]) else 'black'

	# Dynamically adjust font size for nodes with white text (smaller font size to fit inside node)
	if font_color == 'white':
	font_size = min(s[list(G.nodes).index(node)] * 0.01, 10) # Adjust the multiplier and limit font size
	else:
	font_size = 12 # Default size for black font

	plt.text(x, y, node, fontsize=font_size, fontweight='bold', ha='center', va='center', color=font_color)



	# Add a colorbar to represent the node degree color scale
	sm = plt.cm.ScalarMappable(cmap=graph_colormap, norm=plt.Normalize(vmin=0, vmax=1))
	sm.set_array([])
	cbar = plt.colorbar(sm)
	cbar.set_label('Node Degree (Higher = More Connected)', fontsize=12)

	plt.axis('off')

	# Display the network plot in the Streamlit app directly
	st.pyplot(plt)