Spaces:

AI4PD
/

hexviz

App Files Files Community

aksell commited on Apr 5, 2023

Commit

c8fb9e8

•

1 Parent(s): 5d3313d

Expander with pdb code, file or sequence input

Browse files

User picks one.
Still confusing with the caching of the files in session
storage. Need to figure out how to improve that.

Files changed (4) hide show

hexviz/attention.py +11 -1
hexviz/pages/🗺️Identify_Interesting_Heads.py +9 -2
hexviz/view.py +19 -15
hexviz/🧬Attention_Visualization.py +11 -4

hexviz/attention.py CHANGED Viewed

@@ -2,6 +2,7 @@ from io import StringIO
 from typing import List, Optional
 from urllib import request
 import streamlit as st
 import torch
 from Bio.PDB import PDBParser, Polypeptide, Structure
@@ -29,6 +30,15 @@ def get_pdb_file(pdb_code: str) -> Structure:
     file = StringIO(pdb_data)
     return file
 def get_chains(structure: Structure) -> List[str]:
     """
@@ -125,6 +135,7 @@ def get_attention_pairs(pdb_str: str, layer: int, head: int, chain_ids: Optional
         chains = list(structure.get_chains())
     attention_pairs = []
     for chain in chains:
         sequence = get_sequence(chain)
         attention = get_attention(sequence=sequence, model_type=model_type)
@@ -145,7 +156,6 @@ def get_attention_pairs(pdb_str: str, layer: int, head: int, chain_ids: Optional
         top_n_residues = sorted(residue_attention.items(), key=lambda x: x[1], reverse=True)[:top_n]
-        top_residues = []
         for res, attn_sum in top_n_residues:
             coord = chain[res]["CA"].coord.tolist()
             top_residues.append((attn_sum, coord, chain.id, res))

 from typing import List, Optional
 from urllib import request
+import requests
 import streamlit as st
 import torch
 from Bio.PDB import PDBParser, Polypeptide, Structure
     file = StringIO(pdb_data)
     return file
+@st.cache
+def get_pdb_from_seq(sequence: str) -> str:
+    """
+    Get structure from sequence
+    """
+    url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
+    res = requests.post(url, data=sequence)
+    pdb_str = res.text
+    return pdb_str
 def get_chains(structure: Structure) -> List[str]:
     """
         chains = list(structure.get_chains())
     attention_pairs = []
+    top_residues = []
     for chain in chains:
         sequence = get_sequence(chain)
         attention = get_attention(sequence=sequence, model_type=model_type)
         top_n_residues = sorted(residue_attention.items(), key=lambda x: x[1], reverse=True)[:top_n]
         for res, attn_sum in top_n_residues:
             coord = chain[res]["CA"].coord.tolist()
             top_residues.append((attn_sum, coord, chain.id, res))

hexviz/pages/🗺️Identify_Interesting_Heads.py CHANGED Viewed

@@ -4,7 +4,7 @@ from hexviz.attention import get_attention, get_sequence, get_structure
 from hexviz.models import Model, ModelType
 from hexviz.plot import plot_tiled_heatmap
 from hexviz.view import (menu_items, select_heads_and_layers, select_model,
-                         select_pdb, select_sequence_slice)
 st.set_page_config(layout="wide", menu_items=menu_items)
 st.subheader("Find interesting heads and layers")
@@ -15,9 +15,16 @@ models = [
     Model(name=ModelType.ZymCTRL, layers=36, heads=16),
 ]
 selected_model = select_model(models)
-pdb_id = select_pdb()
 structure = get_structure(pdb_id)

 from hexviz.models import Model, ModelType
 from hexviz.plot import plot_tiled_heatmap
 from hexviz.view import (menu_items, select_heads_and_layers, select_model,
+                         select_pdb, select_protein, select_sequence_slice)
 st.set_page_config(layout="wide", menu_items=menu_items)
 st.subheader("Find interesting heads and layers")
     Model(name=ModelType.ZymCTRL, layers=36, heads=16),
 ]
+with st.expander("Input a PDB id, upload a PDB file or input a sequence"):
+    pdb_id = select_pdb()
+    uploaded_file = st.file_uploader("2.Upload PDB", type=["pdb"])
+    # TODO set max length of input sequence
+    input_sequence = st.text_area("3.Input sequence (Folded with ESMfold) Max 400 resis", "", max_chars=400)
+    pdb_str, structure, source = select_protein(pdb_id, uploaded_file, input_sequence)
+    st.write(f"Using: {source}")
 selected_model = select_model(models)
 structure = get_structure(pdb_id)

hexviz/view.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from io import StringIO
 import streamlit as st
 from Bio.PDB import PDBParser
-from hexviz.attention import get_pdb_file
 menu_items = {
     "Get Help": "https://huggingface.co/spaces/aksell/hexviz/discussions/new",
@@ -41,19 +42,16 @@ def select_model(models):
     return select_model
 def select_pdb():
-    st.sidebar.markdown(
-        """
-        Select Protein
-        ---
-        """)
     stored_pdb = st.session_state.get("pdb_id", None)
-    pdb_id = st.sidebar.text_input(
-            label="PDB ID",
             value=stored_pdb or "2FZ5")
     pdb_changed = stored_pdb != pdb_id
     if pdb_changed:
-        st.session_state.selected_chains = None
-        st.session_state.selected_chain_index = 0
         if "sequence_slice" in st.session_state:
             del st.session_state.sequence_slice
         if "uploaded_pdb_str" in st.session_state:
@@ -61,25 +59,31 @@ def select_pdb():
     st.session_state.pdb_id = pdb_id
     return pdb_id
-def select_protein(pdb_code, uploaded_file):
     # We get the pdb from 1 of 3 places:
     # 1. Cached pdb from session storage
     # 2. PDB file from uploaded file
     # 3. PDB file fetched based on the pdb_code input
     parser = PDBParser()
     if uploaded_file is not None:
-        if "pdb_str" in st.session_state:
-            del st.session_state.pdb_str
         pdb_str = uploaded_file.read().decode("utf-8")
         st.session_state["uploaded_pdb_str"] = pdb_str
-    if "uploaded_pdb_str" in st.session_state:
         pdb_str = st.session_state.uploaded_pdb_str
     else:
         file = get_pdb_file(pdb_code)
         pdb_str = file.read()
     structure = parser.get_structure(pdb_code, StringIO(pdb_str))
-    return pdb_str, structure
 def select_heads_and_layers(sidebar, model):
     sidebar.markdown(

 from io import StringIO
+import requests
 import streamlit as st
 from Bio.PDB import PDBParser
+from hexviz.attention import get_pdb_file, get_pdb_from_seq
 menu_items = {
     "Get Help": "https://huggingface.co/spaces/aksell/hexviz/discussions/new",
     return select_model
 def select_pdb():
     stored_pdb = st.session_state.get("pdb_id", None)
+    pdb_id = st.text_input(
+            label="1.PDB ID",
             value=stored_pdb or "2FZ5")
     pdb_changed = stored_pdb != pdb_id
     if pdb_changed:
+        if "selected_chains" in st.session_state:
+            del st.session_state.selected_chains
+        if "selected_chain_index" in st.session_state:
+            del st.session_state.selected_chain_index
         if "sequence_slice" in st.session_state:
             del st.session_state.sequence_slice
         if "uploaded_pdb_str" in st.session_state:
     st.session_state.pdb_id = pdb_id
     return pdb_id
+def select_protein(pdb_code, uploaded_file, input_sequence):
     # We get the pdb from 1 of 3 places:
     # 1. Cached pdb from session storage
     # 2. PDB file from uploaded file
     # 3. PDB file fetched based on the pdb_code input
     parser = PDBParser()
     if uploaded_file is not None:
         pdb_str = uploaded_file.read().decode("utf-8")
         st.session_state["uploaded_pdb_str"] = pdb_str
+        source = f"uploaded pdb file {uploaded_file.name}"
+    elif "uploaded_pdb_str" in st.session_state:
         pdb_str = st.session_state.uploaded_pdb_str
+        source = f"Uploaded file stored in cache"
+    elif input_sequence:
+        pdb_str = get_pdb_from_seq(str(input_sequence))
+        if "selected_chains" in st.session_state:
+            del st.session_state.selected_chains
+        source = f"Input sequence + ESM-fold"
     else:
         file = get_pdb_file(pdb_code)
         pdb_str = file.read()
+        source = f"PDB ID: {pdb_code}"
     structure = parser.get_structure(pdb_code, StringIO(pdb_str))
+    return pdb_str, structure, source
 def select_heads_and_layers(sidebar, model):
     sidebar.markdown(

hexviz/🧬Attention_Visualization.py CHANGED Viewed

@@ -20,11 +20,18 @@ models = [
     Model(name=ModelType.ZymCTRL, layers=36, heads=16),
 ]
-pdb_id = select_pdb()
-with st.expander("Input sequence or upload PDB file"):
-    uploaded_file = st.file_uploader("Upload PDB", type=["pdb"])
-pdb_str, structure = select_protein(pdb_id, uploaded_file)
 chains = get_chains(structure)
 selected_chains = st.sidebar.multiselect(label="Select Chain(s)", options=chains, default=st.session_state.get("selected_chains", None) or chains)

     Model(name=ModelType.ZymCTRL, layers=36, heads=16),
 ]
+with st.expander("Input a PDB id, upload a PDB file or input a sequence"):
+    pdb_id = select_pdb()
+    uploaded_file = st.file_uploader("2.Upload PDB", type=["pdb"])
+    input_sequence = st.text_area("3.Input sequence", "")
+    pdb_str, structure, source = select_protein(pdb_id, uploaded_file, input_sequence)
+    st.write(f"Visualizing: {source}")
+st.sidebar.markdown(
+    """
+    Configure visualization
+    ---
+    """)
 chains = get_chains(structure)
 selected_chains = st.sidebar.multiselect(label="Select Chain(s)", options=chains, default=st.session_state.get("selected_chains", None) or chains)