aksell commited on
Commit
894de54
1 Parent(s): 98cd95a

Add app.py for with the grid protein viewer

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import stmol
3
+ import py3Dmol
4
+ import time
5
+ import re
6
+ import requests
7
+
8
+ st.set_page_config(layout="wide")
9
+ st.header("Grid visualization of folded protein sequences")
10
+
11
+ default_sequences = """MINDLLDISRIISGKMTLDRAEVNLTAIARQVVEEQRQAAEAKSIQLLCSTPDTNHYVFGDFDRLKQTLWNLLSNAVKFTPSGGTVELELGY
12
+ MQGDSSISSSNRMFTLCKPLTVANETSTLSTTRNSKSNKRVSKQRVNLAESPERNAPSPASIKTNETEEFSTIKTTNNEVLGYEPNYVSYDF
13
+ MSTHVSLENTLASLQATFFSLEARHTALETQLLSTRTELAATKQELVRVQAEISRADAQAQDLKAQILTLKEKADQAEVEAAAATQRAEESQ
14
+ MVLLSTGPLPILFLGPSLAELNQKYQVVSDTLLRFTNTVTFNTLKFLGSDS
15
+ MNNDEQPFIMSTSGYAGNTTSSMNSTSDFNTNNKSNTWSNRFSNFIAYFSGVGWFIGAISVIFFIIYVIVFLSRKTKPSGQKQYSRTERNNR
16
+ MEAVYSFTITETGTGTVEVTPLDRTISGADIVYPPDTACVPLTVQPVINANGTWTLGSGCTGHFSVDTTGHVNCLTGGFGAAGVHTVIYTVE
17
+ MGLTTSGGARGFCSLAVLQELVPRPELLFVIDRAFHSGKHAVDMQVVDQEGLGDGVATLLYAHQGLYTCLLQAEARLLGREWAAVPALEPNF
18
+ MGLTTSGGARGFCSLAVLQELVPRPELLFVIDRAFHSGKHAVDMQVVDQEGLGDGVATLLYAHQGLYTCLLQAEARLLGREWAAVPALEPNF
19
+ MGAAGYTGSLILAALKQNPDIAVYALNRNDEKLKDVCGQYSNLKGQVCDLSNESQVEALLSGPRKTVVNLVGPYSFYGSRVLNACIEANCHY
20
+ """
21
+ input_sequence = st.text_area("Sequences separated by a newline (max 400 resis each)", default_sequences)
22
+ sequences = []
23
+
24
+ # Parse and clean input sequences
25
+ for seq in input_sequence.split("\n"):
26
+ seq = seq.strip()
27
+ if len(seq) > 400:
28
+ seq = seq[:400]
29
+ seq = re.sub("[^ACDEFGHIKLMNPQRSTVWY]", "", seq)
30
+ if len(seq) > 0:
31
+ sequences.append(seq)
32
+ st.write(f"Found {len(sequences)} valid sequences")
33
+
34
+
35
+ pdb_strings = []
36
+ url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
37
+
38
+ # Fold sequences with ESMfold
39
+ for seq in sequences:
40
+ retries = 0
41
+ pdb_str = None
42
+ while retries < 3 and pdb_str is None:
43
+ response = requests.post(url, data=seq)
44
+ pdb_str = response.text
45
+ if pdb_str == "INTERNAL SERVER ERROR":
46
+ retries += 1
47
+ time.sleep(0.1)
48
+ pdb_str = None
49
+ if pdb_str is not None:
50
+ pdb_strings.append(pdb_str)
51
+ else:
52
+ st.write("Failed to retrieve PDB structure after 3 retries")
53
+
54
+
55
+
56
+ num_pdb_structures = len(pdb_strings)
57
+ grid_columns = int(num_pdb_structures ** 0.5)
58
+ if grid_columns ** 2 < num_pdb_structures:
59
+ grid_columns += 1
60
+ grid_columns = min(grid_columns, 12)
61
+ grid_rows = (num_pdb_structures + grid_columns - 1) // grid_columns
62
+ import streamlit as st
63
+
64
+ # Get the width of the viewer from the sidebar
65
+ viewer_width = st.sidebar.number_input("Viewer Width", 100, 2000, 900)
66
+
67
+ # Calculate the width and height of each grid cell
68
+ grid_cell_width = int(viewer_width / grid_columns)
69
+ grid_cell_height = grid_cell_width
70
+ viewer_height = grid_rows * grid_cell_height
71
+
72
+ xyzview = py3Dmol.view(
73
+ width=viewer_width,
74
+ height=viewer_height,
75
+ linked=False,
76
+ viewergrid=(grid_rows, grid_columns),
77
+ )
78
+
79
+ for row in range(grid_rows):
80
+ for col in range(grid_columns):
81
+ index = row * grid_columns + col
82
+ pdb_string = pdb_strings[index] if index < len(pdb_strings) else None
83
+ if pdb_string:
84
+ xyzview.addModel(pdb_string, "pdb", viewer=(row, col))
85
+
86
+ xyzview.setStyle({"cartoon": {"color": "spectrum"}})
87
+
88
+ # Focus the chains we added
89
+ xyzview.zoomTo()
90
+
91
+ # Draw our grid!
92
+ st.write("Showing grid")
93
+ stmol.showmol(xyzview, height=viewer_height, width=viewer_width)