xuyingli commited on
Commit
1c470a9
1 Parent(s): 8e8d7f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -13
app.py CHANGED
@@ -8,6 +8,7 @@ from collections import Counter
8
  from tqdm import tqdm
9
  from statistics import mean
10
 
 
11
  import matplotlib.pyplot as plt
12
  import numpy as np
13
  import pandas as pd
@@ -16,6 +17,8 @@ from stmol import *
16
  import py3Dmol
17
  # from streamlit_3Dmol import component_3dmol
18
 
 
 
19
  import scipy
20
  from sklearn.model_selection import GridSearchCV, train_test_split
21
  from sklearn.decomposition import PCA
@@ -115,14 +118,17 @@ def esm_search(model, sequnce, batch_converter,top_k=5):
115
  client = Client(
116
  url=st.secrets["DB_URL"], user=st.secrets["USER"], password=st.secrets["PASSWD"])
117
 
118
- result = client.fetch("SELECT activity, distance('topK=5')(representations, " + str(token_list) + ')'+ "as dist FROM default.esm_protein_indexer_768")
 
119
  result_temp_seq = []
 
120
  for i in result:
121
- # print(result_temp_seq)
122
- result_temp_coords = i['coords']
123
  result_temp_seq.append(i['seq'])
 
 
124
 
125
- return result_temp_coords, result_temp_seq
126
 
127
  def KNN_search(sequence):
128
  model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
@@ -307,7 +313,7 @@ if 'xq' not in st.session_state:
307
 
308
  if sequence:
309
  st.write('you have entered: ', sequence)
310
- result_temp_coords, result_temp_seq = esm_search(model, sequence, esm_search,top_k=5)
311
  st.text('search result: ')
312
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
313
  if st.button(result_temp_seq[0]):
@@ -380,11 +386,13 @@ else:
380
 
381
  if sequence:
382
  st.write('you have entered: ', sequence)
383
- result_temp_coords, result_temp_seq = esm_search(st.session_state['xq'], sequence, st.session_state['batch'] ,top_k=1)
384
  st.text('search result (top 5): ')
385
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
386
- option2 = st.selectbox('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
387
- if option2 == result_temp_seq[0]:
 
 
388
  st.write(result_temp_seq[0])
389
  import random
390
  # print(random.randint(0,9))
@@ -393,8 +401,9 @@ else:
393
  protein = prot_str[random.randint(14,18)]
394
  xyzview = py3Dmol.view(query='pdb:'+protein)
395
  xyzview.setStyle({'stick':{'color':'spectrum'}})
396
- start[3] = showmol(xyzview, height = 500,width=800)
397
  # st.write(result_temp_seq[4])
 
398
  import random
399
  # print(random.randint(0,9))
400
  st.write(result_temp_seq[1])
@@ -403,28 +412,31 @@ else:
403
  protein = prot_str[random.randint(0,4)]
404
  xyzview = py3Dmol.view(query='pdb:'+protein)
405
  xyzview.setStyle({'stick':{'color':'spectrum'}})
406
- start[4] = showmol(xyzview, height = 500,width=800)
 
407
  st.write(result_temp_seq[2])
408
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
409
  # protein=st.selectbox('select protein',prot_list)
410
  protein = prot_str[random.randint(4,8)]
411
  xyzview = py3Dmol.view(query='pdb:'+protein)
412
  xyzview.setStyle({'stick':{'color':'spectrum'}})
413
- start[5] = showmol(xyzview, height = 500,width=800)
 
414
  st.write(result_temp_seq[3])
415
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
416
  # protein=st.selectbox('select protein',prot_list)
417
  protein = prot_str[random.randint(4,8)]
418
  xyzview = py3Dmol.view(query='pdb:'+protein)
419
  xyzview.setStyle({'stick':{'color':'spectrum'}})
420
- start[6] = showmol(xyzview, height = 500,width=800)
 
421
  st.write(result_temp_seq[4])
422
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
423
  # protein=st.selectbox('select protein',prot_list)
424
  protein = prot_str[random.randint(4,8)]
425
  xyzview = py3Dmol.view(query='pdb:'+protein)
426
  xyzview.setStyle({'stick':{'color':'spectrum'}})
427
- start[7] = showmol(xyzview, height = 500,width=800)
428
 
429
 
430
  elif option == 'activity prediction':
 
8
  from tqdm import tqdm
9
  from statistics import mean
10
 
11
+ import torch
12
  import matplotlib.pyplot as plt
13
  import numpy as np
14
  import pandas as pd
 
17
  import py3Dmol
18
  # from streamlit_3Dmol import component_3dmol
19
 
20
+ import esm
21
+
22
  import scipy
23
  from sklearn.model_selection import GridSearchCV, train_test_split
24
  from sklearn.decomposition import PCA
 
118
  client = Client(
119
  url=st.secrets["DB_URL"], user=st.secrets["USER"], password=st.secrets["PASSWD"])
120
 
121
+ result = client.fetch("SELECT seq, distance('topK=500')(representations, " + str(token_list) + ')'+ "as dist FROM default.esm_protein_indexer_768")
122
+
123
  result_temp_seq = []
124
+
125
  for i in result:
126
+ # result_temp_coords = i['seq']
 
127
  result_temp_seq.append(i['seq'])
128
+
129
+ result_temp_seq = list(set(result_temp_seq))
130
 
131
+ return result_temp_seq
132
 
133
  def KNN_search(sequence):
134
  model, alphabet = esm.pretrained.esm2_t33_650M_UR50D()
 
313
 
314
  if sequence:
315
  st.write('you have entered: ', sequence)
316
+ result_temp_seq = esm_search(model, sequence, esm_search,top_k=5)
317
  st.text('search result: ')
318
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
319
  if st.button(result_temp_seq[0]):
 
386
 
387
  if sequence:
388
  st.write('you have entered: ', sequence)
389
+ result_temp_seq = esm_search(st.session_state['xq'], sequence, st.session_state['batch'] ,top_k=10)
390
  st.text('search result (top 5): ')
391
  # tab1, tab2, tab3, tab4, = st.tabs(["Cat", "Dog", "Owl"])
392
+ tab1, tab2, tab3 , tab4, tab5 = st.tabs(['1','2','3','4','5'])
393
+ # option2 = st.radio('top5 sequence', (result_temp_seq[0],result_temp_seq[1],result_temp_seq[2],result_temp_seq[3],result_temp_seq[4]))
394
+
395
+ with tab1:
396
  st.write(result_temp_seq[0])
397
  import random
398
  # print(random.randint(0,9))
 
401
  protein = prot_str[random.randint(14,18)]
402
  xyzview = py3Dmol.view(query='pdb:'+protein)
403
  xyzview.setStyle({'stick':{'color':'spectrum'}})
404
+ showmol(xyzview, height = 500,width=800)
405
  # st.write(result_temp_seq[4])
406
+ with tab2:
407
  import random
408
  # print(random.randint(0,9))
409
  st.write(result_temp_seq[1])
 
412
  protein = prot_str[random.randint(0,4)]
413
  xyzview = py3Dmol.view(query='pdb:'+protein)
414
  xyzview.setStyle({'stick':{'color':'spectrum'}})
415
+ showmol(xyzview, height = 500,width=800)
416
+ with tab3:
417
  st.write(result_temp_seq[2])
418
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
419
  # protein=st.selectbox('select protein',prot_list)
420
  protein = prot_str[random.randint(4,8)]
421
  xyzview = py3Dmol.view(query='pdb:'+protein)
422
  xyzview.setStyle({'stick':{'color':'spectrum'}})
423
+ showmol(xyzview, height = 500,width=800)
424
+ with tab4:
425
  st.write(result_temp_seq[3])
426
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
427
  # protein=st.selectbox('select protein',prot_list)
428
  protein = prot_str[random.randint(4,8)]
429
  xyzview = py3Dmol.view(query='pdb:'+protein)
430
  xyzview.setStyle({'stick':{'color':'spectrum'}})
431
+ showmol(xyzview, height = 500,width=800)
432
+ with tab5:
433
  st.write(result_temp_seq[4])
434
  prot_str=['1A2C','1BML','1D5M','1D5X','1D5Z','1D6E','1DEE','1E9F','1FC2','1FCC','1G4U','1GZS','1HE1','1HEZ','1HQR','1HXY','1IBX','1JBU','1JWM','1JWS']
435
  # protein=st.selectbox('select protein',prot_list)
436
  protein = prot_str[random.randint(4,8)]
437
  xyzview = py3Dmol.view(query='pdb:'+protein)
438
  xyzview.setStyle({'stick':{'color':'spectrum'}})
439
+ showmol(xyzview, height = 500,width=800)
440
 
441
 
442
  elif option == 'activity prediction':