Spaces:

MISATO-dataset
/

Adaptability_protein_dynamics

Sleeping

App Files Files Community

stochasticribosome commited on Jun 1, 2023

Commit

22dca11

1 Parent(s): 41311f3

Add inference preprocessing

Browse files

Files changed (1) hide show

main.py +97 -9

main.py CHANGED Viewed

@@ -12,6 +12,9 @@ import os
 from MDmodel import GNN_MD
 import h5py
 from transformMD import GNNTransformMD
 # JavaScript functions
 resid_hover = """function(atom,viewer) {{
@@ -46,6 +49,78 @@ model = model.to('cpu')
 model.eval()
 def get_pdb(pdb_code="", filepath=""):
     try:
@@ -65,24 +140,37 @@ def get_offset(pdb):
             return int(line[22:27])
 def predict(pdb_code, pdb_file):
-    path_to_pdb = get_pdb(pdb_code=pdb_code, filepath=pdb_file)
     mdh5_file = "inference_for_md.hdf5"
     md_H5File = h5py.File(mdh5_file)
     column_names = ["x", "y", "z", "element"]
     atoms_protein = pd.DataFrame(columns = column_names)
-    cutoff = md_H5File["11GS"]["molecules_begin_atom_index"][:][-1] # cutoff defines protein atoms
-    atoms_protein["x"] = md_H5File["11GS"]["atoms_coordinates_ref"][:][:cutoff, 0]
-    atoms_protein["y"] = md_H5File["11GS"]["atoms_coordinates_ref"][:][:cutoff, 1]
-    atoms_protein["z"] = md_H5File["11GS"]["atoms_coordinates_ref"][:][:cutoff, 2]
-    atoms_protein["element"] = md_H5File["11GS"]["atoms_element"][:][:cutoff]
     item = {}
     item["scores"] = 0
-    item["id"] = "11GS"
     item["atoms_protein"] = atoms_protein
     transform = GNNTransformMD()
@@ -99,7 +187,7 @@ def predict(pdb_code, pdb_file):
     topN = 100
     topN_ind = np.argsort(adaptability)[::-1][:topN]
-    pdb = open(path_to_pdb, "r").read()
     view = py3Dmol.view(width=600, height=400)
     view.setBackgroundColor('white')
@@ -149,4 +237,4 @@ def run():
 if __name__ == "__main__":
-    run()

 from MDmodel import GNN_MD
 import h5py
 from transformMD import GNNTransformMD
+import sys
+import pytraj as pt
+import pickle
 # JavaScript functions
 resid_hover = """function(atom,viewer) {{
 model.eval()
+def run_leap(fileName, path):
+    leapText = """
+    source leaprc.protein.ff14SB
+    source leaprc.water.tip3p
+    exp = loadpdb PATH4amb.pdb
+    saveamberparm exp PATHexp.top PATHexp.crd
+    quit
+    """
+    with open(path+"leap.in", "w") as outLeap:
+        outLeap.write(leapText.replace('PATH', path))
+    os.system("tleap -f "+path+"leap.in >> "+path+"leap.out")
+def convert_to_amber_format(pdbName):
+    fileName, path = pdbName+'.pdb', pdbName+'/'
+    os.system("pdb4amber -i "+fileName+" -p -y -o "+path+"4amb.pdb -l "+path+"pdb4amber_protein.log")
+    run_leap(fileName, path)
+    traj = pt.iterload(path+'exp.crd', top = path+'exp.top')
+    pt.write_traj(path+fileName, traj, overwrite= True)
+    print(path+fileName+' was created. Please always use this file for inspection because the coordinates might get translated during amber file generation and thus might vary from the input pdb file.')
+    return pt.iterload(path+'exp.crd', top = path+'exp.top')
+def get_maps(mapPath):
+    residueMap = pickle.load(open(mapPath+'atoms_residue_map_generate.pickle','rb'))
+    nameMap = pickle.load(open(mapPath+'atoms_name_map_generate.pickle','rb'))
+    typeMap = pickle.load(open(mapPath+'atoms_type_map_generate.pickle','rb'))
+    elementMap = pickle.load(open(mapPath+'map_atomType_element_numbers.pickle','rb'))
+    return residueMap, nameMap, typeMap, elementMap
+def get_residues_atomwise(residues):
+    atomwise = []
+    for name, nAtoms in residues:
+        for i in range(nAtoms):
+            atomwise.append(name)
+    return atomwise
+def get_begin_atom_index(traj):
+    natoms = [m.n_atoms for m in traj.top.mols]
+    molecule_begin_atom_index = [0]
+    x = 0
+    for i in range(len(natoms)):
+        x += natoms[i]
+        molecule_begin_atom_index.append(x)
+    print('molecule begin atom index', molecule_begin_atom_index, natoms)
+    return molecule_begin_atom_index
+def get_traj_info(traj, mapPath):
+    coordinates  = traj.xyz
+    residueMap, nameMap, typeMap, elementMap = get_maps(mapPath)
+    types = [typeMap[a.type] for a in traj.top.atoms]
+    elements = [elementMap[typ] for typ in types]
+    atomic_numbers = [a.atomic_number for a in traj.top.atoms]
+    molecule_begin_atom_index = get_begin_atom_index(traj)
+    residues = [(residueMap[res.name], res.n_atoms) for res in traj.top.residues]
+    residues_atomwise = get_residues_atomwise(residues)
+    return coordinates[0], elements, types, atomic_numbers, residues_atomwise, molecule_begin_atom_index
+def write_h5_info(outName, struct, atoms_type, atoms_number, atoms_residue, atoms_element, molecules_begin_atom_index, atoms_coordinates_ref):
+    if os.path.isfile(outName):
+        os.remove(outName)
+    with h5py.File(outName, 'w') as oF:
+        subgroup = oF.create_group(struct)
+        subgroup.create_dataset('atoms_residue', data= atoms_residue, compression = "gzip", dtype='i8')
+        subgroup.create_dataset('molecules_begin_atom_index', data= molecules_begin_atom_index, compression = "gzip", dtype='i8')
+        subgroup.create_dataset('atoms_type', data= atoms_type, compression = "gzip", dtype='i8')
+        subgroup.create_dataset('atoms_number', data= atoms_number, compression = "gzip", dtype='i8')
+        subgroup.create_dataset('atoms_element', data= atoms_element, compression = "gzip", dtype='i8')
+        subgroup.create_dataset('atoms_coordinates_ref', data= atoms_coordinates_ref, compression = "gzip", dtype='f8')
+def preprocess(pdbid: str = None, ouputfile: str = "inference_for_md.hdf5", mask: str = "!@H=", mappath: str = "/maps/"):
+    traj = convert_to_amber_format(pdbid)
+    atoms_coordinates_ref, atoms_element, atoms_type, atoms_number, atoms_residue, molecules_begin_atom_index = get_traj_info(traj[mask], mappath)
+    write_h5_info(ouputfile, pdbid, atoms_type, atoms_number, atoms_residue, atoms_element, molecules_begin_atom_index, atoms_coordinates_ref)
 def get_pdb(pdb_code="", filepath=""):
     try:
             return int(line[22:27])
+def get_pdbid_from_filename(filename: str):
+    # Assuming the filename would be of the standard form 11GS.pdb
+    return filename.split(".")[0]
 def predict(pdb_code, pdb_file):
+    #path_to_pdb = get_pdb(pdb_code=pdb_code, filepath=pdb_file)
+    #pdb = open(path_to_pdb, "r").read()
+    # switch to misato env if not running from container
+    pdbid = get_pdbid_from_filename(pdb_file)
     mdh5_file = "inference_for_md.hdf5"
+    mappath = "/maps"
+    mask = "!@H="
+    preprocess(pdbid=pdbid, ouputfile=mdh5_file, mask=mask, mappath=mappath)
     md_H5File = h5py.File(mdh5_file)
     column_names = ["x", "y", "z", "element"]
     atoms_protein = pd.DataFrame(columns = column_names)
+    cutoff = md_H5File[pdbid]["molecules_begin_atom_index"][:][-1] # cutoff defines protein atoms
+    atoms_protein["x"] = md_H5File[pdbid]["atoms_coordinates_ref"][:][:cutoff, 0]
+    atoms_protein["y"] = md_H5File[pdbid]["atoms_coordinates_ref"][:][:cutoff, 1]
+    atoms_protein["z"] = md_H5File[pdbid]["atoms_coordinates_ref"][:][:cutoff, 2]
+    atoms_protein["element"] = md_H5File[pdbid]["atoms_element"][:][:cutoff]
     item = {}
     item["scores"] = 0
+    item["id"] = pdbid
     item["atoms_protein"] = atoms_protein
     transform = GNNTransformMD()
     topN = 100
     topN_ind = np.argsort(adaptability)[::-1][:topN]
+    pdb = open(pdb_file.name, "r").read()
     view = py3Dmol.view(width=600, height=400)
     view.setBackgroundColor('white')
 if __name__ == "__main__":
+    run()