aboutsummaryrefslogtreecommitdiff
path: root/plip/structure/preparation.py
diff options
context:
space:
mode:
Diffstat (limited to 'plip/structure/preparation.py')
-rw-r--r--plip/structure/preparation.py2315
1 files changed, 0 insertions, 2315 deletions
diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
deleted file mode 100644
index 32083c7..0000000
--- a/plip/structure/preparation.py
+++ /dev/null
@@ -1,2315 +0,0 @@
-import itertools
-import os
-import re
-import tempfile
-from collections import namedtuple
-from operator import itemgetter
-
-import numpy as np
-from openbabel import pybel
-
-from plip.basic import config, logger
-from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name
-from plip.basic.supplemental import (
- cluster_doubles,
- is_lig,
- normalize_vector,
- vector,
- ring_is_planar,
-)
-from plip.basic.supplemental import (
- extract_pdbid,
- read_pdb,
- create_folder_if_not_exists,
- canonicalize,
-)
-from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
-from plip.basic.supplemental import (
- whichchain,
- whichrestype,
- whichresnumber,
- euclidean3d,
- int32_to_negative,
-)
-from plip.structure.detection import (
- halogen,
- pication,
- water_bridges,
- metal_complexation,
-)
-from plip.structure.detection import (
- hydrophobic_interactions,
- pistacking,
- hbonds,
- saltbridge,
-)
-
-logger = logger.get_logger()
-
-
-class PDBParser:
- def __init__(self, pdbpath, as_string):
- self.as_string = as_string
- self.pdbpath = pdbpath
- self.num_fixed_lines = 0
- self.covlinkage = namedtuple(
- "covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2"
- )
- (
- self.proteinmap,
- self.modres,
- self.covalent,
- self.altconformations,
- self.corrected_pdb,
- ) = self.parse_pdb()
-
- def parse_pdb(self):
- """Extracts additional information from PDB files.
- I. When reading in a PDB file, OpenBabel numbers ATOMS and HETATOMS continously.
- In PDB files, TER records are also counted, leading to a different numbering system.
- This functions reads in a PDB file and provides a mapping as a dictionary.
- II. Additionally, it returns a list of modified residues.
- III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified
- IV. Alternative conformations
- """
- if self.as_string:
- fil = self.pdbpath.rstrip("\n").split(
- "\n"
- ) # Removing trailing newline character
- else:
- f = read(self.pdbpath)
- fil = f.readlines()
- f.close()
- corrected_lines = []
- i, j = 0, 0 # idx and PDB numbering
- d = {}
- modres = set()
- covalent = []
- alt = []
- previous_ter = False
-
- # Standard without fixing
- if not config.NOFIX:
- if not config.PLUGIN_MODE:
- lastnum = 0 # Atom numbering (has to be consecutive)
- other_models = False
- for line in fil:
- if (
- not other_models
- ): # Only consider the first model in an NRM structure
- corrected_line, newnum = self.fix_pdbline(line, lastnum)
- if corrected_line is not None:
- if corrected_line.startswith("MODEL"):
- try: # Get number of MODEL (1,2,3)
- model_num = int(corrected_line[10:14])
- if model_num > 1: # MODEL 2,3,4 etc.
- other_models = True
- except ValueError:
- logger.debug(
- f"ignoring invalid MODEL entry: {corrected_line}"
- )
- corrected_lines.append(corrected_line)
- lastnum = newnum
- corrected_pdb = "".join(corrected_lines)
- else:
- corrected_pdb = self.pdbpath
- corrected_lines = fil
- else:
- corrected_pdb = self.pdbpath
- corrected_lines = fil
-
- for line in corrected_lines:
- if line.startswith(("ATOM", "HETATM")):
- # Retrieve alternate conformations
- atomid, location = int(line[6:11]), line[16]
- location = "A" if location == " " else location
- if location != "A":
- alt.append(atomid)
-
- if not previous_ter:
- i += 1
- j += 1
- else:
- i += 1
- j += 2
- d[i] = j
- previous_ter = False
- # Numbering Changes at TER records
- if line.startswith("TER"):
- previous_ter = True
- # Get modified residues
- if line.startswith("MODRES"):
- modres.add(line[12:15].strip())
- # Get covalent linkages between ligands
- if line.startswith("LINK"):
- covalent.append(self.get_linkage(line))
- return d, modres, covalent, alt, corrected_pdb
-
- def fix_pdbline(self, pdbline, lastnum):
- """Fix a PDB line if information is missing."""
- pdbqt_conversion = {
- "HD": "H",
- "HS": "H",
- "NA": "N",
- "NS": "N",
- "OA": "O",
- "OS": "O",
- "SA": "S",
- }
- fixed = False
- new_num = 0
- forbidden_characters = "[^a-zA-Z0-9_]"
- pdbline = pdbline.strip("\n")
- # Some MD / Docking tools produce empty lines, leading to segfaults
- if len(pdbline.strip()) == 0:
- self.num_fixed_lines += 1
- return None, lastnum
- if len(pdbline) > 100: # Should be 80 long
- self.num_fixed_lines += 1
- return None, lastnum
- # TER Entries also have continuing numbering, consider them as well
- if pdbline.startswith("TER"):
- new_num = lastnum + 1
- if pdbline.startswith("ATOM"):
- new_num = lastnum + 1
- current_num = int(pdbline[6:11])
- resnum = pdbline[22:27].strip()
- resname = pdbline[17:21].strip()
- # Invalid residue number
- try:
- int(resnum)
- except ValueError:
- pdbline = pdbline[:22] + " 0 " + pdbline[27:]
- fixed = True
- # Invalid characters in residue name
- if re.match(forbidden_characters, resname.strip()):
- pdbline = pdbline[:17] + "UNK " + pdbline[21:]
- fixed = True
- if lastnum + 1 != current_num:
- pdbline = (
- pdbline[:6]
- + (5 - len(str(new_num))) * " "
- + str(new_num)
- + " "
- + pdbline[12:]
- )
- fixed = True
- # No chain assigned
- if pdbline[21] == " ":
- pdbline = pdbline[:21] + "A" + pdbline[22:]
- fixed = True
- if pdbline.endswith("H"):
- self.num_fixed_lines += 1
- return None, lastnum
- # Sometimes, converted PDB structures contain PDBQT atom types. Fix that.
- for pdbqttype in pdbqt_conversion:
- if pdbline.strip().endswith(pdbqttype):
- pdbline = (
- pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + "\n"
- )
- self.num_fixed_lines += 1
- if pdbline.startswith("HETATM"):
- new_num = lastnum + 1
- try:
- current_num = int(pdbline[6:11])
- except ValueError:
- current_num = None
- logger.debug(f"invalid HETATM entry: {pdbline}")
- if lastnum + 1 != current_num:
- pdbline = (
- pdbline[:6]
- + (5 - len(str(new_num))) * " "
- + str(new_num)
- + " "
- + pdbline[12:]
- )
- fixed = True
- # No chain assigned or number assigned as chain
- if pdbline[21] == " ":
- pdbline = pdbline[:21] + "Z" + pdbline[22:]
- fixed = True
- # No residue number assigned
- if pdbline[23:26] == " ":
- pdbline = pdbline[:23] + "999" + pdbline[26:]
- fixed = True
- # Non-standard Ligand Names
- ligname = pdbline[17:21].strip()
- if len(ligname) > 3:
- pdbline = pdbline[:17] + ligname[:3] + " " + pdbline[21:]
- fixed = True
- if re.match(forbidden_characters, ligname.strip()):
- pdbline = pdbline[:17] + "LIG " + pdbline[21:]
- fixed = True
- if len(ligname.strip()) == 0:
- pdbline = pdbline[:17] + "LIG " + pdbline[21:]
- fixed = True
- if pdbline.endswith("H"):
- self.num_fixed_lines += 1
- return None, lastnum
- # Sometimes, converted PDB structures contain PDBQT atom types. Fix that.
- for pdbqttype in pdbqt_conversion:
- if pdbline.strip().endswith(pdbqttype):
- pdbline = (
- pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + " "
- )
- self.num_fixed_lines += 1
- self.num_fixed_lines += 1 if fixed else 0
- return pdbline + "\n", max(new_num, lastnum)
-
- def get_linkage(self, line):
- """Get the linkage information from a LINK entry PDB line."""
- conf1, id1, chain1, pos1 = (
- line[16].strip(),
- line[17:20].strip(),
- line[21].strip(),
- int(line[22:26]),
- )
- conf2, id2, chain2, pos2 = (
- line[46].strip(),
- line[47:50].strip(),
- line[51].strip(),
- int(line[52:56]),
- )
- return self.covlinkage(
- id1=id1,
- chain1=chain1,
- pos1=pos1,
- conf1=conf1,
- id2=id2,
- chain2=chain2,
- pos2=pos2,
- conf2=conf2,
- )
-
-
-class LigandFinder:
- def __init__(self, proteincomplex, altconf, modres, covalent, mapper):
- self.lignames_all = None
- self.lignames_kept = None
- self.water = None
- self.proteincomplex = proteincomplex
- self.altconformations = altconf
- self.modresidues = modres
- self.covalent = covalent
- self.mapper = mapper
- self.ligands = self.getligs()
- self.excluded = sorted(
- list(self.lignames_all.difference(set(self.lignames_kept)))
- )
-
- def getpeptides(self, chain):
- """If peptide ligand chains are defined via the command line options,
- try to extract the underlying ligand formed by all residues in the
- given chain without water
- """
- all_from_chain = [
- o
- for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol)
- if o.GetChain() == chain
- ] # All residues from chain
- if len(all_from_chain) == 0:
- return None
- else:
- non_water = [o for o in all_from_chain if not o.GetResidueProperty(9)]
- ligand = self.extract_ligand(non_water)
- return ligand
-
- def getligs(self):
- """Get all ligands from a PDB file and prepare them for analysis.
- Returns all non-empty ligands.
- """
-
- if config.PEPTIDES == [] and config.INTRA is None:
- # Extract small molecule ligands (default)
- ligands = []
-
- # Filter for ligands using lists
- ligand_residues, self.lignames_all, self.water = self.filter_for_ligands()
-
- all_res_dict = {
- (a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues
- }
- self.lignames_kept = list(set([a.GetName() for a in ligand_residues]))
-
- if not config.BREAKCOMPOSITE:
- # Update register of covalent links with those between DNA/RNA subunits
- self.covalent += nucleotide_linkage(all_res_dict)
- # Find fragment linked by covalent bonds
- res_kmers = self.identify_kmers(all_res_dict)
- else:
- res_kmers = [[a,] for a in ligand_residues]
- logger.debug(
- f"{len(res_kmers)} ligand kmer(s) detected for closer inspection"
- )
- for (
- kmer
- ) in (
- res_kmers
- ): # iterate over all ligands and extract molecules + information
- if len(kmer) > config.MAX_COMPOSITE_LENGTH:
- logger.debug(
- f"ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)"
- )
- else:
- ligands.append(self.extract_ligand(kmer))
-
- else:
- # Extract peptides from given chains
- self.water = [
- o
- for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol)
- if o.GetResidueProperty(9)
- ]
- if config.PEPTIDES:
- peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES]
- elif config.INTRA is not None:
- peptide_ligands = [
- self.getpeptides(config.INTRA),
- ]
-
- ligands = [p for p in peptide_ligands if p is not None]
- self.covalent, self.lignames_kept, self.lignames_all = [], [], set()
-
- return [lig for lig in ligands if len(lig.mol.atoms) != 0]
-
- def extract_ligand(self, kmer):
- """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps."""
- data = namedtuple(
- "ligand",
- "mol hetid chain position water members longname type atomorder can_to_pdb",
- )
- members = [
- (res.GetName(), res.GetChain(), int32_to_negative(res.GetNum()))
- for res in kmer
- ]
- members = sort_members_by_importance(members)
- rname, rchain, rnum = members[0]
- logger.debug(
- f"finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements"
- )
- names = [x[0] for x in members]
- longname = "-".join([x[0] for x in members])
-
- if config.PEPTIDES:
- ligtype = "PEPTIDE"
- elif config.INTRA is not None:
- ligtype = "INTRA"
- else:
- # Classify a ligand by its HETID(s)
- ligtype = classify_by_name(names)
- logger.debug(f"ligand classified as {ligtype}")
-
- hetatoms = dict()
- for obresidue in kmer:
- cur_hetatoms = {
- obatom.GetIdx(): obatom
- for obatom in pybel.ob.OBResidueAtomIter(obresidue)
- if obatom.GetAtomicNum() != 1
- }
- if not config.ALTLOC:
- # Remove alternative conformations (standard -> True)
- ids_to_remove = [
- atom_id
- for atom_id in hetatoms.keys()
- if self.mapper.mapid(atom_id, mtype="protein", to="internal")
- in self.altconformations
- ]
- for atom_id in ids_to_remove:
- del cur_hetatoms[atom_id]
- hetatoms.update(cur_hetatoms)
- logger.debug(f"hetero atoms determined (n={len(hetatoms)})")
-
- lig = pybel.ob.OBMol() # new ligand mol
- neighbours = dict()
- for obatom in hetatoms.values(): # iterate over atom objects
- idx = obatom.GetIdx()
- lig.AddAtom(obatom)
- # ids of all neighbours of obatom
- neighbours[idx] = set(
- [
- neighbour_atom.GetIdx()
- for neighbour_atom in pybel.ob.OBAtomAtomIter(obatom)
- ]
- ) & set(hetatoms.keys())
- logger.debug(f"atom neighbours mapped")
-
- ##############################################################
- # map the old atom idx of OBMol to the new idx of the ligand #
- ##############################################################
-
- newidx = dict(
- zip(
- hetatoms.keys(),
- [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)],
- )
- )
- mapold = dict(zip(newidx.values(), newidx))
- # copy the bonds
- for obatom in hetatoms:
- for neighbour_atom in neighbours[obatom]:
- bond = hetatoms[obatom].GetBond(hetatoms[neighbour_atom])
- lig.AddBond(newidx[obatom], newidx[neighbour_atom], bond.GetBondOrder())
- lig = pybel.Molecule(lig)
-
- # For kmers, the representative ids are chosen (first residue of kmer)
- lig.data.update({"Name": rname, "Chain": rchain, "ResNr": rnum})
-
- # Check if a negative residue number is represented as a 32 bit integer
- if rnum > 10 ** 5:
- rnum = int32_to_negative(rnum)
-
- lig.title = ":".join((rname, rchain, str(rnum)))
- self.mapper.ligandmaps[lig.title] = mapold
-
- logger.debug("renumerated molecule generated")
-
- if not config.NOPDBCANMAP:
- atomorder = canonicalize(lig)
- else:
- atomorder = None
-
- can_to_pdb = {}
- if atomorder is not None:
- can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold}
-
- ligand = data(
- mol=lig,
- hetid=rname,
- chain=rchain,
- position=rnum,
- water=self.water,
- members=members,
- longname=longname,
- type=ligtype,
- atomorder=atomorder,
- can_to_pdb=can_to_pdb,
- )
- return ligand
-
- def is_het_residue(self, obres):
- """Given an OBResidue, determines if the residue is indeed a possible ligand
- in the PDB file"""
- if not obres.GetResidueProperty(0):
- # If the residue is NOT amino (0)
- # It can be amino_nucleo, coenzme, ion, nucleo, protein, purine, pyrimidine, solvent
- # In these cases, it is a ligand candidate
- return True
- else:
- # Here, the residue is classified as amino
- # Amino acids can still be ligands, so we check for HETATM entries
- # Only residues with at least one HETATM entry are processed as ligands
- het_atoms = []
- for atm in pybel.ob.OBResidueAtomIter(obres):
- het_atoms.append(obres.IsHetAtom(atm))
- if True in het_atoms:
- return True
- return False
-
- def filter_for_ligands(self):
- """Given an OpenBabel Molecule, get all ligands, their names, and water"""
-
- candidates1 = [
- o
- for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol)
- if not o.GetResidueProperty(9) and self.is_het_residue(o)
- ]
-
- if config.DNARECEPTOR: # If DNA is the receptor, don't consider DNA as a ligand
- candidates1 = [
- res
- for res in candidates1
- if res.GetName() not in config.DNA + config.RNA
- ]
- all_lignames = set([a.GetName() for a in candidates1])
-
- water = [
- o
- for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol)
- if o.GetResidueProperty(9)
- ]
- # Filter out non-ligands
- if not config.KEEPMOD: # Keep modified residues as ligands
- candidates2 = [
- a
- for a in candidates1
- if is_lig(a.GetName()) and a.GetName() not in self.modresidues
- ]
- else:
- candidates2 = [a for a in candidates1 if is_lig(a.GetName())]
- logger.debug(f"{len(candidates2)} ligand(s) after first filtering step")
-
- ############################################
- # Filtering by counting and artifacts list #
- ############################################
- artifacts = []
- unique_ligs = set(a.GetName() for a in candidates2)
- for ulig in unique_ligs:
- # Discard if appearing 15 times or more and is possible artifact
- if (
- ulig in config.biolip_list
- and [a.GetName() for a in candidates2].count(ulig) >= 15
- ):
- artifacts.append(ulig)
-
- selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts]
-
- return selected_ligands, all_lignames, water
-
- def identify_kmers(self, residues):
- """Using the covalent linkage information, find out which fragments/subunits form a ligand."""
-
- # Remove all those not considered by ligands and pairings including alternate conformations
- ligdoubles = [
- [(link.id1, link.chain1, link.pos1), (link.id2, link.chain2, link.pos2)]
- for link in [
- c
- for c in self.covalent
- if c.id1 in self.lignames_kept
- and c.id2 in self.lignames_kept
- and c.conf1 in ["A", ""]
- and c.conf2 in ["A", ""]
- and (c.id1, c.chain1, c.pos1) in residues
- and (c.id2, c.chain2, c.pos2) in residues
- ]
- ]
- kmers = cluster_doubles(ligdoubles)
- if not kmers: # No ligand kmers, just normal independent ligands
- return [[residues[res]] for res in residues]
-
- else:
- # res_kmers contains clusters of covalently bound ligand residues (kmer ligands)
- res_kmers = [[residues[res] for res in kmer] for kmer in kmers]
-
- # In this case, add other ligands which are not part of a kmer
- in_kmer = []
- for res_kmer in res_kmers:
- for res in res_kmer:
- in_kmer.append((res.GetName(), res.GetChain(), res.GetNum()))
- for res in residues:
- if res not in in_kmer:
- newres = [
- residues[res],
- ]
- res_kmers.append(newres)
- return res_kmers
-
-
-class Mapper:
- """Provides functions for mapping atom IDs in the correct way"""
-
- def __init__(self):
- self.proteinmap = (
- None # Map internal atom IDs of protein residues to original PDB Atom IDs
- )
- self.ligandmaps = (
- {}
- ) # Map IDs of new ligand molecules to internal IDs (or PDB IDs?)
- self.original_structure = None
-
- def mapid(
- self, idx, mtype, bsid=None, to="original"
- ): # Mapping to original IDs is standard for ligands
- if mtype == "reversed": # Needed to map internal ID back to original protein ID
- return self.reversed_proteinmap[idx]
- if mtype == "protein":
- return self.proteinmap[idx]
- elif mtype == "ligand":
- if to == "internal":
- return self.ligandmaps[bsid][idx]
- elif to == "original":
- return self.proteinmap[self.ligandmaps[bsid][idx]]
-
- def id_to_atom(self, idx):
- """Returns the atom for a given original ligand ID.
- To do this, the ID is mapped to the protein first and then the atom returned.
- """
- mapped_idx = self.mapid(idx, "reversed")
- return pybel.Atom(self.original_structure.GetAtom(mapped_idx))
-
-
-class Mol:
- def __init__(self, altconf, mapper, mtype, bsid):
- self.mtype = mtype
- self.bsid = bsid
- self.rings = None
- self.hydroph_atoms = None
- self.charged = None
- self.hbond_don_atom_pairs = None
- self.hbond_acc_atoms = None
- self.altconf = altconf
- self.Mapper = mapper
-
- def hydrophobic_atoms(self, all_atoms):
- """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors."""
- atom_set = []
- data = namedtuple("hydrophobic", "atom orig_atom orig_idx")
- atm = [
- a
- for a in all_atoms
- if a.atomicnum == 6
- and set(
- [natom.GetAtomicNum() for natom in pybel.ob.OBAtomAtomIter(a.OBAtom)]
- ).issubset({1, 6})
- ]
- for atom in atm:
- orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid)
- orig_atom = self.Mapper.id_to_atom(orig_idx)
- if atom.idx not in self.altconf:
- atom_set.append(data(atom=atom, orig_atom=orig_atom, orig_idx=orig_idx))
- return atom_set
-
- def find_hba(self, all_atoms):
- """Find all possible hydrogen bond acceptors"""
- data = namedtuple("hbondacceptor", "a a_orig_atom a_orig_idx type")
- a_set = []
- for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms):
- if (
- atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf
- ): # Exclude halogen atoms
- a_orig_idx = self.Mapper.mapid(
- atom.idx, mtype=self.mtype, bsid=self.bsid
- )
- a_orig_atom = self.Mapper.id_to_atom(a_orig_idx)
- a_set.append(
- data(
- a=atom,
- a_orig_atom=a_orig_atom,
- a_orig_idx=a_orig_idx,
- type="regular",
- )
- )
- a_set = sorted(a_set, key=lambda x: x.a_orig_idx)
- return a_set
-
- def find_hbd(self, all_atoms, hydroph_atoms):
- """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)"""
- donor_pairs = []
- data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type")
- for donor in [
- a
- for a in all_atoms
- if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf
- ]:
- in_ring = False
- if not in_ring:
- for adj_atom in [
- a
- for a in pybel.ob.OBAtomAtomIter(donor.OBAtom)
- if a.IsHbondDonorH()
- ]:
- d_orig_idx = self.Mapper.mapid(
- donor.idx, mtype=self.mtype, bsid=self.bsid
- )
- d_orig_atom = self.Mapper.id_to_atom(d_orig_idx)
- donor_pairs.append(
- data(
- d=donor,
- d_orig_atom=d_orig_atom,
- d_orig_idx=d_orig_idx,
- h=pybel.Atom(adj_atom),
- type="regular",
- )
- )
- for carbon in hydroph_atoms:
- for adj_atom in [
- a
- for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom)
- if a.GetAtomicNum() == 1
- ]:
- d_orig_idx = self.Mapper.mapid(
- carbon.atom.idx, mtype=self.mtype, bsid=self.bsid
- )
- d_orig_atom = self.Mapper.id_to_atom(d_orig_idx)
- donor_pairs.append(
- data(
- d=carbon,
- d_orig_atom=d_orig_atom,
- d_orig_idx=d_orig_idx,
- h=pybel.Atom(adj_atom),
- type="weak",
- )
- )
- donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx))
- return donor_pairs
-
- def find_rings(self, mol, all_atoms):
- """Find rings and return only aromatic.
- Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic."""
- data = namedtuple(
- "aromatic_ring", "atoms orig_atoms atoms_orig_idx normal obj center type"
- )
- rings = []
- aromatic_amino = ["TYR", "TRP", "HIS", "PHE"]
- ring_candidates = mol.OBMol.GetSSSR()
- logger.debug(f"number of aromatic ring candidates: {len(ring_candidates)}")
- # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity
- for ring in ring_candidates:
- r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)]
- r_atoms = sorted(r_atoms, key=lambda x: x.idx)
- if 4 < len(r_atoms) <= 6:
- res = list(set([whichrestype(a) for a in r_atoms]))
- # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel
- if res[0] == "UNL":
- ligand_orig_idx = [
- self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms
- ]
- sort_order = np.argsort(np.array(ligand_orig_idx))
- r_atoms = [r_atoms[i] for i in sort_order]
- if (
- ring.IsAromatic()
- or res[0] in aromatic_amino
- or ring_is_planar(ring, r_atoms)
- ):
- # Causes segfault with OpenBabel 2.3.2, so deactivated
- # typ = ring.GetType() if not ring.GetType() == '' else 'unknown'
- # Alternative typing
- ring_type = "%s-membered" % len(r_atoms)
- ring_atms = [
- r_atoms[a].coords for a in [0, 2, 4]
- ] # Probe atoms for normals, assuming planarity
- ringv1 = vector(ring_atms[0], ring_atms[1])
- ringv2 = vector(ring_atms[2], ring_atms[0])
- atoms_orig_idx = [
- self.Mapper.mapid(r_atom.idx, mtype=self.mtype, bsid=self.bsid)
- for r_atom in r_atoms
- ]
- orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx]
- rings.append(
- data(
- atoms=r_atoms,
- orig_atoms=orig_atoms,
- atoms_orig_idx=atoms_orig_idx,
- normal=normalize_vector(np.cross(ringv1, ringv2)),
- obj=ring,
- center=centroid([ra.coords for ra in r_atoms]),
- type=ring_type,
- )
- )
- return rings
-
- def get_hydrophobic_atoms(self):
- return self.hydroph_atoms
-
- def get_hba(self):
- return self.hbond_acc_atoms
-
- def get_hbd(self):
- return [
- don_pair
- for don_pair in self.hbond_don_atom_pairs
- if don_pair.type == "regular"
- ]
-
- def get_weak_hbd(self):
- return [
- don_pair
- for don_pair in self.hbond_don_atom_pairs
- if don_pair.type == "weak"
- ]
-
- def get_pos_charged(self):
- return [charge for charge in self.charged if charge.type == "positive"]
-
- def get_neg_charged(self):
- return [charge for charge in self.charged if charge.type == "negative"]
-
-
-class PLInteraction:
- """Class to store a ligand, a protein and their interactions."""
-
- def __init__(self, lig_obj, bs_obj, protcomplex):
- """Detect all interactions when initializing"""
- self.ligand = lig_obj
- self.lig_members = lig_obj.members
- self.pdbid = protcomplex.pymol_name
- self.bindingsite = bs_obj
- self.Mapper = protcomplex.Mapper
- self.output_path = protcomplex.output_path
- self.altconf = protcomplex.altconf
- # #@todo Refactor code to combine different directionality
-
- self.saltbridge_lneg = saltbridge(
- self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True
- )
- self.saltbridge_pneg = saltbridge(
- self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False
- )
-
- self.all_hbonds_ldon = hbonds(
- self.bindingsite.get_hba(), self.ligand.get_hbd(), False, "strong"
- )
- self.all_hbonds_pdon = hbonds(
- self.ligand.get_hba(), self.bindingsite.get_hbd(), True, "strong"
- )
-
- self.hbonds_ldon = self.refine_hbonds_ldon(
- self.all_hbonds_ldon, self.saltbridge_lneg, self.saltbridge_pneg
- )
- self.hbonds_pdon = self.refine_hbonds_pdon(
- self.all_hbonds_pdon, self.saltbridge_lneg, self.saltbridge_pneg
- )
-
- self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings)
-
- self.all_pi_cation_laro = pication(
- self.ligand.rings, self.bindingsite.get_pos_charged(), True
- )
- self.pication_paro = pication(
- self.bindingsite.rings, self.ligand.get_pos_charged(), False
- )
-
- self.pication_laro = self.refine_pi_cation_laro(
- self.all_pi_cation_laro, self.pistacking
- )
-
- self.all_hydrophobic_contacts = hydrophobic_interactions(
- self.bindingsite.get_hydrophobic_atoms(),
- self.ligand.get_hydrophobic_atoms(),
- )
- self.hydrophobic_contacts = self.refine_hydrophobic(
- self.all_hydrophobic_contacts, self.pistacking
- )
- self.halogen_bonds = halogen(
- self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don
- )
- self.water_bridges = water_bridges(
- self.bindingsite.get_hba(),
- self.ligand.get_hba(),
- self.bindingsite.get_hbd(),
- self.ligand.get_hbd(),
- self.ligand.water,
- )
-
- self.water_bridges = self.refine_water_bridges(
- self.water_bridges, self.hbonds_ldon, self.hbonds_pdon
- )
-
- self.metal_complexes = metal_complexation(
- self.ligand.metals,
- self.ligand.metal_binding,
- self.bindingsite.metal_binding,
- )
-
- self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon
- self.all_itypes = (
- self.all_itypes
- + self.hbonds_ldon
- + self.pistacking
- + self.pication_laro
- + self.pication_paro
- )
- self.all_itypes = (
- self.all_itypes
- + self.hydrophobic_contacts
- + self.halogen_bonds
- + self.water_bridges
- )
- self.all_itypes = self.all_itypes + self.metal_complexes
-
- self.no_interactions = all(len(i) == 0 for i in self.all_itypes)
- (
- self.unpaired_hba,
- self.unpaired_hbd,
- self.unpaired_hal,
- ) = self.find_unpaired_ligand()
- self.unpaired_hba_orig_idx = [
- self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid)
- for atom in self.unpaired_hba
- ]
- self.unpaired_hbd_orig_idx = [
- self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid)
- for atom in self.unpaired_hbd
- ]
- self.unpaired_hal_orig_idx = [
- self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid)
- for atom in self.unpaired_hal
- ]
- self.num_unpaired_hba, self.num_unpaired_hbd = (
- len(self.unpaired_hba),
- len(self.unpaired_hbd),
- )
- self.num_unpaired_hal = len(self.unpaired_hal)
-
- # Exclude empty chains (coming from ligand as a target, from metal complexes)
- self.interacting_chains = sorted(
- list(
- set(
- [
- i.reschain
- for i in self.all_itypes
- if i.reschain not in [" ", None]
- ]
- )
- )
- )
-
- # Get all interacting residues, excluding ligand and water molecules
- self.interacting_res = list(
- set(
- [
- "".join([str(i.resnr), str(i.reschain)])
- for i in self.all_itypes
- if i.restype not in ["LIG", "HOH"]
- ]
- )
- )
- if len(self.interacting_res) != 0:
- logger.info(
- f"ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}"
- )
- interactions_list = []
- num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg)
- num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon)
- num_pication = len(self.pication_laro + self.pication_paro)
- num_pistack = len(self.pistacking)
- num_halogen = len(self.halogen_bonds)
- num_waterbridges = len(self.water_bridges)
- if num_saltbridges != 0:
- interactions_list.append("%i salt bridge(s)" % num_saltbridges)
- if num_hbonds != 0:
- interactions_list.append("%i hydrogen bond(s)" % num_hbonds)
- if num_pication != 0:
- interactions_list.append("%i pi-cation interaction(s)" % num_pication)
- if num_pistack != 0:
- interactions_list.append("%i pi-stacking(s)" % num_pistack)
- if num_halogen != 0:
- interactions_list.append("%i halogen bond(s)" % num_halogen)
- if num_waterbridges != 0:
- interactions_list.append("%i water bridge(s)" % num_waterbridges)
- if not len(interactions_list) == 0:
- logger.info(f"complex uses {interactions_list}")
- else:
- logger.info("no interactions for this ligand")
-
- def find_unpaired_ligand(self):
- """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors.
- """
- unpaired_hba, unpaired_hbd, unpaired_hal = [], [], []
- # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex)
- involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [
- hbond.d.idx for hbond in self.hbonds_ldon
- ]
- [
- [involved_atoms.append(atom.idx) for atom in sb.negative.atoms]
- for sb in self.saltbridge_lneg
- ]
- [
- [involved_atoms.append(atom.idx) for atom in sb.positive.atoms]
- for sb in self.saltbridge_pneg
- ]
- [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon]
- [
- involved_atoms.append(wb.d.idx)
- for wb in self.water_bridges
- if not wb.protisdon
- ]
- [
- involved_atoms.append(mcomplex.target.atom.idx)
- for mcomplex in self.metal_complexes
- if mcomplex.location == "ligand"
- ]
- for atom in [hba.a for hba in self.ligand.get_hba()]:
- if atom.idx not in involved_atoms:
- unpaired_hba.append(atom)
- for atom in [hbd.d for hbd in self.ligand.get_hbd()]:
- if atom.idx not in involved_atoms:
- unpaired_hbd.append(atom)
-
- # unpaired halogen bond donors in ligand (not used for the previous + halogen bonds)
- [involved_atoms.append(atom.don.x.idx) for atom in self.halogen_bonds]
- for atom in [haldon.x for haldon in self.ligand.halogenbond_don]:
- if atom.idx not in involved_atoms:
- unpaired_hal.append(atom)
-
- return unpaired_hba, unpaired_hbd, unpaired_hal
-
- def refine_hydrophobic(self, all_h, pistacks):
- """Apply several rules to reduce the number of hydrophobic interactions."""
- sel = {}
- # 1. Rings interacting via stacking can't have additional hydrophobic contacts between each other.
- for pistack, h in itertools.product(pistacks, all_h):
- h1, h2 = h.bsatom.idx, h.ligatom.idx
- brs, lrs = (
- [p1.idx for p1 in pistack.proteinring.atoms],
- [p2.idx for p2 in pistack.ligandring.atoms],
- )
- if h1 in brs and h2 in lrs:
- sel[(h1, h2)] = "EXCLUDE"
- hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel]
- sel2 = {}
- # 2. If a ligand atom interacts with several binding site atoms in the same residue,
- # keep only the one with the closest distance
- for h in hydroph:
- if not (h.ligatom.idx, h.resnr) in sel2:
- sel2[(h.ligatom.idx, h.resnr)] = h
- else:
- if sel2[(h.ligatom.idx, h.resnr)].distance > h.distance:
- sel2[(h.ligatom.idx, h.resnr)] = h
- hydroph = [h for h in sel2.values()]
- hydroph_final = []
- bsclust = {}
- # 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist
- for h in hydroph:
- if h.bsatom.idx not in bsclust:
- bsclust[h.bsatom.idx] = [
- h,
- ]
- else:
- bsclust[h.bsatom.idx].append(h)
-
- idx_to_h = {}
- for bs in [a for a in bsclust if len(bsclust[a]) == 1]:
- hydroph_final.append(bsclust[bs][0])
-
- # A list of tuples with the idx of an atom and one of its neighbours is created
- for bs in [a for a in bsclust if not len(bsclust[a]) == 1]:
- tuples = []
- all_idx = [i.ligatom.idx for i in bsclust[bs]]
- for b in bsclust[bs]:
- idx = b.ligatom.idx
- neigh = [na for na in pybel.ob.OBAtomAtomIter(b.ligatom.OBAtom)]
- for n in neigh:
- n_idx = n.GetIdx()
- if n_idx in all_idx:
- if n_idx < idx:
- tuples.append((n_idx, idx))
- else:
- tuples.append((idx, n_idx))
- idx_to_h[idx] = b
-
- tuples = list(set(tuples))
- tuples = sorted(tuples, key=itemgetter(1))
- clusters = cluster_doubles(
- tuples
- ) # Cluster connected atoms (i.e. find hydrophobic patches)
-
- for cluster in clusters:
- min_dist = float("inf")
- min_h = None
- for atm_idx in cluster:
- h = idx_to_h[atm_idx]
- if h.distance < min_dist:
- min_dist = h.distance
- min_h = h
- hydroph_final.append(min_h)
- before, reduced = len(all_h), len(hydroph_final)
- if not before == 0 and not before == reduced:
- logger.info(
- f"reduced number of hydrophobic contacts from {before} to {reduced}"
- )
- return hydroph_final
-
- def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg):
- """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds."""
- i_set = {}
- for hbond in all_hbonds:
- i_set[hbond] = False
- for salt in salt_pneg:
- protidx, ligidx = (
- [at.idx for at in salt.negative.atoms],
- [at.idx for at in salt.positive.atoms],
- )
- if hbond.d.idx in ligidx and hbond.a.idx in protidx:
- i_set[hbond] = True
- for salt in salt_lneg:
- protidx, ligidx = (
- [at.idx for at in salt.positive.atoms],
- [at.idx for at in salt.negative.atoms],
- )
- if hbond.d.idx in ligidx and hbond.a.idx in protidx:
- i_set[hbond] = True
-
- # Allow only one hydrogen bond per donor, select interaction with larger donor angle
- second_set = {}
- hbls = [k for k in i_set.keys() if not i_set[k]]
- for hbl in hbls:
- if hbl.d.idx not in second_set:
- second_set[hbl.d.idx] = (hbl.angle, hbl)
- else:
- if second_set[hbl.d.idx][0] < hbl.angle:
- second_set[hbl.d.idx] = (hbl.angle, hbl)
- return [hb[1] for hb in second_set.values()]
-
- def refine_hbonds_pdon(self, all_hbonds, salt_lneg, salt_pneg):
- """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds with
- atoms of the same group.
- """
- i_set = {}
- for hbond in all_hbonds:
- i_set[hbond] = False
- for salt in salt_lneg:
- protidx, ligidx = (
- [at.idx for at in salt.positive.atoms],
- [at.idx for at in salt.negative.atoms],
- )
- if hbond.a.idx in ligidx and hbond.d.idx in protidx:
- i_set[hbond] = True
- for salt in salt_pneg:
- protidx, ligidx = (
- [at.idx for at in salt.negative.atoms],
- [at.idx for at in salt.positive.atoms],
- )
- if hbond.a.idx in ligidx and hbond.d.idx in protidx:
- i_set[hbond] = True
-
- # Allow only one hydrogen bond per donor, select interaction with larger donor angle
- second_set = {}
- hbps = [k for k in i_set.keys() if not i_set[k]]
- for hbp in hbps:
- if hbp.d.idx not in second_set:
- second_set[hbp.d.idx] = (hbp.angle, hbp)
- else:
- if second_set[hbp.d.idx][0] < hbp.angle:
- second_set[hbp.d.idx] = (hbp.angle, hbp)
- return [hb[1] for hb in second_set.values()]
-
- def refine_pi_cation_laro(self, all_picat, stacks):
- """Just important for constellations with histidine involved. If the histidine ring is positioned in stacking
- position to an aromatic ring in the ligand, there is in most cases stacking and pi-cation interaction reported
- as histidine also carries a positive charge in the ring. For such cases, only report stacking.
- """
- i_set = []
- for picat in all_picat:
- exclude = False
- for stack in stacks:
- if (
- whichrestype(stack.proteinring.atoms[0]) == "HIS"
- and picat.ring.obj == stack.ligandring.obj
- ):
- exclude = True
- if not exclude:
- i_set.append(picat)
- return i_set
-
- def refine_water_bridges(self, wbridges, hbonds_ldon, hbonds_pdon):
- """A donor atom already forming a hydrogen bond is not allowed to form a water bridge. Each water molecule
- can only be donor for two water bridges, selecting the constellation with the omega angle closest to 110 deg."""
- donor_atoms_hbonds = [hb.d.idx for hb in hbonds_ldon + hbonds_pdon]
- wb_dict = {}
- wb_dict2 = {}
- omega = 110.0
-
- # Just one hydrogen bond per donor atom
- for wbridge in [wb for wb in wbridges if wb.d.idx not in donor_atoms_hbonds]:
- if (wbridge.water.idx, wbridge.a.idx) not in wb_dict:
- wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge
- else:
- if abs(
- omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle
- ) < abs(omega - wbridge.w_angle):
- wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge
- for wb_tuple in wb_dict:
- water, acceptor = wb_tuple
- if water not in wb_dict2:
- wb_dict2[water] = [
- (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]),
- ]
- elif len(wb_dict2[water]) == 1:
- wb_dict2[water].append(
- (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple])
- )
- wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0])
- else:
- if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle):
- wb_dict2[water] = [
- wb_dict2[water][0],
- (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple]),
- ]
-
- filtered_wb = []
- for fwbridges in wb_dict2.values():
- [filtered_wb.append(fwb[1]) for fwb in fwbridges]
- return filtered_wb
-
-
-class BindingSite(Mol):
- def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper):
- """Find all relevant parts which could take part in interactions"""
- Mol.__init__(self, altconf, mapper, mtype="protein", bsid=None)
- self.complex = cclass
- self.full_mol = protcomplex
- self.all_atoms = atoms
- self.min_dist = min_dist # Minimum distance of bs res to ligand
- self.bs_res = list(
- set(
- [
- "".join([str(whichresnumber(a)), whichchain(a)])
- for a in self.all_atoms
- ]
- )
- ) # e.g. 47A
- self.rings = self.find_rings(self.full_mol, self.all_atoms)
- self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms)
- self.hbond_acc_atoms = self.find_hba(self.all_atoms)
- self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms)
- self.charged = self.find_charged(self.full_mol)
- self.halogenbond_acc = self.find_hal(self.all_atoms)
- self.metal_binding = self.find_metal_binding(self.full_mol)
-
- def find_hal(self, atoms):
- """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)"""
- data = namedtuple("hal_acceptor", "o o_orig_idx y y_orig_idx")
- a_set = []
- # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur
- for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]:
- n_atoms = [
- na
- for na in pybel.ob.OBAtomAtomIter(a.OBAtom)
- if na.GetAtomicNum() in [6, 7, 15, 16]
- ]
- if len(n_atoms) == 1: # Proximal atom
- o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
- y_orig_idx = self.Mapper.mapid(
- n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid
- )
- a_set.append(
- data(
- o=a,
- o_orig_idx=o_orig_idx,
- y=pybel.Atom(n_atoms[0]),
- y_orig_idx=y_orig_idx,
- )
- )
- return a_set
-
- def find_charged(self, mol):
- """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid."""
- data = namedtuple(
- "pcharge", "atoms atoms_orig_idx type center restype resnr reschain"
- )
- a_set = []
- # Iterate through all residue, exclude those in chains defined as peptides
- for res in [
- r
- for r in pybel.ob.OBResidueIter(mol.OBMol)
- if not r.GetChain() in config.PEPTIDES
- ]:
- if config.INTRA is not None:
- if res.GetChain() != config.INTRA:
- continue
- a_contributing = []
- a_contributing_orig_idx = []
- if res.GetName() in (
- "ARG",
- "HIS",
- "LYS",
- ): # Arginine, Histidine or Lysine have charged sidechains
- for a in pybel.ob.OBResidueAtomIter(res):
- if (
- a.GetType().startswith("N")
- and res.GetAtomProperty(a, 8)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- ):
- a_contributing.append(pybel.Atom(a))
- a_contributing_orig_idx.append(
- self.Mapper.mapid(a.GetIdx(), mtype="protein")
- )
- if not len(a_contributing) == 0:
- a_set.append(
- data(
- atoms=a_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="positive",
- center=centroid([ac.coords for ac in a_contributing]),
- restype=res.GetName(),
- resnr=res.GetNum(),
- reschain=res.GetChain(),
- )
- )
- if res.GetName() in ("GLU", "ASP"): # Aspartic or Glutamic Acid
- for a in pybel.ob.OBResidueAtomIter(res):
- if (
- a.GetType().startswith("O")
- and res.GetAtomProperty(a, 8)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- ):
- a_contributing.append(pybel.Atom(a))
- a_contributing_orig_idx.append(
- self.Mapper.mapid(a.GetIdx(), mtype="protein")
- )
- if not len(a_contributing) == 0:
- a_set.append(
- data(
- atoms=a_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="negative",
- center=centroid([ac.coords for ac in a_contributing]),
- restype=res.GetName(),
- resnr=res.GetNum(),
- reschain=res.GetChain(),
- )
- )
- return a_set
-
- def find_metal_binding(self, mol):
- """Looks for atoms that could possibly be involved in chelating a metal ion.
- This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids"""
- data = namedtuple(
- "metal_binding", "atom atom_orig_idx type restype resnr reschain location"
- )
- a_set = []
- for res in pybel.ob.OBResidueIter(mol.OBMol):
- restype, reschain, resnr = (
- res.GetName().upper(),
- res.GetChain(),
- res.GetNum(),
- )
- if restype in ["ASP", "GLU", "SER", "THR", "TYR"]: # Look for oxygens here
- for a in pybel.ob.OBResidueAtomIter(res):
- if (
- a.GetType().startswith("O")
- and res.GetAtomProperty(a, 8)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- ):
- atom_orig_idx = self.Mapper.mapid(
- a.GetIdx(), mtype=self.mtype, bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(a),
- atom_orig_idx=atom_orig_idx,
- type="O",
- restype=restype,
- resnr=resnr,
- reschain=reschain,
- location="protein.sidechain",
- )
- )
- if restype == "HIS": # Look for nitrogen here
- for a in pybel.ob.OBResidueAtomIter(res):
- if (
- a.GetType().startswith("N")
- and res.GetAtomProperty(a, 8)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- ):
- atom_orig_idx = self.Mapper.mapid(
- a.GetIdx(), mtype=self.mtype, bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(a),
- atom_orig_idx=atom_orig_idx,
- type="N",
- restype=restype,
- resnr=resnr,
- reschain=reschain,
- location="protein.sidechain",
- )
- )
- if restype == "CYS": # Look for sulfur here
- for a in pybel.ob.OBResidueAtomIter(res):
- if (
- a.GetType().startswith("S")
- and res.GetAtomProperty(a, 8)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- ):
- atom_orig_idx = self.Mapper.mapid(
- a.GetIdx(), mtype=self.mtype, bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(a),
- atom_orig_idx=atom_orig_idx,
- type="S",
- restype=restype,
- resnr=resnr,
- reschain=reschain,
- location="protein.sidechain",
- )
- )
- for a in pybel.ob.OBResidueAtomIter(res): # All main chain oxygens
- if (
- a.GetType().startswith("O")
- and res.GetAtomProperty(a, 2)
- and not self.Mapper.mapid(a.GetIdx(), mtype="protein")
- in self.altconf
- and restype != "HOH"
- ):
- atom_orig_idx = self.Mapper.mapid(
- a.GetIdx(), mtype=self.mtype, bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(a),
- atom_orig_idx=atom_orig_idx,
- type="O",
- restype=res.GetName(),
- resnr=res.GetNum(),
- reschain=res.GetChain(),
- location="protein.mainchain",
- )
- )
- return a_set
-
-
-class Ligand(Mol):
- def __init__(self, cclass, ligand):
- altconf = cclass.altconf
- self.hetid, self.chain, self.position = (
- ligand.hetid,
- ligand.chain,
- ligand.position,
- )
- self.bsid = ":".join([self.hetid, self.chain, str(self.position)])
- Mol.__init__(self, altconf, cclass.Mapper, mtype="ligand", bsid=self.bsid)
- self.members = ligand.members
- self.longname = ligand.longname
- self.type = ligand.type
- self.complex = cclass
- self.molecule = ligand.mol # Pybel Molecule
- self.smiles = self.molecule.write(format="can") # SMILES String
- self.inchikey = self.molecule.write(format="inchikey")
- self.can_to_pdb = ligand.can_to_pdb
- if not len(self.smiles) == 0:
- self.smiles = self.smiles.split()[0]
- else:
- logger.warning(f"could not write SMILES for ligand {ligand}")
- self.smiles = ""
- self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms() # Heavy atoms count
- self.all_atoms = self.molecule.atoms
- self.atmdict = {l.idx: l for l in self.all_atoms}
- self.rings = self.find_rings(self.molecule, self.all_atoms)
- self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms)
- self.hbond_acc_atoms = self.find_hba(self.all_atoms)
- self.num_rings = len(self.rings)
- if self.num_rings != 0:
- logger.info(f"contains {self.num_rings} aromatic ring(s)")
- descvalues = self.molecule.calcdesc()
- self.molweight, self.logp = float(descvalues["MW"]), float(descvalues["logP"])
- self.num_rot_bonds = int(self.molecule.OBMol.NumRotors())
- self.atomorder = ligand.atomorder
-
- ##########################################################
- # Special Case for hydrogen bond acceptor identification #
- ##########################################################
-
- self.inverse_mapping = {
- v: k for k, v in self.Mapper.ligandmaps[self.bsid].items()
- }
- self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()}
- self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms)
-
- ######
- donor_pairs = []
- data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type")
- for donor in self.all_atoms:
- pdbidx = self.Mapper.mapid(
- donor.idx, mtype="ligand", bsid=self.bsid, to="original"
- )
- d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]]
- if d.OBAtom.IsHbondDonor():
- for adj_atom in [
- a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH()
- ]:
- d_orig_atom = self.Mapper.id_to_atom(pdbidx)
- donor_pairs.append(
- data(
- d=donor,
- d_orig_atom=d_orig_atom,
- d_orig_idx=pdbidx,
- h=pybel.Atom(adj_atom),
- type="regular",
- )
- )
- self.hbond_don_atom_pairs = donor_pairs
- #######
-
- self.charged = self.find_charged(self.all_atoms)
- self.centroid = centroid([a.coords for a in self.all_atoms])
- self.max_dist_to_center = max(
- (euclidean3d(self.centroid, a.coords) for a in self.all_atoms)
- )
- self.water = []
- data = namedtuple("water", "oxy oxy_orig_idx")
- for hoh in ligand.water:
- oxy = None
- for at in pybel.ob.OBResidueAtomIter(hoh):
- if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf:
- oxy = pybel.Atom(at)
- # There are some cases where there is no oxygen in a water residue, ignore those
- if (
- not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)])
- == {1}
- and oxy is not None
- ):
- if (
- euclidean3d(self.centroid, oxy.coords)
- < self.max_dist_to_center + config.BS_DIST
- ):
- oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype="protein")
- self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx))
- self.halogenbond_don = self.find_hal(self.all_atoms)
- self.metal_binding = self.find_metal_binding(self.all_atoms, self.water)
- self.metals = []
- data = namedtuple("metal", "m orig_m m_orig_idx")
- for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]:
- m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
- orig_m = self.Mapper.id_to_atom(m_orig_idx)
- self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m))
- self.num_hba, self.num_hbd = (
- len(self.hbond_acc_atoms),
- len(self.hbond_don_atom_pairs),
- )
- self.num_hal = len(self.halogenbond_don)
-
- def get_canonical_num(self, atomnum):
- """Converts internal atom ID into canonical atom ID. Agrees with Canonical SMILES in XML."""
- return self.atomorder[atomnum - 1]
-
- def is_functional_group(self, atom, group):
- """Given a pybel atom, look up if it belongs to a function group"""
- n_atoms = [
- a_neighbor.GetAtomicNum()
- for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom)
- ]
-
- if group in ["quartamine", "tertamine"] and atom.atomicnum == 7: # Nitrogen
- # It's a nitrogen, so could be a protonated amine or quaternary ammonium
- if "1" not in n_atoms and len(n_atoms) == 4:
- return (
- True if group == "quartamine" else False
- ) # It's a quat. ammonium (N with 4 residues != H)
- elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3:
- return (
- True if group == "tertamine" else False
- ) # It's sp3-hybridized, so could pick up an hydrogen
- else:
- return False
-
- if (
- group in ["sulfonium", "sulfonicacid", "sulfate"] and atom.atomicnum == 16
- ): # Sulfur
- if (
- "1" not in n_atoms and len(n_atoms) == 3
- ): # It's a sulfonium (S with 3 residues != H)
- return True if group == "sulfonium" else False
- elif n_atoms.count(8) == 3: # It's a sulfonate or sulfonic acid
- return True if group == "sulfonicacid" else False
- elif n_atoms.count(8) == 4: # It's a sulfate
- return True if group == "sulfate" else False
-
- if group == "phosphate" and atom.atomicnum == 15: # Phosphor
- if set(n_atoms) == {8}: # It's a phosphate
- return True
-
- if (
- group in ["carboxylate", "guanidine"] and atom.atomicnum == 6
- ): # It's a carbon atom
- if (
- n_atoms.count(8) == 2 and n_atoms.count(6) == 1
- ): # It's a carboxylate group
- return True if group == "carboxylate" else False
- elif n_atoms.count(7) == 3 and len(n_atoms) == 3: # It's a guanidine group
- nitro_partners = []
- for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom):
- nitro_partners.append(
- len(
- [
- b_neighbor
- for b_neighbor in pybel.ob.OBAtomAtomIter(nitro)
- ]
- )
- )
- if (
- min(nitro_partners) == 1
- ): # One nitrogen is only connected to the carbon, can pick up a H
- return True if group == "guanidine" else False
-
- if group == "halocarbon" and atom.atomicnum in [9, 17, 35, 53]: # Halogen atoms
- n_atoms = [
- na
- for na in pybel.ob.OBAtomAtomIter(atom.OBAtom)
- if na.GetAtomicNum() == 6
- ]
- if len(n_atoms) == 1: # Halocarbon
- return True
- else:
- return False
-
- def find_hal(self, atoms):
- """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)"""
- data = namedtuple("hal_donor", "x orig_x x_orig_idx c c_orig_idx")
- a_set = []
- for a in atoms:
- if self.is_functional_group(a, "halocarbon"):
- n_atoms = [
- na
- for na in pybel.ob.OBAtomAtomIter(a.OBAtom)
- if na.GetAtomicNum() == 6
- ]
- x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
- orig_x = self.Mapper.id_to_atom(x_orig_idx)
- c_orig_idx = [
- self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid)
- for na in n_atoms
- ]
- a_set.append(
- data(
- x=a,
- orig_x=orig_x,
- x_orig_idx=x_orig_idx,
- c=pybel.Atom(n_atoms[0]),
- c_orig_idx=c_orig_idx,
- )
- )
- if len(a_set) != 0:
- logger.info(f"ligand contains {len(a_set)} halogen atom(s)")
- return a_set
-
- def find_charged(self, all_atoms):
- """Identify all positively charged groups in a ligand. This search is not exhaustive, as the cases can be quite
- diverse. The typical cases seem to be protonated amines, quaternary ammoinium and sulfonium
- as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)).
- Identify negatively charged groups in the ligand.
- """
- data = namedtuple(
- "lcharge", "atoms orig_atoms atoms_orig_idx type center fgroup"
- )
- a_set = []
- for a in all_atoms:
- a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
- a_orig = self.Mapper.id_to_atom(a_orig_idx)
- if self.is_functional_group(a, "quartamine"):
- a_set.append(
- data(
- atoms=[a,],
- orig_atoms=[a_orig,],
- atoms_orig_idx=[a_orig_idx,],
- type="positive",
- center=list(a.coords),
- fgroup="quartamine",
- )
- )
- elif self.is_functional_group(a, "tertamine"):
- a_set.append(
- data(
- atoms=[a,],
- orig_atoms=[a_orig,],
- atoms_orig_idx=[a_orig_idx,],
- type="positive",
- center=list(a.coords),
- fgroup="tertamine",
- )
- )
- if self.is_functional_group(a, "sulfonium"):
- a_set.append(
- data(
- atoms=[a,],
- orig_atoms=[a_orig,],
- atoms_orig_idx=[a_orig_idx,],
- type="positive",
- center=list(a.coords),
- fgroup="sulfonium",
- )
- )
- if self.is_functional_group(a, "phosphate"):
- a_contributing = [
- a,
- ]
- a_contributing_orig_idx = [
- a_orig_idx,
- ]
- [
- a_contributing.append(pybel.Atom(neighbor))
- for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
- ]
- [
- a_contributing_orig_idx.append(
- self.Mapper.mapid(
- neighbor.idx, mtype=self.mtype, bsid=self.bsid
- )
- )
- for neighbor in a_contributing
- ]
- orig_contributing = [
- self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx
- ]
- a_set.append(
- data(
- atoms=a_contributing,
- orig_atoms=orig_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="negative",
- center=a.coords,
- fgroup="phosphate",
- )
- )
- if self.is_functional_group(a, "sulfonicacid"):
- a_contributing = [
- a,
- ]
- a_contributing_orig_idx = [
- a_orig_idx,
- ]
- [
- a_contributing.append(pybel.Atom(neighbor))
- for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
- if neighbor.GetAtomicNum() == 8
- ]
- [
- a_contributing_orig_idx.append(
- self.Mapper.mapid(
- neighbor.idx, mtype=self.mtype, bsid=self.bsid
- )
- )
- for neighbor in a_contributing
- ]
- orig_contributing = [
- self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx
- ]
- a_set.append(
- data(
- atoms=a_contributing,
- orig_atoms=orig_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="negative",
- center=a.coords,
- fgroup="sulfonicacid",
- )
- )
- elif self.is_functional_group(a, "sulfate"):
- a_contributing = [
- a,
- ]
- a_contributing_orig_idx = [
- a_orig_idx,
- ]
- [
- a_contributing_orig_idx.append(
- self.Mapper.mapid(
- neighbor.idx, mtype=self.mtype, bsid=self.bsid
- )
- )
- for neighbor in a_contributing
- ]
- [
- a_contributing.append(pybel.Atom(neighbor))
- for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
- ]
- orig_contributing = [
- self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx
- ]
- a_set.append(
- data(
- atoms=a_contributing,
- orig_atoms=orig_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="negative",
- center=a.coords,
- fgroup="sulfate",
- )
- )
- if self.is_functional_group(a, "carboxylate"):
- a_contributing = [
- pybel.Atom(neighbor)
- for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
- if neighbor.GetAtomicNum() == 8
- ]
- a_contributing_orig_idx = [
- self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)
- for neighbor in a_contributing
- ]
- orig_contributing = [
- self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx
- ]
- a_set.append(
- data(
- atoms=a_contributing,
- orig_atoms=orig_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="negative",
- center=centroid([a.coords for a in a_contributing]),
- fgroup="carboxylate",
- )
- )
- elif self.is_functional_group(a, "guanidine"):
- a_contributing = [
- pybel.Atom(neighbor)
- for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
- if neighbor.GetAtomicNum() == 7
- ]
- a_contributing_orig_idx = [
- self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)
- for neighbor in a_contributing
- ]
- orig_contributing = [
- self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx
- ]
- a_set.append(
- data(
- atoms=a_contributing,
- orig_atoms=orig_contributing,
- atoms_orig_idx=a_contributing_orig_idx,
- type="positive",
- center=a.coords,
- fgroup="guanidine",
- )
- )
- return a_set
-
- def find_metal_binding(self, lig_atoms, water_oxygens):
- """Looks for atoms that could possibly be involved in binding a metal ion.
- This can be any water oxygen, as well as oxygen from carboxylate, phophoryl, phenolate, alcohol;
- nitrogen from imidazole; sulfur from thiolate.
- """
- a_set = []
- data = namedtuple(
- "metal_binding",
- "atom orig_atom atom_orig_idx type fgroup restype resnr reschain location",
- )
- for oxygen in water_oxygens:
- a_set.append(
- data(
- atom=oxygen.oxy,
- atom_orig_idx=oxygen.oxy_orig_idx,
- type="O",
- fgroup="water",
- restype=whichrestype(oxygen.oxy),
- resnr=whichresnumber(oxygen.oxy),
- reschain=whichchain(oxygen.oxy),
- location="water",
- orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx),
- )
- )
- # #@todo Refactor code
- for a in lig_atoms:
- a_orig_idx = self.Mapper.mapid(a.idx, mtype="ligand", bsid=self.bsid)
- n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom) # Neighboring atoms
- # All atomic numbers of neighboring atoms
- n_atoms_atomicnum = [
- n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom)
- ]
- if a.atomicnum == 8: # Oxygen
- if (
- n_atoms_atomicnum.count("1") == 1 and len(n_atoms_atomicnum) == 2
- ): # Oxygen in alcohol (R-[O]-H)
- a_set.append(
- data(
- atom=a,
- atom_orig_idx=a_orig_idx,
- type="O",
- fgroup="alcohol",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if (
- True in [n.IsAromatic() for n in n_atoms]
- and not a.OBAtom.IsAromatic()
- ): # Phenolate oxygen
- a_set.append(
- data(
- atom=a,
- atom_orig_idx=a_orig_idx,
- type="O",
- fgroup="phenolate",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if a.atomicnum == 6: # It's a carbon atom
- if (
- n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1
- ): # It's a carboxylate group
- for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
- neighbor_orig_idx = self.Mapper.mapid(
- neighbor.GetIdx(), mtype="ligand", bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(neighbor),
- atom_orig_idx=neighbor_orig_idx,
- type="O",
- fgroup="carboxylate",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if a.atomicnum == 15: # It's a phosphor atom
- if n_atoms_atomicnum.count(8) >= 3: # It's a phosphoryl
- for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
- neighbor_orig_idx = self.Mapper.mapid(
- neighbor.GetIdx(), mtype="ligand", bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(neighbor),
- atom_orig_idx=neighbor_orig_idx,
- type="O",
- fgroup="phosphoryl",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if (
- n_atoms_atomicnum.count(8) == 2
- ): # It's another phosphor-containing group #@todo (correct name?)
- for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
- neighbor_orig_idx = self.Mapper.mapid(
- neighbor.GetIdx(), mtype="ligand", bsid=self.bsid
- )
- a_set.append(
- data(
- atom=pybel.Atom(neighbor),
- atom_orig_idx=neighbor_orig_idx,
- type="O",
- fgroup="phosphor.other",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if a.atomicnum == 7: # It's a nitrogen atom
- if n_atoms_atomicnum.count(6) == 2: # It's imidazole/pyrrole or similar
- a_set.append(
- data(
- atom=a,
- atom_orig_idx=a_orig_idx,
- type="N",
- fgroup="imidazole/pyrrole",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if a.atomicnum == 16: # It's a sulfur atom
- if (
- True in [n.IsAromatic() for n in n_atoms]
- and not a.OBAtom.IsAromatic()
- ): # Thiolate
- a_set.append(
- data(
- atom=a,
- atom_orig_idx=a_orig_idx,
- type="S",
- fgroup="thiolate",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
- if set(n_atoms_atomicnum) == {26}: # Sulfur in Iron sulfur cluster
- a_set.append(
- data(
- atom=a,
- atom_orig_idx=a_orig_idx,
- type="S",
- fgroup="iron-sulfur.cluster",
- restype=self.hetid,
- resnr=self.position,
- reschain=self.chain,
- location="ligand",
- orig_atom=self.Mapper.id_to_atom(a_orig_idx),
- )
- )
-
- return a_set
-
-
-class PDBComplex:
- """Contains a collection of objects associated with a PDB complex, i.e. one or several ligands and their binding
- sites as well as information about the pliprofiler between them. Provides functions to load and prepare input files
- such as PDB files.
- """
-
- def __init__(self):
- self.interaction_sets = (
- {}
- ) # Dictionary with site identifiers as keys and object as value
- self.protcomplex = None
- self.filetype = None
- self.atoms = {} # Dictionary of Pybel atoms, accessible by their idx
- self.sourcefiles = {}
- self.information = {}
- self.corrected_pdb = ""
- self._output_path = tempfile.gettempdir()
- self.pymol_name = None
- self.modres = set()
- self.resis = []
- self.altconf = [] # Atom idx of atoms with alternate conformations
- self.covalent = (
- []
- ) # Covalent linkages between ligands and protein residues/other ligands
- self.excluded = [] # Excluded ligands
- self.Mapper = Mapper()
- self.ligands = []
-
- def __str__(self):
- formatted_lig_names = [
- ":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands
- ]
- return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join(
- [lig for lig in formatted_lig_names]
- )
-
- def load_pdb(self, pdbpath, as_string=False):
- """Loads a pdb file with protein AND ligand(s), separates and prepares them.
- If specified 'as_string', the input is a PDB string instead of a path."""
- if as_string:
- self.sourcefiles["pdbcomplex.original"] = None
- self.sourcefiles["pdbcomplex"] = None
- self.sourcefiles["pdbstring"] = pdbpath
- else:
- self.sourcefiles["pdbcomplex.original"] = pdbpath
- self.sourcefiles["pdbcomplex"] = pdbpath
- self.information["pdbfixes"] = False
- pdbparser = PDBParser(
- pdbpath, as_string=as_string
- ) # Parse PDB file to find errors and get additional data
- # #@todo Refactor and rename here
- self.Mapper.proteinmap = pdbparser.proteinmap
- self.Mapper.reversed_proteinmap = {
- v: k for k, v in self.Mapper.proteinmap.items()
- }
- self.modres = pdbparser.modres
- self.covalent = pdbparser.covalent
- self.altconf = pdbparser.altconformations
- self.corrected_pdb = pdbparser.corrected_pdb
-
- if not config.PLUGIN_MODE:
- if pdbparser.num_fixed_lines > 0:
- logger.info(
- f"{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file"
- )
- # Save modified PDB file
- if not as_string:
- basename = os.path.basename(pdbpath).split(".")[0]
- else:
- basename = "from_stdin"
- pdbpath_fixed = tmpfile(
- prefix="plipfixed." + basename + "_", direc=self.output_path
- )
- create_folder_if_not_exists(self.output_path)
- self.sourcefiles["pdbcomplex"] = pdbpath_fixed
- self.corrected_pdb = re.sub(
- r"[^\x00-\x7F]+", " ", self.corrected_pdb
- ) # Strip non-unicode chars
- if (
- not config.NOFIXFILE
- ): # Only write to file if this option is not activated
- with open(pdbpath_fixed, "w") as f:
- f.write(self.corrected_pdb)
- self.information["pdbfixes"] = True
-
- if not as_string:
- self.sourcefiles["filename"] = os.path.basename(
- self.sourcefiles["pdbcomplex"]
- )
- self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True)
-
- # Update the model in the Mapper class instance
- self.Mapper.original_structure = self.protcomplex.OBMol
- logger.info("PDB structure successfully read")
-
- # Determine (temporary) PyMOL Name from Filename
- self.pymol_name = pdbpath.split("/")[-1].split(".")[0] + "-Protein"
- # Replace characters causing problems in PyMOL
- self.pymol_name = (
- self.pymol_name.replace(" ", "")
- .replace("(", "")
- .replace(")", "")
- .replace("-", "_")
- )
- # But if possible, name it after PDBID in Header
- if "HEADER" in self.protcomplex.data: # If the PDB file has a proper header
- potential_name = self.protcomplex.data["HEADER"][56:60].lower()
- if extract_pdbid(potential_name) != "UnknownProtein":
- self.pymol_name = potential_name
- logger.debug(f"PyMOL name set as: {self.pymol_name}")
-
- # Extract and prepare ligands
- ligandfinder = LigandFinder(
- self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper
- )
- self.ligands = ligandfinder.ligands
- self.excluded = ligandfinder.excluded
-
- # decide whether to add polar hydrogens
- if not config.NOHYDRO:
- if not as_string:
- basename = os.path.basename(pdbpath).split(".")[0]
- else:
- basename = "from_stdin"
- self.protcomplex.OBMol.AddPolarHydrogens()
- output_path = os.path.join(self._output_path, f"{basename}_protonated.pdb")
- self.protcomplex.write("pdb", output_path, overwrite=True)
- logger.info(f"protonated structure written to {output_path}")
- else:
- logger.warning(
- "no polar hydrogens will be assigned (make sure your structure contains hydrogens)"
- )
-
- for atm in self.protcomplex:
- self.atoms[atm.idx] = atm
-
- if len(self.excluded) != 0:
- logger.info(f"excluded molecules as ligands: {self.excluded}")
-
- if config.DNARECEPTOR:
- self.resis = [
- obres
- for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol)
- if obres.GetName() in config.DNA + config.RNA
- ]
- else:
- self.resis = [
- obres
- for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol)
- if obres.GetResidueProperty(0)
- ]
-
- num_ligs = len(self.ligands)
- if num_ligs == 1:
- logger.info("analyzing one ligand")
- elif num_ligs > 1:
- logger.info(f"analyzing {num_ligs} ligands")
- else:
- logger.info(f"structure contains no ligands")
-
- def analyze(self):
- """Triggers analysis of all complexes in structure"""
- for ligand in self.ligands:
- self.characterize_complex(ligand)
-
- def characterize_complex(self, ligand):
- """Handles all basic functions for characterizing the interactions for one ligand"""
-
- single_sites = []
- for member in ligand.members:
- single_sites.append(":".join([str(x) for x in member]))
- site = " + ".join(single_sites)
- site = site if not len(site) > 20 else site[:20] + "..."
- longname = (
- ligand.longname
- if not len(ligand.longname) > 20
- else ligand.longname[:20] + "..."
- )
- ligtype = "unspecified type" if ligand.type == "UNSPECIFIED" else ligand.type
- ligtext = f"{longname} [{ligtype}] -- {site}"
- logger.info(f"processing ligand {ligtext}")
- if ligtype == "PEPTIDE":
- logger.info(
- f"chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode"
- )
- if ligtype == "INTRA":
- logger.info(f"chain {ligand.chain} will be processed in [INTRA-CHAIN] mode")
- any_in_biolip = (
- len(set([x[0] for x in ligand.members]).intersection(config.biolip_list))
- != 0
- )
-
- if (
- ligtype
- not in ["POLYMER", "DNA", "ION", "DNA+ION", "RNA+ION", "SMALLMOLECULE+ION"]
- and any_in_biolip
- ):
- logger.info("may be biologically irrelevant")
-
- lig_obj = Ligand(self, ligand)
- cutoff = lig_obj.max_dist_to_center + config.BS_DIST
- bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis)
- # Get a list of all atoms belonging to the binding site, search by idx
- bs_atoms = [
- self.atoms[idx]
- for idx in [
- i
- for i in self.atoms.keys()
- if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res
- ]
- if idx in self.Mapper.proteinmap
- and self.Mapper.mapid(idx, mtype="protein") not in self.altconf
- ]
- if ligand.type == "PEPTIDE":
- # If peptide, don't consider the peptide chain as part of the protein binding site
- bs_atoms = [
- a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain
- ]
- if ligand.type == "INTRA":
- # Interactions within the chain
- bs_atoms = [
- a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain
- ]
- bs_atoms_refined = []
-
- # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE)
- # and refine binding site atom selection with exact threshold
- min_dist = {}
- for r in bs_atoms:
- bs_res_id = "".join([str(whichresnumber(r)), whichchain(r)])
- for l in ligand.mol.atoms:
- distance = euclidean3d(r.coords, l.coords)
- if bs_res_id not in min_dist:
- min_dist[bs_res_id] = (distance, whichrestype(r))
- elif min_dist[bs_res_id][0] > distance:
- min_dist[bs_res_id] = (distance, whichrestype(r))
- if distance <= config.BS_DIST and r not in bs_atoms_refined:
- bs_atoms_refined.append(r)
- num_bs_atoms = len(bs_atoms_refined)
- logger.info(
- f"binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})"
- )
-
- bs_obj = BindingSite(
- bs_atoms_refined,
- self.protcomplex,
- self,
- self.altconf,
- min_dist,
- self.Mapper,
- )
- pli_obj = PLInteraction(lig_obj, bs_obj, self)
- self.interaction_sets[ligand.mol.title] = pli_obj
-
- def extract_bs(self, cutoff, ligcentroid, resis):
- """Return list of ids from residues belonging to the binding site"""
- return [
- obres.GetIdx()
- for obres in resis
- if self.res_belongs_to_bs(obres, cutoff, ligcentroid)
- ]
-
- def res_belongs_to_bs(self, res, cutoff, ligcentroid):
- """Check for each residue if its centroid is within a certain distance to the ligand centroid.
- Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)"""
- rescentroid = centroid(
- [(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]
- )
- # Check geometry
- near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
- # Check chain membership
- restricted_chain = True if res.GetChain() in config.PEPTIDES else False
- return near_enough and not restricted_chain
-
- def get_atom(self, idx):
- return self.atoms[idx]
-
- @property
- def output_path(self):
- return self._output_path
-
- @output_path.setter
- def output_path(self, path):
- self._output_path = tilde_expansion(path)