aboutsummaryrefslogtreecommitdiff
path: root/plip/basic
diff options
context:
space:
mode:
Diffstat (limited to 'plip/basic')
-rw-r--r--plip/basic/__init__.py0
-rw-r--r--plip/basic/config.py625
-rw-r--r--plip/basic/logger.py21
-rw-r--r--plip/basic/parallel.py52
-rw-r--r--plip/basic/remote.py95
-rw-r--r--plip/basic/supplemental.py434
6 files changed, 0 insertions, 1227 deletions
diff --git a/plip/basic/__init__.py b/plip/basic/__init__.py
deleted file mode 100644
index e69de29..0000000
--- a/plip/basic/__init__.py
+++ /dev/null
diff --git a/plip/basic/config.py b/plip/basic/config.py
deleted file mode 100644
index 91103fa..0000000
--- a/plip/basic/config.py
+++ /dev/null
@@ -1,625 +0,0 @@
-__version__ = "2.1.3"
-__maintainer__ = "PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai"
-
-import logging
-
-DEFAULT_LOG_LEVEL = logging.INFO
-VERBOSE = False # Set verbose mode
-QUIET = False # Set verbose mode
-SILENT = False # Set verbose mode
-MAXTHREADS = 1 # Maximum number of main threads for binding site visualization
-XML = False
-TXT = False
-PICS = False
-PYMOL = False
-STDOUT = False
-RAWSTRING = False # use raw strings for input / output
-OUTPATH = "./"
-BASEPATH = "./"
-BREAKCOMPOSITE = False # Break up composite ligands with covalent bonds
-ALTLOC = False # Consider alternate locations
-PLUGIN_MODE = False # Special mode for PLIP in Plugins (e.g. PyMOL)
-NOFIX = False # Turn off fixing of errors in PDB files
-NOFIXFILE = False # Turn off writing to files for fixed PDB structures
-PEPTIDES = [] # Definition which chains should be considered as peptide ligands
-INTRA = None
-KEEPMOD = False
-DNARECEPTOR = False
-OUTPUTFILENAME = "report" # Naming for the TXT and XML report files
-NOPDBCANMAP = False # Skip calculation of mapping canonical atom order: PDB atom order
-NOHYDRO = False # Do not add hydrogen bonds (in case already present in the structure)
-
-# Configuration file for Protein-Ligand Interaction Profiler (PLIP)
-# Set thresholds for detection of interactions
-
-# Thresholds for detection (global variables)
-BS_DIST = 7.5 # Determines maximum distance to include binding site residues
-AROMATIC_PLANARITY = (
- 5.0 # Determines allowed deviation from planarity in aromatic rings
-)
-MIN_DIST = 0.5 # Minimum distance for all distance thresholds
-# Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures
-HYDROPH_DIST_MAX = 4.0 # Distance cutoff for detection of hydrophobic contacts
-HBOND_DIST_MAX = 4.1 # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A
-HBOND_DON_ANGLE_MIN = (
- 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10
-)
-PISTACK_DIST_MAX = (
- 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998)
-)
-PISTACK_ANG_DEV = (
- 30 # Max. Deviation from parallel or perpendicular orientation (in degrees)
-)
-PISTACK_OFFSET_MAX = 2.0 # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A)
-PICATION_DIST_MAX = 6.0 # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999)
-SALTBRIDGE_DIST_MAX = 5.5 # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5
-HALOGEN_DIST_MAX = 4.0 # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5
-HALOGEN_ACC_ANGLE = (
- 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger)
-)
-HALOGEN_DON_ANGLE = (
- 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger)
-)
-HALOGEN_ANGLE_DEV = 30 # Max. deviation from optimal angle
-WATER_BRIDGE_MINDIST = (
- 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1
-)
-WATER_BRIDGE_MAXDIST = (
- 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5
-)
-WATER_BRIDGE_OMEGA_MIN = 71 # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9
-WATER_BRIDGE_OMEGA_MAX = 140 # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005)
-WATER_BRIDGE_THETA_MIN = 100 # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005)
-METAL_DIST_MAX = (
- 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001)
-)
-
-# Other thresholds
-MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments
-
-#########
-# Names #
-#########
-
-# Names of RNA and DNA residues to be considered (detection by name)
-RNA = ["U", "A", "C", "G"]
-DNA = ["DT", "DA", "DC", "DG"]
-
-#############
-# Whitelist #
-#############
-
-# Metal cations which can be complexed
-
-METAL_IONS = [
- "CA",
- "CO",
- "MG",
- "MN",
- "FE",
- "CU",
- "ZN",
- "FE2",
- "FE3",
- "FE4",
- "LI",
- "NA",
- "K",
- "RB",
- "SR",
- "CS",
- "BA",
- "CR",
- "NI",
- "FE1",
- "NI",
- "RU",
- "RU1",
- "RH",
- "RH1",
- "PD",
- "AG",
- "CD",
- "LA",
- "W",
- "W1",
- "OS",
- "IR",
- "PT",
- "PT1",
- "AU",
- "HG",
- "CE",
- "PR",
- "SM",
- "EU",
- "GD",
- "TB",
- "YB",
- "LU",
- "AL",
- "GA",
- "IN",
- "SB",
- "TL",
- "PB",
-]
-
-##############
-# Blacklists #
-##############
-
-# Other Ions/Atoms (not yet supported)
-anions = ["CL", "IOD", "BR"]
-other = ["MO", "RE", "HO"]
-UNSUPPORTED = anions + other
-
-# BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10)
-# Add ligands here to get warnings for possible artifacts.
-biolip_list = [
- "ACE",
- "HEX",
- "TMA",
- "SOH",
- "P25",
- "CCN",
- "PR",
- "PTN",
- "NO3",
- "TCN",
- "BU1",
- "BCN",
- "CB3",
- "HCS",
- "NBN",
- "SO2",
- "MO6",
- "MOH",
- "CAC",
- "MLT",
- "KR",
- "6PH",
- "MOS",
- "UNL",
- "MO3",
- "SR",
- "CD3",
- "PB",
- "ACM",
- "LUT",
- "PMS",
- "OF3",
- "SCN",
- "DHB",
- "E4N",
- "13P",
- "3PG",
- "CYC",
- "NC",
- "BEN",
- "NAO",
- "PHQ",
- "EPE",
- "BME",
- "TB",
- "ETE",
- "EU",
- "OES",
- "EAP",
- "ETX",
- "BEZ",
- "5AD",
- "OC2",
- "OLA",
- "GD3",
- "CIT",
- "DVT",
- "OC6",
- "MW1",
- "OC3",
- "SRT",
- "LCO",
- "BNZ",
- "PPV",
- "STE",
- "PEG",
- "RU",
- "PGE",
- "MPO",
- "B3P",
- "OGA",
- "IPA",
- "LU",
- "EDO",
- "MAC",
- "9PE",
- "IPH",
- "MBN",
- "C1O",
- "1PE",
- "YF3",
- "PEF",
- "GD",
- "8PE",
- "DKA",
- "RB",
- "YB",
- "GGD",
- "SE4",
- "LHG",
- "SMO",
- "DGD",
- "CMO",
- "MLI",
- "MW2",
- "DTT",
- "DOD",
- "7PH",
- "PBM",
- "AU",
- "FOR",
- "PSC",
- "TG1",
- "KAI",
- "1PG",
- "DGA",
- "IR",
- "PE4",
- "VO4",
- "ACN",
- "AG",
- "MO4",
- "OCL",
- "6UL",
- "CHT",
- "RHD",
- "CPS",
- "IR3",
- "OC4",
- "MTE",
- "HGC",
- "CR",
- "PC1",
- "HC4",
- "TEA",
- "BOG",
- "PEO",
- "PE5",
- "144",
- "IUM",
- "LMG",
- "SQU",
- "MMC",
- "GOL",
- "NVP",
- "AU3",
- "3PH",
- "PT4",
- "PGO",
- "ICT",
- "OCM",
- "BCR",
- "PG4",
- "L4P",
- "OPC",
- "OXM",
- "SQD",
- "PQ9",
- "BAM",
- "PI",
- "PL9",
- "P6G",
- "IRI",
- "15P",
- "MAE",
- "MBO",
- "FMT",
- "L1P",
- "DUD",
- "PGV",
- "CD1",
- "P33",
- "DTU",
- "XAT",
- "CD",
- "THE",
- "U1",
- "NA",
- "MW3",
- "BHG",
- "Y1",
- "OCT",
- "BET",
- "MPD",
- "HTO",
- "IBM",
- "D01",
- "HAI",
- "HED",
- "CAD",
- "CUZ",
- "TLA",
- "SO4",
- "OC5",
- "ETF",
- "MRD",
- "PT",
- "PHB",
- "URE",
- "MLA",
- "TGL",
- "PLM",
- "NET",
- "LAC",
- "AUC",
- "UNX",
- "GA",
- "DMS",
- "MO2",
- "LA",
- "NI",
- "TE",
- "THJ",
- "NHE",
- "HAE",
- "MO1",
- "DAO",
- "3PE",
- "LMU",
- "DHJ",
- "FLC",
- "SAL",
- "GAI",
- "ORO",
- "HEZ",
- "TAM",
- "TRA",
- "NEX",
- "CXS",
- "LCP",
- "HOH",
- "OCN",
- "PER",
- "ACY",
- "MH2",
- "ARS",
- "12P",
- "L3P",
- "PUT",
- "IN",
- "CS",
- "NAW",
- "SB",
- "GUN",
- "SX",
- "CON",
- "C2O",
- "EMC",
- "BO4",
- "BNG",
- "MN5",
- "__O",
- "K",
- "CYN",
- "H2S",
- "MH3",
- "YT3",
- "P22",
- "KO4",
- "1AG",
- "CE",
- "IPL",
- "PG6",
- "MO5",
- "F09",
- "HO",
- "AL",
- "TRS",
- "EOH",
- "GCP",
- "MSE",
- "AKR",
- "NCO",
- "PO4",
- "L2P",
- "LDA",
- "SIN",
- "DMI",
- "SM",
- "DTD",
- "SGM",
- "DIO",
- "PPI",
- "DDQ",
- "DPO",
- "HCA",
- "CO5",
- "PD",
- "OS",
- "OH",
- "NA6",
- "NAG",
- "W",
- "ENC",
- "NA5",
- "LI1",
- "P4C",
- "GLV",
- "DMF",
- "ACT",
- "BTB",
- "6PL",
- "BGL",
- "OF1",
- "N8E",
- "LMT",
- "THM",
- "EU3",
- "PGR",
- "NA2",
- "FOL",
- "543",
- "_CP",
- "PEK",
- "NSP",
- "PEE",
- "OCO",
- "CHD",
- "CO2",
- "TBU",
- "UMQ",
- "MES",
- "NH4",
- "CD5",
- "HTG",
- "DEP",
- "OC1",
- "KDO",
- "2PE",
- "PE3",
- "IOD",
- "NDG",
- "CL",
- "HG",
- "F",
- "XE",
- "TL",
- "BA",
- "LI",
- "BR",
- "TAU",
- "TCA",
- "SPD",
- "SPM",
- "SAR",
- "SUC",
- "PAM",
- "SPH",
- "BE7",
- "P4G",
- "OLC",
- "OLB",
- "LFA",
- "D10",
- "D12",
- "DD9",
- "HP6",
- "R16",
- "PX4",
- "TRD",
- "UND",
- "FTT",
- "MYR",
- "RG1",
- "IMD",
- "DMN",
- "KEN",
- "C14",
- "UPL",
- "CMJ",
- "ULI",
- "MYS",
- "TWT",
- "M2M",
- "P15",
- "PG0",
- "PEU",
- "AE3",
- "TOE",
- "ME2",
- "PE8",
- "6JZ",
- "7PE",
- "P3G",
- "7PG",
- "PG5",
- "16P",
- "XPE",
- "PGF",
- "AE4",
- "7E8",
- "7E9",
- "MVC",
- "TAR",
- "DMR",
- "LMR",
- "NER",
- "02U",
- "NGZ",
- "LXB",
- "A2G",
- "BM3",
- "NAA",
- "NGA",
- "LXZ",
- "PX6",
- "PA8",
- "LPP",
- "PX2",
- "MYY",
- "PX8",
- "PD7",
- "XP4",
- "XPA",
- "PEV",
- "6PE",
- "PEX",
- "PEH",
- "PTY",
- "YB2",
- "PGT",
- "CN3",
- "AGA",
- "DGG",
- "CD4",
- "CN6",
- "CDL",
- "PG8",
- "MGE",
- "DTV",
- "L44",
- "L2C",
- "4AG",
- "B3H",
- "1EM",
- "DDR",
- "I42",
- "CNS",
- "PC7",
- "HGP",
- "PC8",
- "HGX",
- "LIO",
- "PLD",
- "PC2",
- "PCF",
- "MC3",
- "P1O",
- "PLC",
- "PC6",
- "HSH",
- "BXC",
- "HSG",
- "DPG",
- "2DP",
- "POV",
- "PCW",
- "GVT",
- "CE9",
- "CXE",
- "C10",
- "CE1",
- "SPJ",
- "SPZ",
- "SPK",
- "SPW",
- "HT3",
- "HTH",
- "2OP",
- "3NI",
- "BO3",
- "DET",
- "D1D",
- "SWE",
- "SOG",
-]
diff --git a/plip/basic/logger.py b/plip/basic/logger.py
deleted file mode 100644
index ca65411..0000000
--- a/plip/basic/logger.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import inspect
-import logging
-
-
-def get_logger():
- """
- Configures a base logger and returns a module-specific sub-logger of the calling module.
- """
- frame = inspect.stack()[1]
- module_name = inspect.getmodule(frame[0]).__name__
- if module_name != '__main__':
- logger = logging.getLogger(module_name)
- if not logger.parent.handlers:
- ch = logging.StreamHandler()
- formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s')
- ch.setFormatter(formatter)
- logger.parent.addHandler(ch)
- else:
- logger = logging.getLogger('plip')
-
- return logger
diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py
deleted file mode 100644
index 1f71943..0000000
--- a/plip/basic/parallel.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import itertools
-import multiprocessing
-from builtins import zip
-from functools import partial
-
-from numpy import asarray
-
-
-class SubProcessError(Exception):
- def __init__(self, e, exitcode=1):
- self.exitcode = exitcode
- super(SubProcessError, self).__init__(e)
-
- pass
-
-
-def universal_worker(input_pair):
- """This is a wrapper function expecting a tiplet of function, single
- argument, dict of keyword arguments. The provided function is called
- with the appropriate arguments."""
- function, arg, kwargs = input_pair
- return function(arg, **kwargs)
-
-
-def pool_args(function, sequence, kwargs):
- """Return a single iterator of n elements of lists of length 3, given a sequence of len n."""
- return zip(itertools.repeat(function), sequence, itertools.repeat(kwargs))
-
-
-def parallel_fn(f):
- """Simple wrapper function, returning a parallel version of the given function f.
- The function f must have one argument and may have an arbitray number of
- keyword arguments. """
-
- def simple_parallel(func, sequence, **args):
- """ f takes an element of sequence as input and the keyword args in **args"""
- if 'processes' in args:
- processes = args.get('processes')
- del args['processes']
- else:
- processes = multiprocessing.cpu_count()
-
- pool = multiprocessing.Pool(processes) # depends on available cores
-
- result = pool.map_async(universal_worker, pool_args(func, sequence, args))
- pool.close()
- pool.join()
- cleaned = [x for x in result.get() if x is not None] # getting results
- cleaned = asarray(cleaned)
- return cleaned
-
- return partial(simple_parallel, f)
diff --git a/plip/basic/remote.py b/plip/basic/remote.py
deleted file mode 100644
index 0c67f6f..0000000
--- a/plip/basic/remote.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from collections import namedtuple
-
-hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id')
-hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids')
-halogen_info = namedtuple('halogen_info', 'don_id acc_id')
-pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms '
- 'ligandring_center type')
-pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged')
-sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos')
-wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon')
-metal_info = namedtuple('metal_info', 'metal_id, target_id location')
-
-
-class VisualizerData:
- """Contains all information on a complex relevant for visualization. Can be pickled"""
-
- def __init__(self, mol, site):
- pcomp = mol
- pli = mol.interaction_sets[site]
- ligand = pli.ligand
-
- # General Information
- self.lig_members = sorted(pli.ligand.members)
- self.sourcefile = pcomp.sourcefiles['pdbcomplex']
- self.corrected_pdb = pcomp.corrected_pdb
- self.pdbid = mol.pymol_name
- self.hetid = ligand.hetid
- self.ligandtype = ligand.type
- self.chain = ligand.chain if not ligand.chain == "0" else "" # #@todo Fix this
- self.position = str(ligand.position)
- self.uid = ":".join([self.hetid, self.chain, self.position])
- self.outpath = mol.output_path
- self.metal_ids = [x.m_orig_idx for x in pli.ligand.metals]
- self.unpaired_hba_idx = pli.unpaired_hba_orig_idx
- self.unpaired_hbd_idx = pli.unpaired_hbd_orig_idx
- self.unpaired_hal_idx = pli.unpaired_hal_orig_idx
-
- # Information on Interactions
-
- # Hydrophobic Contacts
- # Contains IDs of contributing binding site, ligand atoms and the pairings
- hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts]
- self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id],
- lig_ids=[hp[1] for hp in hydroph_pairs_id],
- pairs_ids=hydroph_pairs_id)
-
- # Hydrogen Bonds
- # #@todo Don't use indices, simplify this code here
- hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon
- hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon]
- hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon]
- self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon],
- lig_don_id=[hb[1] for hb in hbonds_ldon_id],
- prot_acc_id=[hb[0] for hb in hbonds_ldon_id],
- pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon],
- prot_don_id=[hb[1] for hb in hbonds_pdon_id],
- lig_acc_id=[hb[0] for hb in hbonds_pdon_id])
-
- # Halogen Bonds
- self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx)
- for h in pli.halogen_bonds]
-
- # Pistacking
- self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx,
- proteinring_center=pistack.proteinring.center,
- ligandring_atoms=pistack.ligandring.atoms_orig_idx,
- ligandring_center=pistack.ligandring.center,
- type=pistack.type) for pistack in pli.pistacking]
-
- # Pi-cation interactions
- self.pication = [pication_info(ring_center=picat.ring.center,
- charge_center=picat.charge.center,
- ring_atoms=picat.ring.atoms_orig_idx,
- charge_atoms=picat.charge.atoms_orig_idx,
- protcharged=picat.protcharged)
- for picat in pli.pication_paro + pli.pication_laro]
-
- # Salt Bridges
- self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx,
- negative_atoms=sbridge.negative.atoms_orig_idx,
- positive_center=sbridge.positive.center,
- negative_center=sbridge.negative.center,
- protispos=sbridge.protispos)
- for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg]
-
- # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon')
- self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx,
- acc_id=wbridge.a_orig_idx,
- water_id=wbridge.water_orig_idx,
- protisdon=wbridge.protisdon) for wbridge in pli.water_bridges]
-
- # Metal Complexes
- self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx,
- target_id=metalc.target_orig_idx,
- location=metalc.location) for metalc in pli.metal_complexes]
diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py
deleted file mode 100644
index 6b3ef7b..0000000
--- a/plip/basic/supplemental.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import gzip
-import itertools
-import os
-import platform
-import re
-import subprocess
-import sys
-import tempfile
-import zipfile
-from collections import namedtuple
-
-import numpy as np
-from openbabel import pybel
-from openbabel.pybel import Atom
-
-from plip.basic import config, logger
-
-logger = logger.get_logger()
-
-# Windows and MacOS
-if os.name != 'nt' and platform.system() != 'Darwin': # Resource module not available for Windows
- import resource
-
-# Settings
-np.seterr(all='ignore') # No runtime warnings
-
-
-def tmpfile(prefix, direc):
- """Returns the path to a newly created temporary file."""
- return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc)
-
-
-def is_lig(hetid):
- """Checks if a PDB compound can be excluded as a small molecule ligand"""
- h = hetid.upper()
- return not (h == 'HOH' or h in config.UNSUPPORTED)
-
-
-def extract_pdbid(string):
- """Use regular expressions to get a PDB ID from a string"""
- p = re.compile("[0-9][0-9a-z]{3}")
- m = p.search(string.lower())
- try:
- return m.group()
- except AttributeError:
- return "UnknownProtein"
-
-
-def whichrestype(atom):
- """Returns the residue name of an Pybel or OpenBabel atom."""
- atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom
- return atom.GetResidue().GetName() if atom.GetResidue() is not None else None
-
-
-def whichresnumber(atom):
- """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file)."""
- atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom
- return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None
-
-
-def whichchain(atom):
- """Returns the residue number of an PyBel or OpenBabel atom."""
- atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom
- return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None
-
-
-#########################
-# Mathematical operations
-#########################
-
-
-def euclidean3d(v1, v2):
- """Faster implementation of euclidean distance for the 3D case."""
- if not len(v1) == 3 and len(v2) == 3:
- return None
- return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2)
-
-
-def vector(p1, p2):
- """Vector from p1 to p2.
- :param p1: coordinates of point p1
- :param p2: coordinates of point p2
- :returns : numpy array with vector coordinates
- """
- return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))])
-
-
-def vecangle(v1, v2, deg=True):
- """Calculate the angle between two vectors
- :param v1: coordinates of vector v1
- :param v2: coordinates of vector v2
- :param deg: whether to return degrees or radians
- :returns : angle in degree or rad
- """
- if np.array_equal(v1, v2):
- return 0.0
- dm = np.dot(v1, v2)
- cm = np.linalg.norm(v1) * np.linalg.norm(v2)
- angle = np.arccos(dm / cm) # Round here to prevent floating point errors
- return np.degrees([angle, ])[0] if deg else angle
-
-
-def normalize_vector(v):
- """Take a vector and return the normalized vector
- :param v: a vector v
- :returns : normalized vector v
- """
- norm = np.linalg.norm(v)
- return v / norm if not norm == 0 else v
-
-
-def centroid(coo):
- """Calculates the centroid from a 3D point cloud and returns the coordinates
- :param coo: Array of coordinate arrays
- :returns : centroid coordinates as list
- """
- return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo]))))
-
-
-def projection(pnormal1, ppoint, tpoint):
- """Calculates the centroid from a 3D point cloud and returns the coordinates
- :param pnormal1: normal of plane
- :param ppoint: coordinates of point in the plane
- :param tpoint: coordinates of point to be projected
- :returns : coordinates of point orthogonally projected on the plane
- """
- # Choose the plane normal pointing to the point to be projected
- pnormal2 = [coo * (-1) for coo in pnormal1]
- d1 = euclidean3d(tpoint, pnormal1 + ppoint)
- d2 = euclidean3d(tpoint, pnormal2 + ppoint)
- pnormal = pnormal1 if d1 < d2 else pnormal2
- # Calculate the projection of tpoint to the plane
- sn = -np.dot(pnormal, vector(ppoint, tpoint))
- sd = np.dot(pnormal, pnormal)
- sb = sn / sd
- return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])]
-
-
-def cluster_doubles(double_list):
- """Given a list of doubles, they are clustered if they share one element
- :param double_list: list of doubles
- :returns : list of clusters (tuples)
- """
- location = {} # hashtable of which cluster each element is in
- clusters = []
- # Go through each double
- for t in double_list:
- a, b = t[0], t[1]
- # If they both are already in different clusters, merge the clusters
- if a in location and b in location:
- if location[a] != location[b]:
- if location[a] < location[b]:
- clusters[location[a]] = clusters[location[a]].union(clusters[location[b]]) # Merge clusters
- clusters = clusters[:location[b]] + clusters[location[b] + 1:]
- else:
- clusters[location[b]] = clusters[location[b]].union(clusters[location[a]]) # Merge clusters
- clusters = clusters[:location[a]] + clusters[location[a] + 1:]
- # Rebuild index of locations for each element as they have changed now
- location = {}
- for i, cluster in enumerate(clusters):
- for c in cluster:
- location[c] = i
- else:
- # If a is already in a cluster, add b to that cluster
- if a in location:
- clusters[location[a]].add(b)
- location[b] = location[a]
- # If b is already in a cluster, add a to that cluster
- if b in location:
- clusters[location[b]].add(a)
- location[a] = location[b]
- # If neither a nor b is in any cluster, create a new one with a and b
- if not (b in location and a in location):
- clusters.append(set(t))
- location[a] = len(clusters) - 1
- location[b] = len(clusters) - 1
- return map(tuple, clusters)
-
-
-#################
-# File operations
-#################
-
-def tilde_expansion(folder_path):
- """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>."""
- return os.path.expanduser(folder_path) if '~' in folder_path else folder_path
-
-
-def folder_exists(folder_path):
- """Checks if a folder exists"""
- return os.path.exists(folder_path)
-
-
-def create_folder_if_not_exists(folder_path):
- """Creates a folder if it does not exists."""
- folder_path = tilde_expansion(folder_path)
- folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path
- direc = os.path.dirname(folder_path)
- if not folder_exists(direc):
- os.makedirs(direc)
-
-
-def cmd_exists(c):
- return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
-
-
-################
-# PyMOL-specific
-################
-
-
-def initialize_pymol(options):
- """Initializes PyMOL"""
- import pymol
- # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround)
- pymol.finish_launching(args=['pymol', options, '-K'])
- pymol.cmd.reinitialize()
-
-
-def start_pymol(quiet=False, options='-p', run=False):
- """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output.
- Command line options can be passed as the second argument."""
- import pymol
- pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:]
- if run:
- initialize_pymol(options)
- if quiet:
- pymol.cmd.feedback('disable', 'all', 'everything')
-
-
-def nucleotide_linkage(residues):
- """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together."""
-
- nuc_covalent = []
- #######################################
- # Basic support for RNA/DNA as ligand #
- #######################################
- nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU']
- dna_rna = {} # Dictionary of DNA/RNA residues by chain
- covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
- # Create missing covlinkage entries for DNA/RNA
- for ligand in residues:
- resname, chain, pos = ligand
- if resname in nucleotides:
- if chain not in dna_rna:
- dna_rna[chain] = [(resname, pos), ]
- else:
- dna_rna[chain].append((resname, pos))
- for chain in dna_rna:
- nuc_list = dna_rna[chain]
- for i, nucleotide in enumerate(nuc_list):
- if not i == len(nuc_list) - 1:
- name, pos = nucleotide
- nextnucleotide = nuc_list[i + 1]
- nextname, nextpos = nextnucleotide
- newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='',
- id2=nextname, chain2=chain, pos2=nextpos, conf2='')
- nuc_covalent.append(newlink)
-
- return nuc_covalent
-
-
-def ring_is_planar(ring, r_atoms):
- """Given a set of ring atoms, check if the ring is sufficiently planar
- to be considered aromatic"""
- normals = []
- for a in r_atoms:
- adj = pybel.ob.OBAtomAtomIter(a.OBAtom)
- # Check for neighboring atoms in the ring
- n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)]
- vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1])
- normals.append(np.cross(vec1, vec2))
- # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less
- for n1, n2 in itertools.product(normals, repeat=2):
- arom_angle = vecangle(n1, n2)
- if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]):
- return False
- return True
-
-
-def classify_by_name(names):
- """Classify a (composite) ligand by the HETID(s)"""
- if len(names) > 3: # Polymer
- if len(set(config.RNA).intersection(set(names))) != 0:
- ligtype = 'RNA'
- elif len(set(config.DNA).intersection(set(names))) != 0:
- ligtype = 'DNA'
- else:
- ligtype = "POLYMER"
- else:
- ligtype = 'SMALLMOLECULE'
-
- for name in names:
- if name in config.METAL_IONS:
- if len(names) == 1:
- ligtype = 'ION'
- else:
- if "ION" not in ligtype:
- ligtype += '+ION'
- return ligtype
-
-
-def sort_members_by_importance(members):
- """Sort the members of a composite ligand according to two criteria:
- 1. Split up in main and ion group. Ion groups are located behind the main group.
- 2. Within each group, sort by chain and position."""
- main = [x for x in members if x[0] not in config.METAL_IONS]
- ion = [x for x in members if x[0] in config.METAL_IONS]
- sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
- sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
- sorted_ion = sorted(ion, key=lambda x: (x[1], x[2]))
- return sorted_main + sorted_ion
-
-
-def get_isomorphisms(reference, lig):
- """Get all isomorphisms of the ligand."""
- query = pybel.ob.CompileMoleculeQuery(reference.OBMol)
- mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query)
- if all:
- isomorphs = pybel.ob.vvpairUIntUInt()
- mappr.MapAll(lig.OBMol, isomorphs)
- else:
- isomorphs = pybel.ob.vpairUIntUInt()
- mappr.MapFirst(lig.OBMol, isomorphs)
- isomorphs = [isomorphs]
- logger.debug(f'number of isomorphisms: {len(isomorphs)}')
- # @todo Check which isomorphism to take
- return isomorphs
-
-
-def canonicalize(lig, preserve_bond_order=False):
- """Get the canonical atom order for the ligand."""
- atomorder = None
- # Get canonical atom order
-
- lig = pybel.ob.OBMol(lig.OBMol)
- if not preserve_bond_order:
- for bond in pybel.ob.OBMolBondIter(lig):
- if bond.GetBondOrder() != 1:
- bond.SetBondOrder(1)
- lig.DeleteData(pybel.ob.StereoData)
- lig = pybel.Molecule(lig)
- testcan = lig.write(format='can')
- try:
- pybel.readstring('can', testcan)
- reference = pybel.readstring('can', testcan)
- except IOError:
- testcan, reference = '', ''
- if testcan != '':
- reference.removeh()
- isomorphs = get_isomorphisms(reference, lig) # isomorphs now holds all isomorphisms within the molecule
- if not len(isomorphs) == 0:
- smi_dict = {}
- smi_to_can = isomorphs[0]
- for x in smi_to_can:
- smi_dict[int(x[1]) + 1] = int(x[0]) + 1
- atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))]
- else:
- atomorder = None
- return atomorder
-
-
-def int32_to_negative(int32):
- """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a
- 32 bit integer and returns the actual number.
- """
- dct = {}
- if int32 == 4294967295: # Special case in some structures (note, this is just a workaround)
- return -1
- for i in range(-1000, -1):
- dct[np.uint32(i)] = i
- if int32 in dct:
- return dct[int32]
- else:
- return int32
-
-
-def read_pdb(pdbfname, as_string=False):
- """Reads a given PDB file and returns a Pybel Molecule."""
- pybel.ob.obErrorLog.StopLogging() # Suppress all OpenBabel warnings
- if os.name != 'nt': # Resource module not available for Windows
- maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1]
- resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize))
- sys.setrecursionlimit(10 ** 5) # increase Python recursion limit
- return readmol(pdbfname, as_string=as_string)
-
-
-def read(fil):
- """Returns a file handler and detects gzipped files."""
- if os.path.splitext(fil)[-1] == '.gz':
- return gzip.open(fil, 'rb')
- elif os.path.splitext(fil)[-1] == '.zip':
- zf = zipfile.ZipFile(fil, 'r')
- return zf.open(zf.infolist()[0].filename)
- else:
- return open(fil, 'r')
-
-
-def readmol(path, as_string=False):
- """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type.
- In contrast to the standard Pybel implementation, the file is closed properly."""
- supported_formats = ['pdb']
- # Fix for Windows-generated files: Remove carriage return characters
- if "\r" in path and as_string:
- path = path.replace('\r', '')
-
- for sformat in supported_formats:
- obc = pybel.ob.OBConversion()
- obc.SetInFormat(sformat)
- logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel')
-
- # Read molecules with single bond information
- if as_string:
- try:
- mymol = pybel.readstring(sformat, path)
- except IOError:
- logger.error('no valid file format provided')
- sys.exit(1)
- else:
- read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None})
- try:
- mymol = next(read_file)
- except StopIteration:
- logger.error('file contains no valid molecules')
- sys.exit(1)
-
- logger.debug('molecule successfully read')
-
- # Assign multiple bonds
- mymol.OBMol.PerceiveBondOrders()
- return mymol, sformat
-
- logger.error('no valid file format provided')
- sys.exit(1)