From 4be08f7bdd77991e9e453c1cda863c3f20c338d5 Mon Sep 17 00:00:00 2001 From: Navan Chauhan Date: Thu, 2 Jul 2020 20:48:33 +0530 Subject: initial commit --- plip/basic/__init__.py | 0 plip/basic/config.py | 121 +++++++++++++ plip/basic/logger.py | 21 +++ plip/basic/parallel.py | 52 ++++++ plip/basic/remote.py | 95 ++++++++++ plip/basic/supplemental.py | 434 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 723 insertions(+) create mode 100644 plip/basic/__init__.py create mode 100644 plip/basic/config.py create mode 100644 plip/basic/logger.py create mode 100644 plip/basic/parallel.py create mode 100644 plip/basic/remote.py create mode 100644 plip/basic/supplemental.py (limited to 'plip/basic') diff --git a/plip/basic/__init__.py b/plip/basic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plip/basic/config.py b/plip/basic/config.py new file mode 100644 index 0000000..a7468cf --- /dev/null +++ b/plip/basic/config.py @@ -0,0 +1,121 @@ +__version__ = '2.1.0-beta' +__maintainer__ = 'PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai' + +import logging + +DEFAULT_LOG_LEVEL = logging.INFO +VERBOSE = False # Set verbose mode +QUIET = False # Set verbose mode +SILENT = False # Set verbose mode +MAXTHREADS = 1 # Maximum number of main threads for binding site visualization +XML = False +TXT = False +PICS = False +PYMOL = False +STDOUT = False +RAWSTRING = False # use raw strings for input / output +OUTPATH = './' +BASEPATH = './' +BREAKCOMPOSITE = False # Break up composite ligands with covalent bonds +ALTLOC = False # Consider alternate locations +PLUGIN_MODE = False # Special mode for PLIP in Plugins (e.g. PyMOL) +NOFIX = False # Turn off fixing of errors in PDB files +NOFIXFILE = False # Turn off writing to files for fixed PDB structures +PEPTIDES = [] # Definition which chains should be considered as peptide ligands +INTRA = None +KEEPMOD = False +DNARECEPTOR = False +OUTPUTFILENAME = "report" # Naming for the TXT and XML report files +NOPDBCANMAP = False # Skip calculation of mapping canonical atom order: PDB atom order +NOHYDRO = False # Do not add hydrogen bonds (in case already present in the structure) + +# Configuration file for Protein-Ligand Interaction Profiler (PLIP) +# Set thresholds for detection of interactions + +# Thresholds for detection (global variables) +BS_DIST = 7.5 # Determines maximum distance to include binding site residues +AROMATIC_PLANARITY = 5.0 # Determines allowed deviation from planarity in aromatic rings +MIN_DIST = 0.5 # Minimum distance for all distance thresholds +# Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures +HYDROPH_DIST_MAX = 4.0 # Distance cutoff for detection of hydrophobic contacts +HBOND_DIST_MAX = 4.1 # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A +HBOND_DON_ANGLE_MIN = 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 +PISTACK_DIST_MAX = 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998) +PISTACK_ANG_DEV = 30 # Max. Deviation from parallel or perpendicular orientation (in degrees) +PISTACK_OFFSET_MAX = 2.0 # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A) +PICATION_DIST_MAX = 6.0 # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999) +SALTBRIDGE_DIST_MAX = 5.5 # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5 +HALOGEN_DIST_MAX = 4.0 # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5 +HALOGEN_ACC_ANGLE = 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) +HALOGEN_DON_ANGLE = 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) +HALOGEN_ANGLE_DEV = 30 # Max. deviation from optimal angle +WATER_BRIDGE_MINDIST = 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 +WATER_BRIDGE_MAXDIST = 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 +WATER_BRIDGE_OMEGA_MIN = 71 # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9 +WATER_BRIDGE_OMEGA_MAX = 140 # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) +WATER_BRIDGE_THETA_MIN = 100 # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005) +METAL_DIST_MAX = 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001) + +# Other thresholds +MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments + +######### +# Names # +######### + +# Names of RNA and DNA residues to be considered (detection by name) +RNA = ['U', 'A', 'C', 'G'] +DNA = ['DT', 'DA', 'DC', 'DG'] + +############# +# Whitelist # +############# + +# Metal cations which can be complexed + +METAL_IONS = ['CA', 'CO', 'MG', 'MN', 'FE', 'CU', 'ZN', 'FE2', 'FE3', 'FE4', 'LI', 'NA', 'K', 'RB', 'SR', 'CS', 'BA', + 'CR', 'NI', 'FE1', 'NI', 'RU', 'RU1', 'RH', 'RH1', 'PD', 'AG', 'CD', 'LA', 'W', 'W1', 'OS', 'IR', 'PT', + 'PT1', 'AU', 'HG', 'CE', 'PR', 'SM', 'EU', 'GD', 'TB', 'YB', 'LU', 'AL', 'GA', 'IN', 'SB', 'TL', 'PB'] + +############## +# Blacklists # +############## + +# Other Ions/Atoms (not yet supported) +anions = ['CL', 'IOD', 'BR'] +other = ['MO', 'RE', 'HO'] +UNSUPPORTED = anions + other + +# BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10) +# Add ligands here to get warnings for possible artifacts. +biolip_list = ['ACE', 'HEX', 'TMA', 'SOH', 'P25', 'CCN', 'PR', 'PTN', 'NO3', 'TCN', 'BU1', 'BCN', 'CB3', 'HCS', 'NBN', + 'SO2', 'MO6', 'MOH', 'CAC', 'MLT', 'KR', '6PH', 'MOS', 'UNL', 'MO3', 'SR', 'CD3', 'PB', 'ACM', 'LUT', + 'PMS', 'OF3', 'SCN', 'DHB', 'E4N', '13P', '3PG', 'CYC', 'NC', 'BEN', 'NAO', 'PHQ', 'EPE', 'BME', 'TB', + 'ETE', 'EU', 'OES', 'EAP', 'ETX', 'BEZ', '5AD', 'OC2', 'OLA', 'GD3', 'CIT', 'DVT', 'OC6', 'MW1', 'OC3', + 'SRT', 'LCO', 'BNZ', 'PPV', 'STE', 'PEG', 'RU', 'PGE', 'MPO', 'B3P', 'OGA', 'IPA', 'LU', 'EDO', 'MAC', + '9PE', 'IPH', 'MBN', 'C1O', '1PE', 'YF3', 'PEF', 'GD', '8PE', 'DKA', 'RB', 'YB', 'GGD', 'SE4', 'LHG', + 'SMO', 'DGD', 'CMO', 'MLI', 'MW2', 'DTT', 'DOD', '7PH', 'PBM', 'AU', 'FOR', 'PSC', 'TG1', 'KAI', '1PG', + 'DGA', 'IR', 'PE4', 'VO4', 'ACN', 'AG', 'MO4', 'OCL', '6UL', 'CHT', 'RHD', 'CPS', 'IR3', 'OC4', 'MTE', + 'HGC', 'CR', 'PC1', 'HC4', 'TEA', 'BOG', 'PEO', 'PE5', '144', 'IUM', 'LMG', 'SQU', 'MMC', 'GOL', 'NVP', + 'AU3', '3PH', 'PT4', 'PGO', 'ICT', 'OCM', 'BCR', 'PG4', 'L4P', 'OPC', 'OXM', 'SQD', 'PQ9', 'BAM', 'PI', + 'PL9', 'P6G', 'IRI', '15P', 'MAE', 'MBO', 'FMT', 'L1P', 'DUD', 'PGV', 'CD1', 'P33', 'DTU', 'XAT', 'CD', + 'THE', 'U1', 'NA', 'MW3', 'BHG', 'Y1', 'OCT', 'BET', 'MPD', 'HTO', 'IBM', 'D01', 'HAI', 'HED', 'CAD', + 'CUZ', 'TLA', 'SO4', 'OC5', 'ETF', 'MRD', 'PT', 'PHB', 'URE', 'MLA', 'TGL', 'PLM', 'NET', 'LAC', 'AUC', + 'UNX', 'GA', 'DMS', 'MO2', 'LA', 'NI', 'TE', 'THJ', 'NHE', 'HAE', 'MO1', 'DAO', '3PE', 'LMU', 'DHJ', + 'FLC', 'SAL', 'GAI', 'ORO', 'HEZ', 'TAM', 'TRA', 'NEX', 'CXS', 'LCP', 'HOH', 'OCN', 'PER', 'ACY', 'MH2', + 'ARS', '12P', 'L3P', 'PUT', 'IN', 'CS', 'NAW', 'SB', 'GUN', 'SX', 'CON', 'C2O', 'EMC', 'BO4', 'BNG', + 'MN5', '__O', 'K', 'CYN', 'H2S', 'MH3', 'YT3', 'P22', 'KO4', '1AG', 'CE', 'IPL', 'PG6', 'MO5', 'F09', + 'HO', 'AL', 'TRS', 'EOH', 'GCP', 'MSE', 'AKR', 'NCO', 'PO4', 'L2P', 'LDA', 'SIN', 'DMI', 'SM', 'DTD', + 'SGM', 'DIO', 'PPI', 'DDQ', 'DPO', 'HCA', 'CO5', 'PD', 'OS', 'OH', 'NA6', 'NAG', 'W', 'ENC', 'NA5', + 'LI1', 'P4C', 'GLV', 'DMF', 'ACT', 'BTB', '6PL', 'BGL', 'OF1', 'N8E', 'LMT', 'THM', 'EU3', 'PGR', 'NA2', + 'FOL', '543', '_CP', 'PEK', 'NSP', 'PEE', 'OCO', 'CHD', 'CO2', 'TBU', 'UMQ', 'MES', 'NH4', 'CD5', 'HTG', + 'DEP', 'OC1', 'KDO', '2PE', 'PE3', 'IOD', 'NDG', 'CL', 'HG', 'F', 'XE', 'TL', 'BA', 'LI', 'BR', 'TAU', + 'TCA', 'SPD', 'SPM', 'SAR', 'SUC', 'PAM', 'SPH', 'BE7', 'P4G', 'OLC', 'OLB', 'LFA', 'D10', 'D12', 'DD9', + 'HP6', 'R16', 'PX4', 'TRD', 'UND', 'FTT', 'MYR', 'RG1', 'IMD', 'DMN', 'KEN', 'C14', 'UPL', 'CMJ', 'ULI', + 'MYS', 'TWT', 'M2M', 'P15', 'PG0', 'PEU', 'AE3', 'TOE', 'ME2', 'PE8', '6JZ', '7PE', 'P3G', '7PG', 'PG5', + '16P', 'XPE', 'PGF', 'AE4', '7E8', '7E9', 'MVC', 'TAR', 'DMR', 'LMR', 'NER', '02U', 'NGZ', 'LXB', 'A2G', + 'BM3', 'NAA', 'NGA', 'LXZ', 'PX6', 'PA8', 'LPP', 'PX2', 'MYY', 'PX8', 'PD7', 'XP4', 'XPA', 'PEV', '6PE', + 'PEX', 'PEH', 'PTY', 'YB2', 'PGT', 'CN3', 'AGA', 'DGG', 'CD4', 'CN6', 'CDL', 'PG8', 'MGE', 'DTV', 'L44', + 'L2C', '4AG', 'B3H', '1EM', 'DDR', 'I42', 'CNS', 'PC7', 'HGP', 'PC8', 'HGX', 'LIO', 'PLD', 'PC2', 'PCF', + 'MC3', 'P1O', 'PLC', 'PC6', 'HSH', 'BXC', 'HSG', 'DPG', '2DP', 'POV', 'PCW', 'GVT', 'CE9', 'CXE', 'C10', + 'CE1', 'SPJ', 'SPZ', 'SPK', 'SPW', 'HT3', 'HTH', '2OP', '3NI', 'BO3', 'DET', 'D1D', 'SWE', 'SOG'] diff --git a/plip/basic/logger.py b/plip/basic/logger.py new file mode 100644 index 0000000..ca65411 --- /dev/null +++ b/plip/basic/logger.py @@ -0,0 +1,21 @@ +import inspect +import logging + + +def get_logger(): + """ + Configures a base logger and returns a module-specific sub-logger of the calling module. + """ + frame = inspect.stack()[1] + module_name = inspect.getmodule(frame[0]).__name__ + if module_name != '__main__': + logger = logging.getLogger(module_name) + if not logger.parent.handlers: + ch = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s') + ch.setFormatter(formatter) + logger.parent.addHandler(ch) + else: + logger = logging.getLogger('plip') + + return logger diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py new file mode 100644 index 0000000..1f71943 --- /dev/null +++ b/plip/basic/parallel.py @@ -0,0 +1,52 @@ +import itertools +import multiprocessing +from builtins import zip +from functools import partial + +from numpy import asarray + + +class SubProcessError(Exception): + def __init__(self, e, exitcode=1): + self.exitcode = exitcode + super(SubProcessError, self).__init__(e) + + pass + + +def universal_worker(input_pair): + """This is a wrapper function expecting a tiplet of function, single + argument, dict of keyword arguments. The provided function is called + with the appropriate arguments.""" + function, arg, kwargs = input_pair + return function(arg, **kwargs) + + +def pool_args(function, sequence, kwargs): + """Return a single iterator of n elements of lists of length 3, given a sequence of len n.""" + return zip(itertools.repeat(function), sequence, itertools.repeat(kwargs)) + + +def parallel_fn(f): + """Simple wrapper function, returning a parallel version of the given function f. + The function f must have one argument and may have an arbitray number of + keyword arguments. """ + + def simple_parallel(func, sequence, **args): + """ f takes an element of sequence as input and the keyword args in **args""" + if 'processes' in args: + processes = args.get('processes') + del args['processes'] + else: + processes = multiprocessing.cpu_count() + + pool = multiprocessing.Pool(processes) # depends on available cores + + result = pool.map_async(universal_worker, pool_args(func, sequence, args)) + pool.close() + pool.join() + cleaned = [x for x in result.get() if x is not None] # getting results + cleaned = asarray(cleaned) + return cleaned + + return partial(simple_parallel, f) diff --git a/plip/basic/remote.py b/plip/basic/remote.py new file mode 100644 index 0000000..0c67f6f --- /dev/null +++ b/plip/basic/remote.py @@ -0,0 +1,95 @@ +from collections import namedtuple + +hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id') +hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids') +halogen_info = namedtuple('halogen_info', 'don_id acc_id') +pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms ' + 'ligandring_center type') +pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged') +sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos') +wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon') +metal_info = namedtuple('metal_info', 'metal_id, target_id location') + + +class VisualizerData: + """Contains all information on a complex relevant for visualization. Can be pickled""" + + def __init__(self, mol, site): + pcomp = mol + pli = mol.interaction_sets[site] + ligand = pli.ligand + + # General Information + self.lig_members = sorted(pli.ligand.members) + self.sourcefile = pcomp.sourcefiles['pdbcomplex'] + self.corrected_pdb = pcomp.corrected_pdb + self.pdbid = mol.pymol_name + self.hetid = ligand.hetid + self.ligandtype = ligand.type + self.chain = ligand.chain if not ligand.chain == "0" else "" # #@todo Fix this + self.position = str(ligand.position) + self.uid = ":".join([self.hetid, self.chain, self.position]) + self.outpath = mol.output_path + self.metal_ids = [x.m_orig_idx for x in pli.ligand.metals] + self.unpaired_hba_idx = pli.unpaired_hba_orig_idx + self.unpaired_hbd_idx = pli.unpaired_hbd_orig_idx + self.unpaired_hal_idx = pli.unpaired_hal_orig_idx + + # Information on Interactions + + # Hydrophobic Contacts + # Contains IDs of contributing binding site, ligand atoms and the pairings + hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts] + self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id], + lig_ids=[hp[1] for hp in hydroph_pairs_id], + pairs_ids=hydroph_pairs_id) + + # Hydrogen Bonds + # #@todo Don't use indices, simplify this code here + hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon + hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon] + hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon] + self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], + lig_don_id=[hb[1] for hb in hbonds_ldon_id], + prot_acc_id=[hb[0] for hb in hbonds_ldon_id], + pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], + prot_don_id=[hb[1] for hb in hbonds_pdon_id], + lig_acc_id=[hb[0] for hb in hbonds_pdon_id]) + + # Halogen Bonds + self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) + for h in pli.halogen_bonds] + + # Pistacking + self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx, + proteinring_center=pistack.proteinring.center, + ligandring_atoms=pistack.ligandring.atoms_orig_idx, + ligandring_center=pistack.ligandring.center, + type=pistack.type) for pistack in pli.pistacking] + + # Pi-cation interactions + self.pication = [pication_info(ring_center=picat.ring.center, + charge_center=picat.charge.center, + ring_atoms=picat.ring.atoms_orig_idx, + charge_atoms=picat.charge.atoms_orig_idx, + protcharged=picat.protcharged) + for picat in pli.pication_paro + pli.pication_laro] + + # Salt Bridges + self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx, + negative_atoms=sbridge.negative.atoms_orig_idx, + positive_center=sbridge.positive.center, + negative_center=sbridge.negative.center, + protispos=sbridge.protispos) + for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg] + + # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon') + self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx, + acc_id=wbridge.a_orig_idx, + water_id=wbridge.water_orig_idx, + protisdon=wbridge.protisdon) for wbridge in pli.water_bridges] + + # Metal Complexes + self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx, + target_id=metalc.target_orig_idx, + location=metalc.location) for metalc in pli.metal_complexes] diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py new file mode 100644 index 0000000..6b3ef7b --- /dev/null +++ b/plip/basic/supplemental.py @@ -0,0 +1,434 @@ +import gzip +import itertools +import os +import platform +import re +import subprocess +import sys +import tempfile +import zipfile +from collections import namedtuple + +import numpy as np +from openbabel import pybel +from openbabel.pybel import Atom + +from plip.basic import config, logger + +logger = logger.get_logger() + +# Windows and MacOS +if os.name != 'nt' and platform.system() != 'Darwin': # Resource module not available for Windows + import resource + +# Settings +np.seterr(all='ignore') # No runtime warnings + + +def tmpfile(prefix, direc): + """Returns the path to a newly created temporary file.""" + return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc) + + +def is_lig(hetid): + """Checks if a PDB compound can be excluded as a small molecule ligand""" + h = hetid.upper() + return not (h == 'HOH' or h in config.UNSUPPORTED) + + +def extract_pdbid(string): + """Use regular expressions to get a PDB ID from a string""" + p = re.compile("[0-9][0-9a-z]{3}") + m = p.search(string.lower()) + try: + return m.group() + except AttributeError: + return "UnknownProtein" + + +def whichrestype(atom): + """Returns the residue name of an Pybel or OpenBabel atom.""" + atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + return atom.GetResidue().GetName() if atom.GetResidue() is not None else None + + +def whichresnumber(atom): + """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file).""" + atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None + + +def whichchain(atom): + """Returns the residue number of an PyBel or OpenBabel atom.""" + atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None + + +######################### +# Mathematical operations +######################### + + +def euclidean3d(v1, v2): + """Faster implementation of euclidean distance for the 3D case.""" + if not len(v1) == 3 and len(v2) == 3: + return None + return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2) + + +def vector(p1, p2): + """Vector from p1 to p2. + :param p1: coordinates of point p1 + :param p2: coordinates of point p2 + :returns : numpy array with vector coordinates + """ + return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))]) + + +def vecangle(v1, v2, deg=True): + """Calculate the angle between two vectors + :param v1: coordinates of vector v1 + :param v2: coordinates of vector v2 + :param deg: whether to return degrees or radians + :returns : angle in degree or rad + """ + if np.array_equal(v1, v2): + return 0.0 + dm = np.dot(v1, v2) + cm = np.linalg.norm(v1) * np.linalg.norm(v2) + angle = np.arccos(dm / cm) # Round here to prevent floating point errors + return np.degrees([angle, ])[0] if deg else angle + + +def normalize_vector(v): + """Take a vector and return the normalized vector + :param v: a vector v + :returns : normalized vector v + """ + norm = np.linalg.norm(v) + return v / norm if not norm == 0 else v + + +def centroid(coo): + """Calculates the centroid from a 3D point cloud and returns the coordinates + :param coo: Array of coordinate arrays + :returns : centroid coordinates as list + """ + return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])))) + + +def projection(pnormal1, ppoint, tpoint): + """Calculates the centroid from a 3D point cloud and returns the coordinates + :param pnormal1: normal of plane + :param ppoint: coordinates of point in the plane + :param tpoint: coordinates of point to be projected + :returns : coordinates of point orthogonally projected on the plane + """ + # Choose the plane normal pointing to the point to be projected + pnormal2 = [coo * (-1) for coo in pnormal1] + d1 = euclidean3d(tpoint, pnormal1 + ppoint) + d2 = euclidean3d(tpoint, pnormal2 + ppoint) + pnormal = pnormal1 if d1 < d2 else pnormal2 + # Calculate the projection of tpoint to the plane + sn = -np.dot(pnormal, vector(ppoint, tpoint)) + sd = np.dot(pnormal, pnormal) + sb = sn / sd + return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])] + + +def cluster_doubles(double_list): + """Given a list of doubles, they are clustered if they share one element + :param double_list: list of doubles + :returns : list of clusters (tuples) + """ + location = {} # hashtable of which cluster each element is in + clusters = [] + # Go through each double + for t in double_list: + a, b = t[0], t[1] + # If they both are already in different clusters, merge the clusters + if a in location and b in location: + if location[a] != location[b]: + if location[a] < location[b]: + clusters[location[a]] = clusters[location[a]].union(clusters[location[b]]) # Merge clusters + clusters = clusters[:location[b]] + clusters[location[b] + 1:] + else: + clusters[location[b]] = clusters[location[b]].union(clusters[location[a]]) # Merge clusters + clusters = clusters[:location[a]] + clusters[location[a] + 1:] + # Rebuild index of locations for each element as they have changed now + location = {} + for i, cluster in enumerate(clusters): + for c in cluster: + location[c] = i + else: + # If a is already in a cluster, add b to that cluster + if a in location: + clusters[location[a]].add(b) + location[b] = location[a] + # If b is already in a cluster, add a to that cluster + if b in location: + clusters[location[b]].add(a) + location[a] = location[b] + # If neither a nor b is in any cluster, create a new one with a and b + if not (b in location and a in location): + clusters.append(set(t)) + location[a] = len(clusters) - 1 + location[b] = len(clusters) - 1 + return map(tuple, clusters) + + +################# +# File operations +################# + +def tilde_expansion(folder_path): + """Tilde expansion, i.e. converts '~' in paths into .""" + return os.path.expanduser(folder_path) if '~' in folder_path else folder_path + + +def folder_exists(folder_path): + """Checks if a folder exists""" + return os.path.exists(folder_path) + + +def create_folder_if_not_exists(folder_path): + """Creates a folder if it does not exists.""" + folder_path = tilde_expansion(folder_path) + folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path + direc = os.path.dirname(folder_path) + if not folder_exists(direc): + os.makedirs(direc) + + +def cmd_exists(c): + return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + + +################ +# PyMOL-specific +################ + + +def initialize_pymol(options): + """Initializes PyMOL""" + import pymol + # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround) + pymol.finish_launching(args=['pymol', options, '-K']) + pymol.cmd.reinitialize() + + +def start_pymol(quiet=False, options='-p', run=False): + """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output. + Command line options can be passed as the second argument.""" + import pymol + pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:] + if run: + initialize_pymol(options) + if quiet: + pymol.cmd.feedback('disable', 'all', 'everything') + + +def nucleotide_linkage(residues): + """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together.""" + + nuc_covalent = [] + ####################################### + # Basic support for RNA/DNA as ligand # + ####################################### + nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'] + dna_rna = {} # Dictionary of DNA/RNA residues by chain + covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") + # Create missing covlinkage entries for DNA/RNA + for ligand in residues: + resname, chain, pos = ligand + if resname in nucleotides: + if chain not in dna_rna: + dna_rna[chain] = [(resname, pos), ] + else: + dna_rna[chain].append((resname, pos)) + for chain in dna_rna: + nuc_list = dna_rna[chain] + for i, nucleotide in enumerate(nuc_list): + if not i == len(nuc_list) - 1: + name, pos = nucleotide + nextnucleotide = nuc_list[i + 1] + nextname, nextpos = nextnucleotide + newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='', + id2=nextname, chain2=chain, pos2=nextpos, conf2='') + nuc_covalent.append(newlink) + + return nuc_covalent + + +def ring_is_planar(ring, r_atoms): + """Given a set of ring atoms, check if the ring is sufficiently planar + to be considered aromatic""" + normals = [] + for a in r_atoms: + adj = pybel.ob.OBAtomAtomIter(a.OBAtom) + # Check for neighboring atoms in the ring + n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)] + vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1]) + normals.append(np.cross(vec1, vec2)) + # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less + for n1, n2 in itertools.product(normals, repeat=2): + arom_angle = vecangle(n1, n2) + if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]): + return False + return True + + +def classify_by_name(names): + """Classify a (composite) ligand by the HETID(s)""" + if len(names) > 3: # Polymer + if len(set(config.RNA).intersection(set(names))) != 0: + ligtype = 'RNA' + elif len(set(config.DNA).intersection(set(names))) != 0: + ligtype = 'DNA' + else: + ligtype = "POLYMER" + else: + ligtype = 'SMALLMOLECULE' + + for name in names: + if name in config.METAL_IONS: + if len(names) == 1: + ligtype = 'ION' + else: + if "ION" not in ligtype: + ligtype += '+ION' + return ligtype + + +def sort_members_by_importance(members): + """Sort the members of a composite ligand according to two criteria: + 1. Split up in main and ion group. Ion groups are located behind the main group. + 2. Within each group, sort by chain and position.""" + main = [x for x in members if x[0] not in config.METAL_IONS] + ion = [x for x in members if x[0] in config.METAL_IONS] + sorted_main = sorted(main, key=lambda x: (x[1], x[2])) + sorted_main = sorted(main, key=lambda x: (x[1], x[2])) + sorted_ion = sorted(ion, key=lambda x: (x[1], x[2])) + return sorted_main + sorted_ion + + +def get_isomorphisms(reference, lig): + """Get all isomorphisms of the ligand.""" + query = pybel.ob.CompileMoleculeQuery(reference.OBMol) + mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query) + if all: + isomorphs = pybel.ob.vvpairUIntUInt() + mappr.MapAll(lig.OBMol, isomorphs) + else: + isomorphs = pybel.ob.vpairUIntUInt() + mappr.MapFirst(lig.OBMol, isomorphs) + isomorphs = [isomorphs] + logger.debug(f'number of isomorphisms: {len(isomorphs)}') + # @todo Check which isomorphism to take + return isomorphs + + +def canonicalize(lig, preserve_bond_order=False): + """Get the canonical atom order for the ligand.""" + atomorder = None + # Get canonical atom order + + lig = pybel.ob.OBMol(lig.OBMol) + if not preserve_bond_order: + for bond in pybel.ob.OBMolBondIter(lig): + if bond.GetBondOrder() != 1: + bond.SetBondOrder(1) + lig.DeleteData(pybel.ob.StereoData) + lig = pybel.Molecule(lig) + testcan = lig.write(format='can') + try: + pybel.readstring('can', testcan) + reference = pybel.readstring('can', testcan) + except IOError: + testcan, reference = '', '' + if testcan != '': + reference.removeh() + isomorphs = get_isomorphisms(reference, lig) # isomorphs now holds all isomorphisms within the molecule + if not len(isomorphs) == 0: + smi_dict = {} + smi_to_can = isomorphs[0] + for x in smi_to_can: + smi_dict[int(x[1]) + 1] = int(x[0]) + 1 + atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))] + else: + atomorder = None + return atomorder + + +def int32_to_negative(int32): + """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a + 32 bit integer and returns the actual number. + """ + dct = {} + if int32 == 4294967295: # Special case in some structures (note, this is just a workaround) + return -1 + for i in range(-1000, -1): + dct[np.uint32(i)] = i + if int32 in dct: + return dct[int32] + else: + return int32 + + +def read_pdb(pdbfname, as_string=False): + """Reads a given PDB file and returns a Pybel Molecule.""" + pybel.ob.obErrorLog.StopLogging() # Suppress all OpenBabel warnings + if os.name != 'nt': # Resource module not available for Windows + maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1] + resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize)) + sys.setrecursionlimit(10 ** 5) # increase Python recursion limit + return readmol(pdbfname, as_string=as_string) + + +def read(fil): + """Returns a file handler and detects gzipped files.""" + if os.path.splitext(fil)[-1] == '.gz': + return gzip.open(fil, 'rb') + elif os.path.splitext(fil)[-1] == '.zip': + zf = zipfile.ZipFile(fil, 'r') + return zf.open(zf.infolist()[0].filename) + else: + return open(fil, 'r') + + +def readmol(path, as_string=False): + """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type. + In contrast to the standard Pybel implementation, the file is closed properly.""" + supported_formats = ['pdb'] + # Fix for Windows-generated files: Remove carriage return characters + if "\r" in path and as_string: + path = path.replace('\r', '') + + for sformat in supported_formats: + obc = pybel.ob.OBConversion() + obc.SetInFormat(sformat) + logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel') + + # Read molecules with single bond information + if as_string: + try: + mymol = pybel.readstring(sformat, path) + except IOError: + logger.error('no valid file format provided') + sys.exit(1) + else: + read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None}) + try: + mymol = next(read_file) + except StopIteration: + logger.error('file contains no valid molecules') + sys.exit(1) + + logger.debug('molecule successfully read') + + # Assign multiple bonds + mymol.OBMol.PerceiveBondOrders() + return mymol, sformat + + logger.error('no valid file format provided') + sys.exit(1) -- cgit v1.2.3