diff options
Diffstat (limited to 'plip/basic')
-rw-r--r-- | plip/basic/__init__.py | 0 | ||||
-rw-r--r-- | plip/basic/config.py | 625 | ||||
-rw-r--r-- | plip/basic/logger.py | 21 | ||||
-rw-r--r-- | plip/basic/parallel.py | 52 | ||||
-rw-r--r-- | plip/basic/remote.py | 95 | ||||
-rw-r--r-- | plip/basic/supplemental.py | 434 |
6 files changed, 0 insertions, 1227 deletions
diff --git a/plip/basic/__init__.py b/plip/basic/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/plip/basic/__init__.py +++ /dev/null diff --git a/plip/basic/config.py b/plip/basic/config.py deleted file mode 100644 index 91103fa..0000000 --- a/plip/basic/config.py +++ /dev/null @@ -1,625 +0,0 @@ -__version__ = "2.1.3" -__maintainer__ = "PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai" - -import logging - -DEFAULT_LOG_LEVEL = logging.INFO -VERBOSE = False # Set verbose mode -QUIET = False # Set verbose mode -SILENT = False # Set verbose mode -MAXTHREADS = 1 # Maximum number of main threads for binding site visualization -XML = False -TXT = False -PICS = False -PYMOL = False -STDOUT = False -RAWSTRING = False # use raw strings for input / output -OUTPATH = "./" -BASEPATH = "./" -BREAKCOMPOSITE = False # Break up composite ligands with covalent bonds -ALTLOC = False # Consider alternate locations -PLUGIN_MODE = False # Special mode for PLIP in Plugins (e.g. PyMOL) -NOFIX = False # Turn off fixing of errors in PDB files -NOFIXFILE = False # Turn off writing to files for fixed PDB structures -PEPTIDES = [] # Definition which chains should be considered as peptide ligands -INTRA = None -KEEPMOD = False -DNARECEPTOR = False -OUTPUTFILENAME = "report" # Naming for the TXT and XML report files -NOPDBCANMAP = False # Skip calculation of mapping canonical atom order: PDB atom order -NOHYDRO = False # Do not add hydrogen bonds (in case already present in the structure) - -# Configuration file for Protein-Ligand Interaction Profiler (PLIP) -# Set thresholds for detection of interactions - -# Thresholds for detection (global variables) -BS_DIST = 7.5 # Determines maximum distance to include binding site residues -AROMATIC_PLANARITY = ( - 5.0 # Determines allowed deviation from planarity in aromatic rings -) -MIN_DIST = 0.5 # Minimum distance for all distance thresholds -# Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures -HYDROPH_DIST_MAX = 4.0 # Distance cutoff for detection of hydrophobic contacts -HBOND_DIST_MAX = 4.1 # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A -HBOND_DON_ANGLE_MIN = ( - 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 -) -PISTACK_DIST_MAX = ( - 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998) -) -PISTACK_ANG_DEV = ( - 30 # Max. Deviation from parallel or perpendicular orientation (in degrees) -) -PISTACK_OFFSET_MAX = 2.0 # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A) -PICATION_DIST_MAX = 6.0 # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999) -SALTBRIDGE_DIST_MAX = 5.5 # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5 -HALOGEN_DIST_MAX = 4.0 # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5 -HALOGEN_ACC_ANGLE = ( - 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) -) -HALOGEN_DON_ANGLE = ( - 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) -) -HALOGEN_ANGLE_DEV = 30 # Max. deviation from optimal angle -WATER_BRIDGE_MINDIST = ( - 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 -) -WATER_BRIDGE_MAXDIST = ( - 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 -) -WATER_BRIDGE_OMEGA_MIN = 71 # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9 -WATER_BRIDGE_OMEGA_MAX = 140 # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) -WATER_BRIDGE_THETA_MIN = 100 # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005) -METAL_DIST_MAX = ( - 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001) -) - -# Other thresholds -MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments - -######### -# Names # -######### - -# Names of RNA and DNA residues to be considered (detection by name) -RNA = ["U", "A", "C", "G"] -DNA = ["DT", "DA", "DC", "DG"] - -############# -# Whitelist # -############# - -# Metal cations which can be complexed - -METAL_IONS = [ - "CA", - "CO", - "MG", - "MN", - "FE", - "CU", - "ZN", - "FE2", - "FE3", - "FE4", - "LI", - "NA", - "K", - "RB", - "SR", - "CS", - "BA", - "CR", - "NI", - "FE1", - "NI", - "RU", - "RU1", - "RH", - "RH1", - "PD", - "AG", - "CD", - "LA", - "W", - "W1", - "OS", - "IR", - "PT", - "PT1", - "AU", - "HG", - "CE", - "PR", - "SM", - "EU", - "GD", - "TB", - "YB", - "LU", - "AL", - "GA", - "IN", - "SB", - "TL", - "PB", -] - -############## -# Blacklists # -############## - -# Other Ions/Atoms (not yet supported) -anions = ["CL", "IOD", "BR"] -other = ["MO", "RE", "HO"] -UNSUPPORTED = anions + other - -# BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10) -# Add ligands here to get warnings for possible artifacts. -biolip_list = [ - "ACE", - "HEX", - "TMA", - "SOH", - "P25", - "CCN", - "PR", - "PTN", - "NO3", - "TCN", - "BU1", - "BCN", - "CB3", - "HCS", - "NBN", - "SO2", - "MO6", - "MOH", - "CAC", - "MLT", - "KR", - "6PH", - "MOS", - "UNL", - "MO3", - "SR", - "CD3", - "PB", - "ACM", - "LUT", - "PMS", - "OF3", - "SCN", - "DHB", - "E4N", - "13P", - "3PG", - "CYC", - "NC", - "BEN", - "NAO", - "PHQ", - "EPE", - "BME", - "TB", - "ETE", - "EU", - "OES", - "EAP", - "ETX", - "BEZ", - "5AD", - "OC2", - "OLA", - "GD3", - "CIT", - "DVT", - "OC6", - "MW1", - "OC3", - "SRT", - "LCO", - "BNZ", - "PPV", - "STE", - "PEG", - "RU", - "PGE", - "MPO", - "B3P", - "OGA", - "IPA", - "LU", - "EDO", - "MAC", - "9PE", - "IPH", - "MBN", - "C1O", - "1PE", - "YF3", - "PEF", - "GD", - "8PE", - "DKA", - "RB", - "YB", - "GGD", - "SE4", - "LHG", - "SMO", - "DGD", - "CMO", - "MLI", - "MW2", - "DTT", - "DOD", - "7PH", - "PBM", - "AU", - "FOR", - "PSC", - "TG1", - "KAI", - "1PG", - "DGA", - "IR", - "PE4", - "VO4", - "ACN", - "AG", - "MO4", - "OCL", - "6UL", - "CHT", - "RHD", - "CPS", - "IR3", - "OC4", - "MTE", - "HGC", - "CR", - "PC1", - "HC4", - "TEA", - "BOG", - "PEO", - "PE5", - "144", - "IUM", - "LMG", - "SQU", - "MMC", - "GOL", - "NVP", - "AU3", - "3PH", - "PT4", - "PGO", - "ICT", - "OCM", - "BCR", - "PG4", - "L4P", - "OPC", - "OXM", - "SQD", - "PQ9", - "BAM", - "PI", - "PL9", - "P6G", - "IRI", - "15P", - "MAE", - "MBO", - "FMT", - "L1P", - "DUD", - "PGV", - "CD1", - "P33", - "DTU", - "XAT", - "CD", - "THE", - "U1", - "NA", - "MW3", - "BHG", - "Y1", - "OCT", - "BET", - "MPD", - "HTO", - "IBM", - "D01", - "HAI", - "HED", - "CAD", - "CUZ", - "TLA", - "SO4", - "OC5", - "ETF", - "MRD", - "PT", - "PHB", - "URE", - "MLA", - "TGL", - "PLM", - "NET", - "LAC", - "AUC", - "UNX", - "GA", - "DMS", - "MO2", - "LA", - "NI", - "TE", - "THJ", - "NHE", - "HAE", - "MO1", - "DAO", - "3PE", - "LMU", - "DHJ", - "FLC", - "SAL", - "GAI", - "ORO", - "HEZ", - "TAM", - "TRA", - "NEX", - "CXS", - "LCP", - "HOH", - "OCN", - "PER", - "ACY", - "MH2", - "ARS", - "12P", - "L3P", - "PUT", - "IN", - "CS", - "NAW", - "SB", - "GUN", - "SX", - "CON", - "C2O", - "EMC", - "BO4", - "BNG", - "MN5", - "__O", - "K", - "CYN", - "H2S", - "MH3", - "YT3", - "P22", - "KO4", - "1AG", - "CE", - "IPL", - "PG6", - "MO5", - "F09", - "HO", - "AL", - "TRS", - "EOH", - "GCP", - "MSE", - "AKR", - "NCO", - "PO4", - "L2P", - "LDA", - "SIN", - "DMI", - "SM", - "DTD", - "SGM", - "DIO", - "PPI", - "DDQ", - "DPO", - "HCA", - "CO5", - "PD", - "OS", - "OH", - "NA6", - "NAG", - "W", - "ENC", - "NA5", - "LI1", - "P4C", - "GLV", - "DMF", - "ACT", - "BTB", - "6PL", - "BGL", - "OF1", - "N8E", - "LMT", - "THM", - "EU3", - "PGR", - "NA2", - "FOL", - "543", - "_CP", - "PEK", - "NSP", - "PEE", - "OCO", - "CHD", - "CO2", - "TBU", - "UMQ", - "MES", - "NH4", - "CD5", - "HTG", - "DEP", - "OC1", - "KDO", - "2PE", - "PE3", - "IOD", - "NDG", - "CL", - "HG", - "F", - "XE", - "TL", - "BA", - "LI", - "BR", - "TAU", - "TCA", - "SPD", - "SPM", - "SAR", - "SUC", - "PAM", - "SPH", - "BE7", - "P4G", - "OLC", - "OLB", - "LFA", - "D10", - "D12", - "DD9", - "HP6", - "R16", - "PX4", - "TRD", - "UND", - "FTT", - "MYR", - "RG1", - "IMD", - "DMN", - "KEN", - "C14", - "UPL", - "CMJ", - "ULI", - "MYS", - "TWT", - "M2M", - "P15", - "PG0", - "PEU", - "AE3", - "TOE", - "ME2", - "PE8", - "6JZ", - "7PE", - "P3G", - "7PG", - "PG5", - "16P", - "XPE", - "PGF", - "AE4", - "7E8", - "7E9", - "MVC", - "TAR", - "DMR", - "LMR", - "NER", - "02U", - "NGZ", - "LXB", - "A2G", - "BM3", - "NAA", - "NGA", - "LXZ", - "PX6", - "PA8", - "LPP", - "PX2", - "MYY", - "PX8", - "PD7", - "XP4", - "XPA", - "PEV", - "6PE", - "PEX", - "PEH", - "PTY", - "YB2", - "PGT", - "CN3", - "AGA", - "DGG", - "CD4", - "CN6", - "CDL", - "PG8", - "MGE", - "DTV", - "L44", - "L2C", - "4AG", - "B3H", - "1EM", - "DDR", - "I42", - "CNS", - "PC7", - "HGP", - "PC8", - "HGX", - "LIO", - "PLD", - "PC2", - "PCF", - "MC3", - "P1O", - "PLC", - "PC6", - "HSH", - "BXC", - "HSG", - "DPG", - "2DP", - "POV", - "PCW", - "GVT", - "CE9", - "CXE", - "C10", - "CE1", - "SPJ", - "SPZ", - "SPK", - "SPW", - "HT3", - "HTH", - "2OP", - "3NI", - "BO3", - "DET", - "D1D", - "SWE", - "SOG", -] diff --git a/plip/basic/logger.py b/plip/basic/logger.py deleted file mode 100644 index ca65411..0000000 --- a/plip/basic/logger.py +++ /dev/null @@ -1,21 +0,0 @@ -import inspect -import logging - - -def get_logger(): - """ - Configures a base logger and returns a module-specific sub-logger of the calling module. - """ - frame = inspect.stack()[1] - module_name = inspect.getmodule(frame[0]).__name__ - if module_name != '__main__': - logger = logging.getLogger(module_name) - if not logger.parent.handlers: - ch = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s') - ch.setFormatter(formatter) - logger.parent.addHandler(ch) - else: - logger = logging.getLogger('plip') - - return logger diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py deleted file mode 100644 index 1f71943..0000000 --- a/plip/basic/parallel.py +++ /dev/null @@ -1,52 +0,0 @@ -import itertools -import multiprocessing -from builtins import zip -from functools import partial - -from numpy import asarray - - -class SubProcessError(Exception): - def __init__(self, e, exitcode=1): - self.exitcode = exitcode - super(SubProcessError, self).__init__(e) - - pass - - -def universal_worker(input_pair): - """This is a wrapper function expecting a tiplet of function, single - argument, dict of keyword arguments. The provided function is called - with the appropriate arguments.""" - function, arg, kwargs = input_pair - return function(arg, **kwargs) - - -def pool_args(function, sequence, kwargs): - """Return a single iterator of n elements of lists of length 3, given a sequence of len n.""" - return zip(itertools.repeat(function), sequence, itertools.repeat(kwargs)) - - -def parallel_fn(f): - """Simple wrapper function, returning a parallel version of the given function f. - The function f must have one argument and may have an arbitray number of - keyword arguments. """ - - def simple_parallel(func, sequence, **args): - """ f takes an element of sequence as input and the keyword args in **args""" - if 'processes' in args: - processes = args.get('processes') - del args['processes'] - else: - processes = multiprocessing.cpu_count() - - pool = multiprocessing.Pool(processes) # depends on available cores - - result = pool.map_async(universal_worker, pool_args(func, sequence, args)) - pool.close() - pool.join() - cleaned = [x for x in result.get() if x is not None] # getting results - cleaned = asarray(cleaned) - return cleaned - - return partial(simple_parallel, f) diff --git a/plip/basic/remote.py b/plip/basic/remote.py deleted file mode 100644 index 0c67f6f..0000000 --- a/plip/basic/remote.py +++ /dev/null @@ -1,95 +0,0 @@ -from collections import namedtuple - -hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id') -hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids') -halogen_info = namedtuple('halogen_info', 'don_id acc_id') -pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms ' - 'ligandring_center type') -pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged') -sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos') -wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon') -metal_info = namedtuple('metal_info', 'metal_id, target_id location') - - -class VisualizerData: - """Contains all information on a complex relevant for visualization. Can be pickled""" - - def __init__(self, mol, site): - pcomp = mol - pli = mol.interaction_sets[site] - ligand = pli.ligand - - # General Information - self.lig_members = sorted(pli.ligand.members) - self.sourcefile = pcomp.sourcefiles['pdbcomplex'] - self.corrected_pdb = pcomp.corrected_pdb - self.pdbid = mol.pymol_name - self.hetid = ligand.hetid - self.ligandtype = ligand.type - self.chain = ligand.chain if not ligand.chain == "0" else "" # #@todo Fix this - self.position = str(ligand.position) - self.uid = ":".join([self.hetid, self.chain, self.position]) - self.outpath = mol.output_path - self.metal_ids = [x.m_orig_idx for x in pli.ligand.metals] - self.unpaired_hba_idx = pli.unpaired_hba_orig_idx - self.unpaired_hbd_idx = pli.unpaired_hbd_orig_idx - self.unpaired_hal_idx = pli.unpaired_hal_orig_idx - - # Information on Interactions - - # Hydrophobic Contacts - # Contains IDs of contributing binding site, ligand atoms and the pairings - hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts] - self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id], - lig_ids=[hp[1] for hp in hydroph_pairs_id], - pairs_ids=hydroph_pairs_id) - - # Hydrogen Bonds - # #@todo Don't use indices, simplify this code here - hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon - hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon] - hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon] - self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], - lig_don_id=[hb[1] for hb in hbonds_ldon_id], - prot_acc_id=[hb[0] for hb in hbonds_ldon_id], - pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], - prot_don_id=[hb[1] for hb in hbonds_pdon_id], - lig_acc_id=[hb[0] for hb in hbonds_pdon_id]) - - # Halogen Bonds - self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) - for h in pli.halogen_bonds] - - # Pistacking - self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx, - proteinring_center=pistack.proteinring.center, - ligandring_atoms=pistack.ligandring.atoms_orig_idx, - ligandring_center=pistack.ligandring.center, - type=pistack.type) for pistack in pli.pistacking] - - # Pi-cation interactions - self.pication = [pication_info(ring_center=picat.ring.center, - charge_center=picat.charge.center, - ring_atoms=picat.ring.atoms_orig_idx, - charge_atoms=picat.charge.atoms_orig_idx, - protcharged=picat.protcharged) - for picat in pli.pication_paro + pli.pication_laro] - - # Salt Bridges - self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx, - negative_atoms=sbridge.negative.atoms_orig_idx, - positive_center=sbridge.positive.center, - negative_center=sbridge.negative.center, - protispos=sbridge.protispos) - for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg] - - # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon') - self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx, - acc_id=wbridge.a_orig_idx, - water_id=wbridge.water_orig_idx, - protisdon=wbridge.protisdon) for wbridge in pli.water_bridges] - - # Metal Complexes - self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx, - target_id=metalc.target_orig_idx, - location=metalc.location) for metalc in pli.metal_complexes] diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py deleted file mode 100644 index 6b3ef7b..0000000 --- a/plip/basic/supplemental.py +++ /dev/null @@ -1,434 +0,0 @@ -import gzip -import itertools -import os -import platform -import re -import subprocess -import sys -import tempfile -import zipfile -from collections import namedtuple - -import numpy as np -from openbabel import pybel -from openbabel.pybel import Atom - -from plip.basic import config, logger - -logger = logger.get_logger() - -# Windows and MacOS -if os.name != 'nt' and platform.system() != 'Darwin': # Resource module not available for Windows - import resource - -# Settings -np.seterr(all='ignore') # No runtime warnings - - -def tmpfile(prefix, direc): - """Returns the path to a newly created temporary file.""" - return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc) - - -def is_lig(hetid): - """Checks if a PDB compound can be excluded as a small molecule ligand""" - h = hetid.upper() - return not (h == 'HOH' or h in config.UNSUPPORTED) - - -def extract_pdbid(string): - """Use regular expressions to get a PDB ID from a string""" - p = re.compile("[0-9][0-9a-z]{3}") - m = p.search(string.lower()) - try: - return m.group() - except AttributeError: - return "UnknownProtein" - - -def whichrestype(atom): - """Returns the residue name of an Pybel or OpenBabel atom.""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom - return atom.GetResidue().GetName() if atom.GetResidue() is not None else None - - -def whichresnumber(atom): - """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file).""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom - return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None - - -def whichchain(atom): - """Returns the residue number of an PyBel or OpenBabel atom.""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom - return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None - - -######################### -# Mathematical operations -######################### - - -def euclidean3d(v1, v2): - """Faster implementation of euclidean distance for the 3D case.""" - if not len(v1) == 3 and len(v2) == 3: - return None - return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2) - - -def vector(p1, p2): - """Vector from p1 to p2. - :param p1: coordinates of point p1 - :param p2: coordinates of point p2 - :returns : numpy array with vector coordinates - """ - return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))]) - - -def vecangle(v1, v2, deg=True): - """Calculate the angle between two vectors - :param v1: coordinates of vector v1 - :param v2: coordinates of vector v2 - :param deg: whether to return degrees or radians - :returns : angle in degree or rad - """ - if np.array_equal(v1, v2): - return 0.0 - dm = np.dot(v1, v2) - cm = np.linalg.norm(v1) * np.linalg.norm(v2) - angle = np.arccos(dm / cm) # Round here to prevent floating point errors - return np.degrees([angle, ])[0] if deg else angle - - -def normalize_vector(v): - """Take a vector and return the normalized vector - :param v: a vector v - :returns : normalized vector v - """ - norm = np.linalg.norm(v) - return v / norm if not norm == 0 else v - - -def centroid(coo): - """Calculates the centroid from a 3D point cloud and returns the coordinates - :param coo: Array of coordinate arrays - :returns : centroid coordinates as list - """ - return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])))) - - -def projection(pnormal1, ppoint, tpoint): - """Calculates the centroid from a 3D point cloud and returns the coordinates - :param pnormal1: normal of plane - :param ppoint: coordinates of point in the plane - :param tpoint: coordinates of point to be projected - :returns : coordinates of point orthogonally projected on the plane - """ - # Choose the plane normal pointing to the point to be projected - pnormal2 = [coo * (-1) for coo in pnormal1] - d1 = euclidean3d(tpoint, pnormal1 + ppoint) - d2 = euclidean3d(tpoint, pnormal2 + ppoint) - pnormal = pnormal1 if d1 < d2 else pnormal2 - # Calculate the projection of tpoint to the plane - sn = -np.dot(pnormal, vector(ppoint, tpoint)) - sd = np.dot(pnormal, pnormal) - sb = sn / sd - return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])] - - -def cluster_doubles(double_list): - """Given a list of doubles, they are clustered if they share one element - :param double_list: list of doubles - :returns : list of clusters (tuples) - """ - location = {} # hashtable of which cluster each element is in - clusters = [] - # Go through each double - for t in double_list: - a, b = t[0], t[1] - # If they both are already in different clusters, merge the clusters - if a in location and b in location: - if location[a] != location[b]: - if location[a] < location[b]: - clusters[location[a]] = clusters[location[a]].union(clusters[location[b]]) # Merge clusters - clusters = clusters[:location[b]] + clusters[location[b] + 1:] - else: - clusters[location[b]] = clusters[location[b]].union(clusters[location[a]]) # Merge clusters - clusters = clusters[:location[a]] + clusters[location[a] + 1:] - # Rebuild index of locations for each element as they have changed now - location = {} - for i, cluster in enumerate(clusters): - for c in cluster: - location[c] = i - else: - # If a is already in a cluster, add b to that cluster - if a in location: - clusters[location[a]].add(b) - location[b] = location[a] - # If b is already in a cluster, add a to that cluster - if b in location: - clusters[location[b]].add(a) - location[a] = location[b] - # If neither a nor b is in any cluster, create a new one with a and b - if not (b in location and a in location): - clusters.append(set(t)) - location[a] = len(clusters) - 1 - location[b] = len(clusters) - 1 - return map(tuple, clusters) - - -################# -# File operations -################# - -def tilde_expansion(folder_path): - """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>.""" - return os.path.expanduser(folder_path) if '~' in folder_path else folder_path - - -def folder_exists(folder_path): - """Checks if a folder exists""" - return os.path.exists(folder_path) - - -def create_folder_if_not_exists(folder_path): - """Creates a folder if it does not exists.""" - folder_path = tilde_expansion(folder_path) - folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path - direc = os.path.dirname(folder_path) - if not folder_exists(direc): - os.makedirs(direc) - - -def cmd_exists(c): - return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 - - -################ -# PyMOL-specific -################ - - -def initialize_pymol(options): - """Initializes PyMOL""" - import pymol - # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround) - pymol.finish_launching(args=['pymol', options, '-K']) - pymol.cmd.reinitialize() - - -def start_pymol(quiet=False, options='-p', run=False): - """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output. - Command line options can be passed as the second argument.""" - import pymol - pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:] - if run: - initialize_pymol(options) - if quiet: - pymol.cmd.feedback('disable', 'all', 'everything') - - -def nucleotide_linkage(residues): - """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together.""" - - nuc_covalent = [] - ####################################### - # Basic support for RNA/DNA as ligand # - ####################################### - nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'] - dna_rna = {} # Dictionary of DNA/RNA residues by chain - covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") - # Create missing covlinkage entries for DNA/RNA - for ligand in residues: - resname, chain, pos = ligand - if resname in nucleotides: - if chain not in dna_rna: - dna_rna[chain] = [(resname, pos), ] - else: - dna_rna[chain].append((resname, pos)) - for chain in dna_rna: - nuc_list = dna_rna[chain] - for i, nucleotide in enumerate(nuc_list): - if not i == len(nuc_list) - 1: - name, pos = nucleotide - nextnucleotide = nuc_list[i + 1] - nextname, nextpos = nextnucleotide - newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='', - id2=nextname, chain2=chain, pos2=nextpos, conf2='') - nuc_covalent.append(newlink) - - return nuc_covalent - - -def ring_is_planar(ring, r_atoms): - """Given a set of ring atoms, check if the ring is sufficiently planar - to be considered aromatic""" - normals = [] - for a in r_atoms: - adj = pybel.ob.OBAtomAtomIter(a.OBAtom) - # Check for neighboring atoms in the ring - n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)] - vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1]) - normals.append(np.cross(vec1, vec2)) - # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less - for n1, n2 in itertools.product(normals, repeat=2): - arom_angle = vecangle(n1, n2) - if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]): - return False - return True - - -def classify_by_name(names): - """Classify a (composite) ligand by the HETID(s)""" - if len(names) > 3: # Polymer - if len(set(config.RNA).intersection(set(names))) != 0: - ligtype = 'RNA' - elif len(set(config.DNA).intersection(set(names))) != 0: - ligtype = 'DNA' - else: - ligtype = "POLYMER" - else: - ligtype = 'SMALLMOLECULE' - - for name in names: - if name in config.METAL_IONS: - if len(names) == 1: - ligtype = 'ION' - else: - if "ION" not in ligtype: - ligtype += '+ION' - return ligtype - - -def sort_members_by_importance(members): - """Sort the members of a composite ligand according to two criteria: - 1. Split up in main and ion group. Ion groups are located behind the main group. - 2. Within each group, sort by chain and position.""" - main = [x for x in members if x[0] not in config.METAL_IONS] - ion = [x for x in members if x[0] in config.METAL_IONS] - sorted_main = sorted(main, key=lambda x: (x[1], x[2])) - sorted_main = sorted(main, key=lambda x: (x[1], x[2])) - sorted_ion = sorted(ion, key=lambda x: (x[1], x[2])) - return sorted_main + sorted_ion - - -def get_isomorphisms(reference, lig): - """Get all isomorphisms of the ligand.""" - query = pybel.ob.CompileMoleculeQuery(reference.OBMol) - mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query) - if all: - isomorphs = pybel.ob.vvpairUIntUInt() - mappr.MapAll(lig.OBMol, isomorphs) - else: - isomorphs = pybel.ob.vpairUIntUInt() - mappr.MapFirst(lig.OBMol, isomorphs) - isomorphs = [isomorphs] - logger.debug(f'number of isomorphisms: {len(isomorphs)}') - # @todo Check which isomorphism to take - return isomorphs - - -def canonicalize(lig, preserve_bond_order=False): - """Get the canonical atom order for the ligand.""" - atomorder = None - # Get canonical atom order - - lig = pybel.ob.OBMol(lig.OBMol) - if not preserve_bond_order: - for bond in pybel.ob.OBMolBondIter(lig): - if bond.GetBondOrder() != 1: - bond.SetBondOrder(1) - lig.DeleteData(pybel.ob.StereoData) - lig = pybel.Molecule(lig) - testcan = lig.write(format='can') - try: - pybel.readstring('can', testcan) - reference = pybel.readstring('can', testcan) - except IOError: - testcan, reference = '', '' - if testcan != '': - reference.removeh() - isomorphs = get_isomorphisms(reference, lig) # isomorphs now holds all isomorphisms within the molecule - if not len(isomorphs) == 0: - smi_dict = {} - smi_to_can = isomorphs[0] - for x in smi_to_can: - smi_dict[int(x[1]) + 1] = int(x[0]) + 1 - atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))] - else: - atomorder = None - return atomorder - - -def int32_to_negative(int32): - """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a - 32 bit integer and returns the actual number. - """ - dct = {} - if int32 == 4294967295: # Special case in some structures (note, this is just a workaround) - return -1 - for i in range(-1000, -1): - dct[np.uint32(i)] = i - if int32 in dct: - return dct[int32] - else: - return int32 - - -def read_pdb(pdbfname, as_string=False): - """Reads a given PDB file and returns a Pybel Molecule.""" - pybel.ob.obErrorLog.StopLogging() # Suppress all OpenBabel warnings - if os.name != 'nt': # Resource module not available for Windows - maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1] - resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize)) - sys.setrecursionlimit(10 ** 5) # increase Python recursion limit - return readmol(pdbfname, as_string=as_string) - - -def read(fil): - """Returns a file handler and detects gzipped files.""" - if os.path.splitext(fil)[-1] == '.gz': - return gzip.open(fil, 'rb') - elif os.path.splitext(fil)[-1] == '.zip': - zf = zipfile.ZipFile(fil, 'r') - return zf.open(zf.infolist()[0].filename) - else: - return open(fil, 'r') - - -def readmol(path, as_string=False): - """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type. - In contrast to the standard Pybel implementation, the file is closed properly.""" - supported_formats = ['pdb'] - # Fix for Windows-generated files: Remove carriage return characters - if "\r" in path and as_string: - path = path.replace('\r', '') - - for sformat in supported_formats: - obc = pybel.ob.OBConversion() - obc.SetInFormat(sformat) - logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel') - - # Read molecules with single bond information - if as_string: - try: - mymol = pybel.readstring(sformat, path) - except IOError: - logger.error('no valid file format provided') - sys.exit(1) - else: - read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None}) - try: - mymol = next(read_file) - except StopIteration: - logger.error('file contains no valid molecules') - sys.exit(1) - - logger.debug('molecule successfully read') - - # Assign multiple bonds - mymol.OBMol.PerceiveBondOrders() - return mymol, sformat - - logger.error('no valid file format provided') - sys.exit(1) |