diff options
Diffstat (limited to 'plip/basic')
| -rw-r--r-- | plip/basic/__init__.py | 0 | ||||
| -rw-r--r-- | plip/basic/config.py | 625 | ||||
| -rw-r--r-- | plip/basic/logger.py | 21 | ||||
| -rw-r--r-- | plip/basic/parallel.py | 52 | ||||
| -rw-r--r-- | plip/basic/remote.py | 95 | ||||
| -rw-r--r-- | plip/basic/supplemental.py | 434 | 
6 files changed, 0 insertions, 1227 deletions
diff --git a/plip/basic/__init__.py b/plip/basic/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/plip/basic/__init__.py +++ /dev/null diff --git a/plip/basic/config.py b/plip/basic/config.py deleted file mode 100644 index 91103fa..0000000 --- a/plip/basic/config.py +++ /dev/null @@ -1,625 +0,0 @@ -__version__ = "2.1.3" -__maintainer__ = "PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai" - -import logging - -DEFAULT_LOG_LEVEL = logging.INFO -VERBOSE = False  # Set verbose mode -QUIET = False  # Set verbose mode -SILENT = False  # Set verbose mode -MAXTHREADS = 1  # Maximum number of main threads for binding site visualization -XML = False -TXT = False -PICS = False -PYMOL = False -STDOUT = False -RAWSTRING = False  # use raw strings for input / output -OUTPATH = "./" -BASEPATH = "./" -BREAKCOMPOSITE = False  # Break up composite ligands with covalent bonds -ALTLOC = False  # Consider alternate locations -PLUGIN_MODE = False  # Special mode for PLIP in Plugins (e.g. PyMOL) -NOFIX = False  # Turn off fixing of errors in PDB files -NOFIXFILE = False  # Turn off writing to files for fixed PDB structures -PEPTIDES = []  # Definition which chains should be considered as peptide ligands -INTRA = None -KEEPMOD = False -DNARECEPTOR = False -OUTPUTFILENAME = "report"  # Naming for the TXT and XML report files -NOPDBCANMAP = False  # Skip calculation of mapping canonical atom order: PDB atom order -NOHYDRO = False  # Do not add hydrogen bonds (in case already present in the structure) - -# Configuration file for Protein-Ligand Interaction Profiler (PLIP) -# Set thresholds for detection of interactions - -# Thresholds for detection (global variables) -BS_DIST = 7.5  # Determines maximum distance to include binding site residues -AROMATIC_PLANARITY = ( -    5.0  # Determines allowed deviation from planarity in aromatic rings -) -MIN_DIST = 0.5  # Minimum distance for all distance thresholds -# Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures -HYDROPH_DIST_MAX = 4.0  # Distance cutoff for detection of hydrophobic contacts -HBOND_DIST_MAX = 4.1  # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A -HBOND_DON_ANGLE_MIN = ( -    100  # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 -) -PISTACK_DIST_MAX = ( -    5.5  # Max. distance for parallel or offset pistacking (McGaughey, 1998) -) -PISTACK_ANG_DEV = ( -    30  # Max. Deviation from parallel or perpendicular orientation (in degrees) -) -PISTACK_OFFSET_MAX = 2.0  # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A) -PICATION_DIST_MAX = 6.0  # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999) -SALTBRIDGE_DIST_MAX = 5.5  # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5 -HALOGEN_DIST_MAX = 4.0  # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5 -HALOGEN_ACC_ANGLE = ( -    120  # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) -) -HALOGEN_DON_ANGLE = ( -    165  # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) -) -HALOGEN_ANGLE_DEV = 30  # Max. deviation from optimal angle -WATER_BRIDGE_MINDIST = ( -    2.5  # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 -) -WATER_BRIDGE_MAXDIST = ( -    4.1  # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 -) -WATER_BRIDGE_OMEGA_MIN = 71  # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9 -WATER_BRIDGE_OMEGA_MAX = 140  # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) -WATER_BRIDGE_THETA_MIN = 100  # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005) -METAL_DIST_MAX = ( -    3.0  # Max. distance between metal ion and interacting atom (Harding, 2001) -) - -# Other thresholds -MAX_COMPOSITE_LENGTH = 200  # Filter out ligands with more than 200 fragments - -######### -# Names # -######### - -# Names of RNA and DNA residues to be considered (detection by name) -RNA = ["U", "A", "C", "G"] -DNA = ["DT", "DA", "DC", "DG"] - -############# -# Whitelist # -############# - -# Metal cations which can be complexed - -METAL_IONS = [ -    "CA", -    "CO", -    "MG", -    "MN", -    "FE", -    "CU", -    "ZN", -    "FE2", -    "FE3", -    "FE4", -    "LI", -    "NA", -    "K", -    "RB", -    "SR", -    "CS", -    "BA", -    "CR", -    "NI", -    "FE1", -    "NI", -    "RU", -    "RU1", -    "RH", -    "RH1", -    "PD", -    "AG", -    "CD", -    "LA", -    "W", -    "W1", -    "OS", -    "IR", -    "PT", -    "PT1", -    "AU", -    "HG", -    "CE", -    "PR", -    "SM", -    "EU", -    "GD", -    "TB", -    "YB", -    "LU", -    "AL", -    "GA", -    "IN", -    "SB", -    "TL", -    "PB", -] - -############## -# Blacklists # -############## - -# Other Ions/Atoms (not yet supported) -anions = ["CL", "IOD", "BR"] -other = ["MO", "RE", "HO"] -UNSUPPORTED = anions + other - -# BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10) -# Add ligands here to get warnings for possible artifacts. -biolip_list = [ -    "ACE", -    "HEX", -    "TMA", -    "SOH", -    "P25", -    "CCN", -    "PR", -    "PTN", -    "NO3", -    "TCN", -    "BU1", -    "BCN", -    "CB3", -    "HCS", -    "NBN", -    "SO2", -    "MO6", -    "MOH", -    "CAC", -    "MLT", -    "KR", -    "6PH", -    "MOS", -    "UNL", -    "MO3", -    "SR", -    "CD3", -    "PB", -    "ACM", -    "LUT", -    "PMS", -    "OF3", -    "SCN", -    "DHB", -    "E4N", -    "13P", -    "3PG", -    "CYC", -    "NC", -    "BEN", -    "NAO", -    "PHQ", -    "EPE", -    "BME", -    "TB", -    "ETE", -    "EU", -    "OES", -    "EAP", -    "ETX", -    "BEZ", -    "5AD", -    "OC2", -    "OLA", -    "GD3", -    "CIT", -    "DVT", -    "OC6", -    "MW1", -    "OC3", -    "SRT", -    "LCO", -    "BNZ", -    "PPV", -    "STE", -    "PEG", -    "RU", -    "PGE", -    "MPO", -    "B3P", -    "OGA", -    "IPA", -    "LU", -    "EDO", -    "MAC", -    "9PE", -    "IPH", -    "MBN", -    "C1O", -    "1PE", -    "YF3", -    "PEF", -    "GD", -    "8PE", -    "DKA", -    "RB", -    "YB", -    "GGD", -    "SE4", -    "LHG", -    "SMO", -    "DGD", -    "CMO", -    "MLI", -    "MW2", -    "DTT", -    "DOD", -    "7PH", -    "PBM", -    "AU", -    "FOR", -    "PSC", -    "TG1", -    "KAI", -    "1PG", -    "DGA", -    "IR", -    "PE4", -    "VO4", -    "ACN", -    "AG", -    "MO4", -    "OCL", -    "6UL", -    "CHT", -    "RHD", -    "CPS", -    "IR3", -    "OC4", -    "MTE", -    "HGC", -    "CR", -    "PC1", -    "HC4", -    "TEA", -    "BOG", -    "PEO", -    "PE5", -    "144", -    "IUM", -    "LMG", -    "SQU", -    "MMC", -    "GOL", -    "NVP", -    "AU3", -    "3PH", -    "PT4", -    "PGO", -    "ICT", -    "OCM", -    "BCR", -    "PG4", -    "L4P", -    "OPC", -    "OXM", -    "SQD", -    "PQ9", -    "BAM", -    "PI", -    "PL9", -    "P6G", -    "IRI", -    "15P", -    "MAE", -    "MBO", -    "FMT", -    "L1P", -    "DUD", -    "PGV", -    "CD1", -    "P33", -    "DTU", -    "XAT", -    "CD", -    "THE", -    "U1", -    "NA", -    "MW3", -    "BHG", -    "Y1", -    "OCT", -    "BET", -    "MPD", -    "HTO", -    "IBM", -    "D01", -    "HAI", -    "HED", -    "CAD", -    "CUZ", -    "TLA", -    "SO4", -    "OC5", -    "ETF", -    "MRD", -    "PT", -    "PHB", -    "URE", -    "MLA", -    "TGL", -    "PLM", -    "NET", -    "LAC", -    "AUC", -    "UNX", -    "GA", -    "DMS", -    "MO2", -    "LA", -    "NI", -    "TE", -    "THJ", -    "NHE", -    "HAE", -    "MO1", -    "DAO", -    "3PE", -    "LMU", -    "DHJ", -    "FLC", -    "SAL", -    "GAI", -    "ORO", -    "HEZ", -    "TAM", -    "TRA", -    "NEX", -    "CXS", -    "LCP", -    "HOH", -    "OCN", -    "PER", -    "ACY", -    "MH2", -    "ARS", -    "12P", -    "L3P", -    "PUT", -    "IN", -    "CS", -    "NAW", -    "SB", -    "GUN", -    "SX", -    "CON", -    "C2O", -    "EMC", -    "BO4", -    "BNG", -    "MN5", -    "__O", -    "K", -    "CYN", -    "H2S", -    "MH3", -    "YT3", -    "P22", -    "KO4", -    "1AG", -    "CE", -    "IPL", -    "PG6", -    "MO5", -    "F09", -    "HO", -    "AL", -    "TRS", -    "EOH", -    "GCP", -    "MSE", -    "AKR", -    "NCO", -    "PO4", -    "L2P", -    "LDA", -    "SIN", -    "DMI", -    "SM", -    "DTD", -    "SGM", -    "DIO", -    "PPI", -    "DDQ", -    "DPO", -    "HCA", -    "CO5", -    "PD", -    "OS", -    "OH", -    "NA6", -    "NAG", -    "W", -    "ENC", -    "NA5", -    "LI1", -    "P4C", -    "GLV", -    "DMF", -    "ACT", -    "BTB", -    "6PL", -    "BGL", -    "OF1", -    "N8E", -    "LMT", -    "THM", -    "EU3", -    "PGR", -    "NA2", -    "FOL", -    "543", -    "_CP", -    "PEK", -    "NSP", -    "PEE", -    "OCO", -    "CHD", -    "CO2", -    "TBU", -    "UMQ", -    "MES", -    "NH4", -    "CD5", -    "HTG", -    "DEP", -    "OC1", -    "KDO", -    "2PE", -    "PE3", -    "IOD", -    "NDG", -    "CL", -    "HG", -    "F", -    "XE", -    "TL", -    "BA", -    "LI", -    "BR", -    "TAU", -    "TCA", -    "SPD", -    "SPM", -    "SAR", -    "SUC", -    "PAM", -    "SPH", -    "BE7", -    "P4G", -    "OLC", -    "OLB", -    "LFA", -    "D10", -    "D12", -    "DD9", -    "HP6", -    "R16", -    "PX4", -    "TRD", -    "UND", -    "FTT", -    "MYR", -    "RG1", -    "IMD", -    "DMN", -    "KEN", -    "C14", -    "UPL", -    "CMJ", -    "ULI", -    "MYS", -    "TWT", -    "M2M", -    "P15", -    "PG0", -    "PEU", -    "AE3", -    "TOE", -    "ME2", -    "PE8", -    "6JZ", -    "7PE", -    "P3G", -    "7PG", -    "PG5", -    "16P", -    "XPE", -    "PGF", -    "AE4", -    "7E8", -    "7E9", -    "MVC", -    "TAR", -    "DMR", -    "LMR", -    "NER", -    "02U", -    "NGZ", -    "LXB", -    "A2G", -    "BM3", -    "NAA", -    "NGA", -    "LXZ", -    "PX6", -    "PA8", -    "LPP", -    "PX2", -    "MYY", -    "PX8", -    "PD7", -    "XP4", -    "XPA", -    "PEV", -    "6PE", -    "PEX", -    "PEH", -    "PTY", -    "YB2", -    "PGT", -    "CN3", -    "AGA", -    "DGG", -    "CD4", -    "CN6", -    "CDL", -    "PG8", -    "MGE", -    "DTV", -    "L44", -    "L2C", -    "4AG", -    "B3H", -    "1EM", -    "DDR", -    "I42", -    "CNS", -    "PC7", -    "HGP", -    "PC8", -    "HGX", -    "LIO", -    "PLD", -    "PC2", -    "PCF", -    "MC3", -    "P1O", -    "PLC", -    "PC6", -    "HSH", -    "BXC", -    "HSG", -    "DPG", -    "2DP", -    "POV", -    "PCW", -    "GVT", -    "CE9", -    "CXE", -    "C10", -    "CE1", -    "SPJ", -    "SPZ", -    "SPK", -    "SPW", -    "HT3", -    "HTH", -    "2OP", -    "3NI", -    "BO3", -    "DET", -    "D1D", -    "SWE", -    "SOG", -] diff --git a/plip/basic/logger.py b/plip/basic/logger.py deleted file mode 100644 index ca65411..0000000 --- a/plip/basic/logger.py +++ /dev/null @@ -1,21 +0,0 @@ -import inspect -import logging - - -def get_logger(): -    """ -    Configures a base logger and returns a module-specific sub-logger of the calling module. -    """ -    frame = inspect.stack()[1] -    module_name = inspect.getmodule(frame[0]).__name__ -    if module_name != '__main__': -        logger = logging.getLogger(module_name) -        if not logger.parent.handlers: -            ch = logging.StreamHandler() -            formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s') -            ch.setFormatter(formatter) -            logger.parent.addHandler(ch) -    else: -        logger = logging.getLogger('plip') - -    return logger diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py deleted file mode 100644 index 1f71943..0000000 --- a/plip/basic/parallel.py +++ /dev/null @@ -1,52 +0,0 @@ -import itertools -import multiprocessing -from builtins import zip -from functools import partial - -from numpy import asarray - - -class SubProcessError(Exception): -    def __init__(self, e, exitcode=1): -        self.exitcode = exitcode -        super(SubProcessError, self).__init__(e) - -    pass - - -def universal_worker(input_pair): -    """This is a wrapper function expecting a tiplet of function, single -       argument, dict of keyword arguments. The provided function is called -       with the appropriate arguments.""" -    function, arg, kwargs = input_pair -    return function(arg, **kwargs) - - -def pool_args(function, sequence, kwargs): -    """Return a single iterator of n elements of lists of length 3, given a sequence of len n.""" -    return zip(itertools.repeat(function), sequence, itertools.repeat(kwargs)) - - -def parallel_fn(f): -    """Simple wrapper function, returning a parallel version of the given function f. -       The function f must have one argument and may have an arbitray number of -       keyword arguments. """ - -    def simple_parallel(func, sequence, **args): -        """ f takes an element of sequence as input and the keyword args in **args""" -        if 'processes' in args: -            processes = args.get('processes') -            del args['processes'] -        else: -            processes = multiprocessing.cpu_count() - -        pool = multiprocessing.Pool(processes)  # depends on available cores - -        result = pool.map_async(universal_worker, pool_args(func, sequence, args)) -        pool.close() -        pool.join() -        cleaned = [x for x in result.get() if x is not None]  # getting results -        cleaned = asarray(cleaned) -        return cleaned - -    return partial(simple_parallel, f) diff --git a/plip/basic/remote.py b/plip/basic/remote.py deleted file mode 100644 index 0c67f6f..0000000 --- a/plip/basic/remote.py +++ /dev/null @@ -1,95 +0,0 @@ -from collections import namedtuple - -hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id') -hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids') -halogen_info = namedtuple('halogen_info', 'don_id acc_id') -pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms ' -                                          'ligandring_center type') -pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged') -sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos') -wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon') -metal_info = namedtuple('metal_info', 'metal_id, target_id location') - - -class VisualizerData: -    """Contains all information on a complex relevant for visualization. Can be pickled""" - -    def __init__(self, mol, site): -        pcomp = mol -        pli = mol.interaction_sets[site] -        ligand = pli.ligand - -        # General Information -        self.lig_members = sorted(pli.ligand.members) -        self.sourcefile = pcomp.sourcefiles['pdbcomplex'] -        self.corrected_pdb = pcomp.corrected_pdb -        self.pdbid = mol.pymol_name -        self.hetid = ligand.hetid -        self.ligandtype = ligand.type -        self.chain = ligand.chain if not ligand.chain == "0" else ""  # #@todo Fix this -        self.position = str(ligand.position) -        self.uid = ":".join([self.hetid, self.chain, self.position]) -        self.outpath = mol.output_path -        self.metal_ids = [x.m_orig_idx for x in pli.ligand.metals] -        self.unpaired_hba_idx = pli.unpaired_hba_orig_idx -        self.unpaired_hbd_idx = pli.unpaired_hbd_orig_idx -        self.unpaired_hal_idx = pli.unpaired_hal_orig_idx - -        # Information on Interactions - -        # Hydrophobic Contacts -        # Contains IDs of contributing binding site, ligand atoms and the pairings -        hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts] -        self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id], -                                                     lig_ids=[hp[1] for hp in hydroph_pairs_id], -                                                     pairs_ids=hydroph_pairs_id) - -        # Hydrogen Bonds -        # #@todo Don't use indices, simplify this code here -        hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon -        hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon] -        hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon] -        self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], -                                  lig_don_id=[hb[1] for hb in hbonds_ldon_id], -                                  prot_acc_id=[hb[0] for hb in hbonds_ldon_id], -                                  pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], -                                  prot_don_id=[hb[1] for hb in hbonds_pdon_id], -                                  lig_acc_id=[hb[0] for hb in hbonds_pdon_id]) - -        # Halogen Bonds -        self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) -                              for h in pli.halogen_bonds] - -        # Pistacking -        self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx, -                                        proteinring_center=pistack.proteinring.center, -                                        ligandring_atoms=pistack.ligandring.atoms_orig_idx, -                                        ligandring_center=pistack.ligandring.center, -                                        type=pistack.type) for pistack in pli.pistacking] - -        # Pi-cation interactions -        self.pication = [pication_info(ring_center=picat.ring.center, -                                       charge_center=picat.charge.center, -                                       ring_atoms=picat.ring.atoms_orig_idx, -                                       charge_atoms=picat.charge.atoms_orig_idx, -                                       protcharged=picat.protcharged) -                         for picat in pli.pication_paro + pli.pication_laro] - -        # Salt Bridges -        self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx, -                                         negative_atoms=sbridge.negative.atoms_orig_idx, -                                         positive_center=sbridge.positive.center, -                                         negative_center=sbridge.negative.center, -                                         protispos=sbridge.protispos) -                            for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg] - -        # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon') -        self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx, -                                          acc_id=wbridge.a_orig_idx, -                                          water_id=wbridge.water_orig_idx, -                                          protisdon=wbridge.protisdon) for wbridge in pli.water_bridges] - -        # Metal Complexes -        self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx, -                                           target_id=metalc.target_orig_idx, -                                           location=metalc.location) for metalc in pli.metal_complexes] diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py deleted file mode 100644 index 6b3ef7b..0000000 --- a/plip/basic/supplemental.py +++ /dev/null @@ -1,434 +0,0 @@ -import gzip -import itertools -import os -import platform -import re -import subprocess -import sys -import tempfile -import zipfile -from collections import namedtuple - -import numpy as np -from openbabel import pybel -from openbabel.pybel import Atom - -from plip.basic import config, logger - -logger = logger.get_logger() - -# Windows and MacOS -if os.name != 'nt' and platform.system() != 'Darwin':  # Resource module not available for Windows -    import resource - -# Settings -np.seterr(all='ignore')  # No runtime warnings - - -def tmpfile(prefix, direc): -    """Returns the path to a newly created temporary file.""" -    return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc) - - -def is_lig(hetid): -    """Checks if a PDB compound can be excluded as a small molecule ligand""" -    h = hetid.upper() -    return not (h == 'HOH' or h in config.UNSUPPORTED) - - -def extract_pdbid(string): -    """Use regular expressions to get a PDB ID from a string""" -    p = re.compile("[0-9][0-9a-z]{3}") -    m = p.search(string.lower()) -    try: -        return m.group() -    except AttributeError: -        return "UnknownProtein" - - -def whichrestype(atom): -    """Returns the residue name of an Pybel or OpenBabel atom.""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom -    return atom.GetResidue().GetName() if atom.GetResidue() is not None else None - - -def whichresnumber(atom): -    """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file).""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom -    return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None - - -def whichchain(atom): -    """Returns the residue number of an PyBel or OpenBabel atom.""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom -    return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None - - -######################### -# Mathematical operations -######################### - - -def euclidean3d(v1, v2): -    """Faster implementation of euclidean distance for the 3D case.""" -    if not len(v1) == 3 and len(v2) == 3: -        return None -    return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2) - - -def vector(p1, p2): -    """Vector from p1 to p2. -    :param p1: coordinates of point p1 -    :param p2: coordinates of point p2 -    :returns : numpy array with vector coordinates -    """ -    return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))]) - - -def vecangle(v1, v2, deg=True): -    """Calculate the angle between two vectors -    :param v1: coordinates of vector v1 -    :param v2: coordinates of vector v2 -    :param deg: whether to return degrees or radians -    :returns : angle in degree or rad -    """ -    if np.array_equal(v1, v2): -        return 0.0 -    dm = np.dot(v1, v2) -    cm = np.linalg.norm(v1) * np.linalg.norm(v2) -    angle = np.arccos(dm / cm)  # Round here to prevent floating point errors -    return np.degrees([angle, ])[0] if deg else angle - - -def normalize_vector(v): -    """Take a vector and return the normalized vector -    :param v: a vector v -    :returns : normalized vector v -    """ -    norm = np.linalg.norm(v) -    return v / norm if not norm == 0 else v - - -def centroid(coo): -    """Calculates the centroid from a 3D point cloud and returns the coordinates -    :param coo: Array of coordinate arrays -    :returns : centroid coordinates as list -    """ -    return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])))) - - -def projection(pnormal1, ppoint, tpoint): -    """Calculates the centroid from a 3D point cloud and returns the coordinates -    :param pnormal1: normal of plane -    :param ppoint: coordinates of point in the plane -    :param tpoint: coordinates of point to be projected -    :returns : coordinates of point orthogonally projected on the plane -    """ -    # Choose the plane normal pointing to the point to be projected -    pnormal2 = [coo * (-1) for coo in pnormal1] -    d1 = euclidean3d(tpoint, pnormal1 + ppoint) -    d2 = euclidean3d(tpoint, pnormal2 + ppoint) -    pnormal = pnormal1 if d1 < d2 else pnormal2 -    # Calculate the projection of tpoint to the plane -    sn = -np.dot(pnormal, vector(ppoint, tpoint)) -    sd = np.dot(pnormal, pnormal) -    sb = sn / sd -    return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])] - - -def cluster_doubles(double_list): -    """Given a list of doubles, they are clustered if they share one element -    :param double_list: list of doubles -    :returns : list of clusters (tuples) -    """ -    location = {}  # hashtable of which cluster each element is in -    clusters = [] -    # Go through each double -    for t in double_list: -        a, b = t[0], t[1] -        # If they both are already in different clusters, merge the clusters -        if a in location and b in location: -            if location[a] != location[b]: -                if location[a] < location[b]: -                    clusters[location[a]] = clusters[location[a]].union(clusters[location[b]])  # Merge clusters -                    clusters = clusters[:location[b]] + clusters[location[b] + 1:] -                else: -                    clusters[location[b]] = clusters[location[b]].union(clusters[location[a]])  # Merge clusters -                    clusters = clusters[:location[a]] + clusters[location[a] + 1:] -                # Rebuild index of locations for each element as they have changed now -                location = {} -                for i, cluster in enumerate(clusters): -                    for c in cluster: -                        location[c] = i -        else: -            # If a is already in a cluster, add b to that cluster -            if a in location: -                clusters[location[a]].add(b) -                location[b] = location[a] -            # If b is already in a cluster, add a to that cluster -            if b in location: -                clusters[location[b]].add(a) -                location[a] = location[b] -            # If neither a nor b is in any cluster, create a new one with a and b -            if not (b in location and a in location): -                clusters.append(set(t)) -                location[a] = len(clusters) - 1 -                location[b] = len(clusters) - 1 -    return map(tuple, clusters) - - -################# -# File operations -################# - -def tilde_expansion(folder_path): -    """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>.""" -    return os.path.expanduser(folder_path) if '~' in folder_path else folder_path - - -def folder_exists(folder_path): -    """Checks if a folder exists""" -    return os.path.exists(folder_path) - - -def create_folder_if_not_exists(folder_path): -    """Creates a folder if it does not exists.""" -    folder_path = tilde_expansion(folder_path) -    folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path -    direc = os.path.dirname(folder_path) -    if not folder_exists(direc): -        os.makedirs(direc) - - -def cmd_exists(c): -    return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 - - -################ -# PyMOL-specific -################ - - -def initialize_pymol(options): -    """Initializes PyMOL""" -    import pymol -    # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround) -    pymol.finish_launching(args=['pymol', options, '-K']) -    pymol.cmd.reinitialize() - - -def start_pymol(quiet=False, options='-p', run=False): -    """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output. -    Command line options can be passed as the second argument.""" -    import pymol -    pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:] -    if run: -        initialize_pymol(options) -    if quiet: -        pymol.cmd.feedback('disable', 'all', 'everything') - - -def nucleotide_linkage(residues): -    """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together.""" - -    nuc_covalent = [] -    ####################################### -    # Basic support for RNA/DNA as ligand # -    ####################################### -    nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'] -    dna_rna = {}  # Dictionary of DNA/RNA residues by chain -    covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") -    # Create missing covlinkage entries for DNA/RNA -    for ligand in residues: -        resname, chain, pos = ligand -        if resname in nucleotides: -            if chain not in dna_rna: -                dna_rna[chain] = [(resname, pos), ] -            else: -                dna_rna[chain].append((resname, pos)) -    for chain in dna_rna: -        nuc_list = dna_rna[chain] -        for i, nucleotide in enumerate(nuc_list): -            if not i == len(nuc_list) - 1: -                name, pos = nucleotide -                nextnucleotide = nuc_list[i + 1] -                nextname, nextpos = nextnucleotide -                newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='', -                                     id2=nextname, chain2=chain, pos2=nextpos, conf2='') -                nuc_covalent.append(newlink) - -    return nuc_covalent - - -def ring_is_planar(ring, r_atoms): -    """Given a set of ring atoms, check if the ring is sufficiently planar -    to be considered aromatic""" -    normals = [] -    for a in r_atoms: -        adj = pybel.ob.OBAtomAtomIter(a.OBAtom) -        # Check for neighboring atoms in the ring -        n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)] -        vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1]) -        normals.append(np.cross(vec1, vec2)) -    # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less -    for n1, n2 in itertools.product(normals, repeat=2): -        arom_angle = vecangle(n1, n2) -        if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]): -            return False -    return True - - -def classify_by_name(names): -    """Classify a (composite) ligand by the HETID(s)""" -    if len(names) > 3:  # Polymer -        if len(set(config.RNA).intersection(set(names))) != 0: -            ligtype = 'RNA' -        elif len(set(config.DNA).intersection(set(names))) != 0: -            ligtype = 'DNA' -        else: -            ligtype = "POLYMER" -    else: -        ligtype = 'SMALLMOLECULE' - -    for name in names: -        if name in config.METAL_IONS: -            if len(names) == 1: -                ligtype = 'ION' -            else: -                if "ION" not in ligtype: -                    ligtype += '+ION' -    return ligtype - - -def sort_members_by_importance(members): -    """Sort the members of a composite ligand according to two criteria: -    1. Split up in main and ion group. Ion groups are located behind the main group. -    2. Within each group, sort by chain and position.""" -    main = [x for x in members if x[0] not in config.METAL_IONS] -    ion = [x for x in members if x[0] in config.METAL_IONS] -    sorted_main = sorted(main, key=lambda x: (x[1], x[2])) -    sorted_main = sorted(main, key=lambda x: (x[1], x[2])) -    sorted_ion = sorted(ion, key=lambda x: (x[1], x[2])) -    return sorted_main + sorted_ion - - -def get_isomorphisms(reference, lig): -    """Get all isomorphisms of the ligand.""" -    query = pybel.ob.CompileMoleculeQuery(reference.OBMol) -    mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query) -    if all: -        isomorphs = pybel.ob.vvpairUIntUInt() -        mappr.MapAll(lig.OBMol, isomorphs) -    else: -        isomorphs = pybel.ob.vpairUIntUInt() -        mappr.MapFirst(lig.OBMol, isomorphs) -        isomorphs = [isomorphs] -    logger.debug(f'number of isomorphisms: {len(isomorphs)}') -    # @todo Check which isomorphism to take -    return isomorphs - - -def canonicalize(lig, preserve_bond_order=False): -    """Get the canonical atom order for the ligand.""" -    atomorder = None -    # Get canonical atom order - -    lig = pybel.ob.OBMol(lig.OBMol) -    if not preserve_bond_order: -        for bond in pybel.ob.OBMolBondIter(lig): -            if bond.GetBondOrder() != 1: -                bond.SetBondOrder(1) -    lig.DeleteData(pybel.ob.StereoData) -    lig = pybel.Molecule(lig) -    testcan = lig.write(format='can') -    try: -        pybel.readstring('can', testcan) -        reference = pybel.readstring('can', testcan) -    except IOError: -        testcan, reference = '', '' -    if testcan != '': -        reference.removeh() -        isomorphs = get_isomorphisms(reference, lig)  # isomorphs now holds all isomorphisms within the molecule -        if not len(isomorphs) == 0: -            smi_dict = {} -            smi_to_can = isomorphs[0] -            for x in smi_to_can: -                smi_dict[int(x[1]) + 1] = int(x[0]) + 1 -            atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))] -        else: -            atomorder = None -    return atomorder - - -def int32_to_negative(int32): -    """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a -    32 bit integer and returns the actual number. -    """ -    dct = {} -    if int32 == 4294967295:  # Special case in some structures (note, this is just a workaround) -        return -1 -    for i in range(-1000, -1): -        dct[np.uint32(i)] = i -    if int32 in dct: -        return dct[int32] -    else: -        return int32 - - -def read_pdb(pdbfname, as_string=False): -    """Reads a given PDB file and returns a Pybel Molecule.""" -    pybel.ob.obErrorLog.StopLogging()  # Suppress all OpenBabel warnings -    if os.name != 'nt':  # Resource module not available for Windows -        maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1] -        resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize)) -    sys.setrecursionlimit(10 ** 5)  # increase Python recursion limit -    return readmol(pdbfname, as_string=as_string) - - -def read(fil): -    """Returns a file handler and detects gzipped files.""" -    if os.path.splitext(fil)[-1] == '.gz': -        return gzip.open(fil, 'rb') -    elif os.path.splitext(fil)[-1] == '.zip': -        zf = zipfile.ZipFile(fil, 'r') -        return zf.open(zf.infolist()[0].filename) -    else: -        return open(fil, 'r') - - -def readmol(path, as_string=False): -    """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type. -    In contrast to the standard Pybel implementation, the file is closed properly.""" -    supported_formats = ['pdb'] -    # Fix for Windows-generated files: Remove carriage return characters -    if "\r" in path and as_string: -        path = path.replace('\r', '') - -    for sformat in supported_formats: -        obc = pybel.ob.OBConversion() -        obc.SetInFormat(sformat) -        logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel') - -        # Read molecules with single bond information -        if as_string: -            try: -                mymol = pybel.readstring(sformat, path) -            except IOError: -                logger.error('no valid file format provided') -                sys.exit(1) -        else: -            read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None}) -            try: -                mymol = next(read_file) -            except StopIteration: -                logger.error('file contains no valid molecules') -                sys.exit(1) - -        logger.debug('molecule successfully read') - -        # Assign multiple bonds -        mymol.OBMol.PerceiveBondOrders() -        return mymol, sformat - -    logger.error('no valid file format provided') -    sys.exit(1)  | 
