diff options
Diffstat (limited to 'plip/basic')
| -rw-r--r-- | plip/basic/config.py | 606 | ||||
| -rw-r--r-- | plip/basic/logger.py | 8 | ||||
| -rw-r--r-- | plip/basic/parallel.py | 6 | ||||
| -rw-r--r-- | plip/basic/remote.py | 132 | ||||
| -rw-r--r-- | plip/basic/supplemental.py | 156 | 
5 files changed, 753 insertions, 155 deletions
| diff --git a/plip/basic/config.py b/plip/basic/config.py index a7468cf..3770d44 100644 --- a/plip/basic/config.py +++ b/plip/basic/config.py @@ -1,5 +1,5 @@ -__version__ = '2.1.0-beta' -__maintainer__ = 'PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai' +__version__ = "2.1.0-beta" +__maintainer__ = "PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai"  import logging @@ -14,8 +14,8 @@ PICS = False  PYMOL = False  STDOUT = False  RAWSTRING = False  # use raw strings for input / output -OUTPATH = './' -BASEPATH = './' +OUTPATH = "./" +BASEPATH = "./"  BREAKCOMPOSITE = False  # Break up composite ligands with covalent bonds  ALTLOC = False  # Consider alternate locations  PLUGIN_MODE = False  # Special mode for PLIP in Plugins (e.g. PyMOL) @@ -34,27 +34,45 @@ NOHYDRO = False  # Do not add hydrogen bonds (in case already present in the str  # Thresholds for detection (global variables)  BS_DIST = 7.5  # Determines maximum distance to include binding site residues -AROMATIC_PLANARITY = 5.0  # Determines allowed deviation from planarity in aromatic rings +AROMATIC_PLANARITY = ( +    5.0  # Determines allowed deviation from planarity in aromatic rings +)  MIN_DIST = 0.5  # Minimum distance for all distance thresholds  # Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures  HYDROPH_DIST_MAX = 4.0  # Distance cutoff for detection of hydrophobic contacts  HBOND_DIST_MAX = 4.1  # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A -HBOND_DON_ANGLE_MIN = 100  # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 -PISTACK_DIST_MAX = 5.5  # Max. distance for parallel or offset pistacking (McGaughey, 1998) -PISTACK_ANG_DEV = 30  # Max. Deviation from parallel or perpendicular orientation (in degrees) +HBOND_DON_ANGLE_MIN = ( +    100  # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 +) +PISTACK_DIST_MAX = ( +    5.5  # Max. distance for parallel or offset pistacking (McGaughey, 1998) +) +PISTACK_ANG_DEV = ( +    30  # Max. Deviation from parallel or perpendicular orientation (in degrees) +)  PISTACK_OFFSET_MAX = 2.0  # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A)  PICATION_DIST_MAX = 6.0  # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999)  SALTBRIDGE_DIST_MAX = 5.5  # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5  HALOGEN_DIST_MAX = 4.0  # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5 -HALOGEN_ACC_ANGLE = 120  # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) -HALOGEN_DON_ANGLE = 165  # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) +HALOGEN_ACC_ANGLE = ( +    120  # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) +) +HALOGEN_DON_ANGLE = ( +    165  # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) +)  HALOGEN_ANGLE_DEV = 30  # Max. deviation from optimal angle -WATER_BRIDGE_MINDIST = 2.5  # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 -WATER_BRIDGE_MAXDIST = 4.1  # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 +WATER_BRIDGE_MINDIST = ( +    2.5  # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 +) +WATER_BRIDGE_MAXDIST = ( +    4.1  # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 +)  WATER_BRIDGE_OMEGA_MIN = 71  # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9  WATER_BRIDGE_OMEGA_MAX = 140  # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005)  WATER_BRIDGE_THETA_MIN = 100  # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005) -METAL_DIST_MAX = 3.0  # Max. distance between metal ion and interacting atom (Harding, 2001) +METAL_DIST_MAX = ( +    3.0  # Max. distance between metal ion and interacting atom (Harding, 2001) +)  # Other thresholds  MAX_COMPOSITE_LENGTH = 200  # Filter out ligands with more than 200 fragments @@ -64,8 +82,8 @@ MAX_COMPOSITE_LENGTH = 200  # Filter out ligands with more than 200 fragments  #########  # Names of RNA and DNA residues to be considered (detection by name) -RNA = ['U', 'A', 'C', 'G'] -DNA = ['DT', 'DA', 'DC', 'DG'] +RNA = ["U", "A", "C", "G"] +DNA = ["DT", "DA", "DC", "DG"]  #############  # Whitelist # @@ -73,49 +91,535 @@ DNA = ['DT', 'DA', 'DC', 'DG']  # Metal cations which can be complexed -METAL_IONS = ['CA', 'CO', 'MG', 'MN', 'FE', 'CU', 'ZN', 'FE2', 'FE3', 'FE4', 'LI', 'NA', 'K', 'RB', 'SR', 'CS', 'BA', -              'CR', 'NI', 'FE1', 'NI', 'RU', 'RU1', 'RH', 'RH1', 'PD', 'AG', 'CD', 'LA', 'W', 'W1', 'OS', 'IR', 'PT', -              'PT1', 'AU', 'HG', 'CE', 'PR', 'SM', 'EU', 'GD', 'TB', 'YB', 'LU', 'AL', 'GA', 'IN', 'SB', 'TL', 'PB'] +METAL_IONS = [ +    "CA", +    "CO", +    "MG", +    "MN", +    "FE", +    "CU", +    "ZN", +    "FE2", +    "FE3", +    "FE4", +    "LI", +    "NA", +    "K", +    "RB", +    "SR", +    "CS", +    "BA", +    "CR", +    "NI", +    "FE1", +    "NI", +    "RU", +    "RU1", +    "RH", +    "RH1", +    "PD", +    "AG", +    "CD", +    "LA", +    "W", +    "W1", +    "OS", +    "IR", +    "PT", +    "PT1", +    "AU", +    "HG", +    "CE", +    "PR", +    "SM", +    "EU", +    "GD", +    "TB", +    "YB", +    "LU", +    "AL", +    "GA", +    "IN", +    "SB", +    "TL", +    "PB", +]  ##############  # Blacklists #  ##############  # Other Ions/Atoms (not yet supported) -anions = ['CL', 'IOD', 'BR'] -other = ['MO', 'RE', 'HO'] +anions = ["CL", "IOD", "BR"] +other = ["MO", "RE", "HO"]  UNSUPPORTED = anions + other  # BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10)  # Add ligands here to get warnings for possible artifacts. -biolip_list = ['ACE', 'HEX', 'TMA', 'SOH', 'P25', 'CCN', 'PR', 'PTN', 'NO3', 'TCN', 'BU1', 'BCN', 'CB3', 'HCS', 'NBN', -               'SO2', 'MO6', 'MOH', 'CAC', 'MLT', 'KR', '6PH', 'MOS', 'UNL', 'MO3', 'SR', 'CD3', 'PB', 'ACM', 'LUT', -               'PMS', 'OF3', 'SCN', 'DHB', 'E4N', '13P', '3PG', 'CYC', 'NC', 'BEN', 'NAO', 'PHQ', 'EPE', 'BME', 'TB', -               'ETE', 'EU', 'OES', 'EAP', 'ETX', 'BEZ', '5AD', 'OC2', 'OLA', 'GD3', 'CIT', 'DVT', 'OC6', 'MW1', 'OC3', -               'SRT', 'LCO', 'BNZ', 'PPV', 'STE', 'PEG', 'RU', 'PGE', 'MPO', 'B3P', 'OGA', 'IPA', 'LU', 'EDO', 'MAC', -               '9PE', 'IPH', 'MBN', 'C1O', '1PE', 'YF3', 'PEF', 'GD', '8PE', 'DKA', 'RB', 'YB', 'GGD', 'SE4', 'LHG', -               'SMO', 'DGD', 'CMO', 'MLI', 'MW2', 'DTT', 'DOD', '7PH', 'PBM', 'AU', 'FOR', 'PSC', 'TG1', 'KAI', '1PG', -               'DGA', 'IR', 'PE4', 'VO4', 'ACN', 'AG', 'MO4', 'OCL', '6UL', 'CHT', 'RHD', 'CPS', 'IR3', 'OC4', 'MTE', -               'HGC', 'CR', 'PC1', 'HC4', 'TEA', 'BOG', 'PEO', 'PE5', '144', 'IUM', 'LMG', 'SQU', 'MMC', 'GOL', 'NVP', -               'AU3', '3PH', 'PT4', 'PGO', 'ICT', 'OCM', 'BCR', 'PG4', 'L4P', 'OPC', 'OXM', 'SQD', 'PQ9', 'BAM', 'PI', -               'PL9', 'P6G', 'IRI', '15P', 'MAE', 'MBO', 'FMT', 'L1P', 'DUD', 'PGV', 'CD1', 'P33', 'DTU', 'XAT', 'CD', -               'THE', 'U1', 'NA', 'MW3', 'BHG', 'Y1', 'OCT', 'BET', 'MPD', 'HTO', 'IBM', 'D01', 'HAI', 'HED', 'CAD', -               'CUZ', 'TLA', 'SO4', 'OC5', 'ETF', 'MRD', 'PT', 'PHB', 'URE', 'MLA', 'TGL', 'PLM', 'NET', 'LAC', 'AUC', -               'UNX', 'GA', 'DMS', 'MO2', 'LA', 'NI', 'TE', 'THJ', 'NHE', 'HAE', 'MO1', 'DAO', '3PE', 'LMU', 'DHJ', -               'FLC', 'SAL', 'GAI', 'ORO', 'HEZ', 'TAM', 'TRA', 'NEX', 'CXS', 'LCP', 'HOH', 'OCN', 'PER', 'ACY', 'MH2', -               'ARS', '12P', 'L3P', 'PUT', 'IN', 'CS', 'NAW', 'SB', 'GUN', 'SX', 'CON', 'C2O', 'EMC', 'BO4', 'BNG', -               'MN5', '__O', 'K', 'CYN', 'H2S', 'MH3', 'YT3', 'P22', 'KO4', '1AG', 'CE', 'IPL', 'PG6', 'MO5', 'F09', -               'HO', 'AL', 'TRS', 'EOH', 'GCP', 'MSE', 'AKR', 'NCO', 'PO4', 'L2P', 'LDA', 'SIN', 'DMI', 'SM', 'DTD', -               'SGM', 'DIO', 'PPI', 'DDQ', 'DPO', 'HCA', 'CO5', 'PD', 'OS', 'OH', 'NA6', 'NAG', 'W', 'ENC', 'NA5', -               'LI1', 'P4C', 'GLV', 'DMF', 'ACT', 'BTB', '6PL', 'BGL', 'OF1', 'N8E', 'LMT', 'THM', 'EU3', 'PGR', 'NA2', -               'FOL', '543', '_CP', 'PEK', 'NSP', 'PEE', 'OCO', 'CHD', 'CO2', 'TBU', 'UMQ', 'MES', 'NH4', 'CD5', 'HTG', -               'DEP', 'OC1', 'KDO', '2PE', 'PE3', 'IOD', 'NDG', 'CL', 'HG', 'F', 'XE', 'TL', 'BA', 'LI', 'BR', 'TAU', -               'TCA', 'SPD', 'SPM', 'SAR', 'SUC', 'PAM', 'SPH', 'BE7', 'P4G', 'OLC', 'OLB', 'LFA', 'D10', 'D12', 'DD9', -               'HP6', 'R16', 'PX4', 'TRD', 'UND', 'FTT', 'MYR', 'RG1', 'IMD', 'DMN', 'KEN', 'C14', 'UPL', 'CMJ', 'ULI', -               'MYS', 'TWT', 'M2M', 'P15', 'PG0', 'PEU', 'AE3', 'TOE', 'ME2', 'PE8', '6JZ', '7PE', 'P3G', '7PG', 'PG5', -               '16P', 'XPE', 'PGF', 'AE4', '7E8', '7E9', 'MVC', 'TAR', 'DMR', 'LMR', 'NER', '02U', 'NGZ', 'LXB', 'A2G', -               'BM3', 'NAA', 'NGA', 'LXZ', 'PX6', 'PA8', 'LPP', 'PX2', 'MYY', 'PX8', 'PD7', 'XP4', 'XPA', 'PEV', '6PE', -               'PEX', 'PEH', 'PTY', 'YB2', 'PGT', 'CN3', 'AGA', 'DGG', 'CD4', 'CN6', 'CDL', 'PG8', 'MGE', 'DTV', 'L44', -               'L2C', '4AG', 'B3H', '1EM', 'DDR', 'I42', 'CNS', 'PC7', 'HGP', 'PC8', 'HGX', 'LIO', 'PLD', 'PC2', 'PCF', -               'MC3', 'P1O', 'PLC', 'PC6', 'HSH', 'BXC', 'HSG', 'DPG', '2DP', 'POV', 'PCW', 'GVT', 'CE9', 'CXE', 'C10', -               'CE1', 'SPJ', 'SPZ', 'SPK', 'SPW', 'HT3', 'HTH', '2OP', '3NI', 'BO3', 'DET', 'D1D', 'SWE', 'SOG'] +biolip_list = [ +    "ACE", +    "HEX", +    "TMA", +    "SOH", +    "P25", +    "CCN", +    "PR", +    "PTN", +    "NO3", +    "TCN", +    "BU1", +    "BCN", +    "CB3", +    "HCS", +    "NBN", +    "SO2", +    "MO6", +    "MOH", +    "CAC", +    "MLT", +    "KR", +    "6PH", +    "MOS", +    "UNL", +    "MO3", +    "SR", +    "CD3", +    "PB", +    "ACM", +    "LUT", +    "PMS", +    "OF3", +    "SCN", +    "DHB", +    "E4N", +    "13P", +    "3PG", +    "CYC", +    "NC", +    "BEN", +    "NAO", +    "PHQ", +    "EPE", +    "BME", +    "TB", +    "ETE", +    "EU", +    "OES", +    "EAP", +    "ETX", +    "BEZ", +    "5AD", +    "OC2", +    "OLA", +    "GD3", +    "CIT", +    "DVT", +    "OC6", +    "MW1", +    "OC3", +    "SRT", +    "LCO", +    "BNZ", +    "PPV", +    "STE", +    "PEG", +    "RU", +    "PGE", +    "MPO", +    "B3P", +    "OGA", +    "IPA", +    "LU", +    "EDO", +    "MAC", +    "9PE", +    "IPH", +    "MBN", +    "C1O", +    "1PE", +    "YF3", +    "PEF", +    "GD", +    "8PE", +    "DKA", +    "RB", +    "YB", +    "GGD", +    "SE4", +    "LHG", +    "SMO", +    "DGD", +    "CMO", +    "MLI", +    "MW2", +    "DTT", +    "DOD", +    "7PH", +    "PBM", +    "AU", +    "FOR", +    "PSC", +    "TG1", +    "KAI", +    "1PG", +    "DGA", +    "IR", +    "PE4", +    "VO4", +    "ACN", +    "AG", +    "MO4", +    "OCL", +    "6UL", +    "CHT", +    "RHD", +    "CPS", +    "IR3", +    "OC4", +    "MTE", +    "HGC", +    "CR", +    "PC1", +    "HC4", +    "TEA", +    "BOG", +    "PEO", +    "PE5", +    "144", +    "IUM", +    "LMG", +    "SQU", +    "MMC", +    "GOL", +    "NVP", +    "AU3", +    "3PH", +    "PT4", +    "PGO", +    "ICT", +    "OCM", +    "BCR", +    "PG4", +    "L4P", +    "OPC", +    "OXM", +    "SQD", +    "PQ9", +    "BAM", +    "PI", +    "PL9", +    "P6G", +    "IRI", +    "15P", +    "MAE", +    "MBO", +    "FMT", +    "L1P", +    "DUD", +    "PGV", +    "CD1", +    "P33", +    "DTU", +    "XAT", +    "CD", +    "THE", +    "U1", +    "NA", +    "MW3", +    "BHG", +    "Y1", +    "OCT", +    "BET", +    "MPD", +    "HTO", +    "IBM", +    "D01", +    "HAI", +    "HED", +    "CAD", +    "CUZ", +    "TLA", +    "SO4", +    "OC5", +    "ETF", +    "MRD", +    "PT", +    "PHB", +    "URE", +    "MLA", +    "TGL", +    "PLM", +    "NET", +    "LAC", +    "AUC", +    "UNX", +    "GA", +    "DMS", +    "MO2", +    "LA", +    "NI", +    "TE", +    "THJ", +    "NHE", +    "HAE", +    "MO1", +    "DAO", +    "3PE", +    "LMU", +    "DHJ", +    "FLC", +    "SAL", +    "GAI", +    "ORO", +    "HEZ", +    "TAM", +    "TRA", +    "NEX", +    "CXS", +    "LCP", +    "HOH", +    "OCN", +    "PER", +    "ACY", +    "MH2", +    "ARS", +    "12P", +    "L3P", +    "PUT", +    "IN", +    "CS", +    "NAW", +    "SB", +    "GUN", +    "SX", +    "CON", +    "C2O", +    "EMC", +    "BO4", +    "BNG", +    "MN5", +    "__O", +    "K", +    "CYN", +    "H2S", +    "MH3", +    "YT3", +    "P22", +    "KO4", +    "1AG", +    "CE", +    "IPL", +    "PG6", +    "MO5", +    "F09", +    "HO", +    "AL", +    "TRS", +    "EOH", +    "GCP", +    "MSE", +    "AKR", +    "NCO", +    "PO4", +    "L2P", +    "LDA", +    "SIN", +    "DMI", +    "SM", +    "DTD", +    "SGM", +    "DIO", +    "PPI", +    "DDQ", +    "DPO", +    "HCA", +    "CO5", +    "PD", +    "OS", +    "OH", +    "NA6", +    "NAG", +    "W", +    "ENC", +    "NA5", +    "LI1", +    "P4C", +    "GLV", +    "DMF", +    "ACT", +    "BTB", +    "6PL", +    "BGL", +    "OF1", +    "N8E", +    "LMT", +    "THM", +    "EU3", +    "PGR", +    "NA2", +    "FOL", +    "543", +    "_CP", +    "PEK", +    "NSP", +    "PEE", +    "OCO", +    "CHD", +    "CO2", +    "TBU", +    "UMQ", +    "MES", +    "NH4", +    "CD5", +    "HTG", +    "DEP", +    "OC1", +    "KDO", +    "2PE", +    "PE3", +    "IOD", +    "NDG", +    "CL", +    "HG", +    "F", +    "XE", +    "TL", +    "BA", +    "LI", +    "BR", +    "TAU", +    "TCA", +    "SPD", +    "SPM", +    "SAR", +    "SUC", +    "PAM", +    "SPH", +    "BE7", +    "P4G", +    "OLC", +    "OLB", +    "LFA", +    "D10", +    "D12", +    "DD9", +    "HP6", +    "R16", +    "PX4", +    "TRD", +    "UND", +    "FTT", +    "MYR", +    "RG1", +    "IMD", +    "DMN", +    "KEN", +    "C14", +    "UPL", +    "CMJ", +    "ULI", +    "MYS", +    "TWT", +    "M2M", +    "P15", +    "PG0", +    "PEU", +    "AE3", +    "TOE", +    "ME2", +    "PE8", +    "6JZ", +    "7PE", +    "P3G", +    "7PG", +    "PG5", +    "16P", +    "XPE", +    "PGF", +    "AE4", +    "7E8", +    "7E9", +    "MVC", +    "TAR", +    "DMR", +    "LMR", +    "NER", +    "02U", +    "NGZ", +    "LXB", +    "A2G", +    "BM3", +    "NAA", +    "NGA", +    "LXZ", +    "PX6", +    "PA8", +    "LPP", +    "PX2", +    "MYY", +    "PX8", +    "PD7", +    "XP4", +    "XPA", +    "PEV", +    "6PE", +    "PEX", +    "PEH", +    "PTY", +    "YB2", +    "PGT", +    "CN3", +    "AGA", +    "DGG", +    "CD4", +    "CN6", +    "CDL", +    "PG8", +    "MGE", +    "DTV", +    "L44", +    "L2C", +    "4AG", +    "B3H", +    "1EM", +    "DDR", +    "I42", +    "CNS", +    "PC7", +    "HGP", +    "PC8", +    "HGX", +    "LIO", +    "PLD", +    "PC2", +    "PCF", +    "MC3", +    "P1O", +    "PLC", +    "PC6", +    "HSH", +    "BXC", +    "HSG", +    "DPG", +    "2DP", +    "POV", +    "PCW", +    "GVT", +    "CE9", +    "CXE", +    "C10", +    "CE1", +    "SPJ", +    "SPZ", +    "SPK", +    "SPW", +    "HT3", +    "HTH", +    "2OP", +    "3NI", +    "BO3", +    "DET", +    "D1D", +    "SWE", +    "SOG", +] diff --git a/plip/basic/logger.py b/plip/basic/logger.py index ca65411..f15650e 100644 --- a/plip/basic/logger.py +++ b/plip/basic/logger.py @@ -8,14 +8,16 @@ def get_logger():      """      frame = inspect.stack()[1]      module_name = inspect.getmodule(frame[0]).__name__ -    if module_name != '__main__': +    if module_name != "__main__":          logger = logging.getLogger(module_name)          if not logger.parent.handlers:              ch = logging.StreamHandler() -            formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s') +            formatter = logging.Formatter( +                "%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s" +            )              ch.setFormatter(formatter)              logger.parent.addHandler(ch)      else: -        logger = logging.getLogger('plip') +        logger = logging.getLogger("plip")      return logger diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py index 1f71943..cd0b93b 100644 --- a/plip/basic/parallel.py +++ b/plip/basic/parallel.py @@ -34,9 +34,9 @@ def parallel_fn(f):      def simple_parallel(func, sequence, **args):          """ f takes an element of sequence as input and the keyword args in **args""" -        if 'processes' in args: -            processes = args.get('processes') -            del args['processes'] +        if "processes" in args: +            processes = args.get("processes") +            del args["processes"]          else:              processes = multiprocessing.cpu_count() diff --git a/plip/basic/remote.py b/plip/basic/remote.py index 0c67f6f..18de4b7 100644 --- a/plip/basic/remote.py +++ b/plip/basic/remote.py @@ -1,14 +1,23 @@  from collections import namedtuple -hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id') -hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids') -halogen_info = namedtuple('halogen_info', 'don_id acc_id') -pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms ' -                                          'ligandring_center type') -pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged') -sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos') -wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon') -metal_info = namedtuple('metal_info', 'metal_id, target_id location') +hbonds_info = namedtuple( +    "hbonds_info", "ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id" +) +hydrophobic_info = namedtuple("hydrophobic_info", "bs_ids lig_ids pairs_ids") +halogen_info = namedtuple("halogen_info", "don_id acc_id") +pistack_info = namedtuple( +    "pistack_info", +    "proteinring_atoms, proteinring_center ligandring_atoms " "ligandring_center type", +) +pication_info = namedtuple( +    "pication_info", "ring_center charge_center ring_atoms charge_atoms, protcharged" +) +sbridge_info = namedtuple( +    "sbridge_info", +    "positive_atoms negative_atoms positive_center negative_center protispos", +) +wbridge_info = namedtuple("wbridge_info", "don_id acc_id water_id protisdon") +metal_info = namedtuple("metal_info", "metal_id, target_id location")  class VisualizerData: @@ -21,7 +30,7 @@ class VisualizerData:          # General Information          self.lig_members = sorted(pli.ligand.members) -        self.sourcefile = pcomp.sourcefiles['pdbcomplex'] +        self.sourcefile = pcomp.sourcefiles["pdbcomplex"]          self.corrected_pdb = pcomp.corrected_pdb          self.pdbid = mol.pymol_name          self.hetid = ligand.hetid @@ -39,57 +48,88 @@ class VisualizerData:          # Hydrophobic Contacts          # Contains IDs of contributing binding site, ligand atoms and the pairings -        hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts] -        self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id], -                                                     lig_ids=[hp[1] for hp in hydroph_pairs_id], -                                                     pairs_ids=hydroph_pairs_id) +        hydroph_pairs_id = [ +            (h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts +        ] +        self.hydrophobic_contacts = hydrophobic_info( +            bs_ids=[hp[0] for hp in hydroph_pairs_id], +            lig_ids=[hp[1] for hp in hydroph_pairs_id], +            pairs_ids=hydroph_pairs_id, +        )          # Hydrogen Bonds          # #@todo Don't use indices, simplify this code here          hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon          hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon]          hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon] -        self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], -                                  lig_don_id=[hb[1] for hb in hbonds_ldon_id], -                                  prot_acc_id=[hb[0] for hb in hbonds_ldon_id], -                                  pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], -                                  prot_don_id=[hb[1] for hb in hbonds_pdon_id], -                                  lig_acc_id=[hb[0] for hb in hbonds_pdon_id]) +        self.hbonds = hbonds_info( +            ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], +            lig_don_id=[hb[1] for hb in hbonds_ldon_id], +            prot_acc_id=[hb[0] for hb in hbonds_ldon_id], +            pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], +            prot_don_id=[hb[1] for hb in hbonds_pdon_id], +            lig_acc_id=[hb[0] for hb in hbonds_pdon_id], +        )          # Halogen Bonds -        self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) -                              for h in pli.halogen_bonds] +        self.halogen_bonds = [ +            halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) +            for h in pli.halogen_bonds +        ]          # Pistacking -        self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx, -                                        proteinring_center=pistack.proteinring.center, -                                        ligandring_atoms=pistack.ligandring.atoms_orig_idx, -                                        ligandring_center=pistack.ligandring.center, -                                        type=pistack.type) for pistack in pli.pistacking] +        self.pistacking = [ +            pistack_info( +                proteinring_atoms=pistack.proteinring.atoms_orig_idx, +                proteinring_center=pistack.proteinring.center, +                ligandring_atoms=pistack.ligandring.atoms_orig_idx, +                ligandring_center=pistack.ligandring.center, +                type=pistack.type, +            ) +            for pistack in pli.pistacking +        ]          # Pi-cation interactions -        self.pication = [pication_info(ring_center=picat.ring.center, -                                       charge_center=picat.charge.center, -                                       ring_atoms=picat.ring.atoms_orig_idx, -                                       charge_atoms=picat.charge.atoms_orig_idx, -                                       protcharged=picat.protcharged) -                         for picat in pli.pication_paro + pli.pication_laro] +        self.pication = [ +            pication_info( +                ring_center=picat.ring.center, +                charge_center=picat.charge.center, +                ring_atoms=picat.ring.atoms_orig_idx, +                charge_atoms=picat.charge.atoms_orig_idx, +                protcharged=picat.protcharged, +            ) +            for picat in pli.pication_paro + pli.pication_laro +        ]          # Salt Bridges -        self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx, -                                         negative_atoms=sbridge.negative.atoms_orig_idx, -                                         positive_center=sbridge.positive.center, -                                         negative_center=sbridge.negative.center, -                                         protispos=sbridge.protispos) -                            for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg] +        self.saltbridges = [ +            sbridge_info( +                positive_atoms=sbridge.positive.atoms_orig_idx, +                negative_atoms=sbridge.negative.atoms_orig_idx, +                positive_center=sbridge.positive.center, +                negative_center=sbridge.negative.center, +                protispos=sbridge.protispos, +            ) +            for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg +        ]          # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon') -        self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx, -                                          acc_id=wbridge.a_orig_idx, -                                          water_id=wbridge.water_orig_idx, -                                          protisdon=wbridge.protisdon) for wbridge in pli.water_bridges] +        self.waterbridges = [ +            wbridge_info( +                don_id=wbridge.d_orig_idx, +                acc_id=wbridge.a_orig_idx, +                water_id=wbridge.water_orig_idx, +                protisdon=wbridge.protisdon, +            ) +            for wbridge in pli.water_bridges +        ]          # Metal Complexes -        self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx, -                                           target_id=metalc.target_orig_idx, -                                           location=metalc.location) for metalc in pli.metal_complexes] +        self.metal_complexes = [ +            metal_info( +                metal_id=metalc.metal_orig_idx, +                target_id=metalc.target_orig_idx, +                location=metalc.location, +            ) +            for metalc in pli.metal_complexes +        ] diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py index 6b3ef7b..7164497 100644 --- a/plip/basic/supplemental.py +++ b/plip/basic/supplemental.py @@ -18,22 +18,24 @@ from plip.basic import config, logger  logger = logger.get_logger()  # Windows and MacOS -if os.name != 'nt' and platform.system() != 'Darwin':  # Resource module not available for Windows +if ( +    os.name != "nt" and platform.system() != "Darwin" +):  # Resource module not available for Windows      import resource  # Settings -np.seterr(all='ignore')  # No runtime warnings +np.seterr(all="ignore")  # No runtime warnings  def tmpfile(prefix, direc):      """Returns the path to a newly created temporary file.""" -    return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc) +    return tempfile.mktemp(prefix=prefix, suffix=".pdb", dir=direc)  def is_lig(hetid):      """Checks if a PDB compound can be excluded as a small molecule ligand"""      h = hetid.upper() -    return not (h == 'HOH' or h in config.UNSUPPORTED) +    return not (h == "HOH" or h in config.UNSUPPORTED)  def extract_pdbid(string): @@ -48,19 +50,25 @@ def extract_pdbid(string):  def whichrestype(atom):      """Returns the residue name of an Pybel or OpenBabel atom.""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom +    atom = ( +        atom if not isinstance(atom, Atom) else atom.OBAtom +    )  # Convert to OpenBabel Atom      return atom.GetResidue().GetName() if atom.GetResidue() is not None else None  def whichresnumber(atom):      """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file).""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom +    atom = ( +        atom if not isinstance(atom, Atom) else atom.OBAtom +    )  # Convert to OpenBabel Atom      return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None  def whichchain(atom):      """Returns the residue number of an PyBel or OpenBabel atom.""" -    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom +    atom = ( +        atom if not isinstance(atom, Atom) else atom.OBAtom +    )  # Convert to OpenBabel Atom      return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None @@ -82,7 +90,11 @@ def vector(p1, p2):      :param p2: coordinates of point p2      :returns : numpy array with vector coordinates      """ -    return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))]) +    return ( +        None +        if len(p1) != len(p2) +        else np.array([p2[i] - p1[i] for i in range(len(p1))]) +    )  def vecangle(v1, v2, deg=True): @@ -97,7 +109,7 @@ def vecangle(v1, v2, deg=True):      dm = np.dot(v1, v2)      cm = np.linalg.norm(v1) * np.linalg.norm(v2)      angle = np.arccos(dm / cm)  # Round here to prevent floating point errors -    return np.degrees([angle, ])[0] if deg else angle +    return np.degrees([angle,])[0] if deg else angle  def normalize_vector(v): @@ -114,7 +126,12 @@ def centroid(coo):      :param coo: Array of coordinate arrays      :returns : centroid coordinates as list      """ -    return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])))) +    return list( +        map( +            np.mean, +            (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])), +        ) +    )  def projection(pnormal1, ppoint, tpoint): @@ -150,11 +167,15 @@ def cluster_doubles(double_list):          if a in location and b in location:              if location[a] != location[b]:                  if location[a] < location[b]: -                    clusters[location[a]] = clusters[location[a]].union(clusters[location[b]])  # Merge clusters -                    clusters = clusters[:location[b]] + clusters[location[b] + 1:] +                    clusters[location[a]] = clusters[location[a]].union( +                        clusters[location[b]] +                    )  # Merge clusters +                    clusters = clusters[: location[b]] + clusters[location[b] + 1 :]                  else: -                    clusters[location[b]] = clusters[location[b]].union(clusters[location[a]])  # Merge clusters -                    clusters = clusters[:location[a]] + clusters[location[a] + 1:] +                    clusters[location[b]] = clusters[location[b]].union( +                        clusters[location[a]] +                    )  # Merge clusters +                    clusters = clusters[: location[a]] + clusters[location[a] + 1 :]                  # Rebuild index of locations for each element as they have changed now                  location = {}                  for i, cluster in enumerate(clusters): @@ -181,9 +202,10 @@ def cluster_doubles(double_list):  # File operations  ################# +  def tilde_expansion(folder_path):      """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>.""" -    return os.path.expanduser(folder_path) if '~' in folder_path else folder_path +    return os.path.expanduser(folder_path) if "~" in folder_path else folder_path  def folder_exists(folder_path): @@ -194,14 +216,21 @@ def folder_exists(folder_path):  def create_folder_if_not_exists(folder_path):      """Creates a folder if it does not exists."""      folder_path = tilde_expansion(folder_path) -    folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path +    folder_path = ( +        "".join([folder_path, "/"]) if not folder_path[-1] == "/" else folder_path +    )      direc = os.path.dirname(folder_path)      if not folder_exists(direc):          os.makedirs(direc)  def cmd_exists(c): -    return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 +    return ( +        subprocess.call( +            "type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE +        ) +        == 0 +    )  ################ @@ -212,20 +241,22 @@ def cmd_exists(c):  def initialize_pymol(options):      """Initializes PyMOL"""      import pymol +      # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround) -    pymol.finish_launching(args=['pymol', options, '-K']) +    pymol.finish_launching(args=["pymol", options, "-K"])      pymol.cmd.reinitialize() -def start_pymol(quiet=False, options='-p', run=False): +def start_pymol(quiet=False, options="-p", run=False):      """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output.      Command line options can be passed as the second argument."""      import pymol -    pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:] + +    pymol.pymol_argv = ["pymol", "%s" % options] + sys.argv[1:]      if run:          initialize_pymol(options)      if quiet: -        pymol.cmd.feedback('disable', 'all', 'everything') +        pymol.cmd.feedback("disable", "all", "everything")  def nucleotide_linkage(residues): @@ -235,7 +266,7 @@ def nucleotide_linkage(residues):      #######################################      # Basic support for RNA/DNA as ligand #      ####################################### -    nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'] +    nucleotides = ["A", "C", "T", "G", "U", "DA", "DC", "DT", "DG", "DU"]      dna_rna = {}  # Dictionary of DNA/RNA residues by chain      covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")      # Create missing covlinkage entries for DNA/RNA @@ -243,7 +274,9 @@ def nucleotide_linkage(residues):          resname, chain, pos = ligand          if resname in nucleotides:              if chain not in dna_rna: -                dna_rna[chain] = [(resname, pos), ] +                dna_rna[chain] = [ +                    (resname, pos), +                ]              else:                  dna_rna[chain].append((resname, pos))      for chain in dna_rna: @@ -253,8 +286,16 @@ def nucleotide_linkage(residues):                  name, pos = nucleotide                  nextnucleotide = nuc_list[i + 1]                  nextname, nextpos = nextnucleotide -                newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='', -                                     id2=nextname, chain2=chain, pos2=nextpos, conf2='') +                newlink = covlinkage( +                    id1=name, +                    chain1=chain, +                    pos1=pos, +                    conf1="", +                    id2=nextname, +                    chain2=chain, +                    pos2=nextpos, +                    conf2="", +                )                  nuc_covalent.append(newlink)      return nuc_covalent @@ -273,7 +314,12 @@ def ring_is_planar(ring, r_atoms):      # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less      for n1, n2 in itertools.product(normals, repeat=2):          arom_angle = vecangle(n1, n2) -        if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]): +        if all( +            [ +                arom_angle > config.AROMATIC_PLANARITY, +                arom_angle < 180.0 - config.AROMATIC_PLANARITY, +            ] +        ):              return False      return True @@ -282,21 +328,21 @@ def classify_by_name(names):      """Classify a (composite) ligand by the HETID(s)"""      if len(names) > 3:  # Polymer          if len(set(config.RNA).intersection(set(names))) != 0: -            ligtype = 'RNA' +            ligtype = "RNA"          elif len(set(config.DNA).intersection(set(names))) != 0: -            ligtype = 'DNA' +            ligtype = "DNA"          else:              ligtype = "POLYMER"      else: -        ligtype = 'SMALLMOLECULE' +        ligtype = "SMALLMOLECULE"      for name in names:          if name in config.METAL_IONS:              if len(names) == 1: -                ligtype = 'ION' +                ligtype = "ION"              else:                  if "ION" not in ligtype: -                    ligtype += '+ION' +                    ligtype += "+ION"      return ligtype @@ -323,7 +369,7 @@ def get_isomorphisms(reference, lig):          isomorphs = pybel.ob.vpairUIntUInt()          mappr.MapFirst(lig.OBMol, isomorphs)          isomorphs = [isomorphs] -    logger.debug(f'number of isomorphisms: {len(isomorphs)}') +    logger.debug(f"number of isomorphisms: {len(isomorphs)}")      # @todo Check which isomorphism to take      return isomorphs @@ -340,15 +386,17 @@ def canonicalize(lig, preserve_bond_order=False):                  bond.SetBondOrder(1)      lig.DeleteData(pybel.ob.StereoData)      lig = pybel.Molecule(lig) -    testcan = lig.write(format='can') +    testcan = lig.write(format="can")      try: -        pybel.readstring('can', testcan) -        reference = pybel.readstring('can', testcan) +        pybel.readstring("can", testcan) +        reference = pybel.readstring("can", testcan)      except IOError: -        testcan, reference = '', '' -    if testcan != '': +        testcan, reference = "", "" +    if testcan != "":          reference.removeh() -        isomorphs = get_isomorphisms(reference, lig)  # isomorphs now holds all isomorphisms within the molecule +        isomorphs = get_isomorphisms( +            reference, lig +        )  # isomorphs now holds all isomorphisms within the molecule          if not len(isomorphs) == 0:              smi_dict = {}              smi_to_can = isomorphs[0] @@ -365,7 +413,9 @@ def int32_to_negative(int32):      32 bit integer and returns the actual number.      """      dct = {} -    if int32 == 4294967295:  # Special case in some structures (note, this is just a workaround) +    if ( +        int32 == 4294967295 +    ):  # Special case in some structures (note, this is just a workaround)          return -1      for i in range(-1000, -1):          dct[np.uint32(i)] = i @@ -378,7 +428,7 @@ def int32_to_negative(int32):  def read_pdb(pdbfname, as_string=False):      """Reads a given PDB file and returns a Pybel Molecule."""      pybel.ob.obErrorLog.StopLogging()  # Suppress all OpenBabel warnings -    if os.name != 'nt':  # Resource module not available for Windows +    if os.name != "nt":  # Resource module not available for Windows          maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1]          resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize))      sys.setrecursionlimit(10 ** 5)  # increase Python recursion limit @@ -387,48 +437,50 @@ def read_pdb(pdbfname, as_string=False):  def read(fil):      """Returns a file handler and detects gzipped files.""" -    if os.path.splitext(fil)[-1] == '.gz': -        return gzip.open(fil, 'rb') -    elif os.path.splitext(fil)[-1] == '.zip': -        zf = zipfile.ZipFile(fil, 'r') +    if os.path.splitext(fil)[-1] == ".gz": +        return gzip.open(fil, "rb") +    elif os.path.splitext(fil)[-1] == ".zip": +        zf = zipfile.ZipFile(fil, "r")          return zf.open(zf.infolist()[0].filename)      else: -        return open(fil, 'r') +        return open(fil, "r")  def readmol(path, as_string=False):      """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type.      In contrast to the standard Pybel implementation, the file is closed properly.""" -    supported_formats = ['pdb'] +    supported_formats = ["pdb"]      # Fix for Windows-generated files: Remove carriage return characters      if "\r" in path and as_string: -        path = path.replace('\r', '') +        path = path.replace("\r", "")      for sformat in supported_formats:          obc = pybel.ob.OBConversion()          obc.SetInFormat(sformat) -        logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel') +        logger.debug( +            f"detected {sformat} as format, trying to read file with OpenBabel" +        )          # Read molecules with single bond information          if as_string:              try:                  mymol = pybel.readstring(sformat, path)              except IOError: -                logger.error('no valid file format provided') +                logger.error("no valid file format provided")                  sys.exit(1)          else:              read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None})              try:                  mymol = next(read_file)              except StopIteration: -                logger.error('file contains no valid molecules') +                logger.error("file contains no valid molecules")                  sys.exit(1) -        logger.debug('molecule successfully read') +        logger.debug("molecule successfully read")          # Assign multiple bonds          mymol.OBMol.PerceiveBondOrders()          return mymol, sformat -    logger.error('no valid file format provided') +    logger.error("no valid file format provided")      sys.exit(1) | 
