diff options
28 files changed, 4631 insertions, 1917 deletions
diff --git a/plip/basic/config.py b/plip/basic/config.py index a7468cf..3770d44 100644 --- a/plip/basic/config.py +++ b/plip/basic/config.py @@ -1,5 +1,5 @@ -__version__ = '2.1.0-beta' -__maintainer__ = 'PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai' +__version__ = "2.1.0-beta" +__maintainer__ = "PharmAI GmbH (2020) - www.pharm.ai - hello@pharm.ai" import logging @@ -14,8 +14,8 @@ PICS = False PYMOL = False STDOUT = False RAWSTRING = False # use raw strings for input / output -OUTPATH = './' -BASEPATH = './' +OUTPATH = "./" +BASEPATH = "./" BREAKCOMPOSITE = False # Break up composite ligands with covalent bonds ALTLOC = False # Consider alternate locations PLUGIN_MODE = False # Special mode for PLIP in Plugins (e.g. PyMOL) @@ -34,27 +34,45 @@ NOHYDRO = False # Do not add hydrogen bonds (in case already present in the str # Thresholds for detection (global variables) BS_DIST = 7.5 # Determines maximum distance to include binding site residues -AROMATIC_PLANARITY = 5.0 # Determines allowed deviation from planarity in aromatic rings +AROMATIC_PLANARITY = ( + 5.0 # Determines allowed deviation from planarity in aromatic rings +) MIN_DIST = 0.5 # Minimum distance for all distance thresholds # Some distance thresholds were extended (max. 1.0A) if too restrictive too account for low-quality structures HYDROPH_DIST_MAX = 4.0 # Distance cutoff for detection of hydrophobic contacts HBOND_DIST_MAX = 4.1 # Max. distance between hydrogen bond donor and acceptor (Hubbard & Haider, 2001) + 0.6 A -HBOND_DON_ANGLE_MIN = 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 -PISTACK_DIST_MAX = 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998) -PISTACK_ANG_DEV = 30 # Max. Deviation from parallel or perpendicular orientation (in degrees) +HBOND_DON_ANGLE_MIN = ( + 100 # Min. angle at the hydrogen bond donor (Hubbard & Haider, 2001) + 10 +) +PISTACK_DIST_MAX = ( + 5.5 # Max. distance for parallel or offset pistacking (McGaughey, 1998) +) +PISTACK_ANG_DEV = ( + 30 # Max. Deviation from parallel or perpendicular orientation (in degrees) +) PISTACK_OFFSET_MAX = 2.0 # Maximum offset of the two rings (corresponds to the radius of benzene + 0.5 A) PICATION_DIST_MAX = 6.0 # Max. distance between charged atom and aromatic ring center (Gallivan and Dougherty, 1999) SALTBRIDGE_DIST_MAX = 5.5 # Max. distance between centers of charge for salt bridges (Barlow and Thornton, 1983) + 1.5 HALOGEN_DIST_MAX = 4.0 # Max. distance between oxy. and halogen (Halogen bonds in biological molecules., Auffinger)+0.5 -HALOGEN_ACC_ANGLE = 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) -HALOGEN_DON_ANGLE = 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) +HALOGEN_ACC_ANGLE = ( + 120 # Optimal acceptor angle (Halogen bonds in biological molecules., Auffinger) +) +HALOGEN_DON_ANGLE = ( + 165 # Optimal donor angle (Halogen bonds in biological molecules., Auffinger) +) HALOGEN_ANGLE_DEV = 30 # Max. deviation from optimal angle -WATER_BRIDGE_MINDIST = 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 -WATER_BRIDGE_MAXDIST = 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 +WATER_BRIDGE_MINDIST = ( + 2.5 # Min. distance between water oxygen and polar atom (Jiang et al., 2005) -0.1 +) +WATER_BRIDGE_MAXDIST = ( + 4.1 # Max. distance between water oxygen and polar atom (Jiang et al., 2005) +0.5 +) WATER_BRIDGE_OMEGA_MIN = 71 # Min. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) - 9 WATER_BRIDGE_OMEGA_MAX = 140 # Max. angle between acceptor, water oxygen and donor hydrogen (Jiang et al., 2005) WATER_BRIDGE_THETA_MIN = 100 # Min. angle between water oxygen, donor hydrogen and donor atom (Jiang et al., 2005) -METAL_DIST_MAX = 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001) +METAL_DIST_MAX = ( + 3.0 # Max. distance between metal ion and interacting atom (Harding, 2001) +) # Other thresholds MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments @@ -64,8 +82,8 @@ MAX_COMPOSITE_LENGTH = 200 # Filter out ligands with more than 200 fragments ######### # Names of RNA and DNA residues to be considered (detection by name) -RNA = ['U', 'A', 'C', 'G'] -DNA = ['DT', 'DA', 'DC', 'DG'] +RNA = ["U", "A", "C", "G"] +DNA = ["DT", "DA", "DC", "DG"] ############# # Whitelist # @@ -73,49 +91,535 @@ DNA = ['DT', 'DA', 'DC', 'DG'] # Metal cations which can be complexed -METAL_IONS = ['CA', 'CO', 'MG', 'MN', 'FE', 'CU', 'ZN', 'FE2', 'FE3', 'FE4', 'LI', 'NA', 'K', 'RB', 'SR', 'CS', 'BA', - 'CR', 'NI', 'FE1', 'NI', 'RU', 'RU1', 'RH', 'RH1', 'PD', 'AG', 'CD', 'LA', 'W', 'W1', 'OS', 'IR', 'PT', - 'PT1', 'AU', 'HG', 'CE', 'PR', 'SM', 'EU', 'GD', 'TB', 'YB', 'LU', 'AL', 'GA', 'IN', 'SB', 'TL', 'PB'] +METAL_IONS = [ + "CA", + "CO", + "MG", + "MN", + "FE", + "CU", + "ZN", + "FE2", + "FE3", + "FE4", + "LI", + "NA", + "K", + "RB", + "SR", + "CS", + "BA", + "CR", + "NI", + "FE1", + "NI", + "RU", + "RU1", + "RH", + "RH1", + "PD", + "AG", + "CD", + "LA", + "W", + "W1", + "OS", + "IR", + "PT", + "PT1", + "AU", + "HG", + "CE", + "PR", + "SM", + "EU", + "GD", + "TB", + "YB", + "LU", + "AL", + "GA", + "IN", + "SB", + "TL", + "PB", +] ############## # Blacklists # ############## # Other Ions/Atoms (not yet supported) -anions = ['CL', 'IOD', 'BR'] -other = ['MO', 'RE', 'HO'] +anions = ["CL", "IOD", "BR"] +other = ["MO", "RE", "HO"] UNSUPPORTED = anions + other # BioLiP list of suspicious ligands from http://zhanglab.ccmb.med.umich.edu/BioLiP/ligand_list (2014-07-10) # Add ligands here to get warnings for possible artifacts. -biolip_list = ['ACE', 'HEX', 'TMA', 'SOH', 'P25', 'CCN', 'PR', 'PTN', 'NO3', 'TCN', 'BU1', 'BCN', 'CB3', 'HCS', 'NBN', - 'SO2', 'MO6', 'MOH', 'CAC', 'MLT', 'KR', '6PH', 'MOS', 'UNL', 'MO3', 'SR', 'CD3', 'PB', 'ACM', 'LUT', - 'PMS', 'OF3', 'SCN', 'DHB', 'E4N', '13P', '3PG', 'CYC', 'NC', 'BEN', 'NAO', 'PHQ', 'EPE', 'BME', 'TB', - 'ETE', 'EU', 'OES', 'EAP', 'ETX', 'BEZ', '5AD', 'OC2', 'OLA', 'GD3', 'CIT', 'DVT', 'OC6', 'MW1', 'OC3', - 'SRT', 'LCO', 'BNZ', 'PPV', 'STE', 'PEG', 'RU', 'PGE', 'MPO', 'B3P', 'OGA', 'IPA', 'LU', 'EDO', 'MAC', - '9PE', 'IPH', 'MBN', 'C1O', '1PE', 'YF3', 'PEF', 'GD', '8PE', 'DKA', 'RB', 'YB', 'GGD', 'SE4', 'LHG', - 'SMO', 'DGD', 'CMO', 'MLI', 'MW2', 'DTT', 'DOD', '7PH', 'PBM', 'AU', 'FOR', 'PSC', 'TG1', 'KAI', '1PG', - 'DGA', 'IR', 'PE4', 'VO4', 'ACN', 'AG', 'MO4', 'OCL', '6UL', 'CHT', 'RHD', 'CPS', 'IR3', 'OC4', 'MTE', - 'HGC', 'CR', 'PC1', 'HC4', 'TEA', 'BOG', 'PEO', 'PE5', '144', 'IUM', 'LMG', 'SQU', 'MMC', 'GOL', 'NVP', - 'AU3', '3PH', 'PT4', 'PGO', 'ICT', 'OCM', 'BCR', 'PG4', 'L4P', 'OPC', 'OXM', 'SQD', 'PQ9', 'BAM', 'PI', - 'PL9', 'P6G', 'IRI', '15P', 'MAE', 'MBO', 'FMT', 'L1P', 'DUD', 'PGV', 'CD1', 'P33', 'DTU', 'XAT', 'CD', - 'THE', 'U1', 'NA', 'MW3', 'BHG', 'Y1', 'OCT', 'BET', 'MPD', 'HTO', 'IBM', 'D01', 'HAI', 'HED', 'CAD', - 'CUZ', 'TLA', 'SO4', 'OC5', 'ETF', 'MRD', 'PT', 'PHB', 'URE', 'MLA', 'TGL', 'PLM', 'NET', 'LAC', 'AUC', - 'UNX', 'GA', 'DMS', 'MO2', 'LA', 'NI', 'TE', 'THJ', 'NHE', 'HAE', 'MO1', 'DAO', '3PE', 'LMU', 'DHJ', - 'FLC', 'SAL', 'GAI', 'ORO', 'HEZ', 'TAM', 'TRA', 'NEX', 'CXS', 'LCP', 'HOH', 'OCN', 'PER', 'ACY', 'MH2', - 'ARS', '12P', 'L3P', 'PUT', 'IN', 'CS', 'NAW', 'SB', 'GUN', 'SX', 'CON', 'C2O', 'EMC', 'BO4', 'BNG', - 'MN5', '__O', 'K', 'CYN', 'H2S', 'MH3', 'YT3', 'P22', 'KO4', '1AG', 'CE', 'IPL', 'PG6', 'MO5', 'F09', - 'HO', 'AL', 'TRS', 'EOH', 'GCP', 'MSE', 'AKR', 'NCO', 'PO4', 'L2P', 'LDA', 'SIN', 'DMI', 'SM', 'DTD', - 'SGM', 'DIO', 'PPI', 'DDQ', 'DPO', 'HCA', 'CO5', 'PD', 'OS', 'OH', 'NA6', 'NAG', 'W', 'ENC', 'NA5', - 'LI1', 'P4C', 'GLV', 'DMF', 'ACT', 'BTB', '6PL', 'BGL', 'OF1', 'N8E', 'LMT', 'THM', 'EU3', 'PGR', 'NA2', - 'FOL', '543', '_CP', 'PEK', 'NSP', 'PEE', 'OCO', 'CHD', 'CO2', 'TBU', 'UMQ', 'MES', 'NH4', 'CD5', 'HTG', - 'DEP', 'OC1', 'KDO', '2PE', 'PE3', 'IOD', 'NDG', 'CL', 'HG', 'F', 'XE', 'TL', 'BA', 'LI', 'BR', 'TAU', - 'TCA', 'SPD', 'SPM', 'SAR', 'SUC', 'PAM', 'SPH', 'BE7', 'P4G', 'OLC', 'OLB', 'LFA', 'D10', 'D12', 'DD9', - 'HP6', 'R16', 'PX4', 'TRD', 'UND', 'FTT', 'MYR', 'RG1', 'IMD', 'DMN', 'KEN', 'C14', 'UPL', 'CMJ', 'ULI', - 'MYS', 'TWT', 'M2M', 'P15', 'PG0', 'PEU', 'AE3', 'TOE', 'ME2', 'PE8', '6JZ', '7PE', 'P3G', '7PG', 'PG5', - '16P', 'XPE', 'PGF', 'AE4', '7E8', '7E9', 'MVC', 'TAR', 'DMR', 'LMR', 'NER', '02U', 'NGZ', 'LXB', 'A2G', - 'BM3', 'NAA', 'NGA', 'LXZ', 'PX6', 'PA8', 'LPP', 'PX2', 'MYY', 'PX8', 'PD7', 'XP4', 'XPA', 'PEV', '6PE', - 'PEX', 'PEH', 'PTY', 'YB2', 'PGT', 'CN3', 'AGA', 'DGG', 'CD4', 'CN6', 'CDL', 'PG8', 'MGE', 'DTV', 'L44', - 'L2C', '4AG', 'B3H', '1EM', 'DDR', 'I42', 'CNS', 'PC7', 'HGP', 'PC8', 'HGX', 'LIO', 'PLD', 'PC2', 'PCF', - 'MC3', 'P1O', 'PLC', 'PC6', 'HSH', 'BXC', 'HSG', 'DPG', '2DP', 'POV', 'PCW', 'GVT', 'CE9', 'CXE', 'C10', - 'CE1', 'SPJ', 'SPZ', 'SPK', 'SPW', 'HT3', 'HTH', '2OP', '3NI', 'BO3', 'DET', 'D1D', 'SWE', 'SOG'] +biolip_list = [ + "ACE", + "HEX", + "TMA", + "SOH", + "P25", + "CCN", + "PR", + "PTN", + "NO3", + "TCN", + "BU1", + "BCN", + "CB3", + "HCS", + "NBN", + "SO2", + "MO6", + "MOH", + "CAC", + "MLT", + "KR", + "6PH", + "MOS", + "UNL", + "MO3", + "SR", + "CD3", + "PB", + "ACM", + "LUT", + "PMS", + "OF3", + "SCN", + "DHB", + "E4N", + "13P", + "3PG", + "CYC", + "NC", + "BEN", + "NAO", + "PHQ", + "EPE", + "BME", + "TB", + "ETE", + "EU", + "OES", + "EAP", + "ETX", + "BEZ", + "5AD", + "OC2", + "OLA", + "GD3", + "CIT", + "DVT", + "OC6", + "MW1", + "OC3", + "SRT", + "LCO", + "BNZ", + "PPV", + "STE", + "PEG", + "RU", + "PGE", + "MPO", + "B3P", + "OGA", + "IPA", + "LU", + "EDO", + "MAC", + "9PE", + "IPH", + "MBN", + "C1O", + "1PE", + "YF3", + "PEF", + "GD", + "8PE", + "DKA", + "RB", + "YB", + "GGD", + "SE4", + "LHG", + "SMO", + "DGD", + "CMO", + "MLI", + "MW2", + "DTT", + "DOD", + "7PH", + "PBM", + "AU", + "FOR", + "PSC", + "TG1", + "KAI", + "1PG", + "DGA", + "IR", + "PE4", + "VO4", + "ACN", + "AG", + "MO4", + "OCL", + "6UL", + "CHT", + "RHD", + "CPS", + "IR3", + "OC4", + "MTE", + "HGC", + "CR", + "PC1", + "HC4", + "TEA", + "BOG", + "PEO", + "PE5", + "144", + "IUM", + "LMG", + "SQU", + "MMC", + "GOL", + "NVP", + "AU3", + "3PH", + "PT4", + "PGO", + "ICT", + "OCM", + "BCR", + "PG4", + "L4P", + "OPC", + "OXM", + "SQD", + "PQ9", + "BAM", + "PI", + "PL9", + "P6G", + "IRI", + "15P", + "MAE", + "MBO", + "FMT", + "L1P", + "DUD", + "PGV", + "CD1", + "P33", + "DTU", + "XAT", + "CD", + "THE", + "U1", + "NA", + "MW3", + "BHG", + "Y1", + "OCT", + "BET", + "MPD", + "HTO", + "IBM", + "D01", + "HAI", + "HED", + "CAD", + "CUZ", + "TLA", + "SO4", + "OC5", + "ETF", + "MRD", + "PT", + "PHB", + "URE", + "MLA", + "TGL", + "PLM", + "NET", + "LAC", + "AUC", + "UNX", + "GA", + "DMS", + "MO2", + "LA", + "NI", + "TE", + "THJ", + "NHE", + "HAE", + "MO1", + "DAO", + "3PE", + "LMU", + "DHJ", + "FLC", + "SAL", + "GAI", + "ORO", + "HEZ", + "TAM", + "TRA", + "NEX", + "CXS", + "LCP", + "HOH", + "OCN", + "PER", + "ACY", + "MH2", + "ARS", + "12P", + "L3P", + "PUT", + "IN", + "CS", + "NAW", + "SB", + "GUN", + "SX", + "CON", + "C2O", + "EMC", + "BO4", + "BNG", + "MN5", + "__O", + "K", + "CYN", + "H2S", + "MH3", + "YT3", + "P22", + "KO4", + "1AG", + "CE", + "IPL", + "PG6", + "MO5", + "F09", + "HO", + "AL", + "TRS", + "EOH", + "GCP", + "MSE", + "AKR", + "NCO", + "PO4", + "L2P", + "LDA", + "SIN", + "DMI", + "SM", + "DTD", + "SGM", + "DIO", + "PPI", + "DDQ", + "DPO", + "HCA", + "CO5", + "PD", + "OS", + "OH", + "NA6", + "NAG", + "W", + "ENC", + "NA5", + "LI1", + "P4C", + "GLV", + "DMF", + "ACT", + "BTB", + "6PL", + "BGL", + "OF1", + "N8E", + "LMT", + "THM", + "EU3", + "PGR", + "NA2", + "FOL", + "543", + "_CP", + "PEK", + "NSP", + "PEE", + "OCO", + "CHD", + "CO2", + "TBU", + "UMQ", + "MES", + "NH4", + "CD5", + "HTG", + "DEP", + "OC1", + "KDO", + "2PE", + "PE3", + "IOD", + "NDG", + "CL", + "HG", + "F", + "XE", + "TL", + "BA", + "LI", + "BR", + "TAU", + "TCA", + "SPD", + "SPM", + "SAR", + "SUC", + "PAM", + "SPH", + "BE7", + "P4G", + "OLC", + "OLB", + "LFA", + "D10", + "D12", + "DD9", + "HP6", + "R16", + "PX4", + "TRD", + "UND", + "FTT", + "MYR", + "RG1", + "IMD", + "DMN", + "KEN", + "C14", + "UPL", + "CMJ", + "ULI", + "MYS", + "TWT", + "M2M", + "P15", + "PG0", + "PEU", + "AE3", + "TOE", + "ME2", + "PE8", + "6JZ", + "7PE", + "P3G", + "7PG", + "PG5", + "16P", + "XPE", + "PGF", + "AE4", + "7E8", + "7E9", + "MVC", + "TAR", + "DMR", + "LMR", + "NER", + "02U", + "NGZ", + "LXB", + "A2G", + "BM3", + "NAA", + "NGA", + "LXZ", + "PX6", + "PA8", + "LPP", + "PX2", + "MYY", + "PX8", + "PD7", + "XP4", + "XPA", + "PEV", + "6PE", + "PEX", + "PEH", + "PTY", + "YB2", + "PGT", + "CN3", + "AGA", + "DGG", + "CD4", + "CN6", + "CDL", + "PG8", + "MGE", + "DTV", + "L44", + "L2C", + "4AG", + "B3H", + "1EM", + "DDR", + "I42", + "CNS", + "PC7", + "HGP", + "PC8", + "HGX", + "LIO", + "PLD", + "PC2", + "PCF", + "MC3", + "P1O", + "PLC", + "PC6", + "HSH", + "BXC", + "HSG", + "DPG", + "2DP", + "POV", + "PCW", + "GVT", + "CE9", + "CXE", + "C10", + "CE1", + "SPJ", + "SPZ", + "SPK", + "SPW", + "HT3", + "HTH", + "2OP", + "3NI", + "BO3", + "DET", + "D1D", + "SWE", + "SOG", +] diff --git a/plip/basic/logger.py b/plip/basic/logger.py index ca65411..f15650e 100644 --- a/plip/basic/logger.py +++ b/plip/basic/logger.py @@ -8,14 +8,16 @@ def get_logger(): """ frame = inspect.stack()[1] module_name = inspect.getmodule(frame[0]).__name__ - if module_name != '__main__': + if module_name != "__main__": logger = logging.getLogger(module_name) if not logger.parent.handlers: ch = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s') + formatter = logging.Formatter( + "%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(name)s: %(message)s" + ) ch.setFormatter(formatter) logger.parent.addHandler(ch) else: - logger = logging.getLogger('plip') + logger = logging.getLogger("plip") return logger diff --git a/plip/basic/parallel.py b/plip/basic/parallel.py index 1f71943..cd0b93b 100644 --- a/plip/basic/parallel.py +++ b/plip/basic/parallel.py @@ -34,9 +34,9 @@ def parallel_fn(f): def simple_parallel(func, sequence, **args): """ f takes an element of sequence as input and the keyword args in **args""" - if 'processes' in args: - processes = args.get('processes') - del args['processes'] + if "processes" in args: + processes = args.get("processes") + del args["processes"] else: processes = multiprocessing.cpu_count() diff --git a/plip/basic/remote.py b/plip/basic/remote.py index 0c67f6f..18de4b7 100644 --- a/plip/basic/remote.py +++ b/plip/basic/remote.py @@ -1,14 +1,23 @@ from collections import namedtuple -hbonds_info = namedtuple('hbonds_info', 'ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id') -hydrophobic_info = namedtuple('hydrophobic_info', 'bs_ids lig_ids pairs_ids') -halogen_info = namedtuple('halogen_info', 'don_id acc_id') -pistack_info = namedtuple('pistack_info', 'proteinring_atoms, proteinring_center ligandring_atoms ' - 'ligandring_center type') -pication_info = namedtuple('pication_info', 'ring_center charge_center ring_atoms charge_atoms, protcharged') -sbridge_info = namedtuple('sbridge_info', 'positive_atoms negative_atoms positive_center negative_center protispos') -wbridge_info = namedtuple('wbridge_info', 'don_id acc_id water_id protisdon') -metal_info = namedtuple('metal_info', 'metal_id, target_id location') +hbonds_info = namedtuple( + "hbonds_info", "ldon_id lig_don_id prot_acc_id pdon_id prot_don_id lig_acc_id" +) +hydrophobic_info = namedtuple("hydrophobic_info", "bs_ids lig_ids pairs_ids") +halogen_info = namedtuple("halogen_info", "don_id acc_id") +pistack_info = namedtuple( + "pistack_info", + "proteinring_atoms, proteinring_center ligandring_atoms " "ligandring_center type", +) +pication_info = namedtuple( + "pication_info", "ring_center charge_center ring_atoms charge_atoms, protcharged" +) +sbridge_info = namedtuple( + "sbridge_info", + "positive_atoms negative_atoms positive_center negative_center protispos", +) +wbridge_info = namedtuple("wbridge_info", "don_id acc_id water_id protisdon") +metal_info = namedtuple("metal_info", "metal_id, target_id location") class VisualizerData: @@ -21,7 +30,7 @@ class VisualizerData: # General Information self.lig_members = sorted(pli.ligand.members) - self.sourcefile = pcomp.sourcefiles['pdbcomplex'] + self.sourcefile = pcomp.sourcefiles["pdbcomplex"] self.corrected_pdb = pcomp.corrected_pdb self.pdbid = mol.pymol_name self.hetid = ligand.hetid @@ -39,57 +48,88 @@ class VisualizerData: # Hydrophobic Contacts # Contains IDs of contributing binding site, ligand atoms and the pairings - hydroph_pairs_id = [(h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts] - self.hydrophobic_contacts = hydrophobic_info(bs_ids=[hp[0] for hp in hydroph_pairs_id], - lig_ids=[hp[1] for hp in hydroph_pairs_id], - pairs_ids=hydroph_pairs_id) + hydroph_pairs_id = [ + (h.bsatom_orig_idx, h.ligatom_orig_idx) for h in pli.hydrophobic_contacts + ] + self.hydrophobic_contacts = hydrophobic_info( + bs_ids=[hp[0] for hp in hydroph_pairs_id], + lig_ids=[hp[1] for hp in hydroph_pairs_id], + pairs_ids=hydroph_pairs_id, + ) # Hydrogen Bonds # #@todo Don't use indices, simplify this code here hbonds_ldon, hbonds_pdon = pli.hbonds_ldon, pli.hbonds_pdon hbonds_ldon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon] hbonds_pdon_id = [(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon] - self.hbonds = hbonds_info(ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], - lig_don_id=[hb[1] for hb in hbonds_ldon_id], - prot_acc_id=[hb[0] for hb in hbonds_ldon_id], - pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], - prot_don_id=[hb[1] for hb in hbonds_pdon_id], - lig_acc_id=[hb[0] for hb in hbonds_pdon_id]) + self.hbonds = hbonds_info( + ldon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_ldon], + lig_don_id=[hb[1] for hb in hbonds_ldon_id], + prot_acc_id=[hb[0] for hb in hbonds_ldon_id], + pdon_id=[(hb.a_orig_idx, hb.d_orig_idx) for hb in hbonds_pdon], + prot_don_id=[hb[1] for hb in hbonds_pdon_id], + lig_acc_id=[hb[0] for hb in hbonds_pdon_id], + ) # Halogen Bonds - self.halogen_bonds = [halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) - for h in pli.halogen_bonds] + self.halogen_bonds = [ + halogen_info(don_id=h.don_orig_idx, acc_id=h.acc_orig_idx) + for h in pli.halogen_bonds + ] # Pistacking - self.pistacking = [pistack_info(proteinring_atoms=pistack.proteinring.atoms_orig_idx, - proteinring_center=pistack.proteinring.center, - ligandring_atoms=pistack.ligandring.atoms_orig_idx, - ligandring_center=pistack.ligandring.center, - type=pistack.type) for pistack in pli.pistacking] + self.pistacking = [ + pistack_info( + proteinring_atoms=pistack.proteinring.atoms_orig_idx, + proteinring_center=pistack.proteinring.center, + ligandring_atoms=pistack.ligandring.atoms_orig_idx, + ligandring_center=pistack.ligandring.center, + type=pistack.type, + ) + for pistack in pli.pistacking + ] # Pi-cation interactions - self.pication = [pication_info(ring_center=picat.ring.center, - charge_center=picat.charge.center, - ring_atoms=picat.ring.atoms_orig_idx, - charge_atoms=picat.charge.atoms_orig_idx, - protcharged=picat.protcharged) - for picat in pli.pication_paro + pli.pication_laro] + self.pication = [ + pication_info( + ring_center=picat.ring.center, + charge_center=picat.charge.center, + ring_atoms=picat.ring.atoms_orig_idx, + charge_atoms=picat.charge.atoms_orig_idx, + protcharged=picat.protcharged, + ) + for picat in pli.pication_paro + pli.pication_laro + ] # Salt Bridges - self.saltbridges = [sbridge_info(positive_atoms=sbridge.positive.atoms_orig_idx, - negative_atoms=sbridge.negative.atoms_orig_idx, - positive_center=sbridge.positive.center, - negative_center=sbridge.negative.center, - protispos=sbridge.protispos) - for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg] + self.saltbridges = [ + sbridge_info( + positive_atoms=sbridge.positive.atoms_orig_idx, + negative_atoms=sbridge.negative.atoms_orig_idx, + positive_center=sbridge.positive.center, + negative_center=sbridge.negative.center, + protispos=sbridge.protispos, + ) + for sbridge in pli.saltbridge_lneg + pli.saltbridge_pneg + ] # Water Bridgese('wbridge_info', 'don_id acc_id water_id protisdon') - self.waterbridges = [wbridge_info(don_id=wbridge.d_orig_idx, - acc_id=wbridge.a_orig_idx, - water_id=wbridge.water_orig_idx, - protisdon=wbridge.protisdon) for wbridge in pli.water_bridges] + self.waterbridges = [ + wbridge_info( + don_id=wbridge.d_orig_idx, + acc_id=wbridge.a_orig_idx, + water_id=wbridge.water_orig_idx, + protisdon=wbridge.protisdon, + ) + for wbridge in pli.water_bridges + ] # Metal Complexes - self.metal_complexes = [metal_info(metal_id=metalc.metal_orig_idx, - target_id=metalc.target_orig_idx, - location=metalc.location) for metalc in pli.metal_complexes] + self.metal_complexes = [ + metal_info( + metal_id=metalc.metal_orig_idx, + target_id=metalc.target_orig_idx, + location=metalc.location, + ) + for metalc in pli.metal_complexes + ] diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py index 6b3ef7b..7164497 100644 --- a/plip/basic/supplemental.py +++ b/plip/basic/supplemental.py @@ -18,22 +18,24 @@ from plip.basic import config, logger logger = logger.get_logger() # Windows and MacOS -if os.name != 'nt' and platform.system() != 'Darwin': # Resource module not available for Windows +if ( + os.name != "nt" and platform.system() != "Darwin" +): # Resource module not available for Windows import resource # Settings -np.seterr(all='ignore') # No runtime warnings +np.seterr(all="ignore") # No runtime warnings def tmpfile(prefix, direc): """Returns the path to a newly created temporary file.""" - return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc) + return tempfile.mktemp(prefix=prefix, suffix=".pdb", dir=direc) def is_lig(hetid): """Checks if a PDB compound can be excluded as a small molecule ligand""" h = hetid.upper() - return not (h == 'HOH' or h in config.UNSUPPORTED) + return not (h == "HOH" or h in config.UNSUPPORTED) def extract_pdbid(string): @@ -48,19 +50,25 @@ def extract_pdbid(string): def whichrestype(atom): """Returns the residue name of an Pybel or OpenBabel atom.""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + atom = ( + atom if not isinstance(atom, Atom) else atom.OBAtom + ) # Convert to OpenBabel Atom return atom.GetResidue().GetName() if atom.GetResidue() is not None else None def whichresnumber(atom): """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file).""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + atom = ( + atom if not isinstance(atom, Atom) else atom.OBAtom + ) # Convert to OpenBabel Atom return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None def whichchain(atom): """Returns the residue number of an PyBel or OpenBabel atom.""" - atom = atom if not isinstance(atom, Atom) else atom.OBAtom # Convert to OpenBabel Atom + atom = ( + atom if not isinstance(atom, Atom) else atom.OBAtom + ) # Convert to OpenBabel Atom return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None @@ -82,7 +90,11 @@ def vector(p1, p2): :param p2: coordinates of point p2 :returns : numpy array with vector coordinates """ - return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))]) + return ( + None + if len(p1) != len(p2) + else np.array([p2[i] - p1[i] for i in range(len(p1))]) + ) def vecangle(v1, v2, deg=True): @@ -97,7 +109,7 @@ def vecangle(v1, v2, deg=True): dm = np.dot(v1, v2) cm = np.linalg.norm(v1) * np.linalg.norm(v2) angle = np.arccos(dm / cm) # Round here to prevent floating point errors - return np.degrees([angle, ])[0] if deg else angle + return np.degrees([angle,])[0] if deg else angle def normalize_vector(v): @@ -114,7 +126,12 @@ def centroid(coo): :param coo: Array of coordinate arrays :returns : centroid coordinates as list """ - return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])))) + return list( + map( + np.mean, + (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo])), + ) + ) def projection(pnormal1, ppoint, tpoint): @@ -150,11 +167,15 @@ def cluster_doubles(double_list): if a in location and b in location: if location[a] != location[b]: if location[a] < location[b]: - clusters[location[a]] = clusters[location[a]].union(clusters[location[b]]) # Merge clusters - clusters = clusters[:location[b]] + clusters[location[b] + 1:] + clusters[location[a]] = clusters[location[a]].union( + clusters[location[b]] + ) # Merge clusters + clusters = clusters[: location[b]] + clusters[location[b] + 1 :] else: - clusters[location[b]] = clusters[location[b]].union(clusters[location[a]]) # Merge clusters - clusters = clusters[:location[a]] + clusters[location[a] + 1:] + clusters[location[b]] = clusters[location[b]].union( + clusters[location[a]] + ) # Merge clusters + clusters = clusters[: location[a]] + clusters[location[a] + 1 :] # Rebuild index of locations for each element as they have changed now location = {} for i, cluster in enumerate(clusters): @@ -181,9 +202,10 @@ def cluster_doubles(double_list): # File operations ################# + def tilde_expansion(folder_path): """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>.""" - return os.path.expanduser(folder_path) if '~' in folder_path else folder_path + return os.path.expanduser(folder_path) if "~" in folder_path else folder_path def folder_exists(folder_path): @@ -194,14 +216,21 @@ def folder_exists(folder_path): def create_folder_if_not_exists(folder_path): """Creates a folder if it does not exists.""" folder_path = tilde_expansion(folder_path) - folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path + folder_path = ( + "".join([folder_path, "/"]) if not folder_path[-1] == "/" else folder_path + ) direc = os.path.dirname(folder_path) if not folder_exists(direc): os.makedirs(direc) def cmd_exists(c): - return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0 + return ( + subprocess.call( + "type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + == 0 + ) ################ @@ -212,20 +241,22 @@ def cmd_exists(c): def initialize_pymol(options): """Initializes PyMOL""" import pymol + # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround) - pymol.finish_launching(args=['pymol', options, '-K']) + pymol.finish_launching(args=["pymol", options, "-K"]) pymol.cmd.reinitialize() -def start_pymol(quiet=False, options='-p', run=False): +def start_pymol(quiet=False, options="-p", run=False): """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output. Command line options can be passed as the second argument.""" import pymol - pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:] + + pymol.pymol_argv = ["pymol", "%s" % options] + sys.argv[1:] if run: initialize_pymol(options) if quiet: - pymol.cmd.feedback('disable', 'all', 'everything') + pymol.cmd.feedback("disable", "all", "everything") def nucleotide_linkage(residues): @@ -235,7 +266,7 @@ def nucleotide_linkage(residues): ####################################### # Basic support for RNA/DNA as ligand # ####################################### - nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU'] + nucleotides = ["A", "C", "T", "G", "U", "DA", "DC", "DT", "DG", "DU"] dna_rna = {} # Dictionary of DNA/RNA residues by chain covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") # Create missing covlinkage entries for DNA/RNA @@ -243,7 +274,9 @@ def nucleotide_linkage(residues): resname, chain, pos = ligand if resname in nucleotides: if chain not in dna_rna: - dna_rna[chain] = [(resname, pos), ] + dna_rna[chain] = [ + (resname, pos), + ] else: dna_rna[chain].append((resname, pos)) for chain in dna_rna: @@ -253,8 +286,16 @@ def nucleotide_linkage(residues): name, pos = nucleotide nextnucleotide = nuc_list[i + 1] nextname, nextpos = nextnucleotide - newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='', - id2=nextname, chain2=chain, pos2=nextpos, conf2='') + newlink = covlinkage( + id1=name, + chain1=chain, + pos1=pos, + conf1="", + id2=nextname, + chain2=chain, + pos2=nextpos, + conf2="", + ) nuc_covalent.append(newlink) return nuc_covalent @@ -273,7 +314,12 @@ def ring_is_planar(ring, r_atoms): # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less for n1, n2 in itertools.product(normals, repeat=2): arom_angle = vecangle(n1, n2) - if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]): + if all( + [ + arom_angle > config.AROMATIC_PLANARITY, + arom_angle < 180.0 - config.AROMATIC_PLANARITY, + ] + ): return False return True @@ -282,21 +328,21 @@ def classify_by_name(names): """Classify a (composite) ligand by the HETID(s)""" if len(names) > 3: # Polymer if len(set(config.RNA).intersection(set(names))) != 0: - ligtype = 'RNA' + ligtype = "RNA" elif len(set(config.DNA).intersection(set(names))) != 0: - ligtype = 'DNA' + ligtype = "DNA" else: ligtype = "POLYMER" else: - ligtype = 'SMALLMOLECULE' + ligtype = "SMALLMOLECULE" for name in names: if name in config.METAL_IONS: if len(names) == 1: - ligtype = 'ION' + ligtype = "ION" else: if "ION" not in ligtype: - ligtype += '+ION' + ligtype += "+ION" return ligtype @@ -323,7 +369,7 @@ def get_isomorphisms(reference, lig): isomorphs = pybel.ob.vpairUIntUInt() mappr.MapFirst(lig.OBMol, isomorphs) isomorphs = [isomorphs] - logger.debug(f'number of isomorphisms: {len(isomorphs)}') + logger.debug(f"number of isomorphisms: {len(isomorphs)}") # @todo Check which isomorphism to take return isomorphs @@ -340,15 +386,17 @@ def canonicalize(lig, preserve_bond_order=False): bond.SetBondOrder(1) lig.DeleteData(pybel.ob.StereoData) lig = pybel.Molecule(lig) - testcan = lig.write(format='can') + testcan = lig.write(format="can") try: - pybel.readstring('can', testcan) - reference = pybel.readstring('can', testcan) + pybel.readstring("can", testcan) + reference = pybel.readstring("can", testcan) except IOError: - testcan, reference = '', '' - if testcan != '': + testcan, reference = "", "" + if testcan != "": reference.removeh() - isomorphs = get_isomorphisms(reference, lig) # isomorphs now holds all isomorphisms within the molecule + isomorphs = get_isomorphisms( + reference, lig + ) # isomorphs now holds all isomorphisms within the molecule if not len(isomorphs) == 0: smi_dict = {} smi_to_can = isomorphs[0] @@ -365,7 +413,9 @@ def int32_to_negative(int32): 32 bit integer and returns the actual number. """ dct = {} - if int32 == 4294967295: # Special case in some structures (note, this is just a workaround) + if ( + int32 == 4294967295 + ): # Special case in some structures (note, this is just a workaround) return -1 for i in range(-1000, -1): dct[np.uint32(i)] = i @@ -378,7 +428,7 @@ def int32_to_negative(int32): def read_pdb(pdbfname, as_string=False): """Reads a given PDB file and returns a Pybel Molecule.""" pybel.ob.obErrorLog.StopLogging() # Suppress all OpenBabel warnings - if os.name != 'nt': # Resource module not available for Windows + if os.name != "nt": # Resource module not available for Windows maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1] resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize)) sys.setrecursionlimit(10 ** 5) # increase Python recursion limit @@ -387,48 +437,50 @@ def read_pdb(pdbfname, as_string=False): def read(fil): """Returns a file handler and detects gzipped files.""" - if os.path.splitext(fil)[-1] == '.gz': - return gzip.open(fil, 'rb') - elif os.path.splitext(fil)[-1] == '.zip': - zf = zipfile.ZipFile(fil, 'r') + if os.path.splitext(fil)[-1] == ".gz": + return gzip.open(fil, "rb") + elif os.path.splitext(fil)[-1] == ".zip": + zf = zipfile.ZipFile(fil, "r") return zf.open(zf.infolist()[0].filename) else: - return open(fil, 'r') + return open(fil, "r") def readmol(path, as_string=False): """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type. In contrast to the standard Pybel implementation, the file is closed properly.""" - supported_formats = ['pdb'] + supported_formats = ["pdb"] # Fix for Windows-generated files: Remove carriage return characters if "\r" in path and as_string: - path = path.replace('\r', '') + path = path.replace("\r", "") for sformat in supported_formats: obc = pybel.ob.OBConversion() obc.SetInFormat(sformat) - logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel') + logger.debug( + f"detected {sformat} as format, trying to read file with OpenBabel" + ) # Read molecules with single bond information if as_string: try: mymol = pybel.readstring(sformat, path) except IOError: - logger.error('no valid file format provided') + logger.error("no valid file format provided") sys.exit(1) else: read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None}) try: mymol = next(read_file) except StopIteration: - logger.error('file contains no valid molecules') + logger.error("file contains no valid molecules") sys.exit(1) - logger.debug('molecule successfully read') + logger.debug("molecule successfully read") # Assign multiple bonds mymol.OBMol.PerceiveBondOrders() return mymol, sformat - logger.error('no valid file format provided') + logger.error("no valid file format provided") sys.exit(1) diff --git a/plip/exchange/json.py b/plip/exchange/json.py index 5c7ad5c..14febda 100644 --- a/plip/exchange/json.py +++ b/plip/exchange/json.py @@ -1 +1 @@ -# place holder for module to add Json support
\ No newline at end of file +# place holder for module to add Json support diff --git a/plip/exchange/report.py b/plip/exchange/report.py index a559bf8..edca1f1 100644 --- a/plip/exchange/report.py +++ b/plip/exchange/report.py @@ -11,7 +11,7 @@ from plip.structure.preparation import PDBComplex class StructureReport: """Creates reports (xml or txt) for one structure/""" - def __init__(self, mol: PDBComplex, outputprefix: str = 'report'): + def __init__(self, mol: PDBComplex, outputprefix: str = "report"): self.mol = mol self.excluded = self.mol.excluded self.xmlreport = self.construct_xml_tree() @@ -22,57 +22,75 @@ class StructureReport: def construct_xml_tree(self): """Construct the basic XML tree""" - report = et.Element('report') - plipversion = et.SubElement(report, 'plipversion') + report = et.Element("report") + plipversion = et.SubElement(report, "plipversion") plipversion.text = __version__ - date_of_creation = et.SubElement(report, 'date_of_creation') + date_of_creation = et.SubElement(report, "date_of_creation") date_of_creation.text = time.strftime("%Y/%m/%d") - citation_information = et.SubElement(report, 'citation_information') - citation_information.text = "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " \ - "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi: 10.1093/nar/gkv315" + citation_information = et.SubElement(report, "citation_information") + citation_information.text = ( + "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " + "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi: 10.1093/nar/gkv315" + ) - maintainer_information = et.SubElement(report, 'maintainer_information') + maintainer_information = et.SubElement(report, "maintainer_information") maintainer_information.text = config.__maintainer__ - mode = et.SubElement(report, 'mode') + mode = et.SubElement(report, "mode") if config.DNARECEPTOR: - mode.text = 'dna_receptor' + mode.text = "dna_receptor" else: - mode.text = 'default' - pdbid = et.SubElement(report, 'pdbid') + mode.text = "default" + pdbid = et.SubElement(report, "pdbid") pdbid.text = self.mol.pymol_name.upper() - filetype = et.SubElement(report, 'filetype') + filetype = et.SubElement(report, "filetype") filetype.text = self.mol.filetype.upper() - pdbfile = et.SubElement(report, 'pdbfile') - pdbfile.text = self.mol.sourcefiles['pdbcomplex'] - pdbfixes = et.SubElement(report, 'pdbfixes') - pdbfixes.text = str(self.mol.information['pdbfixes']) - filename = et.SubElement(report, 'filename') - filename.text = str(self.mol.sourcefiles.get('filename') or None) - exligs = et.SubElement(report, 'excluded_ligands') + pdbfile = et.SubElement(report, "pdbfile") + pdbfile.text = self.mol.sourcefiles["pdbcomplex"] + pdbfixes = et.SubElement(report, "pdbfixes") + pdbfixes.text = str(self.mol.information["pdbfixes"]) + filename = et.SubElement(report, "filename") + filename.text = str(self.mol.sourcefiles.get("filename") or None) + exligs = et.SubElement(report, "excluded_ligands") for i, exlig in enumerate(self.excluded): - e = et.SubElement(exligs, 'excluded_ligand', id=str(i + 1)) + e = et.SubElement(exligs, "excluded_ligand", id=str(i + 1)) e.text = exlig - covalent = et.SubElement(report, 'covlinkages') + covalent = et.SubElement(report, "covlinkages") for i, covlinkage in enumerate(self.mol.covalent): - e = et.SubElement(covalent, 'covlinkage', id=str(i + 1)) - f1 = et.SubElement(e, 'res1') - f2 = et.SubElement(e, 'res2') - f1.text = ":".join([covlinkage.id1, covlinkage.chain1, str(covlinkage.pos1)]) - f2.text = ":".join([covlinkage.id2, covlinkage.chain2, str(covlinkage.pos2)]) + e = et.SubElement(covalent, "covlinkage", id=str(i + 1)) + f1 = et.SubElement(e, "res1") + f2 = et.SubElement(e, "res2") + f1.text = ":".join( + [covlinkage.id1, covlinkage.chain1, str(covlinkage.pos1)] + ) + f2.text = ":".join( + [covlinkage.id2, covlinkage.chain2, str(covlinkage.pos2)] + ) return report def construct_txt_file(self): """Construct the header of the txt file""" - textlines = ['Prediction of noncovalent interactions for PDB structure %s' % self.mol.pymol_name.upper(), ] + textlines = [ + "Prediction of noncovalent interactions for PDB structure %s" + % self.mol.pymol_name.upper(), + ] textlines.append("=" * len(textlines[0])) - textlines.append('Created on %s using PLIP v%s\n' % (time.strftime("%Y/%m/%d"), __version__)) - textlines.append('If you are using PLIP in your work, please cite:') - textlines.append('Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler.') - textlines.append('Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi: 10.1093/nar/gkv315\n') + textlines.append( + "Created on %s using PLIP v%s\n" % (time.strftime("%Y/%m/%d"), __version__) + ) + textlines.append("If you are using PLIP in your work, please cite:") + textlines.append( + "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler." + ) + textlines.append( + "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi: 10.1093/nar/gkv315\n" + ) if len(self.excluded) != 0: - textlines.append('Excluded molecules as ligands: %s\n' % ','.join([lig for lig in self.excluded])) + textlines.append( + "Excluded molecules as ligands: %s\n" + % ",".join([lig for lig in self.excluded]) + ) if config.DNARECEPTOR: - textlines.append('DNA/RNA in structure was chosen as the receptor part.\n') + textlines.append("DNA/RNA in structure was chosen as the receptor part.\n") return textlines def get_bindingsite_data(self): @@ -80,21 +98,24 @@ class StructureReport: for i, site in enumerate(sorted(self.mol.interaction_sets)): s = self.mol.interaction_sets[site] bindingsite = BindingSiteReport(s).generate_xml() - bindingsite.set('id', str(i + 1)) - bindingsite.set('has_interactions', 'False') + bindingsite.set("id", str(i + 1)) + bindingsite.set("has_interactions", "False") self.xmlreport.insert(i + 1, bindingsite) for itype in BindingSiteReport(s).generate_txt(): self.txtreport.append(itype) if not s.no_interactions: - bindingsite.set('has_interactions', 'True') + bindingsite.set("has_interactions", "True") else: - self.txtreport.append('No interactions detected.') + self.txtreport.append("No interactions detected.") def write_xml(self, as_string=False): """Write the XML report""" if not as_string: - et.ElementTree(self.xmlreport).write('{}/{}.xml'.format(self.outpath, self.outputprefix), pretty_print=True, - xml_declaration=True) + et.ElementTree(self.xmlreport).write( + "{}/{}.xml".format(self.outpath, self.outputprefix), + pretty_print=True, + xml_declaration=True, + ) else: output = et.tostring(self.xmlreport, pretty_print=True) if config.RAWSTRING: @@ -104,10 +125,10 @@ class StructureReport: def write_txt(self, as_string=False): """Write the TXT report""" if not as_string: - with open('{}/{}.txt'.format(self.outpath, self.outputprefix), 'w') as f: - [f.write(textline + '\n') for textline in self.txtreport] + with open("{}/{}.txt".format(self.outpath, self.outputprefix), "w") as f: + [f.write(textline + "\n") for textline in self.txtreport] else: - output = '\n'.join(self.txtreport) + output = "\n".join(self.txtreport) if config.RAWSTRING: output = repr(output) print(output) @@ -126,7 +147,9 @@ class BindingSiteReport: self.ligand = self.complex.ligand self.bindingsite = self.complex.bindingsite self.output_path = self.complex.output_path - self.bsid = ':'.join([self.ligand.hetid, self.ligand.chain, str(self.ligand.position)]) + self.bsid = ":".join( + [self.ligand.hetid, self.ligand.chain, str(self.ligand.position)] + ) self.longname = self.ligand.longname self.ligtype = self.ligand.type self.bs_res = self.bindingsite.bs_res @@ -141,163 +164,408 @@ class BindingSiteReport: ############################ self.hydrophobic_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'DIST', 'LIGCARBONIDX', - 'PROTCARBONIDX', 'LIGCOO', - 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "DIST", + "LIGCARBONIDX", + "PROTCARBONIDX", + "LIGCOO", + "PROTCOO", + ) self.hydrophobic_info = [] for hydroph in self.complex.hydrophobic_contacts: - self.hydrophobic_info.append((hydroph.resnr, hydroph.restype, hydroph.reschain, hydroph.resnr_l, - hydroph.restype_l, hydroph.reschain_l, '%.2f' % hydroph.distance, - hydroph.ligatom_orig_idx, hydroph.bsatom_orig_idx, hydroph.ligatom.coords, - hydroph.bsatom.coords)) + self.hydrophobic_info.append( + ( + hydroph.resnr, + hydroph.restype, + hydroph.reschain, + hydroph.resnr_l, + hydroph.restype_l, + hydroph.reschain_l, + "%.2f" % hydroph.distance, + hydroph.ligatom_orig_idx, + hydroph.bsatom_orig_idx, + hydroph.ligatom.coords, + hydroph.bsatom.coords, + ) + ) ################## # HYDROGEN BONDS # ################## self.hbond_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'SIDECHAIN', 'DIST_H-A', - 'DIST_D-A', - 'DON_ANGLE', - 'PROTISDON', 'DONORIDX', 'DONORTYPE', 'ACCEPTORIDX', 'ACCEPTORTYPE', 'LIGCOO', 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "SIDECHAIN", + "DIST_H-A", + "DIST_D-A", + "DON_ANGLE", + "PROTISDON", + "DONORIDX", + "DONORTYPE", + "ACCEPTORIDX", + "ACCEPTORTYPE", + "LIGCOO", + "PROTCOO", + ) self.hbond_info = [] for hbond in self.complex.hbonds_pdon + self.complex.hbonds_ldon: - ligatom, protatom = (hbond.a, hbond.d) if hbond.protisdon else (hbond.d, hbond.a) - self.hbond_info.append((hbond.resnr, hbond.restype, hbond.reschain, hbond.resnr_l, hbond.restype_l, - hbond.reschain_l, hbond.sidechain, - '%.2f' % hbond.distance_ah, '%.2f' % hbond.distance_ad, '%.2f' % hbond.angle, - hbond.protisdon, hbond.d_orig_idx, hbond.dtype, hbond.a_orig_idx, hbond.atype, - ligatom.coords, protatom.coords)) + ligatom, protatom = ( + (hbond.a, hbond.d) if hbond.protisdon else (hbond.d, hbond.a) + ) + self.hbond_info.append( + ( + hbond.resnr, + hbond.restype, + hbond.reschain, + hbond.resnr_l, + hbond.restype_l, + hbond.reschain_l, + hbond.sidechain, + "%.2f" % hbond.distance_ah, + "%.2f" % hbond.distance_ad, + "%.2f" % hbond.angle, + hbond.protisdon, + hbond.d_orig_idx, + hbond.dtype, + hbond.a_orig_idx, + hbond.atype, + ligatom.coords, + protatom.coords, + ) + ) ################# # WATER-BRIDGES # ################# self.waterbridge_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'DIST_A-W', 'DIST_D-W', - 'DON_ANGLE', - 'WATER_ANGLE', - 'PROTISDON', 'DONOR_IDX', 'DONORTYPE', 'ACCEPTOR_IDX', 'ACCEPTORTYPE', 'WATER_IDX', - 'LIGCOO', 'PROTCOO', 'WATERCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "DIST_A-W", + "DIST_D-W", + "DON_ANGLE", + "WATER_ANGLE", + "PROTISDON", + "DONOR_IDX", + "DONORTYPE", + "ACCEPTOR_IDX", + "ACCEPTORTYPE", + "WATER_IDX", + "LIGCOO", + "PROTCOO", + "WATERCOO", + ) # The coordinate format is an exception here, since the interaction is not only between ligand and protein self.waterbridge_info = [] for wbridge in self.complex.water_bridges: - lig, prot = (wbridge.a, wbridge.d) if wbridge.protisdon else (wbridge.d, wbridge.a) - self.waterbridge_info.append((wbridge.resnr, wbridge.restype, wbridge.reschain, wbridge.resnr_l, - wbridge.restype_l, wbridge.reschain_l, - '%.2f' % wbridge.distance_aw, '%.2f' % wbridge.distance_dw, - '%.2f' % wbridge.d_angle, '%.2f' % wbridge.w_angle, wbridge.protisdon, - wbridge.d_orig_idx, wbridge.dtype, wbridge.a_orig_idx, wbridge.atype, - wbridge.water_orig_idx, lig.coords, prot.coords, wbridge.water.coords)) + lig, prot = ( + (wbridge.a, wbridge.d) if wbridge.protisdon else (wbridge.d, wbridge.a) + ) + self.waterbridge_info.append( + ( + wbridge.resnr, + wbridge.restype, + wbridge.reschain, + wbridge.resnr_l, + wbridge.restype_l, + wbridge.reschain_l, + "%.2f" % wbridge.distance_aw, + "%.2f" % wbridge.distance_dw, + "%.2f" % wbridge.d_angle, + "%.2f" % wbridge.w_angle, + wbridge.protisdon, + wbridge.d_orig_idx, + wbridge.dtype, + wbridge.a_orig_idx, + wbridge.atype, + wbridge.water_orig_idx, + lig.coords, + prot.coords, + wbridge.water.coords, + ) + ) ################ # SALT BRIDGES # ################ self.saltbridge_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'DIST', 'PROTISPOS', - 'LIG_GROUP', - 'LIG_IDX_LIST', - 'LIGCOO', 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "DIST", + "PROTISPOS", + "LIG_GROUP", + "LIG_IDX_LIST", + "LIGCOO", + "PROTCOO", + ) self.saltbridge_info = [] for sb in self.complex.saltbridge_lneg + self.complex.saltbridge_pneg: if sb.protispos: - group, ids = sb.negative.fgroup, [str(x) for x in sb.negative.atoms_orig_idx] - self.saltbridge_info.append((sb.resnr, sb.restype, sb.reschain, sb.resnr_l, sb.restype_l, sb.reschain_l, - '%.2f' % sb.distance, sb.protispos, - group.capitalize(), ",".join(ids), - tuple(sb.negative.center), tuple(sb.positive.center))) + group, ids = ( + sb.negative.fgroup, + [str(x) for x in sb.negative.atoms_orig_idx], + ) + self.saltbridge_info.append( + ( + sb.resnr, + sb.restype, + sb.reschain, + sb.resnr_l, + sb.restype_l, + sb.reschain_l, + "%.2f" % sb.distance, + sb.protispos, + group.capitalize(), + ",".join(ids), + tuple(sb.negative.center), + tuple(sb.positive.center), + ) + ) else: - group, ids = sb.positive.fgroup, [str(x) for x in sb.positive.atoms_orig_idx] - self.saltbridge_info.append((sb.resnr, sb.restype, sb.reschain, sb.resnr_l, sb.restype_l, sb.reschain_l, - '%.2f' % sb.distance, sb.protispos, - group.capitalize(), ",".join(ids), - tuple(sb.positive.center), tuple(sb.negative.center))) + group, ids = ( + sb.positive.fgroup, + [str(x) for x in sb.positive.atoms_orig_idx], + ) + self.saltbridge_info.append( + ( + sb.resnr, + sb.restype, + sb.reschain, + sb.resnr_l, + sb.restype_l, + sb.reschain_l, + "%.2f" % sb.distance, + sb.protispos, + group.capitalize(), + ",".join(ids), + tuple(sb.positive.center), + tuple(sb.negative.center), + ) + ) ############### # PI-STACKING # ############### self.pistacking_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'CENTDIST', 'ANGLE', 'OFFSET', - 'TYPE', - 'LIG_IDX_LIST', 'LIGCOO', 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "CENTDIST", + "ANGLE", + "OFFSET", + "TYPE", + "LIG_IDX_LIST", + "LIGCOO", + "PROTCOO", + ) self.pistacking_info = [] for stack in self.complex.pistacking: ids = [str(x) for x in stack.ligandring.atoms_orig_idx] - self.pistacking_info.append((stack.resnr, stack.restype, stack.reschain, stack.resnr_l, stack.restype_l, - stack.reschain_l, '%.2f' % stack.distance, - '%.2f' % stack.angle, '%.2f' % stack.offset, stack.type, ",".join(ids), - tuple(stack.ligandring.center), tuple(stack.proteinring.center))) + self.pistacking_info.append( + ( + stack.resnr, + stack.restype, + stack.reschain, + stack.resnr_l, + stack.restype_l, + stack.reschain_l, + "%.2f" % stack.distance, + "%.2f" % stack.angle, + "%.2f" % stack.offset, + stack.type, + ",".join(ids), + tuple(stack.ligandring.center), + tuple(stack.proteinring.center), + ) + ) ########################## # PI-CATION INTERACTIONS # ########################## self.pication_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'DIST', 'OFFSET', 'PROTCHARGED', - 'LIG_GROUP', - 'LIG_IDX_LIST', 'LIGCOO', 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "DIST", + "OFFSET", + "PROTCHARGED", + "LIG_GROUP", + "LIG_IDX_LIST", + "LIGCOO", + "PROTCOO", + ) self.pication_info = [] for picat in self.complex.pication_laro + self.complex.pication_paro: if picat.protcharged: ids = [str(x) for x in picat.ring.atoms_orig_idx] - group = 'Aromatic' - self.pication_info.append((picat.resnr, picat.restype, picat.reschain, picat.resnr_l, picat.restype_l, - picat.reschain_l, '%.2f' % picat.distance, - '%.2f' % picat.offset, picat.protcharged, group, ",".join(ids), - tuple(picat.ring.center), tuple(picat.charge.center))) + group = "Aromatic" + self.pication_info.append( + ( + picat.resnr, + picat.restype, + picat.reschain, + picat.resnr_l, + picat.restype_l, + picat.reschain_l, + "%.2f" % picat.distance, + "%.2f" % picat.offset, + picat.protcharged, + group, + ",".join(ids), + tuple(picat.ring.center), + tuple(picat.charge.center), + ) + ) else: ids = [str(x) for x in picat.charge.atoms_orig_idx] group = picat.charge.fgroup - self.pication_info.append((picat.resnr, picat.restype, picat.reschain, picat.resnr_l, picat.restype_l, - picat.reschain_l, '%.2f' % picat.distance, - '%.2f' % picat.offset, picat.protcharged, group, ",".join(ids), - tuple(picat.charge.center), tuple(picat.ring.center))) + self.pication_info.append( + ( + picat.resnr, + picat.restype, + picat.reschain, + picat.resnr_l, + picat.restype_l, + picat.reschain_l, + "%.2f" % picat.distance, + "%.2f" % picat.offset, + picat.protcharged, + group, + ",".join(ids), + tuple(picat.charge.center), + tuple(picat.ring.center), + ) + ) ################# # HALOGEN BONDS # ################# self.halogen_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'SIDECHAIN', 'DIST', - 'DON_ANGLE', - 'ACC_ANGLE', - 'DON_IDX', 'DONORTYPE', 'ACC_IDX', 'ACCEPTORTYPE', 'LIGCOO', 'PROTCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "SIDECHAIN", + "DIST", + "DON_ANGLE", + "ACC_ANGLE", + "DON_IDX", + "DONORTYPE", + "ACC_IDX", + "ACCEPTORTYPE", + "LIGCOO", + "PROTCOO", + ) self.halogen_info = [] for halogen in self.complex.halogen_bonds: - self.halogen_info.append((halogen.resnr, halogen.restype, halogen.reschain, halogen.resnr_l, - halogen.restype_l, halogen.reschain_l, halogen.sidechain, - '%.2f' % halogen.distance, '%.2f' % halogen.don_angle, '%.2f' % halogen.acc_angle, - halogen.don_orig_idx, halogen.donortype, - halogen.acc_orig_idx, halogen.acctype, - halogen.acc.o.coords, halogen.don.x.coords)) + self.halogen_info.append( + ( + halogen.resnr, + halogen.restype, + halogen.reschain, + halogen.resnr_l, + halogen.restype_l, + halogen.reschain_l, + halogen.sidechain, + "%.2f" % halogen.distance, + "%.2f" % halogen.don_angle, + "%.2f" % halogen.acc_angle, + halogen.don_orig_idx, + halogen.donortype, + halogen.acc_orig_idx, + halogen.acctype, + halogen.acc.o.coords, + halogen.don.x.coords, + ) + ) ################### # METAL COMPLEXES # ################### self.metal_features = ( - 'RESNR', 'RESTYPE', 'RESCHAIN', 'RESNR_LIG', 'RESTYPE_LIG', 'RESCHAIN_LIG', 'METAL_IDX', 'METAL_TYPE', - 'TARGET_IDX', 'TARGET_TYPE', - 'COORDINATION', 'DIST', 'LOCATION', 'RMS', 'GEOMETRY', 'COMPLEXNUM', 'METALCOO', - 'TARGETCOO') + "RESNR", + "RESTYPE", + "RESCHAIN", + "RESNR_LIG", + "RESTYPE_LIG", + "RESCHAIN_LIG", + "METAL_IDX", + "METAL_TYPE", + "TARGET_IDX", + "TARGET_TYPE", + "COORDINATION", + "DIST", + "LOCATION", + "RMS", + "GEOMETRY", + "COMPLEXNUM", + "METALCOO", + "TARGETCOO", + ) self.metal_info = [] # Coordinate format here is non-standard since the interaction partner can be either ligand or protein for m in self.complex.metal_complexes: self.metal_info.append( - (m.resnr, m.restype, m.reschain, m.resnr_l, m.restype_l, m.reschain_l, m.metal_orig_idx, m.metal_type, - m.target_orig_idx, m.target_type, m.coordination_num, '%.2f' % m.distance, - m.location, '%.2f' % m.rms, m.geometry, str(m.complexnum), m.metal.coords, - m.target.atom.coords)) + ( + m.resnr, + m.restype, + m.reschain, + m.resnr_l, + m.restype_l, + m.reschain_l, + m.metal_orig_idx, + m.metal_type, + m.target_orig_idx, + m.target_type, + m.coordination_num, + "%.2f" % m.distance, + m.location, + "%.2f" % m.rms, + m.geometry, + str(m.complexnum), + m.metal.coords, + m.target.atom.coords, + ) + ) def write_section(self, name, features, info, f): """Provides formatting for one section (e.g. hydrogen bonds)""" if not len(info) == 0: - f.write('\n\n### %s ###\n' % name) - f.write('%s\n' % '\t'.join(features)) + f.write("\n\n### %s ###\n" % name) + f.write("%s\n" % "\t".join(features)) for line in info: - f.write('%s\n' % '\t'.join(map(str, line))) + f.write("%s\n" % "\t".join(map(str, line))) def rst_table(self, array): """Given an array, the function formats and returns and table in rST format.""" @@ -309,62 +577,77 @@ class BindingSiteReport: cell_dict[j] = [] cell_dict[j].append(val) for item in cell_dict: - cell_dict[item] = max([len(x) for x in cell_dict[item]]) + 1 # Contains adapted width for each column + cell_dict[item] = ( + max([len(x) for x in cell_dict[item]]) + 1 + ) # Contains adapted width for each column # Format top line num_cols = len(array[0]) - form = '+' + form = "+" for col in range(num_cols): - form += (cell_dict[col] + 1) * '-' - form += '+' - form += '\n' + form += (cell_dict[col] + 1) * "-" + form += "+" + form += "\n" # Format values for i, row in enumerate(array): - form += '| ' + form += "| " for j, val in enumerate(row): cell_width = cell_dict[j] - form += str(val) + (cell_width - len(val)) * ' ' + '| ' + form += str(val) + (cell_width - len(val)) * " " + "| " form.rstrip() - form += '\n' + form += "\n" # Seperation lines - form += '+' + form += "+" if i == 0: - sign = '=' + sign = "=" else: - sign = '-' + sign = "-" for col in range(num_cols): form += (cell_dict[col] + 1) * sign - form += '+' - form += '\n' + form += "+" + form += "\n" return form def generate_txt(self): """Generates an flat text report for a single binding site""" txt = [] - titletext = '%s (%s) - %s' % (self.bsid, self.longname, self.ligtype) + titletext = "%s (%s) - %s" % (self.bsid, self.longname, self.ligtype) txt.append(titletext) for i, member in enumerate(self.lig_members[1:]): - txt.append(' + %s' % ":".join(str(element) for element in member)) + txt.append(" + %s" % ":".join(str(element) for element in member)) txt.append("-" * len(titletext)) - txt.append("Interacting chain(s): %s\n" % ','.join([chain for chain in self.interacting_chains])) - for section in [['Hydrophobic Interactions', self.hydrophobic_features, self.hydrophobic_info], - ['Hydrogen Bonds', self.hbond_features, self.hbond_info], - ['Water Bridges', self.waterbridge_features, self.waterbridge_info], - ['Salt Bridges', self.saltbridge_features, self.saltbridge_info], - ['pi-Stacking', self.pistacking_features, self.pistacking_info], - ['pi-Cation Interactions', self.pication_features, self.pication_info], - ['Halogen Bonds', self.halogen_features, self.halogen_info], - ['Metal Complexes', self.metal_features, self.metal_info]]: + txt.append( + "Interacting chain(s): %s\n" + % ",".join([chain for chain in self.interacting_chains]) + ) + for section in [ + [ + "Hydrophobic Interactions", + self.hydrophobic_features, + self.hydrophobic_info, + ], + ["Hydrogen Bonds", self.hbond_features, self.hbond_info], + ["Water Bridges", self.waterbridge_features, self.waterbridge_info], + ["Salt Bridges", self.saltbridge_features, self.saltbridge_info], + ["pi-Stacking", self.pistacking_features, self.pistacking_info], + ["pi-Cation Interactions", self.pication_features, self.pication_info], + ["Halogen Bonds", self.halogen_features, self.halogen_info], + ["Metal Complexes", self.metal_features, self.metal_info], + ]: iname, features, interaction_information = section # Sort results first by res number, then by distance and finally ligand coordinates to get a unique order - interaction_information = sorted(interaction_information, key=itemgetter(0, 2, -2)) + interaction_information = sorted( + interaction_information, key=itemgetter(0, 2, -2) + ) if not len(interaction_information) == 0: - txt.append('\n**%s**' % iname) - table = [features, ] + txt.append("\n**%s**" % iname) + table = [ + features, + ] for single_contact in interaction_information: values = [] for x in single_contact: @@ -376,122 +659,185 @@ class BindingSiteReport: values.append(str(x)) table.append(values) txt.append(self.rst_table(table)) - txt.append('\n') + txt.append("\n") return txt def generate_xml(self): """Generates an XML-formatted report for a single binding site""" - report = et.Element('bindingsite') - identifiers = et.SubElement(report, 'identifiers') - longname = et.SubElement(identifiers, 'longname') - ligtype = et.SubElement(identifiers, 'ligtype') - hetid = et.SubElement(identifiers, 'hetid') - chain = et.SubElement(identifiers, 'chain') - position = et.SubElement(identifiers, 'position') - composite = et.SubElement(identifiers, 'composite') - members = et.SubElement(identifiers, 'members') - smiles = et.SubElement(identifiers, 'smiles') - inchikey = et.SubElement(identifiers, 'inchikey') + report = et.Element("bindingsite") + identifiers = et.SubElement(report, "identifiers") + longname = et.SubElement(identifiers, "longname") + ligtype = et.SubElement(identifiers, "ligtype") + hetid = et.SubElement(identifiers, "hetid") + chain = et.SubElement(identifiers, "chain") + position = et.SubElement(identifiers, "position") + composite = et.SubElement(identifiers, "composite") + members = et.SubElement(identifiers, "members") + smiles = et.SubElement(identifiers, "smiles") + inchikey = et.SubElement(identifiers, "inchikey") # Ligand properties. Number of (unpaired) functional atoms and rings. - lig_properties = et.SubElement(report, 'lig_properties') - num_heavy_atoms = et.SubElement(lig_properties, 'num_heavy_atoms') - num_hbd = et.SubElement(lig_properties, 'num_hbd') + lig_properties = et.SubElement(report, "lig_properties") + num_heavy_atoms = et.SubElement(lig_properties, "num_heavy_atoms") + num_hbd = et.SubElement(lig_properties, "num_hbd") num_hbd.text = str(self.ligand.num_hbd) - num_unpaired_hbd = et.SubElement(lig_properties, 'num_unpaired_hbd') + num_unpaired_hbd = et.SubElement(lig_properties, "num_unpaired_hbd") num_unpaired_hbd.text = str(self.complex.num_unpaired_hbd) - num_hba = et.SubElement(lig_properties, 'num_hba') + num_hba = et.SubElement(lig_properties, "num_hba") num_hba.text = str(self.ligand.num_hba) - num_unpaired_hba = et.SubElement(lig_properties, 'num_unpaired_hba') + num_unpaired_hba = et.SubElement(lig_properties, "num_unpaired_hba") num_unpaired_hba.text = str(self.complex.num_unpaired_hba) - num_hal = et.SubElement(lig_properties, 'num_hal') + num_hal = et.SubElement(lig_properties, "num_hal") num_hal.text = str(self.ligand.num_hal) - num_unpaired_hal = et.SubElement(lig_properties, 'num_unpaired_hal') + num_unpaired_hal = et.SubElement(lig_properties, "num_unpaired_hal") num_unpaired_hal.text = str(self.complex.num_unpaired_hal) - num_aromatic_rings = et.SubElement(lig_properties, 'num_aromatic_rings') + num_aromatic_rings = et.SubElement(lig_properties, "num_aromatic_rings") num_aromatic_rings.text = str(self.ligand.num_rings) - num_rot_bonds = et.SubElement(lig_properties, 'num_rotatable_bonds') + num_rot_bonds = et.SubElement(lig_properties, "num_rotatable_bonds") num_rot_bonds.text = str(self.ligand.num_rot_bonds) - molweight = et.SubElement(lig_properties, 'molweight') + molweight = et.SubElement(lig_properties, "molweight") molweight.text = str(self.ligand.molweight) - logp = et.SubElement(lig_properties, 'logp') + logp = et.SubElement(lig_properties, "logp") logp.text = str(self.ligand.logp) - ichains = et.SubElement(report, 'interacting_chains') - bsresidues = et.SubElement(report, 'bs_residues') + ichains = et.SubElement(report, "interacting_chains") + bsresidues = et.SubElement(report, "bs_residues") for i, ichain in enumerate(self.interacting_chains): - c = et.SubElement(ichains, 'interacting_chain', id=str(i + 1)) + c = et.SubElement(ichains, "interacting_chain", id=str(i + 1)) c.text = ichain for i, bsres in enumerate(self.bs_res): - contact = 'True' if bsres in self.bs_res_interacting else 'False' - distance = '%.1f' % self.min_dist[bsres][0] + contact = "True" if bsres in self.bs_res_interacting else "False" + distance = "%.1f" % self.min_dist[bsres][0] aatype = self.min_dist[bsres][1] - c = et.SubElement(bsresidues, 'bs_residue', id=str(i + 1), contact=contact, min_dist=distance, aa=aatype) + c = et.SubElement( + bsresidues, + "bs_residue", + id=str(i + 1), + contact=contact, + min_dist=distance, + aa=aatype, + ) c.text = bsres - hetid.text, chain.text, position.text = self.ligand.hetid, self.ligand.chain, str(self.ligand.position) - composite.text = 'True' if len(self.lig_members) > 1 else 'False' + hetid.text, chain.text, position.text = ( + self.ligand.hetid, + self.ligand.chain, + str(self.ligand.position), + ) + composite.text = "True" if len(self.lig_members) > 1 else "False" longname.text = self.longname ligtype.text = self.ligtype smiles.text = self.ligand.smiles inchikey.text = self.ligand.inchikey - num_heavy_atoms.text = str(self.ligand.heavy_atoms) # Number of heavy atoms in ligand + num_heavy_atoms.text = str( + self.ligand.heavy_atoms + ) # Number of heavy atoms in ligand for i, member in enumerate(self.lig_members): bsid = ":".join(str(element) for element in member) - m = et.SubElement(members, 'member', id=str(i + 1)) + m = et.SubElement(members, "member", id=str(i + 1)) m.text = bsid - interactions = et.SubElement(report, 'interactions') + interactions = et.SubElement(report, "interactions") def format_interactions(element_name, features, interaction_information): """Returns a formatted element with interaction information.""" interaction = et.Element(element_name) # Sort results first by res number, then by distance and finally ligand coordinates to get a unique order - interaction_information = sorted(interaction_information, key=itemgetter(0, 2, -2)) + interaction_information = sorted( + interaction_information, key=itemgetter(0, 2, -2) + ) for j, single_contact in enumerate(interaction_information): - if not element_name == 'metal_complexes': - new_contact = et.SubElement(interaction, element_name[:-1], id=str(j + 1)) + if not element_name == "metal_complexes": + new_contact = et.SubElement( + interaction, element_name[:-1], id=str(j + 1) + ) else: # Metal Complex[es] - new_contact = et.SubElement(interaction, element_name[:-2], id=str(j + 1)) + new_contact = et.SubElement( + interaction, element_name[:-2], id=str(j + 1) + ) for i, feature in enumerate(single_contact): # Just assign the value unless it's an atom list, use subelements in this case - if features[i] == 'LIG_IDX_LIST': + if features[i] == "LIG_IDX_LIST": feat = et.SubElement(new_contact, features[i].lower()) - for k, atm_idx in enumerate(feature.split(',')): - idx = et.SubElement(feat, 'idx', id=str(k + 1)) + for k, atm_idx in enumerate(feature.split(",")): + idx = et.SubElement(feat, "idx", id=str(k + 1)) idx.text = str(atm_idx) - elif features[i].endswith('COO'): + elif features[i].endswith("COO"): feat = et.SubElement(new_contact, features[i].lower()) xc, yc, zc = feature - xcoo = et.SubElement(feat, 'x') - xcoo.text = '%.3f' % xc - ycoo = et.SubElement(feat, 'y') - ycoo.text = '%.3f' % yc - zcoo = et.SubElement(feat, 'z') - zcoo.text = '%.3f' % zc + xcoo = et.SubElement(feat, "x") + xcoo.text = "%.3f" % xc + ycoo = et.SubElement(feat, "y") + ycoo.text = "%.3f" % yc + zcoo = et.SubElement(feat, "z") + zcoo.text = "%.3f" % zc else: feat = et.SubElement(new_contact, features[i].lower()) feat.text = str(feature) return interaction - interactions.append(format_interactions('hydrophobic_interactions', self.hydrophobic_features, - self.hydrophobic_info)) - interactions.append(format_interactions('hydrogen_bonds', self.hbond_features, self.hbond_info)) - interactions.append(format_interactions('water_bridges', self.waterbridge_features, self.waterbridge_info)) - interactions.append(format_interactions('salt_bridges', self.saltbridge_features, self.saltbridge_info)) - interactions.append(format_interactions('pi_stacks', self.pistacking_features, self.pistacking_info)) - interactions.append(format_interactions('pi_cation_interactions', self.pication_features, self.pication_info)) - interactions.append(format_interactions('halogen_bonds', self.halogen_features, self.halogen_info)) - interactions.append(format_interactions('metal_complexes', self.metal_features, self.metal_info)) + interactions.append( + format_interactions( + "hydrophobic_interactions", + self.hydrophobic_features, + self.hydrophobic_info, + ) + ) + interactions.append( + format_interactions("hydrogen_bonds", self.hbond_features, self.hbond_info) + ) + interactions.append( + format_interactions( + "water_bridges", self.waterbridge_features, self.waterbridge_info + ) + ) + interactions.append( + format_interactions( + "salt_bridges", self.saltbridge_features, self.saltbridge_info + ) + ) + interactions.append( + format_interactions( + "pi_stacks", self.pistacking_features, self.pistacking_info + ) + ) + interactions.append( + format_interactions( + "pi_cation_interactions", self.pication_features, self.pication_info + ) + ) + interactions.append( + format_interactions( + "halogen_bonds", self.halogen_features, self.halogen_info + ) + ) + interactions.append( + format_interactions("metal_complexes", self.metal_features, self.metal_info) + ) # Mappings - mappings = et.SubElement(report, 'mappings') - smiles_to_pdb = et.SubElement(mappings, 'smiles_to_pdb') # SMILES numbering to PDB file numbering (atoms) - bsid = ':'.join([self.ligand.hetid, self.ligand.chain, str(self.ligand.position)]) + mappings = et.SubElement(report, "mappings") + smiles_to_pdb = et.SubElement( + mappings, "smiles_to_pdb" + ) # SMILES numbering to PDB file numbering (atoms) + bsid = ":".join( + [self.ligand.hetid, self.ligand.chain, str(self.ligand.position)] + ) if self.ligand.atomorder is not None: - smiles_to_pdb_map = [(key, self.ligand.Mapper.mapid(self.ligand.can_to_pdb[key], - mtype='protein', bsid=bsid)) for key in - self.ligand.can_to_pdb] - smiles_to_pdb.text = ','.join([str(mapping[0]) + ':' + str(mapping[1]) for mapping in smiles_to_pdb_map]) + smiles_to_pdb_map = [ + ( + key, + self.ligand.Mapper.mapid( + self.ligand.can_to_pdb[key], mtype="protein", bsid=bsid + ), + ) + for key in self.ligand.can_to_pdb + ] + smiles_to_pdb.text = ",".join( + [ + str(mapping[0]) + ":" + str(mapping[1]) + for mapping in smiles_to_pdb_map + ] + ) else: - smiles_to_pdb.text = '' + smiles_to_pdb.text = "" return report diff --git a/plip/exchange/webservices.py b/plip/exchange/webservices.py index 0c8cd3e..61cb6f5 100644 --- a/plip/exchange/webservices.py +++ b/plip/exchange/webservices.py @@ -11,44 +11,50 @@ logger = logger.get_logger() def check_pdb_status(pdbid): """Returns the status and up-to-date entry in the PDB for a given PDB ID""" - url = 'http://www.rcsb.org/pdb/rest/idStatus?structureId=%s' % pdbid + url = "http://www.rcsb.org/pdb/rest/idStatus?structureId=%s" % pdbid xmlf = urlopen(url) xml = et.parse(xmlf) xmlf.close() status = None current_pdbid = pdbid - for df in xml.xpath('//record'): - status = df.attrib['status'] # Status of an entry can be either 'UNKWOWN', 'OBSOLETE', or 'CURRENT' - if status == 'OBSOLETE': - current_pdbid = df.attrib['replacedBy'] # Contains the up-to-date PDB ID for obsolete entries + for df in xml.xpath("//record"): + status = df.attrib[ + "status" + ] # Status of an entry can be either 'UNKWOWN', 'OBSOLETE', or 'CURRENT' + if status == "OBSOLETE": + current_pdbid = df.attrib[ + "replacedBy" + ] # Contains the up-to-date PDB ID for obsolete entries return [status, current_pdbid.lower()] def fetch_pdb(pdbid): """Get the newest entry from the RCSB server for the given PDB ID. Exits with '1' if PDB ID is invalid.""" pdbid = pdbid.lower() - logger.info(f'checking status of PDB-ID {pdbid}') + logger.info(f"checking status of PDB-ID {pdbid}") state, current_entry = check_pdb_status(pdbid) # Get state and current PDB ID - if state == 'OBSOLETE': - logger.info(f'entry is obsolete, getting {current_entry} instead') - elif state == 'CURRENT': - logger.info('entry is up-to-date') - elif state == 'UNKNOWN': - logger.error('invalid PDB-ID (entry does not exist on PDB server)') + if state == "OBSOLETE": + logger.info(f"entry is obsolete, getting {current_entry} instead") + elif state == "CURRENT": + logger.info("entry is up-to-date") + elif state == "UNKNOWN": + logger.error("invalid PDB-ID (entry does not exist on PDB server)") sys.exit(1) - logger.info('downloading file from PDB') + logger.info("downloading file from PDB") # get URL for current entry # @todo needs update to react properly on response codes of RCSB servers - pdburl = f'http://www.rcsb.org/pdb/files/{current_entry}.pdb' + pdburl = f"http://www.rcsb.org/pdb/files/{current_entry}.pdb" try: pdbfile = urlopen(pdburl).read().decode() # If no PDB file is available, a text is now shown with "We're sorry, but ..." # Could previously be distinguished by an HTTP error - if 'sorry' in pdbfile: - logger.error('no file in PDB format available from wwPDB for the given PDB ID.') + if "sorry" in pdbfile: + logger.error( + "no file in PDB format available from wwPDB for the given PDB ID." + ) sys.exit(1) except HTTPError: - logger.error('no file in PDB format available from wwPDB for the given PDB ID') + logger.error("no file in PDB format available from wwPDB for the given PDB ID") sys.exit(1) return [pdbfile, current_entry] diff --git a/plip/exchange/xml.py b/plip/exchange/xml.py index a5eee21..92399c2 100644 --- a/plip/exchange/xml.py +++ b/plip/exchange/xml.py @@ -8,16 +8,16 @@ class XMLStorage: def getdata(self, tree, location, force_string=False): """Gets XML data from a specific element and handles types.""" - found = tree.xpath('%s/text()' % location) + found = tree.xpath("%s/text()" % location) if not found: return None else: data = found[0] if force_string: return data - if data == 'True': + if data == "True": return True - elif data == 'False': + elif data == "False": return False else: try: @@ -31,22 +31,26 @@ class XMLStorage: def getcoordinates(self, tree, location): """Gets coordinates from a specific element in PLIP XML""" - return tuple(float(x) for x in tree.xpath('.//%s/*/text()' % location)) + return tuple(float(x) for x in tree.xpath(".//%s/*/text()" % location)) class Interaction(XMLStorage): """Stores information on a specific interaction type""" def __init__(self, interaction_part): - self.id = interaction_part.get('id') - self.resnr = self.getdata(interaction_part, 'resnr') - self.restype = self.getdata(interaction_part, 'restype', force_string=True) - self.reschain = self.getdata(interaction_part, 'reschain', force_string=True) - self.resnr_lig = self.getdata(interaction_part, 'resnr_lig') - self.restype_lig = self.getdata(interaction_part, 'restype_lig', force_string=True) - self.reschain_lig = self.getdata(interaction_part, 'reschain_lig', force_string=True) - self.ligcoo = self.getcoordinates(interaction_part, 'ligcoo') - self.protcoo = self.getcoordinates(interaction_part, 'protcoo') + self.id = interaction_part.get("id") + self.resnr = self.getdata(interaction_part, "resnr") + self.restype = self.getdata(interaction_part, "restype", force_string=True) + self.reschain = self.getdata(interaction_part, "reschain", force_string=True) + self.resnr_lig = self.getdata(interaction_part, "resnr_lig") + self.restype_lig = self.getdata( + interaction_part, "restype_lig", force_string=True + ) + self.reschain_lig = self.getdata( + interaction_part, "reschain_lig", force_string=True + ) + self.ligcoo = self.getcoordinates(interaction_part, "ligcoo") + self.protcoo = self.getcoordinates(interaction_part, "protcoo") class HydrophobicInteraction(Interaction): @@ -54,9 +58,9 @@ class HydrophobicInteraction(Interaction): def __init__(self, hydrophobic_part): Interaction.__init__(self, hydrophobic_part) - self.dist = self.getdata(hydrophobic_part, 'dist') - self.ligcarbonidx = self.getdata(hydrophobic_part, 'ligcarbonidx') - self.protcarbonidx = self.getdata(hydrophobic_part, 'protcarbonidx') + self.dist = self.getdata(hydrophobic_part, "dist") + self.ligcarbonidx = self.getdata(hydrophobic_part, "ligcarbonidx") + self.protcarbonidx = self.getdata(hydrophobic_part, "protcarbonidx") class HydrogenBond(Interaction): @@ -64,17 +68,17 @@ class HydrogenBond(Interaction): def __init__(self, hbond_part): Interaction.__init__(self, hbond_part) - self.sidechain = self.getdata(hbond_part, 'sidechain') - self.dist_h_a = self.getdata(hbond_part, 'dist_h-a') - self.dist_d_a = self.getdata(hbond_part, 'dist_d-a') + self.sidechain = self.getdata(hbond_part, "sidechain") + self.dist_h_a = self.getdata(hbond_part, "dist_h-a") + self.dist_d_a = self.getdata(hbond_part, "dist_d-a") self.dist = self.dist_d_a - self.don_angle = self.getdata(hbond_part, 'don_angle') - self.protisdon = self.getdata(hbond_part, 'protisdon') - self.donoridx = self.getdata(hbond_part, 'donoridx') - self.acceptoridx = self.getdata(hbond_part, 'acceptoridx') - self.donortype = self.getdata(hbond_part, 'donortype', force_string=True) - self.acceptortype = self.getdata(hbond_part, 'acceptortype', force_string=True) + self.don_angle = self.getdata(hbond_part, "don_angle") + self.protisdon = self.getdata(hbond_part, "protisdon") + self.donoridx = self.getdata(hbond_part, "donoridx") + self.acceptoridx = self.getdata(hbond_part, "acceptoridx") + self.donortype = self.getdata(hbond_part, "donortype", force_string=True) + self.acceptortype = self.getdata(hbond_part, "acceptortype", force_string=True) class WaterBridge(Interaction): @@ -82,19 +86,21 @@ class WaterBridge(Interaction): def __init__(self, wbridge_part): Interaction.__init__(self, wbridge_part) - self.dist_a_w = self.getdata(wbridge_part, 'dist_a-w') - self.dist_d_w = self.getdata(wbridge_part, 'dist_d-w') - self.don_angle = self.getdata(wbridge_part, 'don_angle') - self.water_angle = self.getdata(wbridge_part, 'water_angle') - self.protisdon = self.getdata(wbridge_part, 'protisdon') + self.dist_a_w = self.getdata(wbridge_part, "dist_a-w") + self.dist_d_w = self.getdata(wbridge_part, "dist_d-w") + self.don_angle = self.getdata(wbridge_part, "don_angle") + self.water_angle = self.getdata(wbridge_part, "water_angle") + self.protisdon = self.getdata(wbridge_part, "protisdon") self.dist = self.dist_a_w if self.protisdon else self.dist_d_w - self.donor_idx = self.getdata(wbridge_part, 'donor_idx') - self.acceptor_idx = self.getdata(wbridge_part, 'acceptor_idx') - self.donortype = self.getdata(wbridge_part, 'donortype', force_string=True) - self.acceptortype = self.getdata(wbridge_part, 'acceptortype', force_string=True) - self.water_idx = self.getdata(wbridge_part, 'water_idx') - self.watercoo = self.getcoordinates(wbridge_part, 'watercoo') + self.donor_idx = self.getdata(wbridge_part, "donor_idx") + self.acceptor_idx = self.getdata(wbridge_part, "acceptor_idx") + self.donortype = self.getdata(wbridge_part, "donortype", force_string=True) + self.acceptortype = self.getdata( + wbridge_part, "acceptortype", force_string=True + ) + self.water_idx = self.getdata(wbridge_part, "water_idx") + self.watercoo = self.getcoordinates(wbridge_part, "watercoo") class SaltBridge(Interaction): @@ -102,11 +108,12 @@ class SaltBridge(Interaction): def __init__(self, sbridge_part): Interaction.__init__(self, sbridge_part) - self.dist = self.getdata(sbridge_part, 'dist') - self.protispos = self.getdata(sbridge_part, 'protispos') - self.lig_group = self.getdata(sbridge_part, 'lig_group', force_string=True) - self.lig_idx_list = [int(tagpart.text) for tagpart in - sbridge_part.xpath('lig_idx_list/idx')] + self.dist = self.getdata(sbridge_part, "dist") + self.protispos = self.getdata(sbridge_part, "protispos") + self.lig_group = self.getdata(sbridge_part, "lig_group", force_string=True) + self.lig_idx_list = [ + int(tagpart.text) for tagpart in sbridge_part.xpath("lig_idx_list/idx") + ] class PiStacking(Interaction): @@ -114,13 +121,14 @@ class PiStacking(Interaction): def __init__(self, pistack_part): Interaction.__init__(self, pistack_part) - self.centdist = self.getdata(pistack_part, 'centdist') + self.centdist = self.getdata(pistack_part, "centdist") self.dist = self.centdist - self.angle = self.getdata(pistack_part, 'angle') - self.offset = self.getdata(pistack_part, 'offset') - self.type = self.getdata(pistack_part, 'type') - self.lig_idx_list = [int(tagpart.text) for tagpart in - pistack_part.xpath('lig_idx_list/idx')] + self.angle = self.getdata(pistack_part, "angle") + self.offset = self.getdata(pistack_part, "offset") + self.type = self.getdata(pistack_part, "type") + self.lig_idx_list = [ + int(tagpart.text) for tagpart in pistack_part.xpath("lig_idx_list/idx") + ] class PiCation(Interaction): @@ -128,11 +136,13 @@ class PiCation(Interaction): def __init__(self, pication_part): Interaction.__init__(self, pication_part) - self.dist = self.getdata(pication_part, 'dist') - self.offset = self.getdata(pication_part, 'offset') - self.protcharged = self.getdata(pication_part, 'protcharged') - self.lig_group = self.getdata(pication_part, 'lig_group') - self.lig_idx_list = [int(tag.text) for tag in pication_part.xpath('.//lig_idx_list/idx')] + self.dist = self.getdata(pication_part, "dist") + self.offset = self.getdata(pication_part, "offset") + self.protcharged = self.getdata(pication_part, "protcharged") + self.lig_group = self.getdata(pication_part, "lig_group") + self.lig_idx_list = [ + int(tag.text) for tag in pication_part.xpath(".//lig_idx_list/idx") + ] class HalogenBond(Interaction): @@ -140,14 +150,16 @@ class HalogenBond(Interaction): def __init__(self, halogen_part): Interaction.__init__(self, halogen_part) - self.dist = self.getdata(halogen_part, 'dist') - self.don_angle = self.getdata(halogen_part, 'don_angle') - self.acc_angle = self.getdata(halogen_part, 'acc_angle') - self.donortype = self.getdata(halogen_part, 'donortype', force_string=True) - self.acceptortype = self.getdata(halogen_part, 'acceptortype', force_string=True) - self.don_idx = self.getdata(halogen_part, 'don_idx') - self.acc_idx = self.getdata(halogen_part, 'acc_idx') - self.sidechain = self.getdata(halogen_part, 'sidechain') + self.dist = self.getdata(halogen_part, "dist") + self.don_angle = self.getdata(halogen_part, "don_angle") + self.acc_angle = self.getdata(halogen_part, "acc_angle") + self.donortype = self.getdata(halogen_part, "donortype", force_string=True) + self.acceptortype = self.getdata( + halogen_part, "acceptortype", force_string=True + ) + self.don_idx = self.getdata(halogen_part, "don_idx") + self.acc_idx = self.getdata(halogen_part, "acc_idx") + self.sidechain = self.getdata(halogen_part, "sidechain") class MetalComplex(Interaction): @@ -155,18 +167,22 @@ class MetalComplex(Interaction): def __init__(self, metalcomplex_part): Interaction.__init__(self, metalcomplex_part) - self.metal_idx = self.getdata(metalcomplex_part, 'metal_idx') - self.metal_type = self.getdata(metalcomplex_part, 'metal_type', force_string=True) - self.target_idx = self.getdata(metalcomplex_part, 'target_idx') - self.target_type = self.getdata(metalcomplex_part, 'target_type', force_string=True) - self.coordination = self.getdata(metalcomplex_part, 'coordination') - self.dist = self.getdata(metalcomplex_part, 'dist') - self.location = self.getdata(metalcomplex_part, 'location', force_string=True) - self.rms = self.getdata(metalcomplex_part, 'rms') - self.geometry = self.getdata(metalcomplex_part, 'geometry', force_string=True) - self.complexnum = self.getdata(metalcomplex_part, 'complexnum') - self.targetcoo = self.getcoordinates(metalcomplex_part, 'targetcoo') - self.metalcoo = self.getcoordinates(metalcomplex_part, 'metalcoo') + self.metal_idx = self.getdata(metalcomplex_part, "metal_idx") + self.metal_type = self.getdata( + metalcomplex_part, "metal_type", force_string=True + ) + self.target_idx = self.getdata(metalcomplex_part, "target_idx") + self.target_type = self.getdata( + metalcomplex_part, "target_type", force_string=True + ) + self.coordination = self.getdata(metalcomplex_part, "coordination") + self.dist = self.getdata(metalcomplex_part, "dist") + self.location = self.getdata(metalcomplex_part, "location", force_string=True) + self.rms = self.getdata(metalcomplex_part, "rms") + self.geometry = self.getdata(metalcomplex_part, "geometry", force_string=True) + self.complexnum = self.getdata(metalcomplex_part, "complexnum") + self.targetcoo = self.getcoordinates(metalcomplex_part, "targetcoo") + self.metalcoo = self.getcoordinates(metalcomplex_part, "metalcoo") class BSite(XMLStorage): @@ -175,63 +191,107 @@ class BSite(XMLStorage): def __init__(self, bindingsite, pdbid): self.bindingsite = bindingsite self.pdbid = pdbid - self.bsid = ":".join(bindingsite.xpath('identifiers/*/text()')[2:5]) + self.bsid = ":".join(bindingsite.xpath("identifiers/*/text()")[2:5]) self.uniqueid = ":".join([self.pdbid, self.bsid]) - self.hetid = self.getdata(bindingsite, 'identifiers/hetid', force_string=True) - self.longname = self.getdata(bindingsite, 'identifiers/longname', force_string=True) - self.ligtype = self.getdata(bindingsite, 'identifiers/ligtype', force_string=True) - self.smiles = self.getdata(bindingsite, 'identifiers/smiles', force_string=True) - self.inchikey = self.getdata(bindingsite, 'identifiers/inchikey', force_string=True) - self.position = self.getdata(bindingsite, 'identifiers/position') - self.chain = self.getdata(bindingsite, 'identifiers/chain', force_string=True) + self.hetid = self.getdata(bindingsite, "identifiers/hetid", force_string=True) + self.longname = self.getdata( + bindingsite, "identifiers/longname", force_string=True + ) + self.ligtype = self.getdata( + bindingsite, "identifiers/ligtype", force_string=True + ) + self.smiles = self.getdata(bindingsite, "identifiers/smiles", force_string=True) + self.inchikey = self.getdata( + bindingsite, "identifiers/inchikey", force_string=True + ) + self.position = self.getdata(bindingsite, "identifiers/position") + self.chain = self.getdata(bindingsite, "identifiers/chain", force_string=True) # Information on binding site members self.members = [] - for member in bindingsite.xpath('identifiers/members/member'): - self.members += member.xpath('text()') + for member in bindingsite.xpath("identifiers/members/member"): + self.members += member.xpath("text()") - self.composite = self.getdata(bindingsite, 'identifiers/composite') + self.composite = self.getdata(bindingsite, "identifiers/composite") # Ligand Properties - self.heavy_atoms = self.getdata(bindingsite, 'lig_properties/num_heavy_atoms') - self.hbd = self.getdata(bindingsite, 'lig_properties/num_hbd') - self.unpaired_hbd = self.getdata(bindingsite, 'lig_properties/num_unpaired_hbd') - self.hba = self.getdata(bindingsite, 'lig_properties/num_hba') - self.unpaired_hba = self.getdata(bindingsite, 'lig_properties/num_unpaired_hba') - self.hal = self.getdata(bindingsite, 'lig_properties/num_hal') - self.unpaired_hal = self.getdata(bindingsite, 'lig_properties/num_unpaired_hal') - self.molweight = self.getdata(bindingsite, 'lig_properties/molweight') - self.logp = self.getdata(bindingsite, 'lig_properties/logp') - self.rotatable_bonds = self.getdata(bindingsite, 'lig_properties/num_rotatable_bonds') - self.rings = self.getdata(bindingsite, 'lig_properties/num_aromatic_rings') + self.heavy_atoms = self.getdata(bindingsite, "lig_properties/num_heavy_atoms") + self.hbd = self.getdata(bindingsite, "lig_properties/num_hbd") + self.unpaired_hbd = self.getdata(bindingsite, "lig_properties/num_unpaired_hbd") + self.hba = self.getdata(bindingsite, "lig_properties/num_hba") + self.unpaired_hba = self.getdata(bindingsite, "lig_properties/num_unpaired_hba") + self.hal = self.getdata(bindingsite, "lig_properties/num_hal") + self.unpaired_hal = self.getdata(bindingsite, "lig_properties/num_unpaired_hal") + self.molweight = self.getdata(bindingsite, "lig_properties/molweight") + self.logp = self.getdata(bindingsite, "lig_properties/logp") + self.rotatable_bonds = self.getdata( + bindingsite, "lig_properties/num_rotatable_bonds" + ) + self.rings = self.getdata(bindingsite, "lig_properties/num_aromatic_rings") # Binding Site residues self.bs_res = [] - for tagpart in bindingsite.xpath('bs_residues/bs_residue'): + for tagpart in bindingsite.xpath("bs_residues/bs_residue"): resnumber, reschain = tagpart.text[:-1], tagpart.text[-1] - aa, contact, min_dist = tagpart.get('aa'), tagpart.get('contact'), tagpart.get('min_dist') - new_bs_res = {'resnr': int(resnumber), 'reschain': reschain, 'aa': aa, - 'contact': True if contact == 'True' else False, 'min_dist': float(min_dist)} + aa, contact, min_dist = ( + tagpart.get("aa"), + tagpart.get("contact"), + tagpart.get("min_dist"), + ) + new_bs_res = { + "resnr": int(resnumber), + "reschain": reschain, + "aa": aa, + "contact": True if contact == "True" else False, + "min_dist": float(min_dist), + } self.bs_res.append(new_bs_res) # Interacting chains self.interacting_chains = [] - for chain in bindingsite.xpath('interacting_chains/interacting_chain'): - self.interacting_chains += chain.xpath('text()') + for chain in bindingsite.xpath("interacting_chains/interacting_chain"): + self.interacting_chains += chain.xpath("text()") # Interactions - interactions = bindingsite.xpath('interactions')[0] - self.hydrophobics = [HydrophobicInteraction(x) for x in - interactions.xpath('hydrophobic_interactions/hydrophobic_interaction')] - self.hbonds = [HydrogenBond(x) for x in interactions.xpath('hydrogen_bonds/hydrogen_bond')] - self.wbridges = [WaterBridge(x) for x in interactions.xpath('water_bridges/water_bridge')] - self.sbridges = [SaltBridge(x) for x in interactions.xpath('salt_bridges/salt_bridge')] - self.pi_stacks = [PiStacking(x) for x in interactions.xpath('pi_stacks/pi_stack')] - self.pi_cations = [PiCation(x) for x in interactions.xpath('pi_cation_interactions/pi_cation_interaction')] - self.halogens = [HalogenBond(x) for x in interactions.xpath('halogen_bonds/halogen_bond')] - self.metal_complexes = [MetalComplex(x) for x in interactions.xpath('metal_complexes/metal_complex')] - self.num_contacts = len(self.hydrophobics) + len(self.hbonds) + len(self.wbridges) + len(self.sbridges) + \ - len(self.pi_stacks) + len(self.pi_cations) + len(self.halogens) + len(self.metal_complexes) + interactions = bindingsite.xpath("interactions")[0] + self.hydrophobics = [ + HydrophobicInteraction(x) + for x in interactions.xpath( + "hydrophobic_interactions/hydrophobic_interaction" + ) + ] + self.hbonds = [ + HydrogenBond(x) for x in interactions.xpath("hydrogen_bonds/hydrogen_bond") + ] + self.wbridges = [ + WaterBridge(x) for x in interactions.xpath("water_bridges/water_bridge") + ] + self.sbridges = [ + SaltBridge(x) for x in interactions.xpath("salt_bridges/salt_bridge") + ] + self.pi_stacks = [ + PiStacking(x) for x in interactions.xpath("pi_stacks/pi_stack") + ] + self.pi_cations = [ + PiCation(x) + for x in interactions.xpath("pi_cation_interactions/pi_cation_interaction") + ] + self.halogens = [ + HalogenBond(x) for x in interactions.xpath("halogen_bonds/halogen_bond") + ] + self.metal_complexes = [ + MetalComplex(x) for x in interactions.xpath("metal_complexes/metal_complex") + ] + self.num_contacts = ( + len(self.hydrophobics) + + len(self.hbonds) + + len(self.wbridges) + + len(self.sbridges) + + len(self.pi_stacks) + + len(self.pi_cations) + + len(self.halogens) + + len(self.metal_complexes) + ) self.has_interactions = self.num_contacts > 0 self.get_atom_mapping() @@ -240,25 +300,45 @@ class BSite(XMLStorage): def get_atom_mapping(self): """Parses the ligand atom mapping.""" # Atom mappings - smiles_to_pdb_mapping = self.bindingsite.xpath('mappings/smiles_to_pdb/text()') + smiles_to_pdb_mapping = self.bindingsite.xpath("mappings/smiles_to_pdb/text()") if not smiles_to_pdb_mapping: - self.mappings = {'smiles_to_pdb': None, 'pdb_to_smiles': None} + self.mappings = {"smiles_to_pdb": None, "pdb_to_smiles": None} else: - smiles_to_pdb_mapping = {int(y[0]): int(y[1]) for y in [x.split(':') - for x in smiles_to_pdb_mapping[0].split(',')]} - self.mappings = {'smiles_to_pdb': smiles_to_pdb_mapping} - self.mappings['pdb_to_smiles'] = {v: k for k, v in self.mappings['smiles_to_pdb'].items()} + smiles_to_pdb_mapping = { + int(y[0]): int(y[1]) + for y in [x.split(":") for x in smiles_to_pdb_mapping[0].split(",")] + } + self.mappings = {"smiles_to_pdb": smiles_to_pdb_mapping} + self.mappings["pdb_to_smiles"] = { + v: k for k, v in self.mappings["smiles_to_pdb"].items() + } def get_counts(self): """counts the interaction types and backbone hydrogen bonding in a binding site""" hbondsback = len([hb for hb in self.hbonds if not hb.sidechain]) - counts = {'hydrophobics': len(self.hydrophobics), 'hbonds': len(self.hbonds), - 'wbridges': len(self.wbridges), 'sbridges': len(self.sbridges), 'pistacks': len(self.pi_stacks), - 'pications': len(self.pi_cations), 'halogens': len(self.halogens), 'metal': len(self.metal_complexes), - 'hbond_back': hbondsback, 'hbond_nonback': (len(self.hbonds) - hbondsback)} - counts['total'] = counts['hydrophobics'] + counts['hbonds'] + counts['wbridges'] + \ - counts['sbridges'] + counts['pistacks'] + counts['pications'] + counts['halogens'] + counts['metal'] + counts = { + "hydrophobics": len(self.hydrophobics), + "hbonds": len(self.hbonds), + "wbridges": len(self.wbridges), + "sbridges": len(self.sbridges), + "pistacks": len(self.pi_stacks), + "pications": len(self.pi_cations), + "halogens": len(self.halogens), + "metal": len(self.metal_complexes), + "hbond_back": hbondsback, + "hbond_nonback": (len(self.hbonds) - hbondsback), + } + counts["total"] = ( + counts["hydrophobics"] + + counts["hbonds"] + + counts["wbridges"] + + counts["sbridges"] + + counts["pistacks"] + + counts["pications"] + + counts["halogens"] + + counts["metal"] + ) return counts @@ -269,17 +349,22 @@ class PlipXML(XMLStorage): self.load_data(xmlfile) # Parse general information - self.version = self.getdata(self.doc, '/report/plipversion/') - self.pdbid = self.getdata(self.doc, '/report/pdbid', force_string=True) - self.filetype = self.getdata(self.doc, '/report/filetype') - self.fixed = self.getdata(self.doc, '/report/pdbfixes/') - self.filename = self.getdata(self.doc, '/report/filename') - self.excluded = self.doc.xpath('/report/excluded_ligands/excluded_ligand/text()') + self.version = self.getdata(self.doc, "/report/plipversion/") + self.pdbid = self.getdata(self.doc, "/report/pdbid", force_string=True) + self.filetype = self.getdata(self.doc, "/report/filetype") + self.fixed = self.getdata(self.doc, "/report/pdbfixes/") + self.filename = self.getdata(self.doc, "/report/filename") + self.excluded = self.doc.xpath( + "/report/excluded_ligands/excluded_ligand/text()" + ) # Parse binding site information - self.bsites = {BSite(bs, self.pdbid).bsid: BSite(bs, self.pdbid) for bs in self.doc.xpath('//bindingsite')} + self.bsites = { + BSite(bs, self.pdbid).bsid: BSite(bs, self.pdbid) + for bs in self.doc.xpath("//bindingsite") + } self.num_bsites = len(self.bsites) def load_data(self, xmlfile): """Loads/parses an XML file and saves it as a tree if successful.""" - self.doc = etree.parse(xmlfile)
\ No newline at end of file + self.doc = etree.parse(xmlfile) diff --git a/plip/plipcmd.py b/plip/plipcmd.py index e4c0815..b590d09 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -25,12 +25,14 @@ from plip.exchange.webservices import fetch_pdb from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid from plip.structure.preparation import tilde_expansion, PDBComplex -description = f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" \ - "is a command-line based tool to analyze interactions in a protein-ligand complex. " \ - "If you are using PLIP in your work, please cite: " \ - "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " \ - "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" \ - f"Supported and maintained by: {config.__maintainer__}" +description = ( + f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" + "is a command-line based tool to analyze interactions in a protein-ligand complex. " + "If you are using PLIP in your work, please cite: " + "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " + "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" + f"Supported and maintained by: {config.__maintainer__}" +) def threshold_limiter(aparser, arg): @@ -40,13 +42,13 @@ def threshold_limiter(aparser, arg): return arg -def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): +def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"): """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" if not as_string: - pdb_file_name = pdbfile.split('/')[-1] - startmessage = f'starting analysis of {pdb_file_name}' + pdb_file_name = pdbfile.split("/")[-1] + startmessage = f"starting analysis of {pdb_file_name}" else: - startmessage = 'starting analysis from STDIN' + startmessage = "starting analysis from STDIN" logger.info(startmessage) mol = PDBComplex() mol.output_path = outpath @@ -68,10 +70,16 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): if config.PYMOL or config.PICS: from plip.visualization.visualize import visualize_in_pymol - complexes = [VisualizerData(mol, site) for site in sorted(mol.interaction_sets) - if not len(mol.interaction_sets[site].interacting_res) == 0] + + complexes = [ + VisualizerData(mol, site) + for site in sorted(mol.interaction_sets) + if not len(mol.interaction_sets[site].interacting_res) == 0 + ] if config.MAXTHREADS > 1: - logger.info(f'generating visualizations in parallel on {config.MAXTHREADS} cores') + logger.info( + f"generating visualizations in parallel on {config.MAXTHREADS} cores" + ) parfn = parallel_fn(visualize_in_pymol) parfn(complexes, processes=config.MAXTHREADS) else: @@ -89,19 +97,22 @@ def download_structure(inputpdbid): Checks for validity of ID and handles error while downloading. Returns the path of the downloaded file.""" try: - if len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == 'UnknownProtein': - logger.error(f'invalid PDB-ID (wrong format): {inputpdbid}') + if ( + len(inputpdbid) != 4 + or extract_pdbid(inputpdbid.lower()) == "UnknownProtein" + ): + logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}") sys.exit(1) pdbfile, pdbid = fetch_pdb(inputpdbid.lower()) - pdbpath = tilde_expansion('%s/%s.pdb' % (config.BASEPATH.rstrip('/'), pdbid)) + pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid)) create_folder_if_not_exists(config.BASEPATH) - with open(pdbpath, 'w') as g: + with open(pdbpath, "w") as g: g.write(pdbfile) - logger.info(f'file downloaded as {pdbpath}') + logger.info(f"file downloaded as {pdbpath}") return pdbpath, pdbid except ValueError: # Invalid PDB ID, cannot fetch from RCBS server - logger.error(f'PDB-ID does not exist: {inputpdbid}') + logger.error(f"PDB-ID does not exist: {inputpdbid}") sys.exit(1) @@ -111,9 +122,9 @@ def remove_duplicates(slist): unique = list(set(slist)) difference = len(slist) - len(unique) if difference == 1: - logger.info('removed one duplicate entry from input list') + logger.info("removed one duplicate entry from input list") if difference > 1: - logger.info(f'Removed {difference} duplicate entries from input list') + logger.info(f"Removed {difference} duplicate entries from input list") return unique @@ -122,9 +133,9 @@ def run_analysis(inputstructs, inputpdbids): pdbid, pdbpath = None, None # @todo For multiprocessing, implement better stacktracing for errors # Print title and version - logger.info(f'Protein-Ligand Interaction Profiler (PLIP) {__version__}') - logger.info(f'brought to you by: {config.__maintainer__}') - logger.info(f'please cite: https://www.doi.org/10.1093/nar/gkv315') + logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}") + logger.info(f"brought to you by: {config.__maintainer__}") + logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315") output_prefix = config.OUTPUTFILENAME if inputstructs is not None: # Process PDB file(s) @@ -132,114 +143,258 @@ def run_analysis(inputstructs, inputpdbids): inputstructs = remove_duplicates(inputstructs) read_from_stdin = False for inputstruct in inputstructs: - if inputstruct == '-': + if inputstruct == "-": inputstruct = sys.stdin.read() read_from_stdin = True if config.RAWSTRING: if sys.version_info < (3,): - inputstruct = bytes(inputstruct).decode('unicode_escape') + inputstruct = bytes(inputstruct).decode("unicode_escape") else: - inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape') + inputstruct = bytes(inputstruct, "utf8").decode( + "unicode_escape" + ) else: if os.path.getsize(inputstruct) == 0: - logger.error('empty PDB file') + logger.error("empty PDB file") sys.exit(1) if num_structures > 1: - basename = inputstruct.split('.')[-2].split('/')[-1] - config.OUTPATH = '/'.join([config.BASEPATH, basename]) - output_prefix = 'report' - process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix) + basename = inputstruct.split(".")[-2].split("/")[-1] + config.OUTPATH = "/".join([config.BASEPATH, basename]) + output_prefix = "report" + process_pdb( + inputstruct, + config.OUTPATH, + as_string=read_from_stdin, + outputprefix=output_prefix, + ) else: # Try to fetch the current PDB structure(s) directly from the RCBS server num_pdbids = len(inputpdbids) inputpdbids = remove_duplicates(inputpdbids) for inputpdbid in inputpdbids: pdbpath, pdbid = download_structure(inputpdbid) if num_pdbids > 1: - config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]) - output_prefix = 'report' + config.OUTPATH = "/".join( + [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()] + ) + output_prefix = "report" process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: - if config.BASEPATH in ['.', './']: - logger.info('finished analysis, find the result files in the working directory') + if config.BASEPATH in [".", "./"]: + logger.info( + "finished analysis, find the result files in the working directory" + ) else: - logger.info(f'finished analysis, find the result files in {config.BASEPATH}') + logger.info( + f"finished analysis, find the result files in {config.BASEPATH}" + ) -if __name__ == '__main__': +if __name__ == "__main__": """Parse command line arguments and start main script for analysis.""" parser = ArgumentParser(prog="PLIP", description=description) - pdbstructure = parser.add_mutually_exclusive_group(required=True) # Needs either PDB ID or file + pdbstructure = parser.add_mutually_exclusive_group( + required=True + ) # Needs either PDB ID or file # '-' as file name reads from stdin - pdbstructure.add_argument("-f", "--file", dest="input", nargs="+", help="Set input file, '-' reads from stdin") + pdbstructure.add_argument( + "-f", + "--file", + dest="input", + nargs="+", + help="Set input file, '-' reads from stdin", + ) pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+") - outputgroup = parser.add_mutually_exclusive_group(required=False) # Needs either outpath or stdout + outputgroup = parser.add_mutually_exclusive_group( + required=False + ) # Needs either outpath or stdout outputgroup.add_argument("-o", "--out", dest="outpath", default="./") - outputgroup.add_argument("-O", "--stdout", dest="stdout", action="store_true", default=False, - help="Write to stdout instead of file") - parser.add_argument("--rawstring", dest="use_raw_string", default=False, action="store_true", - help="Use Python raw strings for stdout and stdin") - parser.add_argument("-v", "--verbose", dest="verbose", default=False, help="Turn on verbose mode", - action="store_true") - parser.add_argument("-q", "--quiet", dest="quiet", default=False, help="Turn on quiet mode", action="store_true") - parser.add_argument("-s", "--silent", dest="silent", default=False, help="Turn on silent mode", action="store_true") - parser.add_argument("-p", "--pics", dest="pics", default=False, help="Additional pictures", action="store_true") - parser.add_argument("-x", "--xml", dest="xml", default=False, help="Generate report file in XML format", - action="store_true") - parser.add_argument("-t", "--txt", dest="txt", default=False, help="Generate report file in TXT (RST) format", - action="store_true") - parser.add_argument("-y", "--pymol", dest="pymol", default=False, help="Additional PyMOL session files", - action="store_true") - parser.add_argument("--maxthreads", dest="maxthreads", default=multiprocessing.cpu_count(), - help="Set maximum number of main threads (number of binding sites processed simultaneously)." - "If not set, PLIP uses all available CPUs if possible.", - type=int) - parser.add_argument("--breakcomposite", dest="breakcomposite", default=False, - help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", - action="store_true") - parser.add_argument("--altlocation", dest="altlocation", default=False, - help="Also consider alternate locations for atoms (e.g. alternate conformations).", - action="store_true") - parser.add_argument("--nofix", dest="nofix", default=False, - help="Turns off fixing of PDB files.", - action="store_true") - parser.add_argument("--nofixfile", dest="nofixfile", default=False, - help="Turns off writing files for fixed PDB files.", - action="store_true") - parser.add_argument("--nopdbcanmap", dest="nopdbcanmap", default=False, - help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", - action="store_true") - parser.add_argument("--dnareceptor", dest="dnareceptor", default=False, - help="Uses the DNA instead of the protein as a receptor for interactions.", - action="store_true") - parser.add_argument("--name", dest="outputfilename", default="report", - help="Set a filename for the report TXT and XML files. Will only work when processing single structures.") - ligandtype = parser.add_mutually_exclusive_group() # Either peptide/inter or intra mode - ligandtype.add_argument("--peptides", "--inter", dest="peptides", default=[], - help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", - nargs="+") - ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.") - parser.add_argument("--keepmod", dest="keepmod", default=False, - help="Keep modified residues as ligands", - action="store_true") - parser.add_argument("--nohydro", dest="nohydro", default=False, - help="Do not add polar hydrogens in case your structure already contains hydrogens.", - action="store_true") + outputgroup.add_argument( + "-O", + "--stdout", + dest="stdout", + action="store_true", + default=False, + help="Write to stdout instead of file", + ) + parser.add_argument( + "--rawstring", + dest="use_raw_string", + default=False, + action="store_true", + help="Use Python raw strings for stdout and stdin", + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + default=False, + help="Turn on verbose mode", + action="store_true", + ) + parser.add_argument( + "-q", + "--quiet", + dest="quiet", + default=False, + help="Turn on quiet mode", + action="store_true", + ) + parser.add_argument( + "-s", + "--silent", + dest="silent", + default=False, + help="Turn on silent mode", + action="store_true", + ) + parser.add_argument( + "-p", + "--pics", + dest="pics", + default=False, + help="Additional pictures", + action="store_true", + ) + parser.add_argument( + "-x", + "--xml", + dest="xml", + default=False, + help="Generate report file in XML format", + action="store_true", + ) + parser.add_argument( + "-t", + "--txt", + dest="txt", + default=False, + help="Generate report file in TXT (RST) format", + action="store_true", + ) + parser.add_argument( + "-y", + "--pymol", + dest="pymol", + default=False, + help="Additional PyMOL session files", + action="store_true", + ) + parser.add_argument( + "--maxthreads", + dest="maxthreads", + default=multiprocessing.cpu_count(), + help="Set maximum number of main threads (number of binding sites processed simultaneously)." + "If not set, PLIP uses all available CPUs if possible.", + type=int, + ) + parser.add_argument( + "--breakcomposite", + dest="breakcomposite", + default=False, + help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", + action="store_true", + ) + parser.add_argument( + "--altlocation", + dest="altlocation", + default=False, + help="Also consider alternate locations for atoms (e.g. alternate conformations).", + action="store_true", + ) + parser.add_argument( + "--nofix", + dest="nofix", + default=False, + help="Turns off fixing of PDB files.", + action="store_true", + ) + parser.add_argument( + "--nofixfile", + dest="nofixfile", + default=False, + help="Turns off writing files for fixed PDB files.", + action="store_true", + ) + parser.add_argument( + "--nopdbcanmap", + dest="nopdbcanmap", + default=False, + help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", + action="store_true", + ) + parser.add_argument( + "--dnareceptor", + dest="dnareceptor", + default=False, + help="Uses the DNA instead of the protein as a receptor for interactions.", + action="store_true", + ) + parser.add_argument( + "--name", + dest="outputfilename", + default="report", + help="Set a filename for the report TXT and XML files. Will only work when processing single structures.", + ) + ligandtype = ( + parser.add_mutually_exclusive_group() + ) # Either peptide/inter or intra mode + ligandtype.add_argument( + "--peptides", + "--inter", + dest="peptides", + default=[], + help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", + nargs="+", + ) + ligandtype.add_argument( + "--intra", + dest="intra", + help="Allows to define one chain to analyze intra-chain contacts.", + ) + parser.add_argument( + "--keepmod", + dest="keepmod", + default=False, + help="Keep modified residues as ligands", + action="store_true", + ) + parser.add_argument( + "--nohydro", + dest="nohydro", + default=False, + help="Do not add polar hydrogens in case your structure already contains hydrogens.", + action="store_true", + ) # Optional threshold arguments, not shown in help - thr = namedtuple('threshold', 'name type') - thresholds = [thr(name='aromatic_planarity', type='angle'), - thr(name='hydroph_dist_max', type='distance'), thr(name='hbond_dist_max', type='distance'), - thr(name='hbond_don_angle_min', type='angle'), thr(name='pistack_dist_max', type='distance'), - thr(name='pistack_ang_dev', type='other'), thr(name='pistack_offset_max', type='distance'), - thr(name='pication_dist_max', type='distance'), thr(name='saltbridge_dist_max', type='distance'), - thr(name='halogen_dist_max', type='distance'), thr(name='halogen_acc_angle', type='angle'), - thr(name='halogen_don_angle', type='angle'), thr(name='halogen_angle_dev', type='other'), - thr(name='water_bridge_mindist', type='distance'), thr(name='water_bridge_maxdist', type='distance'), - thr(name='water_bridge_omega_min', type='angle'), thr(name='water_bridge_omega_max', type='angle'), - thr(name='water_bridge_theta_min', type='angle')] + thr = namedtuple("threshold", "name type") + thresholds = [ + thr(name="aromatic_planarity", type="angle"), + thr(name="hydroph_dist_max", type="distance"), + thr(name="hbond_dist_max", type="distance"), + thr(name="hbond_don_angle_min", type="angle"), + thr(name="pistack_dist_max", type="distance"), + thr(name="pistack_ang_dev", type="other"), + thr(name="pistack_offset_max", type="distance"), + thr(name="pication_dist_max", type="distance"), + thr(name="saltbridge_dist_max", type="distance"), + thr(name="halogen_dist_max", type="distance"), + thr(name="halogen_acc_angle", type="angle"), + thr(name="halogen_don_angle", type="angle"), + thr(name="halogen_angle_dev", type="other"), + thr(name="water_bridge_mindist", type="distance"), + thr(name="water_bridge_maxdist", type="distance"), + thr(name="water_bridge_omega_min", type="angle"), + thr(name="water_bridge_omega_max", type="angle"), + thr(name="water_bridge_theta_min", type="angle"), + ] for t in thresholds: - parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), - help=argparse.SUPPRESS) + parser.add_argument( + "--%s" % t.name, + dest=t.name, + type=lambda val: threshold_limiter(parser, val), + help=argparse.SUPPRESS, + ) arguments = parser.parse_args() @@ -264,8 +419,11 @@ if __name__ == '__main__': config.STDOUT = arguments.stdout config.RAWSTRING = arguments.use_raw_string config.OUTPATH = arguments.outpath - config.OUTPATH = tilde_expansion("".join([config.OUTPATH, '/']) - if not config.OUTPATH.endswith('/') else config.OUTPATH) + config.OUTPATH = tilde_expansion( + "".join([config.OUTPATH, "/"]) + if not config.OUTPATH.endswith("/") + else config.OUTPATH + ) config.BASEPATH = config.OUTPATH # Used for batch processing config.BREAKCOMPOSITE = arguments.breakcomposite config.ALTLOC = arguments.altlocation @@ -283,28 +441,46 @@ if __name__ == '__main__': try: import pymol except ImportError: - logger.error('PyMOL is required for the --pics and --pymol option') + logger.error("PyMOL is required for the --pics and --pymol option") sys.exit(1) # Assign values to global thresholds for t in thresholds: tvalue = getattr(arguments, t.name) if tvalue is not None: - if t.type == 'angle' and not 0 < tvalue < 180: # Check value for angle thresholds - parser.error("Threshold for angles need to have values within 0 and 180.") - if t.type == 'distance': + if ( + t.type == "angle" and not 0 < tvalue < 180 + ): # Check value for angle thresholds + parser.error( + "Threshold for angles need to have values within 0 and 180." + ) + if t.type == "distance": if tvalue > 10: # Check value for angle thresholds - parser.error("Threshold for distances must not be larger than 10 Angstrom.") - elif tvalue > config.BS_DIST + 1: # Dynamically adapt the search space for binding site residues + parser.error( + "Threshold for distances must not be larger than 10 Angstrom." + ) + elif ( + tvalue > config.BS_DIST + 1 + ): # Dynamically adapt the search space for binding site residues config.BS_DIST = tvalue + 1 setattr(config, t.name.upper(), tvalue) # Check additional conditions for interdependent thresholds if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error("The halogen acceptor angle has to be larger than the halogen angle deviation.") + parser.error( + "The halogen acceptor angle has to be larger than the halogen angle deviation." + ) if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error("The halogen donor angle has to be larger than the halogen angle deviation.") + parser.error( + "The halogen donor angle has to be larger than the halogen angle deviation." + ) if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST: - parser.error("The water bridge minimum distance has to be smaller than the water bridge maximum distance.") + parser.error( + "The water bridge minimum distance has to be smaller than the water bridge maximum distance." + ) if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: - parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle") - expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None + parser.error( + "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle" + ) + expanded_path = ( + tilde_expansion(arguments.input) if arguments.input is not None else None + ) run_analysis(expanded_path, arguments.pdbid) # Start main script diff --git a/plip/structure/detection.py b/plip/structure/detection.py index 71fec63..6a2eeb7 100644 --- a/plip/structure/detection.py +++ b/plip/structure/detection.py @@ -11,25 +11,30 @@ from plip.basic.supplemental import whichresnumber, whichrestype, whichchain logger = logger.get_logger() + def filter_contacts(pairings): """Filter interactions by two criteria: 1. No interactions between the same residue (important for intra mode). 2. No duplicate interactions (A with B and B with A, also important for intra mode).""" if not config.INTRA: return pairings - filtered1_pairings = [p for p in pairings if (p.resnr, p.reschain) != (p.resnr_l, p.reschain_l)] + filtered1_pairings = [ + p for p in pairings if (p.resnr, p.reschain) != (p.resnr_l, p.reschain_l) + ] already_considered = [] filtered2_pairings = [] for contact in filtered1_pairings: try: - dist = 'D{}'.format(round(contact.distance, 2)) + dist = "D{}".format(round(contact.distance, 2)) except AttributeError: try: - dist = 'D{}'.format(round(contact.distance_ah, 2)) + dist = "D{}".format(round(contact.distance_ah, 2)) except AttributeError: - dist = 'D{}'.format(round(contact.distance_aw, 2)) - res1, res2 = ''.join([str(contact.resnr), contact.reschain]), ''.join( - [str(contact.resnr_l), contact.reschain_l]) + dist = "D{}".format(round(contact.distance_aw, 2)) + res1, res2 = ( + "".join([str(contact.resnr), contact.reschain]), + "".join([str(contact.resnr_l), contact.reschain_l]), + ) data = {res1, res2, dist} if data not in already_considered: filtered2_pairings.append(contact) @@ -41,12 +46,16 @@ def filter_contacts(pairings): # FUNCTIONS FOR DETECTION OF SPECIFIC INTERACTIONS ################################################## + def hydrophobic_interactions(atom_set_a, atom_set_b): """Detection of hydrophobic pliprofiler between atom_set_a (binding site) and atom_set_b (ligand). Definition: All pairs of qualified carbon atoms within a distance of HYDROPH_DIST_MAX """ - data = namedtuple('hydroph_interaction', 'bsatom bsatom_orig_idx ligatom ligatom_orig_idx ' - 'distance restype resnr reschain restype_l, resnr_l, reschain_l') + data = namedtuple( + "hydroph_interaction", + "bsatom bsatom_orig_idx ligatom ligatom_orig_idx " + "distance restype resnr reschain restype_l, resnr_l, reschain_l", + ) pairings = [] for a, b in itertools.product(atom_set_a, atom_set_b): if a.orig_idx == b.orig_idx: @@ -54,12 +63,29 @@ def hydrophobic_interactions(atom_set_a, atom_set_b): e = euclidean3d(a.atom.coords, b.atom.coords) if not config.MIN_DIST < e < config.HYDROPH_DIST_MAX: continue - restype, resnr, reschain = whichrestype(a.atom), whichresnumber(a.atom), whichchain(a.atom) - restype_l, resnr_l, reschain_l = whichrestype(b.orig_atom), whichresnumber(b.orig_atom), whichchain(b.orig_atom) - contact = data(bsatom=a.atom, bsatom_orig_idx=a.orig_idx, ligatom=b.atom, ligatom_orig_idx=b.orig_idx, - distance=e, restype=restype, resnr=resnr, - reschain=reschain, restype_l=restype_l, - resnr_l=resnr_l, reschain_l=reschain_l) + restype, resnr, reschain = ( + whichrestype(a.atom), + whichresnumber(a.atom), + whichchain(a.atom), + ) + restype_l, resnr_l, reschain_l = ( + whichrestype(b.orig_atom), + whichresnumber(b.orig_atom), + whichchain(b.orig_atom), + ) + contact = data( + bsatom=a.atom, + bsatom_orig_idx=a.orig_idx, + ligatom=b.atom, + ligatom_orig_idx=b.orig_idx, + distance=e, + restype=restype, + resnr=resnr, + reschain=reschain, + restype_l=restype_l, + resnr_l=resnr_l, + reschain_l=reschain_l, + ) pairings.append(contact) return filter_contacts(pairings) @@ -70,43 +96,79 @@ def hbonds(acceptors, donor_pairs, protisdon, typ): donor hydrogens and acceptor showing a distance within HBOND DIST MIN and HBOND DIST MAX and donor angles above HBOND_DON_ANGLE_MIN """ - data = namedtuple('hbond', 'a a_orig_idx d d_orig_idx h distance_ah distance_ad angle type protisdon resnr ' - 'restype reschain resnr_l restype_l reschain_l sidechain atype dtype') + data = namedtuple( + "hbond", + "a a_orig_idx d d_orig_idx h distance_ah distance_ad angle type protisdon resnr " + "restype reschain resnr_l restype_l reschain_l sidechain atype dtype", + ) pairings = [] for acc, don in itertools.product(acceptors, donor_pairs): - if not typ == 'strong': + if not typ == "strong": continue # Regular (strong) hydrogen bonds dist_ah = euclidean3d(acc.a.coords, don.h.coords) dist_ad = euclidean3d(acc.a.coords, don.d.coords) if not config.MIN_DIST < dist_ad < config.HBOND_DIST_MAX: continue - vec1, vec2 = vector(don.h.coords, don.d.coords), vector(don.h.coords, acc.a.coords) + vec1, vec2 = ( + vector(don.h.coords, don.d.coords), + vector(don.h.coords, acc.a.coords), + ) v = vecangle(vec1, vec2) if not v > config.HBOND_DON_ANGLE_MIN: continue protatom = don.d.OBAtom if protisdon else acc.a.OBAtom ligatom = don.d.OBAtom if not protisdon else acc.a.OBAtom - is_sidechain_hbond = protatom.GetResidue().GetAtomProperty(protatom, 8) # Check if sidechain atom + is_sidechain_hbond = protatom.GetResidue().GetAtomProperty( + protatom, 8 + ) # Check if sidechain atom resnr = whichresnumber(don.d) if protisdon else whichresnumber(acc.a) - resnr_l = whichresnumber(acc.a_orig_atom) if protisdon else whichresnumber(don.d_orig_atom) + resnr_l = ( + whichresnumber(acc.a_orig_atom) + if protisdon + else whichresnumber(don.d_orig_atom) + ) restype = whichrestype(don.d) if protisdon else whichrestype(acc.a) - restype_l = whichrestype(acc.a_orig_atom) if protisdon else whichrestype(don.d_orig_atom) + restype_l = ( + whichrestype(acc.a_orig_atom) + if protisdon + else whichrestype(don.d_orig_atom) + ) reschain = whichchain(don.d) if protisdon else whichchain(acc.a) - rechain_l = whichchain(acc.a_orig_atom) if protisdon else whichchain(don.d_orig_atom) + rechain_l = ( + whichchain(acc.a_orig_atom) if protisdon else whichchain(don.d_orig_atom) + ) # Next line prevents H-Bonds within amino acids in intermolecular interactions if config.INTRA is not None and whichresnumber(don.d) == whichresnumber(acc.a): continue # Next line prevents backbone-backbone H-Bonds - if config.INTRA is not None and protatom.GetResidue().GetAtomProperty(protatom, - 8) and ligatom.GetResidue().GetAtomProperty( - ligatom, 8): + if ( + config.INTRA is not None + and protatom.GetResidue().GetAtomProperty(protatom, 8) + and ligatom.GetResidue().GetAtomProperty(ligatom, 8) + ): continue - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, d=don.d, d_orig_idx=don.d_orig_idx, h=don.h, - distance_ah=dist_ah, distance_ad=dist_ad, angle=v, type=typ, protisdon=protisdon, - resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, - restype_l=restype_l, reschain_l=rechain_l, sidechain=is_sidechain_hbond, - atype=acc.a.type, dtype=don.d.type) + contact = data( + a=acc.a, + a_orig_idx=acc.a_orig_idx, + d=don.d, + d_orig_idx=don.d_orig_idx, + h=don.h, + distance_ah=dist_ah, + distance_ad=dist_ad, + angle=v, + type=typ, + protisdon=protisdon, + resnr=resnr, + restype=restype, + reschain=reschain, + resnr_l=resnr_l, + restype_l=restype_l, + reschain_l=rechain_l, + sidechain=is_sidechain_hbond, + atype=acc.a.type, + dtype=don.d.type, + ) pairings.append(contact) return filter_contacts(pairings) @@ -114,14 +176,17 @@ def hbonds(acceptors, donor_pairs, protisdon, typ): def pistacking(rings_bs, rings_lig): """Return all pi-stackings between the given aromatic ring systems in receptor and ligand.""" data = namedtuple( - 'pistack', - 'proteinring ligandring distance angle offset type restype resnr reschain restype_l resnr_l reschain_l') + "pistack", + "proteinring ligandring distance angle offset type restype resnr reschain restype_l resnr_l reschain_l", + ) pairings = [] for r, l in itertools.product(rings_bs, rings_lig): # DISTANCE AND RING ANGLE CALCULATION d = euclidean3d(r.center, l.center) b = vecangle(r.normal, l.normal) - a = min(b, 180 - b if not 180 - b < 0 else b) # Smallest of two angles, depending on direction of normal + a = min( + b, 180 - b if not 180 - b < 0 else b + ) # Smallest of two angles, depending on direction of normal # RING CENTER OFFSET CALCULATION (project each ring center into the other ring) proj1 = projection(l.normal, l.center, r.center) @@ -129,24 +194,45 @@ def pistacking(rings_bs, rings_lig): offset = min(euclidean3d(proj1, l.center), euclidean3d(proj2, r.center)) # RECEPTOR DATA - resnr, restype, reschain = whichresnumber(r.atoms[0]), whichrestype(r.atoms[0]), whichchain(r.atoms[0]) - resnr_l, restype_l, reschain_l = whichresnumber(l.orig_atoms[0]), whichrestype( - l.orig_atoms[0]), whichchain(l.orig_atoms[0]) + resnr, restype, reschain = ( + whichresnumber(r.atoms[0]), + whichrestype(r.atoms[0]), + whichchain(r.atoms[0]), + ) + resnr_l, restype_l, reschain_l = ( + whichresnumber(l.orig_atoms[0]), + whichrestype(l.orig_atoms[0]), + whichchain(l.orig_atoms[0]), + ) # SELECTION BY DISTANCE, ANGLE AND OFFSET passed = False if not config.MIN_DIST < d < config.PISTACK_DIST_MAX: continue if 0 < a < config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: - ptype = 'P' + ptype = "P" passed = True - if 90 - config.PISTACK_ANG_DEV < a < 90 + config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: - ptype = 'T' + if ( + 90 - config.PISTACK_ANG_DEV < a < 90 + config.PISTACK_ANG_DEV + and offset < config.PISTACK_OFFSET_MAX + ): + ptype = "T" passed = True if passed: - contact = data(proteinring=r, ligandring=l, distance=d, angle=a, offset=offset, - type=ptype, resnr=resnr, restype=restype, reschain=reschain, - resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) + contact = data( + proteinring=r, + ligandring=l, + distance=d, + angle=a, + offset=offset, + type=ptype, + resnr=resnr, + restype=restype, + reschain=reschain, + resnr_l=resnr_l, + restype_l=restype_l, + reschain_l=reschain_l, + ) pairings.append(contact) return filter_contacts(pairings) @@ -156,7 +242,9 @@ def pication(rings, pos_charged, protcharged): For tertiary and quaternary amines, check also the angle between the ring and the nitrogen. """ data = namedtuple( - 'pication', 'ring charge distance offset type restype resnr reschain restype_l resnr_l reschain_l protcharged') + "pication", + "ring charge distance offset type restype resnr reschain restype_l resnr_l reschain_l protcharged", + ) pairings = [] if len(rings) == 0 or len(pos_charged) == 0: return pairings @@ -167,38 +255,92 @@ def pication(rings, pos_charged, protcharged): # Project the center of charge into the ring and measure distance to ring center proj = projection(ring.normal, ring.center, p.center) offset = euclidean3d(proj, ring.center) - if not config.MIN_DIST < d < config.PICATION_DIST_MAX or not offset < config.PISTACK_OFFSET_MAX: + if ( + not config.MIN_DIST < d < config.PICATION_DIST_MAX + or not offset < config.PISTACK_OFFSET_MAX + ): continue - if type(p).__name__ == 'lcharge' and p.fgroup == 'tertamine': + if type(p).__name__ == "lcharge" and p.fgroup == "tertamine": # Special case here if the ligand has a tertiary amine, check an additional angle # Otherwise, we might have have a pi-cation interaction 'through' the ligand - n_atoms = [a_neighbor for a_neighbor in OBAtomAtomIter(p.atoms[0].OBAtom)] + n_atoms = [ + a_neighbor for a_neighbor in OBAtomAtomIter(p.atoms[0].OBAtom) + ] n_atoms_coords = [(a.x(), a.y(), a.z()) for a in n_atoms] - amine_normal = np.cross(vector(n_atoms_coords[0], n_atoms_coords[1]), - vector(n_atoms_coords[2], n_atoms_coords[0])) + amine_normal = np.cross( + vector(n_atoms_coords[0], n_atoms_coords[1]), + vector(n_atoms_coords[2], n_atoms_coords[0]), + ) b = vecangle(ring.normal, amine_normal) # Smallest of two angles, depending on direction of normal a = min(b, 180 - b if not 180 - b < 0 else b) if not a > 30.0: - resnr, restype = whichresnumber(ring.atoms[0]), whichrestype(ring.atoms[0]) + resnr, restype = ( + whichresnumber(ring.atoms[0]), + whichrestype(ring.atoms[0]), + ) reschain = whichchain(ring.atoms[0]) - resnr_l, restype_l = whichresnumber(p.orig_atoms[0]), whichrestype(p.orig_atoms[0]) + resnr_l, restype_l = ( + whichresnumber(p.orig_atoms[0]), + whichrestype(p.orig_atoms[0]), + ) reschain_l = whichchain(p.orig_atoms[0]) - contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', - restype=restype, resnr=resnr, reschain=reschain, - restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, - protcharged=protcharged) + contact = data( + ring=ring, + charge=p, + distance=d, + offset=offset, + type="regular", + restype=restype, + resnr=resnr, + reschain=reschain, + restype_l=restype_l, + resnr_l=resnr_l, + reschain_l=reschain_l, + protcharged=protcharged, + ) pairings.append(contact) break - resnr = whichresnumber(p.atoms[0]) if protcharged else whichresnumber(ring.atoms[0]) - resnr_l = whichresnumber(ring.orig_atoms[0]) if protcharged else whichresnumber(p.orig_atoms[0]) - restype = whichrestype(p.atoms[0]) if protcharged else whichrestype(ring.atoms[0]) - restype_l = whichrestype(ring.orig_atoms[0]) if protcharged else whichrestype(p.orig_atoms[0]) - reschain = whichchain(p.atoms[0]) if protcharged else whichchain(ring.atoms[0]) - reschain_l = whichchain(ring.orig_atoms[0]) if protcharged else whichchain(p.orig_atoms[0]) - contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', restype=restype, - resnr=resnr, reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, - reschain_l=reschain_l, protcharged=protcharged) + resnr = ( + whichresnumber(p.atoms[0]) + if protcharged + else whichresnumber(ring.atoms[0]) + ) + resnr_l = ( + whichresnumber(ring.orig_atoms[0]) + if protcharged + else whichresnumber(p.orig_atoms[0]) + ) + restype = ( + whichrestype(p.atoms[0]) if protcharged else whichrestype(ring.atoms[0]) + ) + restype_l = ( + whichrestype(ring.orig_atoms[0]) + if protcharged + else whichrestype(p.orig_atoms[0]) + ) + reschain = ( + whichchain(p.atoms[0]) if protcharged else whichchain(ring.atoms[0]) + ) + reschain_l = ( + whichchain(ring.orig_atoms[0]) + if protcharged + else whichchain(p.orig_atoms[0]) + ) + contact = data( + ring=ring, + charge=p, + distance=d, + offset=offset, + type="regular", + restype=restype, + resnr=resnr, + reschain=reschain, + restype_l=restype_l, + resnr_l=resnr_l, + reschain_l=reschain_l, + protcharged=protcharged, + ) pairings.append(contact) return filter_contacts(pairings) @@ -206,59 +348,124 @@ def pication(rings, pos_charged, protcharged): def saltbridge(poscenter, negcenter, protispos): """Detect all salt bridges (pliprofiler between centers of positive and negative charge)""" data = namedtuple( - 'saltbridge', 'positive negative distance protispos resnr restype reschain resnr_l restype_l reschain_l') + "saltbridge", + "positive negative distance protispos resnr restype reschain resnr_l restype_l reschain_l", + ) pairings = [] for pc, nc in itertools.product(poscenter, negcenter): - if not config.MIN_DIST < euclidean3d(pc.center, nc.center) < config.SALTBRIDGE_DIST_MAX: + if ( + not config.MIN_DIST + < euclidean3d(pc.center, nc.center) + < config.SALTBRIDGE_DIST_MAX + ): continue resnr = pc.resnr if protispos else nc.resnr - resnr_l = whichresnumber(nc.orig_atoms[0]) if protispos else whichresnumber(pc.orig_atoms[0]) + resnr_l = ( + whichresnumber(nc.orig_atoms[0]) + if protispos + else whichresnumber(pc.orig_atoms[0]) + ) restype = pc.restype if protispos else nc.restype - restype_l = whichrestype(nc.orig_atoms[0]) if protispos else whichrestype(pc.orig_atoms[0]) + restype_l = ( + whichrestype(nc.orig_atoms[0]) + if protispos + else whichrestype(pc.orig_atoms[0]) + ) reschain = pc.reschain if protispos else nc.reschain - reschain_l = whichchain(nc.orig_atoms[0]) if protispos else whichchain(pc.orig_atoms[0]) - contact = data(positive=pc, negative=nc, distance=euclidean3d(pc.center, nc.center), protispos=protispos, - resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, restype_l=restype_l, - reschain_l=reschain_l) + reschain_l = ( + whichchain(nc.orig_atoms[0]) if protispos else whichchain(pc.orig_atoms[0]) + ) + contact = data( + positive=pc, + negative=nc, + distance=euclidean3d(pc.center, nc.center), + protispos=protispos, + resnr=resnr, + restype=restype, + reschain=reschain, + resnr_l=resnr_l, + restype_l=restype_l, + reschain_l=reschain_l, + ) pairings.append(contact) return filter_contacts(pairings) def halogen(acceptor, donor): """Detect all halogen bonds of the type Y-O...X-C""" - data = namedtuple('halogenbond', 'acc acc_orig_idx don don_orig_idx distance don_angle acc_angle restype ' - 'resnr reschain restype_l resnr_l reschain_l donortype acctype sidechain') + data = namedtuple( + "halogenbond", + "acc acc_orig_idx don don_orig_idx distance don_angle acc_angle restype " + "resnr reschain restype_l resnr_l reschain_l donortype acctype sidechain", + ) pairings = [] for acc, don in itertools.product(acceptor, donor): dist = euclidean3d(acc.o.coords, don.x.coords) if not config.MIN_DIST < dist < config.HALOGEN_DIST_MAX: continue - vec1, vec2 = vector(acc.o.coords, acc.y.coords), vector(acc.o.coords, don.x.coords) - vec3, vec4 = vector(don.x.coords, acc.o.coords), vector(don.x.coords, don.c.coords) + vec1, vec2 = ( + vector(acc.o.coords, acc.y.coords), + vector(acc.o.coords, don.x.coords), + ) + vec3, vec4 = ( + vector(don.x.coords, acc.o.coords), + vector(don.x.coords, don.c.coords), + ) acc_angle, don_angle = vecangle(vec1, vec2), vecangle(vec3, vec4) - is_sidechain_hal = acc.o.OBAtom.GetResidue().GetAtomProperty(acc.o.OBAtom, 8) # Check if sidechain atom - if not config.HALOGEN_ACC_ANGLE - config.HALOGEN_ANGLE_DEV < acc_angle \ - < config.HALOGEN_ACC_ANGLE + config.HALOGEN_ANGLE_DEV: + is_sidechain_hal = acc.o.OBAtom.GetResidue().GetAtomProperty( + acc.o.OBAtom, 8 + ) # Check if sidechain atom + if ( + not config.HALOGEN_ACC_ANGLE - config.HALOGEN_ANGLE_DEV + < acc_angle + < config.HALOGEN_ACC_ANGLE + config.HALOGEN_ANGLE_DEV + ): continue - if not config.HALOGEN_DON_ANGLE - config.HALOGEN_ANGLE_DEV < don_angle \ - < config.HALOGEN_DON_ANGLE + config.HALOGEN_ANGLE_DEV: + if ( + not config.HALOGEN_DON_ANGLE - config.HALOGEN_ANGLE_DEV + < don_angle + < config.HALOGEN_DON_ANGLE + config.HALOGEN_ANGLE_DEV + ): continue - restype, reschain, resnr = whichrestype(acc.o), whichchain(acc.o), whichresnumber(acc.o) - restype_l, reschain_l, resnr_l = whichrestype(don.orig_x), whichchain(don.orig_x), whichresnumber(don.orig_x) - contact = data(acc=acc, acc_orig_idx=acc.o_orig_idx, don=don, don_orig_idx=don.x_orig_idx, - distance=dist, don_angle=don_angle, acc_angle=acc_angle, - restype=restype, resnr=resnr, - reschain=reschain, restype_l=restype_l, - reschain_l=reschain_l, resnr_l=resnr_l, donortype=don.x.OBAtom.GetType(), acctype=acc.o.type, - sidechain=is_sidechain_hal) + restype, reschain, resnr = ( + whichrestype(acc.o), + whichchain(acc.o), + whichresnumber(acc.o), + ) + restype_l, reschain_l, resnr_l = ( + whichrestype(don.orig_x), + whichchain(don.orig_x), + whichresnumber(don.orig_x), + ) + contact = data( + acc=acc, + acc_orig_idx=acc.o_orig_idx, + don=don, + don_orig_idx=don.x_orig_idx, + distance=dist, + don_angle=don_angle, + acc_angle=acc_angle, + restype=restype, + resnr=resnr, + reschain=reschain, + restype_l=restype_l, + reschain_l=reschain_l, + resnr_l=resnr_l, + donortype=don.x.OBAtom.GetType(), + acctype=acc.o.type, + sidechain=is_sidechain_hal, + ) pairings.append(contact) return filter_contacts(pairings) def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): """Find water-bridged hydrogen bonds between ligand and protein. For now only considers bridged of first degree.""" - data = namedtuple('waterbridge', 'a a_orig_idx atype d d_orig_idx dtype h water water_orig_idx distance_aw ' - 'distance_dw d_angle w_angle type resnr restype reschain resnr_l restype_l reschain_l protisdon') + data = namedtuple( + "waterbridge", + "a a_orig_idx atype d d_orig_idx dtype h water water_orig_idx distance_aw " + "distance_dw d_angle w_angle type resnr restype reschain resnr_l restype_l reschain_l protisdon", + ) pairings = [] # First find all acceptor-water pairs with distance within d # and all donor-water pairs with distance within d and angle greater theta @@ -274,15 +481,25 @@ def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): prot_aw.append((acc2, w, dist)) for don1 in lig_hbd: dist = euclidean3d(don1.d.coords, w.oxy.coords) - d_angle = vecangle(vector(don1.h.coords, don1.d.coords), vector(don1.h.coords, w.oxy.coords)) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ - and d_angle > config.WATER_BRIDGE_THETA_MIN: + d_angle = vecangle( + vector(don1.h.coords, don1.d.coords), + vector(don1.h.coords, w.oxy.coords), + ) + if ( + config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST + and d_angle > config.WATER_BRIDGE_THETA_MIN + ): lig_dw.append((don1, w, dist, d_angle)) for don2 in bs_hbd: dist = euclidean3d(don2.d.coords, w.oxy.coords) - d_angle = vecangle(vector(don2.h.coords, don2.d.coords), vector(don2.h.coords, w.oxy.coords)) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ - and d_angle > config.WATER_BRIDGE_THETA_MIN: + d_angle = vecangle( + vector(don2.h.coords, don2.d.coords), + vector(don2.h.coords, w.oxy.coords), + ) + if ( + config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST + and d_angle > config.WATER_BRIDGE_THETA_MIN + ): prot_hw.append((don2, w, dist, d_angle)) for l, p in itertools.product(lig_aw, prot_hw): @@ -291,17 +508,44 @@ def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): if not wl.oxy == wd.oxy: continue # Same water molecule and angle within omega - w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) + w_angle = vecangle( + vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords) + ) if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: continue - resnr, reschain, restype = whichresnumber(don.d), whichchain(don.d), whichrestype(don.d) - resnr_l, reschain_l, restype_l = whichresnumber(acc.a_orig_atom), whichchain( - acc.a_orig_atom), whichrestype(acc.a_orig_atom) - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, - dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, - distance_aw=distance_aw, distance_dw=distance_dw, d_angle=d_angle, w_angle=w_angle, - type='first_deg', resnr=resnr, restype=restype, - reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, protisdon=True) + resnr, reschain, restype = ( + whichresnumber(don.d), + whichchain(don.d), + whichrestype(don.d), + ) + resnr_l, reschain_l, restype_l = ( + whichresnumber(acc.a_orig_atom), + whichchain(acc.a_orig_atom), + whichrestype(acc.a_orig_atom), + ) + contact = data( + a=acc.a, + a_orig_idx=acc.a_orig_idx, + atype=acc.a.type, + d=don.d, + d_orig_idx=don.d_orig_idx, + dtype=don.d.type, + h=don.h, + water=wl.oxy, + water_orig_idx=wl.oxy_orig_idx, + distance_aw=distance_aw, + distance_dw=distance_dw, + d_angle=d_angle, + w_angle=w_angle, + type="first_deg", + resnr=resnr, + restype=restype, + reschain=reschain, + restype_l=restype_l, + resnr_l=resnr_l, + reschain_l=reschain_l, + protisdon=True, + ) pairings.append(contact) for p, l in itertools.product(prot_aw, lig_dw): acc, wl, distance_aw = p @@ -309,38 +553,71 @@ def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): if not wl.oxy == wd.oxy: continue # Same water molecule and angle within omega - w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) + w_angle = vecangle( + vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords) + ) if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: continue - resnr, reschain, restype = whichresnumber(acc.a), whichchain(acc.a), whichrestype(acc.a) - resnr_l, reschain_l, restype_l = whichresnumber(don.d_orig_atom), whichchain( - don.d_orig_atom), whichrestype(don.d_orig_atom) - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, - dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, - distance_aw=distance_aw, distance_dw=distance_dw, - d_angle=d_angle, w_angle=w_angle, type='first_deg', resnr=resnr, - restype=restype, reschain=reschain, - restype_l=restype_l, reschain_l=reschain_l, resnr_l=resnr_l, protisdon=False) + resnr, reschain, restype = ( + whichresnumber(acc.a), + whichchain(acc.a), + whichrestype(acc.a), + ) + resnr_l, reschain_l, restype_l = ( + whichresnumber(don.d_orig_atom), + whichchain(don.d_orig_atom), + whichrestype(don.d_orig_atom), + ) + contact = data( + a=acc.a, + a_orig_idx=acc.a_orig_idx, + atype=acc.a.type, + d=don.d, + d_orig_idx=don.d_orig_idx, + dtype=don.d.type, + h=don.h, + water=wl.oxy, + water_orig_idx=wl.oxy_orig_idx, + distance_aw=distance_aw, + distance_dw=distance_dw, + d_angle=d_angle, + w_angle=w_angle, + type="first_deg", + resnr=resnr, + restype=restype, + reschain=reschain, + restype_l=restype_l, + reschain_l=reschain_l, + resnr_l=resnr_l, + protisdon=False, + ) pairings.append(contact) return filter_contacts(pairings) def metal_complexation(metals, metal_binding_lig, metal_binding_bs): """Find all metal complexes between metals and appropriate groups in both protein and ligand, as well as water""" - data = namedtuple('metal_complex', 'metal metal_orig_idx metal_type target target_orig_idx target_type ' - 'coordination_num distance resnr restype ' - 'reschain restype_l reschain_l resnr_l location rms, geometry num_partners complexnum') + data = namedtuple( + "metal_complex", + "metal metal_orig_idx metal_type target target_orig_idx target_type " + "coordination_num distance resnr restype " + "reschain restype_l reschain_l resnr_l location rms, geometry num_partners complexnum", + ) pairings_dict = {} pairings = [] # #@todo Refactor metal_to_id = {} metal_to_orig_atom = {} - for metal, target in itertools.product(metals, metal_binding_lig + metal_binding_bs): + for metal, target in itertools.product( + metals, metal_binding_lig + metal_binding_bs + ): distance = euclidean3d(metal.m.coords, target.atom.coords) if not distance < config.METAL_DIST_MAX: continue if metal.m not in pairings_dict: - pairings_dict[metal.m] = [(target, distance), ] + pairings_dict[metal.m] = [ + (target, distance), + ] metal_to_id[metal.m] = metal.m_orig_idx metal_to_orig_atom[metal.m] = metal.orig_m else: @@ -354,24 +631,32 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): vectors_dict = defaultdict(list) for contact_pair in contact_pairs: target, distance = contact_pair - vectors_dict[target.atom.idx].append(vector(metal.coords, target.atom.coords)) + vectors_dict[target.atom.idx].append( + vector(metal.coords, target.atom.coords) + ) # Listing of coordination numbers and their geometries - configs = {2: ['linear', ], - 3: ['trigonal.planar', 'trigonal.pyramidal'], - 4: ['tetrahedral', 'square.planar'], - 5: ['trigonal.bipyramidal', 'square.pyramidal'], - 6: ['octahedral', ]} + configs = { + 2: ["linear",], + 3: ["trigonal.planar", "trigonal.pyramidal"], + 4: ["tetrahedral", "square.planar"], + 5: ["trigonal.bipyramidal", "square.pyramidal"], + 6: ["octahedral",], + } # Angle signatures for each geometry (as seen from each target atom) - ideal_angles = {'linear': [[180.0]] * 2, - 'trigonal.planar': [[120.0, 120.0]] * 3, - 'trigonal.pyramidal': [[109.5, 109.5]] * 3, - 'tetrahedral': [[109.5, 109.5, 109.5, 109.5]] * 4, - 'square.planar': [[90.0, 90.0, 90.0, 90.0]] * 4, - 'trigonal.bipyramidal': [[120.0, 120.0, 90.0, 90.0]] * 3 + [[90.0, 90.0, 90.0, 180.0]] * 2, - 'square.pyramidal': [[90.0, 90.0, 90.0, 180.0]] * 4 + [[90.0, 90.0, 90.0, 90.0]], - 'octahedral': [[90.0, 90.0, 90.0, 90.0, 180.0]] * 6} + ideal_angles = { + "linear": [[180.0]] * 2, + "trigonal.planar": [[120.0, 120.0]] * 3, + "trigonal.pyramidal": [[109.5, 109.5]] * 3, + "tetrahedral": [[109.5, 109.5, 109.5, 109.5]] * 4, + "square.planar": [[90.0, 90.0, 90.0, 90.0]] * 4, + "trigonal.bipyramidal": [[120.0, 120.0, 90.0, 90.0]] * 3 + + [[90.0, 90.0, 90.0, 180.0]] * 2, + "square.pyramidal": [[90.0, 90.0, 90.0, 180.0]] * 4 + + [[90.0, 90.0, 90.0, 90.0]], + "octahedral": [[90.0, 90.0, 90.0, 90.0, 180.0]] * 6, + } angles_dict = {} for target in vectors_dict: @@ -380,27 +665,40 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): for t in vectors_dict: if not t == target: [other_vectors.append(x) for x in vectors_dict[t]] - angles = [vecangle(pair[0], pair[1]) for pair in itertools.product(cur_vector, other_vectors)] + angles = [ + vecangle(pair[0], pair[1]) + for pair in itertools.product(cur_vector, other_vectors) + ] angles_dict[target] = angles all_total = [] # Record fit information for each geometry tested - gdata = namedtuple('gdata', 'geometry rms coordination excluded diff_targets') # Geometry Data + gdata = namedtuple( + "gdata", "geometry rms coordination excluded diff_targets" + ) # Geometry Data # Can't specify geometry with only one target if num_targets == 1: - final_geom = 'NA' + final_geom = "NA" final_coo = 1 excluded = [] rms = 0.0 else: - for coo in sorted(configs, reverse=True): # Start with highest coordination number + for coo in sorted( + configs, reverse=True + ): # Start with highest coordination number geometries = configs[coo] for geometry in geometries: - signature = ideal_angles[geometry] # Set of ideal angles for geometry, from each perspective + signature = ideal_angles[ + geometry + ] # Set of ideal angles for geometry, from each perspective geometry_total = 0 - geometry_scores = [] # All scores for one geometry (from all subsignatures) + geometry_scores = ( + [] + ) # All scores for one geometry (from all subsignatures) used_up_targets = [] # Use each target just once for a subsignature not_used = [] - coo_diff = num_targets - coo # How many more observed targets are there? + coo_diff = ( + num_targets - coo + ) # How many more observed targets are there? # Find best match for each subsignature for subsignature in signature: # Ideal angles from one perspective @@ -409,7 +707,9 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): for k, target in enumerate(angles_dict): if target not in used_up_targets: - observed_angles = angles_dict[target] # Observed angles from perspective of one target + observed_angles = angles_dict[ + target + ] # Observed angles from perspective of one target single_target_scores = [] used_up_observed_angles = [] for i, ideal_angle in enumerate(subsignature): @@ -426,7 +726,9 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): used_up_observed_angles.append(best_match) single_target_scores.append(best_match_diff) # Calculate RMS for target angles - target_total = sum([x ** 2 for x in single_target_scores]) ** 0.5 # Tot. score targ/sig + target_total = ( + sum([x ** 2 for x in single_target_scores]) ** 0.5 + ) # Tot. score targ/sig if target_total < best_target_score: best_target_score = target_total best_target = target @@ -436,9 +738,20 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): # Total score is mean of RMS values geometry_total = np.mean(geometry_scores) # Record the targets not used for excluding them when deciding for a final geometry - [not_used.append(target) for target in angles_dict if target not in used_up_targets] - all_total.append(gdata(geometry=geometry, rms=geometry_total, coordination=coo, - excluded=not_used, diff_targets=coo_diff)) + [ + not_used.append(target) + for target in angles_dict + if target not in used_up_targets + ] + all_total.append( + gdata( + geometry=geometry, + rms=geometry_total, + coordination=coo, + excluded=not_used, + diff_targets=coo_diff, + ) + ) # Make a decision here. Starting with the geometry with lowest difference in ideal and observed partners ... # Check if the difference between the RMS to the next best solution is not larger than 0.5 @@ -449,31 +762,59 @@ def metal_complexation(metals, metal_binding_lig, metal_binding_bs): this_rms, next_rms = total.rms, next_total.rms diff_to_next = next_rms - this_rms if diff_to_next > 0.5: - final_geom, final_coo, rms, excluded = total.geometry, total.coordination, total.rms, total.excluded + final_geom, final_coo, rms, excluded = ( + total.geometry, + total.coordination, + total.rms, + total.excluded, + ) break elif next_total.rms < 3.5: - final_geom, final_coo, = next_total.geometry, next_total.coordination + final_geom, final_coo, = ( + next_total.geometry, + next_total.coordination, + ) rms, excluded = next_total.rms, next_total.excluded break elif i == len(all_total) - 2: - final_geom, final_coo, rms, excluded = "NA", "NA", float('nan'), [] + final_geom, final_coo, rms, excluded = "NA", "NA", float("nan"), [] break # Record all contact pairing, excluding those with targets superfluous for chosen geometry - only_water = set([x[0].location for x in contact_pairs]) == {'water'} + only_water = set([x[0].location for x in contact_pairs]) == {"water"} if not only_water: # No complex if just with water as targets - logger.info(f'metal ion {metal.type} complexed with {final_geom} geometry (coo. number {final_coo}/ {num_targets} observed)') + logger.info( + f"metal ion {metal.type} complexed with {final_geom} geometry (coo. number {final_coo}/ {num_targets} observed)" + ) for contact_pair in contact_pairs: target, distance = contact_pair if target.atom.idx not in excluded: metal_orig_atom = metal_to_orig_atom[metal] - restype_l, reschain_l, resnr_l = whichrestype(metal_orig_atom), whichchain( - metal_orig_atom), whichresnumber(metal_orig_atom) - contact = data(metal=metal, metal_orig_idx=metal_to_id[metal], metal_type=metal.type, - target=target, target_orig_idx=target.atom_orig_idx, target_type=target.type, - coordination_num=final_coo, distance=distance, resnr=target.resnr, - restype=target.restype, reschain=target.reschain, location=target.location, - rms=rms, geometry=final_geom, num_partners=num_targets, complexnum=cnum + 1, - resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) + restype_l, reschain_l, resnr_l = ( + whichrestype(metal_orig_atom), + whichchain(metal_orig_atom), + whichresnumber(metal_orig_atom), + ) + contact = data( + metal=metal, + metal_orig_idx=metal_to_id[metal], + metal_type=metal.type, + target=target, + target_orig_idx=target.atom_orig_idx, + target_type=target.type, + coordination_num=final_coo, + distance=distance, + resnr=target.resnr, + restype=target.restype, + reschain=target.reschain, + location=target.location, + rms=rms, + geometry=final_geom, + num_partners=num_targets, + complexnum=cnum + 1, + resnr_l=resnr_l, + restype_l=restype_l, + reschain_l=reschain_l, + ) pairings.append(contact) return filter_contacts(pairings) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py index 6b63d63..6b4f04d 100644 --- a/plip/structure/preparation.py +++ b/plip/structure/preparation.py @@ -10,12 +10,39 @@ from openbabel import pybel from plip.basic import config, logger from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name -from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar -from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize +from plip.basic.supplemental import ( + cluster_doubles, + is_lig, + normalize_vector, + vector, + ring_is_planar, +) +from plip.basic.supplemental import ( + extract_pdbid, + read_pdb, + create_folder_if_not_exists, + canonicalize, +) from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance -from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative -from plip.structure.detection import halogen, pication, water_bridges, metal_complexation -from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge +from plip.basic.supplemental import ( + whichchain, + whichrestype, + whichresnumber, + euclidean3d, + int32_to_negative, +) +from plip.structure.detection import ( + halogen, + pication, + water_bridges, + metal_complexation, +) +from plip.structure.detection import ( + hydrophobic_interactions, + pistacking, + hbonds, + saltbridge, +) logger = logger.get_logger() @@ -25,8 +52,16 @@ class PDBParser: self.as_string = as_string self.pdbpath = pdbpath self.num_fixed_lines = 0 - self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") - self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb() + self.covlinkage = namedtuple( + "covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2" + ) + ( + self.proteinmap, + self.modres, + self.covalent, + self.altconformations, + self.corrected_pdb, + ) = self.parse_pdb() def parse_pdb(self): """Extracts additional information from PDB files. @@ -38,7 +73,9 @@ class PDBParser: IV. Alternative conformations """ if self.as_string: - fil = self.pdbpath.rstrip('\n').split('\n') # Removing trailing newline character + fil = self.pdbpath.rstrip("\n").split( + "\n" + ) # Removing trailing newline character else: f = read(self.pdbpath) fil = f.readlines() @@ -57,19 +94,23 @@ class PDBParser: lastnum = 0 # Atom numbering (has to be consecutive) other_models = False for line in fil: - if not other_models: # Only consider the first model in an NRM structure + if ( + not other_models + ): # Only consider the first model in an NRM structure corrected_line, newnum = self.fix_pdbline(line, lastnum) if corrected_line is not None: - if corrected_line.startswith('MODEL'): + if corrected_line.startswith("MODEL"): try: # Get number of MODEL (1,2,3) model_num = int(corrected_line[10:14]) if model_num > 1: # MODEL 2,3,4 etc. other_models = True except ValueError: - logger.debug(f'ignoring invalid MODEL entry: {corrected_line}') + logger.debug( + f"ignoring invalid MODEL entry: {corrected_line}" + ) corrected_lines.append(corrected_line) lastnum = newnum - corrected_pdb = ''.join(corrected_lines) + corrected_pdb = "".join(corrected_lines) else: corrected_pdb = self.pdbpath corrected_lines = fil @@ -81,8 +122,8 @@ class PDBParser: if line.startswith(("ATOM", "HETATM")): # Retrieve alternate conformations atomid, location = int(line[6:11]), line[16] - location = 'A' if location == ' ' else location - if location != 'A': + location = "A" if location == " " else location + if location != "A": alt.append(atomid) if not previous_ter: @@ -107,12 +148,18 @@ class PDBParser: def fix_pdbline(self, pdbline, lastnum): """Fix a PDB line if information is missing.""" pdbqt_conversion = { - "HD": "H", "HS": "H", "NA": "N", - "NS": "N", "OA": "O", "OS": "O", "SA": "S"} + "HD": "H", + "HS": "H", + "NA": "N", + "NS": "N", + "OA": "O", + "OS": "O", + "SA": "S", + } fixed = False new_num = 0 forbidden_characters = "[^a-zA-Z0-9_]" - pdbline = pdbline.strip('\n') + pdbline = pdbline.strip("\n") # Some MD / Docking tools produce empty lines, leading to segfaults if len(pdbline.strip()) == 0: self.num_fixed_lines += 1 @@ -121,9 +168,9 @@ class PDBParser: self.num_fixed_lines += 1 return None, lastnum # TER Entries also have continuing numbering, consider them as well - if pdbline.startswith('TER'): + if pdbline.startswith("TER"): new_num = lastnum + 1 - if pdbline.startswith('ATOM'): + if pdbline.startswith("ATOM"): new_num = lastnum + 1 current_num = int(pdbline[6:11]) resnum = pdbline[22:27].strip() @@ -132,73 +179,107 @@ class PDBParser: try: int(resnum) except ValueError: - pdbline = pdbline[:22] + ' 0 ' + pdbline[27:] + pdbline = pdbline[:22] + " 0 " + pdbline[27:] fixed = True # Invalid characters in residue name if re.match(forbidden_characters, resname.strip()): - pdbline = pdbline[:17] + 'UNK ' + pdbline[21:] + pdbline = pdbline[:17] + "UNK " + pdbline[21:] fixed = True if lastnum + 1 != current_num: - pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:] + pdbline = ( + pdbline[:6] + + (5 - len(str(new_num))) * " " + + str(new_num) + + " " + + pdbline[12:] + ) fixed = True # No chain assigned - if pdbline[21] == ' ': - pdbline = pdbline[:21] + 'A' + pdbline[22:] + if pdbline[21] == " ": + pdbline = pdbline[:21] + "A" + pdbline[22:] fixed = True - if pdbline.endswith('H'): + if pdbline.endswith("H"): self.num_fixed_lines += 1 return None, lastnum # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. for pdbqttype in pdbqt_conversion: if pdbline.strip().endswith(pdbqttype): - pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + '\n' + pdbline = ( + pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + "\n" + ) self.num_fixed_lines += 1 - if pdbline.startswith('HETATM'): + if pdbline.startswith("HETATM"): new_num = lastnum + 1 try: current_num = int(pdbline[6:11]) except ValueError: current_num = None - logger.debug(f'invalid HETATM entry: {pdbline}') + logger.debug(f"invalid HETATM entry: {pdbline}") if lastnum + 1 != current_num: - pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:] + pdbline = ( + pdbline[:6] + + (5 - len(str(new_num))) * " " + + str(new_num) + + " " + + pdbline[12:] + ) fixed = True # No chain assigned or number assigned as chain - if pdbline[21] == ' ': - pdbline = pdbline[:21] + 'Z' + pdbline[22:] + if pdbline[21] == " ": + pdbline = pdbline[:21] + "Z" + pdbline[22:] fixed = True # No residue number assigned - if pdbline[23:26] == ' ': - pdbline = pdbline[:23] + '999' + pdbline[26:] + if pdbline[23:26] == " ": + pdbline = pdbline[:23] + "999" + pdbline[26:] fixed = True # Non-standard Ligand Names ligname = pdbline[17:21].strip() if len(ligname) > 3: - pdbline = pdbline[:17] + ligname[:3] + ' ' + pdbline[21:] + pdbline = pdbline[:17] + ligname[:3] + " " + pdbline[21:] fixed = True if re.match(forbidden_characters, ligname.strip()): - pdbline = pdbline[:17] + 'LIG ' + pdbline[21:] + pdbline = pdbline[:17] + "LIG " + pdbline[21:] fixed = True if len(ligname.strip()) == 0: - pdbline = pdbline[:17] + 'LIG ' + pdbline[21:] + pdbline = pdbline[:17] + "LIG " + pdbline[21:] fixed = True - if pdbline.endswith('H'): + if pdbline.endswith("H"): self.num_fixed_lines += 1 return None, lastnum # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. for pdbqttype in pdbqt_conversion: if pdbline.strip().endswith(pdbqttype): - pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + ' ' + pdbline = ( + pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + " " + ) self.num_fixed_lines += 1 self.num_fixed_lines += 1 if fixed else 0 - return pdbline + '\n', max(new_num, lastnum) + return pdbline + "\n", max(new_num, lastnum) def get_linkage(self, line): """Get the linkage information from a LINK entry PDB line.""" - conf1, id1, chain1, pos1 = line[16].strip(), line[17:20].strip(), line[21].strip(), int(line[22:26]) - conf2, id2, chain2, pos2 = line[46].strip(), line[47:50].strip(), line[51].strip(), int(line[52:56]) - return self.covlinkage(id1=id1, chain1=chain1, pos1=pos1, conf1=conf1, - id2=id2, chain2=chain2, pos2=pos2, conf2=conf2) + conf1, id1, chain1, pos1 = ( + line[16].strip(), + line[17:20].strip(), + line[21].strip(), + int(line[22:26]), + ) + conf2, id2, chain2, pos2 = ( + line[46].strip(), + line[47:50].strip(), + line[51].strip(), + int(line[52:56]), + ) + return self.covlinkage( + id1=id1, + chain1=chain1, + pos1=pos1, + conf1=conf1, + id2=id2, + chain2=chain2, + pos2=pos2, + conf2=conf2, + ) class LigandFinder: @@ -212,15 +293,20 @@ class LigandFinder: self.covalent = covalent self.mapper = mapper self.ligands = self.getligs() - self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept)))) + self.excluded = sorted( + list(self.lignames_all.difference(set(self.lignames_kept))) + ) def getpeptides(self, chain): """If peptide ligand chains are defined via the command line options, try to extract the underlying ligand formed by all residues in the given chain without water """ - all_from_chain = [o for o in pybel.ob.OBResidueIter( - self.proteincomplex.OBMol) if o.GetChain() == chain] # All residues from chain + all_from_chain = [ + o + for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) + if o.GetChain() == chain + ] # All residues from chain if len(all_from_chain) == 0: return None else: @@ -240,7 +326,9 @@ class LigandFinder: # Filter for ligands using lists ligand_residues, self.lignames_all, self.water = self.filter_for_ligands() - all_res_dict = {(a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues} + all_res_dict = { + (a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues + } self.lignames_kept = list(set([a.GetName() for a in ligand_residues])) if not config.BREAKCOMPOSITE: @@ -249,22 +337,35 @@ class LigandFinder: # Find fragment linked by covalent bonds res_kmers = self.identify_kmers(all_res_dict) else: - res_kmers = [[a, ] for a in ligand_residues] - logger.debug(f'{len(res_kmers)} ligand kmer(s) detected for closer inspection') - for kmer in res_kmers: # iterate over all ligands and extract molecules + information + res_kmers = [[a,] for a in ligand_residues] + logger.debug( + f"{len(res_kmers)} ligand kmer(s) detected for closer inspection" + ) + for ( + kmer + ) in ( + res_kmers + ): # iterate over all ligands and extract molecules + information if len(kmer) > config.MAX_COMPOSITE_LENGTH: logger.debug( - f'ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)') + f"ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)" + ) else: ligands.append(self.extract_ligand(kmer)) else: # Extract peptides from given chains - self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)] + self.water = [ + o + for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) + if o.GetResidueProperty(9) + ] if config.PEPTIDES: peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES] elif config.INTRA is not None: - peptide_ligands = [self.getpeptides(config.INTRA), ] + peptide_ligands = [ + self.getpeptides(config.INTRA), + ] ligands = [p for p in peptide_ligands if p is not None] self.covalent, self.lignames_kept, self.lignames_all = [], [], set() @@ -273,35 +374,53 @@ class LigandFinder: def extract_ligand(self, kmer): """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps.""" - data = namedtuple('ligand', 'mol hetid chain position water members longname type atomorder can_to_pdb') - members = [(res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) for res in kmer] + data = namedtuple( + "ligand", + "mol hetid chain position water members longname type atomorder can_to_pdb", + ) + members = [ + (res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) + for res in kmer + ] members = sort_members_by_importance(members) rname, rchain, rnum = members[0] - logger.debug(f'finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements') + logger.debug( + f"finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements" + ) names = [x[0] for x in members] - longname = '-'.join([x[0] for x in members]) + longname = "-".join([x[0] for x in members]) if config.PEPTIDES: - ligtype = 'PEPTIDE' + ligtype = "PEPTIDE" elif config.INTRA is not None: - ligtype = 'INTRA' + ligtype = "INTRA" else: # Classify a ligand by its HETID(s) ligtype = classify_by_name(names) - logger.debug(f'ligand classified as {ligtype}') + logger.debug(f"ligand classified as {ligtype}") hetatoms = set() for obresidue in kmer: - hetatoms_res = set([(obatom.GetIdx(), obatom) for obatom in pybel.ob.OBResidueAtomIter(obresidue) - if obatom.GetAtomicNum() != 1]) + hetatoms_res = set( + [ + (obatom.GetIdx(), obatom) + for obatom in pybel.ob.OBResidueAtomIter(obresidue) + if obatom.GetAtomicNum() != 1 + ] + ) if not config.ALTLOC: # Remove alternative conformations (standard -> True) - hetatoms_res = set([atm for atm in hetatoms_res - if not self.mapper.mapid(atm[0], mtype='protein', - to='internal') in self.altconformations]) + hetatoms_res = set( + [ + atm + for atm in hetatoms_res + if not self.mapper.mapid(atm[0], mtype="protein", to="internal") + in self.altconformations + ] + ) hetatoms.update(hetatoms_res) - logger.debug(f'hetero atoms determined (n={len(hetatoms)})') + logger.debug(f"hetero atoms determined (n={len(hetatoms)})") hetatoms = dict(hetatoms) # make it a dict with idx as key and OBAtom as value lig = pybel.ob.OBMol() # new ligand mol @@ -310,15 +429,24 @@ class LigandFinder: idx = obatom.GetIdx() lig.AddAtom(obatom) # ids of all neighbours of obatom - neighbours[idx] = set([neighbour_atom.GetIdx() for neighbour_atom - in pybel.ob.OBAtomAtomIter(obatom)]) & set(hetatoms.keys()) - logger.debug(f'atom neighbours mapped') + neighbours[idx] = set( + [ + neighbour_atom.GetIdx() + for neighbour_atom in pybel.ob.OBAtomAtomIter(obatom) + ] + ) & set(hetatoms.keys()) + logger.debug(f"atom neighbours mapped") ############################################################## # map the old atom idx of OBMol to the new idx of the ligand # ############################################################## - newidx = dict(zip(hetatoms.keys(), [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)])) + newidx = dict( + zip( + hetatoms.keys(), + [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)], + ) + ) mapold = dict(zip(newidx.values(), newidx)) # copy the bonds for obatom in hetatoms: @@ -328,16 +456,16 @@ class LigandFinder: lig = pybel.Molecule(lig) # For kmers, the representative ids are chosen (first residue of kmer) - lig.data.update({'Name': rname, 'Chain': rchain, 'ResNr': rnum}) + lig.data.update({"Name": rname, "Chain": rchain, "ResNr": rnum}) # Check if a negative residue number is represented as a 32 bit integer if rnum > 10 ** 5: rnum = int32_to_negative(rnum) - lig.title = ':'.join((rname, rchain, str(rnum))) + lig.title = ":".join((rname, rchain, str(rnum))) self.mapper.ligandmaps[lig.title] = mapold - logger.debug('renumerated molecule generated') + logger.debug("renumerated molecule generated") if not config.NOPDBCANMAP: atomorder = canonicalize(lig) @@ -348,9 +476,18 @@ class LigandFinder: if atomorder is not None: can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold} - ligand = data(mol=lig, hetid=rname, chain=rchain, position=rnum, water=self.water, - members=members, longname=longname, type=ligtype, atomorder=atomorder, - can_to_pdb=can_to_pdb) + ligand = data( + mol=lig, + hetid=rname, + chain=rchain, + position=rnum, + water=self.water, + members=members, + longname=longname, + type=ligtype, + atomorder=atomorder, + can_to_pdb=can_to_pdb, + ) return ligand def is_het_residue(self, obres): @@ -375,20 +512,35 @@ class LigandFinder: def filter_for_ligands(self): """Given an OpenBabel Molecule, get all ligands, their names, and water""" - candidates1 = [o for o in pybel.ob.OBResidueIter( - self.proteincomplex.OBMol) if not o.GetResidueProperty(9) and self.is_het_residue(o)] + candidates1 = [ + o + for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) + if not o.GetResidueProperty(9) and self.is_het_residue(o) + ] if config.DNARECEPTOR: # If DNA is the receptor, don't consider DNA as a ligand - candidates1 = [res for res in candidates1 if res.GetName() not in config.DNA + config.RNA] + candidates1 = [ + res + for res in candidates1 + if res.GetName() not in config.DNA + config.RNA + ] all_lignames = set([a.GetName() for a in candidates1]) - water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)] + water = [ + o + for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) + if o.GetResidueProperty(9) + ] # Filter out non-ligands if not config.KEEPMOD: # Keep modified residues as ligands - candidates2 = [a for a in candidates1 if is_lig(a.GetName()) and a.GetName() not in self.modresidues] + candidates2 = [ + a + for a in candidates1 + if is_lig(a.GetName()) and a.GetName() not in self.modresidues + ] else: candidates2 = [a for a in candidates1 if is_lig(a.GetName())] - logger.debug(f'{len(candidates2)} ligand(s) after first filtering step') + logger.debug(f"{len(candidates2)} ligand(s) after first filtering step") ############################################ # Filtering by counting and artifacts list # @@ -397,7 +549,10 @@ class LigandFinder: unique_ligs = set(a.GetName() for a in candidates2) for ulig in unique_ligs: # Discard if appearing 15 times or more and is possible artifact - if ulig in config.biolip_list and [a.GetName() for a in candidates2].count(ulig) >= 15: + if ( + ulig in config.biolip_list + and [a.GetName() for a in candidates2].count(ulig) >= 15 + ): artifacts.append(ulig) selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts] @@ -408,12 +563,19 @@ class LigandFinder: """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" # Remove all those not considered by ligands and pairings including alternate conformations - ligdoubles = [[(link.id1, link.chain1, link.pos1), - (link.id2, link.chain2, link.pos2)] for link in - [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept - and c.conf1 in ['A', ''] and c.conf2 in ['A', ''] - and (c.id1, c.chain1, c.pos1) in residues - and (c.id2, c.chain2, c.pos2) in residues]] + ligdoubles = [ + [(link.id1, link.chain1, link.pos1), (link.id2, link.chain2, link.pos2)] + for link in [ + c + for c in self.covalent + if c.id1 in self.lignames_kept + and c.id2 in self.lignames_kept + and c.conf1 in ["A", ""] + and c.conf2 in ["A", ""] + and (c.id1, c.chain1, c.pos1) in residues + and (c.id2, c.chain2, c.pos2) in residues + ] + ] kmers = cluster_doubles(ligdoubles) if not kmers: # No ligand kmers, just normal independent ligands return [[residues[res]] for res in residues] @@ -429,7 +591,9 @@ class LigandFinder: in_kmer.append((res.GetName(), res.GetChain(), res.GetNum())) for res in residues: if res not in in_kmer: - newres = [residues[res], ] + newres = [ + residues[res], + ] res_kmers.append(newres) return res_kmers @@ -438,26 +602,32 @@ class Mapper: """Provides functions for mapping atom IDs in the correct way""" def __init__(self): - self.proteinmap = None # Map internal atom IDs of protein residues to original PDB Atom IDs - self.ligandmaps = {} # Map IDs of new ligand molecules to internal IDs (or PDB IDs?) + self.proteinmap = ( + None # Map internal atom IDs of protein residues to original PDB Atom IDs + ) + self.ligandmaps = ( + {} + ) # Map IDs of new ligand molecules to internal IDs (or PDB IDs?) self.original_structure = None - def mapid(self, idx, mtype, bsid=None, to='original'): # Mapping to original IDs is standard for ligands - if mtype == 'reversed': # Needed to map internal ID back to original protein ID + def mapid( + self, idx, mtype, bsid=None, to="original" + ): # Mapping to original IDs is standard for ligands + if mtype == "reversed": # Needed to map internal ID back to original protein ID return self.reversed_proteinmap[idx] - if mtype == 'protein': + if mtype == "protein": return self.proteinmap[idx] - elif mtype == 'ligand': - if to == 'internal': + elif mtype == "ligand": + if to == "internal": return self.ligandmaps[bsid][idx] - elif to == 'original': + elif to == "original": return self.proteinmap[self.ligandmaps[bsid][idx]] def id_to_atom(self, idx): """Returns the atom for a given original ligand ID. To do this, the ID is mapped to the protein first and then the atom returned. """ - mapped_idx = self.mapid(idx, 'reversed') + mapped_idx = self.mapid(idx, "reversed") return pybel.Atom(self.original_structure.GetAtom(mapped_idx)) @@ -476,10 +646,15 @@ class Mol: def hydrophobic_atoms(self, all_atoms): """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors.""" atom_set = [] - data = namedtuple('hydrophobic', 'atom orig_atom orig_idx') - atm = [a for a in all_atoms if a.atomicnum == 6 and set([natom.GetAtomicNum() for natom - in pybel.ob.OBAtomAtomIter(a.OBAtom)]).issubset( - {1, 6})] + data = namedtuple("hydrophobic", "atom orig_atom orig_idx") + atm = [ + a + for a in all_atoms + if a.atomicnum == 6 + and set( + [natom.GetAtomicNum() for natom in pybel.ob.OBAtomAtomIter(a.OBAtom)] + ).issubset({1, 6}) + ] for atom in atm: orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) orig_atom = self.Mapper.id_to_atom(orig_idx) @@ -489,45 +664,88 @@ class Mol: def find_hba(self, all_atoms): """Find all possible hydrogen bond acceptors""" - data = namedtuple('hbondacceptor', 'a a_orig_atom a_orig_idx type') + data = namedtuple("hbondacceptor", "a a_orig_atom a_orig_idx type") a_set = [] for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms): - if atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf: # Exclude halogen atoms - a_orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) + if ( + atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf + ): # Exclude halogen atoms + a_orig_idx = self.Mapper.mapid( + atom.idx, mtype=self.mtype, bsid=self.bsid + ) a_orig_atom = self.Mapper.id_to_atom(a_orig_idx) - a_set.append(data(a=atom, a_orig_atom=a_orig_atom, a_orig_idx=a_orig_idx, type='regular')) + a_set.append( + data( + a=atom, + a_orig_atom=a_orig_atom, + a_orig_idx=a_orig_idx, + type="regular", + ) + ) a_set = sorted(a_set, key=lambda x: x.a_orig_idx) return a_set def find_hbd(self, all_atoms, hydroph_atoms): """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)""" donor_pairs = [] - data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type') - for donor in [a for a in all_atoms if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf]: + data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") + for donor in [ + a + for a in all_atoms + if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf + ]: in_ring = False if not in_ring: - for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) if a.IsHbondDonorH()]: - d_orig_idx = self.Mapper.mapid(donor.idx, mtype=self.mtype, bsid=self.bsid) + for adj_atom in [ + a + for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) + if a.IsHbondDonorH() + ]: + d_orig_idx = self.Mapper.mapid( + donor.idx, mtype=self.mtype, bsid=self.bsid + ) d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) - donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=d_orig_idx, - h=pybel.Atom(adj_atom), type='regular')) + donor_pairs.append( + data( + d=donor, + d_orig_atom=d_orig_atom, + d_orig_idx=d_orig_idx, + h=pybel.Atom(adj_atom), + type="regular", + ) + ) for carbon in hydroph_atoms: - for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) if a.GetAtomicNum() == 1]: - d_orig_idx = self.Mapper.mapid(carbon.atom.idx, mtype=self.mtype, bsid=self.bsid) + for adj_atom in [ + a + for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) + if a.GetAtomicNum() == 1 + ]: + d_orig_idx = self.Mapper.mapid( + carbon.atom.idx, mtype=self.mtype, bsid=self.bsid + ) d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) - donor_pairs.append(data(d=carbon, d_orig_atom=d_orig_atom, - d_orig_idx=d_orig_idx, h=pybel.Atom(adj_atom), type='weak')) + donor_pairs.append( + data( + d=carbon, + d_orig_atom=d_orig_atom, + d_orig_idx=d_orig_idx, + h=pybel.Atom(adj_atom), + type="weak", + ) + ) donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx)) return donor_pairs def find_rings(self, mol, all_atoms): """Find rings and return only aromatic. Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic.""" - data = namedtuple('aromatic_ring', 'atoms orig_atoms atoms_orig_idx normal obj center type') + data = namedtuple( + "aromatic_ring", "atoms orig_atoms atoms_orig_idx normal obj center type" + ) rings = [] - aromatic_amino = ['TYR', 'TRP', 'HIS', 'PHE'] + aromatic_amino = ["TYR", "TRP", "HIS", "PHE"] ring_candidates = mol.OBMol.GetSSSR() - logger.debug(f'number of aromatic ring candidates: {len(ring_candidates)}') + logger.debug(f"number of aromatic ring candidates: {len(ring_candidates)}") # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity for ring in ring_candidates: r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)] @@ -535,28 +753,42 @@ class Mol: if 4 < len(r_atoms) <= 6: res = list(set([whichrestype(a) for a in r_atoms])) # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel - if res[0] == 'UNL': - ligand_orig_idx = [self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms] + if res[0] == "UNL": + ligand_orig_idx = [ + self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms + ] sort_order = np.argsort(np.array(ligand_orig_idx)) r_atoms = [r_atoms[i] for i in sort_order] - if ring.IsAromatic() or res[0] in aromatic_amino or ring_is_planar(ring, r_atoms): + if ( + ring.IsAromatic() + or res[0] in aromatic_amino + or ring_is_planar(ring, r_atoms) + ): # Causes segfault with OpenBabel 2.3.2, so deactivated # typ = ring.GetType() if not ring.GetType() == '' else 'unknown' # Alternative typing - ring_type = '%s-membered' % len(r_atoms) - ring_atms = [r_atoms[a].coords for a in [0, 2, 4]] # Probe atoms for normals, assuming planarity + ring_type = "%s-membered" % len(r_atoms) + ring_atms = [ + r_atoms[a].coords for a in [0, 2, 4] + ] # Probe atoms for normals, assuming planarity ringv1 = vector(ring_atms[0], ring_atms[1]) ringv2 = vector(ring_atms[2], ring_atms[0]) - atoms_orig_idx = [self.Mapper.mapid(r_atom.idx, mtype=self.mtype, - bsid=self.bsid) for r_atom in r_atoms] + atoms_orig_idx = [ + self.Mapper.mapid(r_atom.idx, mtype=self.mtype, bsid=self.bsid) + for r_atom in r_atoms + ] orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx] - rings.append(data(atoms=r_atoms, - orig_atoms=orig_atoms, - atoms_orig_idx=atoms_orig_idx, - normal=normalize_vector(np.cross(ringv1, ringv2)), - obj=ring, - center=centroid([ra.coords for ra in r_atoms]), - type=ring_type)) + rings.append( + data( + atoms=r_atoms, + orig_atoms=orig_atoms, + atoms_orig_idx=atoms_orig_idx, + normal=normalize_vector(np.cross(ringv1, ringv2)), + obj=ring, + center=centroid([ra.coords for ra in r_atoms]), + type=ring_type, + ) + ) return rings def get_hydrophobic_atoms(self): @@ -566,16 +798,24 @@ class Mol: return self.hbond_acc_atoms def get_hbd(self): - return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'regular'] + return [ + don_pair + for don_pair in self.hbond_don_atom_pairs + if don_pair.type == "regular" + ] def get_weak_hbd(self): - return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'weak'] + return [ + don_pair + for don_pair in self.hbond_don_atom_pairs + if don_pair.type == "weak" + ] def get_pos_charged(self): - return [charge for charge in self.charged if charge.type == 'positive'] + return [charge for charge in self.charged if charge.type == "positive"] def get_neg_charged(self): - return [charge for charge in self.charged if charge.type == 'negative'] + return [charge for charge in self.charged if charge.type == "negative"] class PLInteraction: @@ -592,65 +832,135 @@ class PLInteraction: self.altconf = protcomplex.altconf # #@todo Refactor code to combine different directionality - self.saltbridge_lneg = saltbridge(self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True) - self.saltbridge_pneg = saltbridge(self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False) - - self.all_hbonds_ldon = hbonds(self.bindingsite.get_hba(), - self.ligand.get_hbd(), False, 'strong') - self.all_hbonds_pdon = hbonds(self.ligand.get_hba(), - self.bindingsite.get_hbd(), True, 'strong') - - self.hbonds_ldon = self.refine_hbonds_ldon(self.all_hbonds_ldon, self.saltbridge_lneg, - self.saltbridge_pneg) - self.hbonds_pdon = self.refine_hbonds_pdon(self.all_hbonds_pdon, self.saltbridge_lneg, - self.saltbridge_pneg) + self.saltbridge_lneg = saltbridge( + self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True + ) + self.saltbridge_pneg = saltbridge( + self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False + ) + + self.all_hbonds_ldon = hbonds( + self.bindingsite.get_hba(), self.ligand.get_hbd(), False, "strong" + ) + self.all_hbonds_pdon = hbonds( + self.ligand.get_hba(), self.bindingsite.get_hbd(), True, "strong" + ) + + self.hbonds_ldon = self.refine_hbonds_ldon( + self.all_hbonds_ldon, self.saltbridge_lneg, self.saltbridge_pneg + ) + self.hbonds_pdon = self.refine_hbonds_pdon( + self.all_hbonds_pdon, self.saltbridge_lneg, self.saltbridge_pneg + ) self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings) - self.all_pi_cation_laro = pication(self.ligand.rings, self.bindingsite.get_pos_charged(), True) - self.pication_paro = pication(self.bindingsite.rings, self.ligand.get_pos_charged(), False) - - self.pication_laro = self.refine_pi_cation_laro(self.all_pi_cation_laro, self.pistacking) - - self.all_hydrophobic_contacts = hydrophobic_interactions(self.bindingsite.get_hydrophobic_atoms(), - self.ligand.get_hydrophobic_atoms()) - self.hydrophobic_contacts = self.refine_hydrophobic(self.all_hydrophobic_contacts, self.pistacking) - self.halogen_bonds = halogen(self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don) - self.water_bridges = water_bridges(self.bindingsite.get_hba(), self.ligand.get_hba(), - self.bindingsite.get_hbd(), self.ligand.get_hbd(), - self.ligand.water) - - self.water_bridges = self.refine_water_bridges(self.water_bridges, self.hbonds_ldon, self.hbonds_pdon) - - self.metal_complexes = metal_complexation(self.ligand.metals, self.ligand.metal_binding, - self.bindingsite.metal_binding) + self.all_pi_cation_laro = pication( + self.ligand.rings, self.bindingsite.get_pos_charged(), True + ) + self.pication_paro = pication( + self.bindingsite.rings, self.ligand.get_pos_charged(), False + ) + + self.pication_laro = self.refine_pi_cation_laro( + self.all_pi_cation_laro, self.pistacking + ) + + self.all_hydrophobic_contacts = hydrophobic_interactions( + self.bindingsite.get_hydrophobic_atoms(), + self.ligand.get_hydrophobic_atoms(), + ) + self.hydrophobic_contacts = self.refine_hydrophobic( + self.all_hydrophobic_contacts, self.pistacking + ) + self.halogen_bonds = halogen( + self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don + ) + self.water_bridges = water_bridges( + self.bindingsite.get_hba(), + self.ligand.get_hba(), + self.bindingsite.get_hbd(), + self.ligand.get_hbd(), + self.ligand.water, + ) + + self.water_bridges = self.refine_water_bridges( + self.water_bridges, self.hbonds_ldon, self.hbonds_pdon + ) + + self.metal_complexes = metal_complexation( + self.ligand.metals, + self.ligand.metal_binding, + self.bindingsite.metal_binding, + ) self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon - self.all_itypes = self.all_itypes + self.hbonds_ldon + self.pistacking + self.pication_laro + self.pication_paro - self.all_itypes = self.all_itypes + self.hydrophobic_contacts + self.halogen_bonds + self.water_bridges + self.all_itypes = ( + self.all_itypes + + self.hbonds_ldon + + self.pistacking + + self.pication_laro + + self.pication_paro + ) + self.all_itypes = ( + self.all_itypes + + self.hydrophobic_contacts + + self.halogen_bonds + + self.water_bridges + ) self.all_itypes = self.all_itypes + self.metal_complexes self.no_interactions = all(len(i) == 0 for i in self.all_itypes) - self.unpaired_hba, self.unpaired_hbd, self.unpaired_hal = self.find_unpaired_ligand() - self.unpaired_hba_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) - for atom in self.unpaired_hba] - self.unpaired_hbd_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) - for atom in self.unpaired_hbd] - self.unpaired_hal_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) - for atom in self.unpaired_hal] - self.num_unpaired_hba, self.num_unpaired_hbd = len(self.unpaired_hba), len(self.unpaired_hbd) + ( + self.unpaired_hba, + self.unpaired_hbd, + self.unpaired_hal, + ) = self.find_unpaired_ligand() + self.unpaired_hba_orig_idx = [ + self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) + for atom in self.unpaired_hba + ] + self.unpaired_hbd_orig_idx = [ + self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) + for atom in self.unpaired_hbd + ] + self.unpaired_hal_orig_idx = [ + self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) + for atom in self.unpaired_hal + ] + self.num_unpaired_hba, self.num_unpaired_hbd = ( + len(self.unpaired_hba), + len(self.unpaired_hbd), + ) self.num_unpaired_hal = len(self.unpaired_hal) # Exclude empty chains (coming from ligand as a target, from metal complexes) - self.interacting_chains = sorted(list(set([i.reschain for i in self.all_itypes - if i.reschain not in [' ', None]]))) + self.interacting_chains = sorted( + list( + set( + [ + i.reschain + for i in self.all_itypes + if i.reschain not in [" ", None] + ] + ) + ) + ) # Get all interacting residues, excluding ligand and water molecules - self.interacting_res = list(set([''.join([str(i.resnr), str(i.reschain)]) for i in self.all_itypes - if i.restype not in ['LIG', 'HOH']])) + self.interacting_res = list( + set( + [ + "".join([str(i.resnr), str(i.reschain)]) + for i in self.all_itypes + if i.restype not in ["LIG", "HOH"] + ] + ) + ) if len(self.interacting_res) != 0: logger.info( - f'ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}') + f"ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}" + ) interactions_list = [] num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg) num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon) @@ -659,34 +969,49 @@ class PLInteraction: num_halogen = len(self.halogen_bonds) num_waterbridges = len(self.water_bridges) if num_saltbridges != 0: - interactions_list.append('%i salt bridge(s)' % num_saltbridges) + interactions_list.append("%i salt bridge(s)" % num_saltbridges) if num_hbonds != 0: - interactions_list.append('%i hydrogen bond(s)' % num_hbonds) + interactions_list.append("%i hydrogen bond(s)" % num_hbonds) if num_pication != 0: - interactions_list.append('%i pi-cation interaction(s)' % num_pication) + interactions_list.append("%i pi-cation interaction(s)" % num_pication) if num_pistack != 0: - interactions_list.append('%i pi-stacking(s)' % num_pistack) + interactions_list.append("%i pi-stacking(s)" % num_pistack) if num_halogen != 0: - interactions_list.append('%i halogen bond(s)' % num_halogen) + interactions_list.append("%i halogen bond(s)" % num_halogen) if num_waterbridges != 0: - interactions_list.append('%i water bridge(s)' % num_waterbridges) + interactions_list.append("%i water bridge(s)" % num_waterbridges) if not len(interactions_list) == 0: - logger.info(f'complex uses {interactions_list}') + logger.info(f"complex uses {interactions_list}") else: - logger.info('no interactions for this ligand') + logger.info("no interactions for this ligand") def find_unpaired_ligand(self): """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors. """ unpaired_hba, unpaired_hbd, unpaired_hal = [], [], [] # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex) - involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [hbond.d.idx for hbond in self.hbonds_ldon] - [[involved_atoms.append(atom.idx) for atom in sb.negative.atoms] for sb in self.saltbridge_lneg] - [[involved_atoms.append(atom.idx) for atom in sb.positive.atoms] for sb in self.saltbridge_pneg] + involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [ + hbond.d.idx for hbond in self.hbonds_ldon + ] + [ + [involved_atoms.append(atom.idx) for atom in sb.negative.atoms] + for sb in self.saltbridge_lneg + ] + [ + [involved_atoms.append(atom.idx) for atom in sb.positive.atoms] + for sb in self.saltbridge_pneg + ] [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon] - [involved_atoms.append(wb.d.idx) for wb in self.water_bridges if not wb.protisdon] - [involved_atoms.append(mcomplex.target.atom.idx) for mcomplex in self.metal_complexes - if mcomplex.location == 'ligand'] + [ + involved_atoms.append(wb.d.idx) + for wb in self.water_bridges + if not wb.protisdon + ] + [ + involved_atoms.append(mcomplex.target.atom.idx) + for mcomplex in self.metal_complexes + if mcomplex.location == "ligand" + ] for atom in [hba.a for hba in self.ligand.get_hba()]: if atom.idx not in involved_atoms: unpaired_hba.append(atom) @@ -708,7 +1033,10 @@ class PLInteraction: # 1. Rings interacting via stacking can't have additional hydrophobic contacts between each other. for pistack, h in itertools.product(pistacks, all_h): h1, h2 = h.bsatom.idx, h.ligatom.idx - brs, lrs = [p1.idx for p1 in pistack.proteinring.atoms], [p2.idx for p2 in pistack.ligandring.atoms] + brs, lrs = ( + [p1.idx for p1 in pistack.proteinring.atoms], + [p2.idx for p2 in pistack.ligandring.atoms], + ) if h1 in brs and h2 in lrs: sel[(h1, h2)] = "EXCLUDE" hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel] @@ -727,7 +1055,9 @@ class PLInteraction: # 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist for h in hydroph: if h.bsatom.idx not in bsclust: - bsclust[h.bsatom.idx] = [h, ] + bsclust[h.bsatom.idx] = [ + h, + ] else: bsclust[h.bsatom.idx].append(h) @@ -753,10 +1083,12 @@ class PLInteraction: tuples = list(set(tuples)) tuples = sorted(tuples, key=itemgetter(1)) - clusters = cluster_doubles(tuples) # Cluster connected atoms (i.e. find hydrophobic patches) + clusters = cluster_doubles( + tuples + ) # Cluster connected atoms (i.e. find hydrophobic patches) for cluster in clusters: - min_dist = float('inf') + min_dist = float("inf") min_h = None for atm_idx in cluster: h = idx_to_h[atm_idx] @@ -766,7 +1098,9 @@ class PLInteraction: hydroph_final.append(min_h) before, reduced = len(all_h), len(hydroph_final) if not before == 0 and not before == reduced: - logger.info(f'reduced number of hydrophobic contacts from {before} to {reduced}') + logger.info( + f"reduced number of hydrophobic contacts from {before} to {reduced}" + ) return hydroph_final def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg): @@ -775,11 +1109,17 @@ class PLInteraction: for hbond in all_hbonds: i_set[hbond] = False for salt in salt_pneg: - protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms] + protidx, ligidx = ( + [at.idx for at in salt.negative.atoms], + [at.idx for at in salt.positive.atoms], + ) if hbond.d.idx in ligidx and hbond.a.idx in protidx: i_set[hbond] = True for salt in salt_lneg: - protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms] + protidx, ligidx = ( + [at.idx for at in salt.positive.atoms], + [at.idx for at in salt.negative.atoms], + ) if hbond.d.idx in ligidx and hbond.a.idx in protidx: i_set[hbond] = True @@ -802,11 +1142,17 @@ class PLInteraction: for hbond in all_hbonds: i_set[hbond] = False for salt in salt_lneg: - protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms] + protidx, ligidx = ( + [at.idx for at in salt.positive.atoms], + [at.idx for at in salt.negative.atoms], + ) if hbond.a.idx in ligidx and hbond.d.idx in protidx: i_set[hbond] = True for salt in salt_pneg: - protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms] + protidx, ligidx = ( + [at.idx for at in salt.negative.atoms], + [at.idx for at in salt.positive.atoms], + ) if hbond.a.idx in ligidx and hbond.d.idx in protidx: i_set[hbond] = True @@ -830,7 +1176,10 @@ class PLInteraction: for picat in all_picat: exclude = False for stack in stacks: - if whichrestype(stack.proteinring.atoms[0]) == 'HIS' and picat.ring.obj == stack.ligandring.obj: + if ( + whichrestype(stack.proteinring.atoms[0]) == "HIS" + and picat.ring.obj == stack.ligandring.obj + ): exclude = True if not exclude: i_set.append(picat) @@ -849,18 +1198,27 @@ class PLInteraction: if (wbridge.water.idx, wbridge.a.idx) not in wb_dict: wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge else: - if abs(omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle) < abs(omega - wbridge.w_angle): + if abs( + omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle + ) < abs(omega - wbridge.w_angle): wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge for wb_tuple in wb_dict: water, acceptor = wb_tuple if water not in wb_dict2: - wb_dict2[water] = [(abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), ] + wb_dict2[water] = [ + (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), + ] elif len(wb_dict2[water]) == 1: - wb_dict2[water].append((abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple])) + wb_dict2[water].append( + (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]) + ) wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0]) else: if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle): - wb_dict2[water] = [wb_dict2[water][0], (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple])] + wb_dict2[water] = [ + wb_dict2[water][0], + (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple]), + ] filtered_wb = [] for fwbridges in wb_dict2.values(): @@ -871,12 +1229,19 @@ class PLInteraction: class BindingSite(Mol): def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper): """Find all relevant parts which could take part in interactions""" - Mol.__init__(self, altconf, mapper, mtype='protein', bsid=None) + Mol.__init__(self, altconf, mapper, mtype="protein", bsid=None) self.complex = cclass self.full_mol = protcomplex self.all_atoms = atoms self.min_dist = min_dist # Minimum distance of bs res to ligand - self.bs_res = list(set([''.join([str(whichresnumber(a)), whichchain(a)]) for a in self.all_atoms])) # e.g. 47A + self.bs_res = list( + set( + [ + "".join([str(whichresnumber(a)), whichchain(a)]) + for a in self.all_atoms + ] + ) + ) # e.g. 47A self.rings = self.find_rings(self.full_mol, self.all_atoms) self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) self.hbond_acc_atoms = self.find_hba(self.all_atoms) @@ -887,118 +1252,228 @@ class BindingSite(Mol): def find_hal(self, atoms): """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)""" - data = namedtuple('hal_acceptor', 'o o_orig_idx y y_orig_idx') + data = namedtuple("hal_acceptor", "o o_orig_idx y y_orig_idx") a_set = [] # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]: - n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() in [6, 7, 15, 16]] + n_atoms = [ + na + for na in pybel.ob.OBAtomAtomIter(a.OBAtom) + if na.GetAtomicNum() in [6, 7, 15, 16] + ] if len(n_atoms) == 1: # Proximal atom o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) - y_orig_idx = self.Mapper.mapid(n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid) - a_set.append(data(o=a, o_orig_idx=o_orig_idx, y=pybel.Atom(n_atoms[0]), y_orig_idx=y_orig_idx)) + y_orig_idx = self.Mapper.mapid( + n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid + ) + a_set.append( + data( + o=a, + o_orig_idx=o_orig_idx, + y=pybel.Atom(n_atoms[0]), + y_orig_idx=y_orig_idx, + ) + ) return a_set def find_charged(self, mol): """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid.""" - data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') + data = namedtuple( + "pcharge", "atoms atoms_orig_idx type center restype resnr reschain" + ) a_set = [] # Iterate through all residue, exclude those in chains defined as peptides - for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]: + for res in [ + r + for r in pybel.ob.OBResidueIter(mol.OBMol) + if not r.GetChain() in config.PEPTIDES + ]: if config.INTRA is not None: if res.GetChain() != config.INTRA: continue a_contributing = [] a_contributing_orig_idx = [] - if res.GetName() in ('ARG', 'HIS', 'LYS'): # Arginine, Histidine or Lysine have charged sidechains + if res.GetName() in ( + "ARG", + "HIS", + "LYS", + ): # Arginine, Histidine or Lysine have charged sidechains for a in pybel.ob.OBResidueAtomIter(res): - if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + if ( + a.GetType().startswith("N") + and res.GetAtomProperty(a, 8) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + ): a_contributing.append(pybel.Atom(a)) - a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein')) + a_contributing_orig_idx.append( + self.Mapper.mapid(a.GetIdx(), mtype="protein") + ) if not len(a_contributing) == 0: - a_set.append(data(atoms=a_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type='positive', - center=centroid([ac.coords for ac in a_contributing]), - restype=res.GetName(), - resnr=res.GetNum(), - reschain=res.GetChain())) - if res.GetName() in ('GLU', 'ASP'): # Aspartic or Glutamic Acid + a_set.append( + data( + atoms=a_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="positive", + center=centroid([ac.coords for ac in a_contributing]), + restype=res.GetName(), + resnr=res.GetNum(), + reschain=res.GetChain(), + ) + ) + if res.GetName() in ("GLU", "ASP"): # Aspartic or Glutamic Acid for a in pybel.ob.OBResidueAtomIter(res): - if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + if ( + a.GetType().startswith("O") + and res.GetAtomProperty(a, 8) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + ): a_contributing.append(pybel.Atom(a)) - a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein')) + a_contributing_orig_idx.append( + self.Mapper.mapid(a.GetIdx(), mtype="protein") + ) if not len(a_contributing) == 0: - a_set.append(data(atoms=a_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type='negative', - center=centroid([ac.coords for ac in a_contributing]), - restype=res.GetName(), - resnr=res.GetNum(), - reschain=res.GetChain())) + a_set.append( + data( + atoms=a_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="negative", + center=centroid([ac.coords for ac in a_contributing]), + restype=res.GetName(), + resnr=res.GetNum(), + reschain=res.GetChain(), + ) + ) return a_set def find_metal_binding(self, mol): """Looks for atoms that could possibly be involved in chelating a metal ion. This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids""" - data = namedtuple('metal_binding', 'atom atom_orig_idx type restype resnr reschain location') + data = namedtuple( + "metal_binding", "atom atom_orig_idx type restype resnr reschain location" + ) a_set = [] for res in pybel.ob.OBResidueIter(mol.OBMol): - restype, reschain, resnr = res.GetName().upper(), res.GetChain(), res.GetNum() - if restype in ['ASP', 'GLU', 'SER', 'THR', 'TYR']: # Look for oxygens here + restype, reschain, resnr = ( + res.GetName().upper(), + res.GetChain(), + res.GetNum(), + ) + if restype in ["ASP", "GLU", "SER", "THR", "TYR"]: # Look for oxygens here for a in pybel.ob.OBResidueAtomIter(res): - if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: - atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=restype, - resnr=resnr, reschain=reschain, - location='protein.sidechain')) - if restype == 'HIS': # Look for nitrogen here + if ( + a.GetType().startswith("O") + and res.GetAtomProperty(a, 8) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + ): + atom_orig_idx = self.Mapper.mapid( + a.GetIdx(), mtype=self.mtype, bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(a), + atom_orig_idx=atom_orig_idx, + type="O", + restype=restype, + resnr=resnr, + reschain=reschain, + location="protein.sidechain", + ) + ) + if restype == "HIS": # Look for nitrogen here for a in pybel.ob.OBResidueAtomIter(res): - if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: - atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='N', restype=restype, - resnr=resnr, reschain=reschain, - location='protein.sidechain')) - if restype == 'CYS': # Look for sulfur here + if ( + a.GetType().startswith("N") + and res.GetAtomProperty(a, 8) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + ): + atom_orig_idx = self.Mapper.mapid( + a.GetIdx(), mtype=self.mtype, bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(a), + atom_orig_idx=atom_orig_idx, + type="N", + restype=restype, + resnr=resnr, + reschain=reschain, + location="protein.sidechain", + ) + ) + if restype == "CYS": # Look for sulfur here for a in pybel.ob.OBResidueAtomIter(res): - if a.GetType().startswith('S') and res.GetAtomProperty(a, 8) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: - atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='S', restype=restype, - resnr=resnr, reschain=reschain, - location='protein.sidechain')) + if ( + a.GetType().startswith("S") + and res.GetAtomProperty(a, 8) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + ): + atom_orig_idx = self.Mapper.mapid( + a.GetIdx(), mtype=self.mtype, bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(a), + atom_orig_idx=atom_orig_idx, + type="S", + restype=restype, + resnr=resnr, + reschain=reschain, + location="protein.sidechain", + ) + ) for a in pybel.ob.OBResidueAtomIter(res): # All main chain oxygens - if a.GetType().startswith('O') and res.GetAtomProperty(a, 2) \ - and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf and restype != 'HOH': - atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=res.GetName(), - resnr=res.GetNum(), reschain=res.GetChain(), - location='protein.mainchain')) + if ( + a.GetType().startswith("O") + and res.GetAtomProperty(a, 2) + and not self.Mapper.mapid(a.GetIdx(), mtype="protein") + in self.altconf + and restype != "HOH" + ): + atom_orig_idx = self.Mapper.mapid( + a.GetIdx(), mtype=self.mtype, bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(a), + atom_orig_idx=atom_orig_idx, + type="O", + restype=res.GetName(), + resnr=res.GetNum(), + reschain=res.GetChain(), + location="protein.mainchain", + ) + ) return a_set class Ligand(Mol): def __init__(self, cclass, ligand): altconf = cclass.altconf - self.hetid, self.chain, self.position = ligand.hetid, ligand.chain, ligand.position - self.bsid = ':'.join([self.hetid, self.chain, str(self.position)]) - Mol.__init__(self, altconf, cclass.Mapper, mtype='ligand', bsid=self.bsid) + self.hetid, self.chain, self.position = ( + ligand.hetid, + ligand.chain, + ligand.position, + ) + self.bsid = ":".join([self.hetid, self.chain, str(self.position)]) + Mol.__init__(self, altconf, cclass.Mapper, mtype="ligand", bsid=self.bsid) self.members = ligand.members self.longname = ligand.longname self.type = ligand.type self.complex = cclass self.molecule = ligand.mol # Pybel Molecule - self.smiles = self.molecule.write(format='can') # SMILES String - self.inchikey = self.molecule.write(format='inchikey') + self.smiles = self.molecule.write(format="can") # SMILES String + self.inchikey = self.molecule.write(format="inchikey") self.can_to_pdb = ligand.can_to_pdb if not len(self.smiles) == 0: self.smiles = self.smiles.split()[0] else: - logger.warning(f'could not write SMILES for ligand {ligand}') - self.smiles = '' + logger.warning(f"could not write SMILES for ligand {ligand}") + self.smiles = "" self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms() # Heavy atoms count self.all_atoms = self.molecule.atoms self.atmdict = {l.idx: l for l in self.all_atoms} @@ -1007,9 +1482,9 @@ class Ligand(Mol): self.hbond_acc_atoms = self.find_hba(self.all_atoms) self.num_rings = len(self.rings) if self.num_rings != 0: - logger.info(f'contains {self.num_rings} aromatic ring(s)') + logger.info(f"contains {self.num_rings} aromatic ring(s)") descvalues = self.molecule.calcdesc() - self.molweight, self.logp = float(descvalues['MW']), float(descvalues['logP']) + self.molweight, self.logp = float(descvalues["MW"]), float(descvalues["logP"]) self.num_rot_bonds = int(self.molecule.OBMol.NumRotors()) self.atomorder = ligand.atomorder @@ -1017,48 +1492,73 @@ class Ligand(Mol): # Special Case for hydrogen bond acceptor identification # ########################################################## - self.inverse_mapping = {v: k for k, v in self.Mapper.ligandmaps[self.bsid].items()} + self.inverse_mapping = { + v: k for k, v in self.Mapper.ligandmaps[self.bsid].items() + } self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()} self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) ###### donor_pairs = [] - data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type') + data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") for donor in self.all_atoms: - pdbidx = self.Mapper.mapid(donor.idx, mtype='ligand', bsid=self.bsid, to='original') + pdbidx = self.Mapper.mapid( + donor.idx, mtype="ligand", bsid=self.bsid, to="original" + ) d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]] if d.OBAtom.IsHbondDonor(): - for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH()]: + for adj_atom in [ + a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH() + ]: d_orig_atom = self.Mapper.id_to_atom(pdbidx) - donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=pdbidx, - h=pybel.Atom(adj_atom), type='regular')) + donor_pairs.append( + data( + d=donor, + d_orig_atom=d_orig_atom, + d_orig_idx=pdbidx, + h=pybel.Atom(adj_atom), + type="regular", + ) + ) self.hbond_don_atom_pairs = donor_pairs ####### self.charged = self.find_charged(self.all_atoms) self.centroid = centroid([a.coords for a in self.all_atoms]) - self.max_dist_to_center = max((euclidean3d(self.centroid, a.coords) for a in self.all_atoms)) + self.max_dist_to_center = max( + (euclidean3d(self.centroid, a.coords) for a in self.all_atoms) + ) self.water = [] - data = namedtuple('water', 'oxy oxy_orig_idx') + data = namedtuple("water", "oxy oxy_orig_idx") for hoh in ligand.water: oxy = None for at in pybel.ob.OBResidueAtomIter(hoh): if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf: oxy = pybel.Atom(at) # There are some cases where there is no oxygen in a water residue, ignore those - if not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) == {1} and oxy is not None: - if euclidean3d(self.centroid, oxy.coords) < self.max_dist_to_center + config.BS_DIST: - oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype='protein') + if ( + not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) + == {1} + and oxy is not None + ): + if ( + euclidean3d(self.centroid, oxy.coords) + < self.max_dist_to_center + config.BS_DIST + ): + oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype="protein") self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx)) self.halogenbond_don = self.find_hal(self.all_atoms) self.metal_binding = self.find_metal_binding(self.all_atoms, self.water) self.metals = [] - data = namedtuple('metal', 'm orig_m m_orig_idx') + data = namedtuple("metal", "m orig_m m_orig_idx") for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]: m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) orig_m = self.Mapper.id_to_atom(m_orig_idx) self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m)) - self.num_hba, self.num_hbd = len(self.hbond_acc_atoms), len(self.hbond_don_atom_pairs) + self.num_hba, self.num_hbd = ( + len(self.hbond_acc_atoms), + len(self.hbond_don_atom_pairs), + ) self.num_hal = len(self.halogenbond_don) def get_canonical_num(self, atomnum): @@ -1067,41 +1567,69 @@ class Ligand(Mol): def is_functional_group(self, atom, group): """Given a pybel atom, look up if it belongs to a function group""" - n_atoms = [a_neighbor.GetAtomicNum() for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom)] + n_atoms = [ + a_neighbor.GetAtomicNum() + for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom) + ] - if group in ['quartamine', 'tertamine'] and atom.atomicnum == 7: # Nitrogen + if group in ["quartamine", "tertamine"] and atom.atomicnum == 7: # Nitrogen # It's a nitrogen, so could be a protonated amine or quaternary ammonium - if '1' not in n_atoms and len(n_atoms) == 4: - return True if group == 'quartamine' else False # It's a quat. ammonium (N with 4 residues != H) + if "1" not in n_atoms and len(n_atoms) == 4: + return ( + True if group == "quartamine" else False + ) # It's a quat. ammonium (N with 4 residues != H) elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3: - return True if group == 'tertamine' else False # It's sp3-hybridized, so could pick up an hydrogen + return ( + True if group == "tertamine" else False + ) # It's sp3-hybridized, so could pick up an hydrogen else: return False - if group in ['sulfonium', 'sulfonicacid', 'sulfate'] and atom.atomicnum == 16: # Sulfur - if '1' not in n_atoms and len(n_atoms) == 3: # It's a sulfonium (S with 3 residues != H) - return True if group == 'sulfonium' else False + if ( + group in ["sulfonium", "sulfonicacid", "sulfate"] and atom.atomicnum == 16 + ): # Sulfur + if ( + "1" not in n_atoms and len(n_atoms) == 3 + ): # It's a sulfonium (S with 3 residues != H) + return True if group == "sulfonium" else False elif n_atoms.count(8) == 3: # It's a sulfonate or sulfonic acid - return True if group == 'sulfonicacid' else False + return True if group == "sulfonicacid" else False elif n_atoms.count(8) == 4: # It's a sulfate - return True if group == 'sulfate' else False + return True if group == "sulfate" else False - if group == 'phosphate' and atom.atomicnum == 15: # Phosphor + if group == "phosphate" and atom.atomicnum == 15: # Phosphor if set(n_atoms) == {8}: # It's a phosphate return True - if group in ['carboxylate', 'guanidine'] and atom.atomicnum == 6: # It's a carbon atom - if n_atoms.count(8) == 2 and n_atoms.count(6) == 1: # It's a carboxylate group - return True if group == 'carboxylate' else False + if ( + group in ["carboxylate", "guanidine"] and atom.atomicnum == 6 + ): # It's a carbon atom + if ( + n_atoms.count(8) == 2 and n_atoms.count(6) == 1 + ): # It's a carboxylate group + return True if group == "carboxylate" else False elif n_atoms.count(7) == 3 and len(n_atoms) == 3: # It's a guanidine group nitro_partners = [] for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom): - nitro_partners.append(len([b_neighbor for b_neighbor in pybel.ob.OBAtomAtomIter(nitro)])) - if min(nitro_partners) == 1: # One nitrogen is only connected to the carbon, can pick up a H - return True if group == 'guanidine' else False - - if group == 'halocarbon' and atom.atomicnum in [9, 17, 35, 53]: # Halogen atoms - n_atoms = [na for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) if na.GetAtomicNum() == 6] + nitro_partners.append( + len( + [ + b_neighbor + for b_neighbor in pybel.ob.OBAtomAtomIter(nitro) + ] + ) + ) + if ( + min(nitro_partners) == 1 + ): # One nitrogen is only connected to the carbon, can pick up a H + return True if group == "guanidine" else False + + if group == "halocarbon" and atom.atomicnum in [9, 17, 35, 53]: # Halogen atoms + n_atoms = [ + na + for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) + if na.GetAtomicNum() == 6 + ] if len(n_atoms) == 1: # Halocarbon return True else: @@ -1109,18 +1637,32 @@ class Ligand(Mol): def find_hal(self, atoms): """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)""" - data = namedtuple('hal_donor', 'x orig_x x_orig_idx c c_orig_idx') + data = namedtuple("hal_donor", "x orig_x x_orig_idx c c_orig_idx") a_set = [] for a in atoms: - if self.is_functional_group(a, 'halocarbon'): - n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() == 6] + if self.is_functional_group(a, "halocarbon"): + n_atoms = [ + na + for na in pybel.ob.OBAtomAtomIter(a.OBAtom) + if na.GetAtomicNum() == 6 + ] x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) orig_x = self.Mapper.id_to_atom(x_orig_idx) - c_orig_idx = [self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) for na in n_atoms] - a_set.append(data(x=a, orig_x=orig_x, x_orig_idx=x_orig_idx, - c=pybel.Atom(n_atoms[0]), c_orig_idx=c_orig_idx)) + c_orig_idx = [ + self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) + for na in n_atoms + ] + a_set.append( + data( + x=a, + orig_x=orig_x, + x_orig_idx=x_orig_idx, + c=pybel.Atom(n_atoms[0]), + c_orig_idx=c_orig_idx, + ) + ) if len(a_set) != 0: - logger.info(f'ligand contains {len(a_set)} halogen atom(s)') + logger.info(f"ligand contains {len(a_set)} halogen atom(s)") return a_set def find_charged(self, all_atoms): @@ -1129,76 +1671,189 @@ class Ligand(Mol): as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)). Identify negatively charged groups in the ligand. """ - data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup') + data = namedtuple( + "lcharge", "atoms orig_atoms atoms_orig_idx type center fgroup" + ) a_set = [] for a in all_atoms: a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) a_orig = self.Mapper.id_to_atom(a_orig_idx) - if self.is_functional_group(a, 'quartamine'): - a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', - center=list(a.coords), fgroup='quartamine')) - elif self.is_functional_group(a, 'tertamine'): - a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', - center=list(a.coords), - fgroup='tertamine')) - if self.is_functional_group(a, 'sulfonium'): - a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', - center=list(a.coords), - fgroup='sulfonium')) - if self.is_functional_group(a, 'phosphate'): - a_contributing = [a, ] - a_contributing_orig_idx = [a_orig_idx, ] - [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)] - [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) - for neighbor in a_contributing] - orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + if self.is_functional_group(a, "quartamine"): + a_set.append( + data( + atoms=[a,], + orig_atoms=[a_orig,], + atoms_orig_idx=[a_orig_idx,], + type="positive", + center=list(a.coords), + fgroup="quartamine", + ) + ) + elif self.is_functional_group(a, "tertamine"): a_set.append( - data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, - type='negative', - center=a.coords, fgroup='phosphate')) - if self.is_functional_group(a, 'sulfonicacid'): - a_contributing = [a, ] - a_contributing_orig_idx = [a_orig_idx, ] - [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) if - neighbor.GetAtomicNum() == 8] - [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) - for neighbor in a_contributing] - orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + data( + atoms=[a,], + orig_atoms=[a_orig,], + atoms_orig_idx=[a_orig_idx,], + type="positive", + center=list(a.coords), + fgroup="tertamine", + ) + ) + if self.is_functional_group(a, "sulfonium"): a_set.append( - data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, - type='negative', - center=a.coords, fgroup='sulfonicacid')) - elif self.is_functional_group(a, 'sulfate'): - a_contributing = [a, ] - a_contributing_orig_idx = [a_orig_idx, ] - [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) - for neighbor in a_contributing] - [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)] - orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + data( + atoms=[a,], + orig_atoms=[a_orig,], + atoms_orig_idx=[a_orig_idx,], + type="positive", + center=list(a.coords), + fgroup="sulfonium", + ) + ) + if self.is_functional_group(a, "phosphate"): + a_contributing = [ + a, + ] + a_contributing_orig_idx = [ + a_orig_idx, + ] + [ + a_contributing.append(pybel.Atom(neighbor)) + for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + ] + [ + a_contributing_orig_idx.append( + self.Mapper.mapid( + neighbor.idx, mtype=self.mtype, bsid=self.bsid + ) + ) + for neighbor in a_contributing + ] + orig_contributing = [ + self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx + ] a_set.append( - data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, - type='negative', - center=a.coords, fgroup='sulfate')) - if self.is_functional_group(a, 'carboxylate'): - a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - if neighbor.GetAtomicNum() == 8] - a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) - for neighbor in a_contributing] - orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + data( + atoms=a_contributing, + orig_atoms=orig_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="negative", + center=a.coords, + fgroup="phosphate", + ) + ) + if self.is_functional_group(a, "sulfonicacid"): + a_contributing = [ + a, + ] + a_contributing_orig_idx = [ + a_orig_idx, + ] + [ + a_contributing.append(pybel.Atom(neighbor)) + for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + if neighbor.GetAtomicNum() == 8 + ] + [ + a_contributing_orig_idx.append( + self.Mapper.mapid( + neighbor.idx, mtype=self.mtype, bsid=self.bsid + ) + ) + for neighbor in a_contributing + ] + orig_contributing = [ + self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx + ] a_set.append( - data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, - type='negative', - center=centroid([a.coords for a in a_contributing]), fgroup='carboxylate')) - elif self.is_functional_group(a, 'guanidine'): - a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - if neighbor.GetAtomicNum() == 7] - a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) - for neighbor in a_contributing] - orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + data( + atoms=a_contributing, + orig_atoms=orig_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="negative", + center=a.coords, + fgroup="sulfonicacid", + ) + ) + elif self.is_functional_group(a, "sulfate"): + a_contributing = [ + a, + ] + a_contributing_orig_idx = [ + a_orig_idx, + ] + [ + a_contributing_orig_idx.append( + self.Mapper.mapid( + neighbor.idx, mtype=self.mtype, bsid=self.bsid + ) + ) + for neighbor in a_contributing + ] + [ + a_contributing.append(pybel.Atom(neighbor)) + for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + ] + orig_contributing = [ + self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx + ] a_set.append( - data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, - type='positive', - center=a.coords, fgroup='guanidine')) + data( + atoms=a_contributing, + orig_atoms=orig_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="negative", + center=a.coords, + fgroup="sulfate", + ) + ) + if self.is_functional_group(a, "carboxylate"): + a_contributing = [ + pybel.Atom(neighbor) + for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + if neighbor.GetAtomicNum() == 8 + ] + a_contributing_orig_idx = [ + self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) + for neighbor in a_contributing + ] + orig_contributing = [ + self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx + ] + a_set.append( + data( + atoms=a_contributing, + orig_atoms=orig_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="negative", + center=centroid([a.coords for a in a_contributing]), + fgroup="carboxylate", + ) + ) + elif self.is_functional_group(a, "guanidine"): + a_contributing = [ + pybel.Atom(neighbor) + for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + if neighbor.GetAtomicNum() == 7 + ] + a_contributing_orig_idx = [ + self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) + for neighbor in a_contributing + ] + orig_contributing = [ + self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx + ] + a_set.append( + data( + atoms=a_contributing, + orig_atoms=orig_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type="positive", + center=a.coords, + fgroup="guanidine", + ) + ) return a_set def find_metal_binding(self, lig_atoms, water_oxygens): @@ -1207,67 +1862,173 @@ class Ligand(Mol): nitrogen from imidazole; sulfur from thiolate. """ a_set = [] - data = namedtuple('metal_binding', 'atom orig_atom atom_orig_idx type fgroup restype resnr reschain location') + data = namedtuple( + "metal_binding", + "atom orig_atom atom_orig_idx type fgroup restype resnr reschain location", + ) for oxygen in water_oxygens: - a_set.append(data(atom=oxygen.oxy, atom_orig_idx=oxygen.oxy_orig_idx, type='O', fgroup='water', - restype=whichrestype(oxygen.oxy), resnr=whichresnumber(oxygen.oxy), - reschain=whichchain(oxygen.oxy), location='water', - orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx))) + a_set.append( + data( + atom=oxygen.oxy, + atom_orig_idx=oxygen.oxy_orig_idx, + type="O", + fgroup="water", + restype=whichrestype(oxygen.oxy), + resnr=whichresnumber(oxygen.oxy), + reschain=whichchain(oxygen.oxy), + location="water", + orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx), + ) + ) # #@todo Refactor code for a in lig_atoms: - a_orig_idx = self.Mapper.mapid(a.idx, mtype='ligand', bsid=self.bsid) + a_orig_idx = self.Mapper.mapid(a.idx, mtype="ligand", bsid=self.bsid) n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom) # Neighboring atoms # All atomic numbers of neighboring atoms - n_atoms_atomicnum = [n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom)] + n_atoms_atomicnum = [ + n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom) + ] if a.atomicnum == 8: # Oxygen - if n_atoms_atomicnum.count('1') == 1 and len(n_atoms_atomicnum) == 2: # Oxygen in alcohol (R-[O]-H) - a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='alcohol', - restype=self.hetid, resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) - if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic(): # Phenolate oxygen - a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='phenolate', - restype=self.hetid, resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if ( + n_atoms_atomicnum.count("1") == 1 and len(n_atoms_atomicnum) == 2 + ): # Oxygen in alcohol (R-[O]-H) + a_set.append( + data( + atom=a, + atom_orig_idx=a_orig_idx, + type="O", + fgroup="alcohol", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) + if ( + True in [n.IsAromatic() for n in n_atoms] + and not a.OBAtom.IsAromatic() + ): # Phenolate oxygen + a_set.append( + data( + atom=a, + atom_orig_idx=a_orig_idx, + type="O", + fgroup="phenolate", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) if a.atomicnum == 6: # It's a carbon atom - if n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1: # It's a carboxylate group + if ( + n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1 + ): # It's a carboxylate group for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', - fgroup='carboxylate', - restype=self.hetid, - resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + neighbor_orig_idx = self.Mapper.mapid( + neighbor.GetIdx(), mtype="ligand", bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(neighbor), + atom_orig_idx=neighbor_orig_idx, + type="O", + fgroup="carboxylate", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) if a.atomicnum == 15: # It's a phosphor atom if n_atoms_atomicnum.count(8) >= 3: # It's a phosphoryl for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', - fgroup='phosphoryl', - restype=self.hetid, - resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) - if n_atoms_atomicnum.count(8) == 2: # It's another phosphor-containing group #@todo (correct name?) + neighbor_orig_idx = self.Mapper.mapid( + neighbor.GetIdx(), mtype="ligand", bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(neighbor), + atom_orig_idx=neighbor_orig_idx, + type="O", + fgroup="phosphoryl", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) + if ( + n_atoms_atomicnum.count(8) == 2 + ): # It's another phosphor-containing group #@todo (correct name?) for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) - a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', - fgroup='phosphor.other', restype=self.hetid, - resnr=self.position, - reschain=self.chain, location='ligand', - orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + neighbor_orig_idx = self.Mapper.mapid( + neighbor.GetIdx(), mtype="ligand", bsid=self.bsid + ) + a_set.append( + data( + atom=pybel.Atom(neighbor), + atom_orig_idx=neighbor_orig_idx, + type="O", + fgroup="phosphor.other", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) if a.atomicnum == 7: # It's a nitrogen atom if n_atoms_atomicnum.count(6) == 2: # It's imidazole/pyrrole or similar - a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='N', fgroup='imidazole/pyrrole', - restype=self.hetid, resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + a_set.append( + data( + atom=a, + atom_orig_idx=a_orig_idx, + type="N", + fgroup="imidazole/pyrrole", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) if a.atomicnum == 16: # It's a sulfur atom - if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic(): # Thiolate - a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='thiolate', - restype=self.hetid, resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if ( + True in [n.IsAromatic() for n in n_atoms] + and not a.OBAtom.IsAromatic() + ): # Thiolate + a_set.append( + data( + atom=a, + atom_orig_idx=a_orig_idx, + type="S", + fgroup="thiolate", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) if set(n_atoms_atomicnum) == {26}: # Sulfur in Iron sulfur cluster - a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='iron-sulfur.cluster', - restype=self.hetid, resnr=self.position, reschain=self.chain, - location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + a_set.append( + data( + atom=a, + atom_orig_idx=a_orig_idx, + type="S", + fgroup="iron-sulfur.cluster", + restype=self.hetid, + resnr=self.position, + reschain=self.chain, + location="ligand", + orig_atom=self.Mapper.id_to_atom(a_orig_idx), + ) + ) return a_set @@ -1279,43 +2040,54 @@ class PDBComplex: """ def __init__(self): - self.interaction_sets = {} # Dictionary with site identifiers as keys and object as value + self.interaction_sets = ( + {} + ) # Dictionary with site identifiers as keys and object as value self.protcomplex = None self.filetype = None self.atoms = {} # Dictionary of Pybel atoms, accessible by their idx self.sourcefiles = {} self.information = {} - self.corrected_pdb = '' + self.corrected_pdb = "" self._output_path = tempfile.gettempdir() self.pymol_name = None self.modres = set() self.resis = [] self.altconf = [] # Atom idx of atoms with alternate conformations - self.covalent = [] # Covalent linkages between ligands and protein residues/other ligands + self.covalent = ( + [] + ) # Covalent linkages between ligands and protein residues/other ligands self.excluded = [] # Excluded ligands self.Mapper = Mapper() self.ligands = [] def __str__(self): - formatted_lig_names = [":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands] + formatted_lig_names = [ + ":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands + ] return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( - [lig for lig in formatted_lig_names]) + [lig for lig in formatted_lig_names] + ) def load_pdb(self, pdbpath, as_string=False): """Loads a pdb file with protein AND ligand(s), separates and prepares them. If specified 'as_string', the input is a PDB string instead of a path.""" if as_string: - self.sourcefiles['pdbcomplex.original'] = None - self.sourcefiles['pdbcomplex'] = None - self.sourcefiles['pdbstring'] = pdbpath + self.sourcefiles["pdbcomplex.original"] = None + self.sourcefiles["pdbcomplex"] = None + self.sourcefiles["pdbstring"] = pdbpath else: - self.sourcefiles['pdbcomplex.original'] = pdbpath - self.sourcefiles['pdbcomplex'] = pdbpath - self.information['pdbfixes'] = False - pdbparser = PDBParser(pdbpath, as_string=as_string) # Parse PDB file to find errors and get additional data + self.sourcefiles["pdbcomplex.original"] = pdbpath + self.sourcefiles["pdbcomplex"] = pdbpath + self.information["pdbfixes"] = False + pdbparser = PDBParser( + pdbpath, as_string=as_string + ) # Parse PDB file to find errors and get additional data # #@todo Refactor and rename here self.Mapper.proteinmap = pdbparser.proteinmap - self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()} + self.Mapper.reversed_proteinmap = { + v: k for k, v in self.Mapper.proteinmap.items() + } self.modres = pdbparser.modres self.covalent = pdbparser.covalent self.altconf = pdbparser.altconformations @@ -1323,78 +2095,103 @@ class PDBComplex: if not config.PLUGIN_MODE: if pdbparser.num_fixed_lines > 0: - logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file') + logger.info( + f"{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file" + ) # Save modified PDB file if not as_string: - basename = os.path.basename(pdbpath).split('.')[0] + basename = os.path.basename(pdbpath).split(".")[0] else: basename = "from_stdin" - pdbpath_fixed = tmpfile(prefix='plipfixed.' + basename + '_', direc=self.output_path) + pdbpath_fixed = tmpfile( + prefix="plipfixed." + basename + "_", direc=self.output_path + ) create_folder_if_not_exists(self.output_path) - self.sourcefiles['pdbcomplex'] = pdbpath_fixed - self.corrected_pdb = re.sub(r'[^\x00-\x7F]+', ' ', self.corrected_pdb) # Strip non-unicode chars - if not config.NOFIXFILE: # Only write to file if this option is not activated - with open(pdbpath_fixed, 'w') as f: + self.sourcefiles["pdbcomplex"] = pdbpath_fixed + self.corrected_pdb = re.sub( + r"[^\x00-\x7F]+", " ", self.corrected_pdb + ) # Strip non-unicode chars + if ( + not config.NOFIXFILE + ): # Only write to file if this option is not activated + with open(pdbpath_fixed, "w") as f: f.write(self.corrected_pdb) - self.information['pdbfixes'] = True + self.information["pdbfixes"] = True if not as_string: - self.sourcefiles['filename'] = os.path.basename(self.sourcefiles['pdbcomplex']) + self.sourcefiles["filename"] = os.path.basename( + self.sourcefiles["pdbcomplex"] + ) self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True) # Update the model in the Mapper class instance self.Mapper.original_structure = self.protcomplex.OBMol - logger.info('PDB structure successfully read') + logger.info("PDB structure successfully read") # Determine (temporary) PyMOL Name from Filename - self.pymol_name = pdbpath.split('/')[-1].split('.')[0] + '-Protein' + self.pymol_name = pdbpath.split("/")[-1].split(".")[0] + "-Protein" # Replace characters causing problems in PyMOL - self.pymol_name = self.pymol_name.replace(' ', '').replace('(', '').replace(')', '').replace('-', '_') + self.pymol_name = ( + self.pymol_name.replace(" ", "") + .replace("(", "") + .replace(")", "") + .replace("-", "_") + ) # But if possible, name it after PDBID in Header - if 'HEADER' in self.protcomplex.data: # If the PDB file has a proper header - potential_name = self.protcomplex.data['HEADER'][56:60].lower() - if extract_pdbid(potential_name) != 'UnknownProtein': + if "HEADER" in self.protcomplex.data: # If the PDB file has a proper header + potential_name = self.protcomplex.data["HEADER"][56:60].lower() + if extract_pdbid(potential_name) != "UnknownProtein": self.pymol_name = potential_name - logger.debug(f'PyMOL name set as: {self.pymol_name}') + logger.debug(f"PyMOL name set as: {self.pymol_name}") # Extract and prepare ligands - ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper) + ligandfinder = LigandFinder( + self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper + ) self.ligands = ligandfinder.ligands self.excluded = ligandfinder.excluded # decide whether to add polar hydrogens if not config.NOHYDRO: if not as_string: - basename = os.path.basename(pdbpath).split('.')[0] + basename = os.path.basename(pdbpath).split(".")[0] else: basename = "from_stdin" self.protcomplex.OBMol.AddPolarHydrogens() - output_path = os.path.join(self._output_path, f'{basename}_protonated.pdb') - self.protcomplex.write('pdb', output_path, overwrite=True) - logger.info(f'protonated structure written to {output_path}') + output_path = os.path.join(self._output_path, f"{basename}_protonated.pdb") + self.protcomplex.write("pdb", output_path, overwrite=True) + logger.info(f"protonated structure written to {output_path}") else: - logger.warning('no polar hydrogens will be assigned (make sure your structure contains hydrogens)') + logger.warning( + "no polar hydrogens will be assigned (make sure your structure contains hydrogens)" + ) for atm in self.protcomplex: self.atoms[atm.idx] = atm if len(self.excluded) != 0: - logger.info(f'excluded molecules as ligands: {self.excluded}') + logger.info(f"excluded molecules as ligands: {self.excluded}") if config.DNARECEPTOR: - self.resis = [obres for obres in pybel.ob.OBResidueIter( - self.protcomplex.OBMol) if obres.GetName() in config.DNA + config.RNA] + self.resis = [ + obres + for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) + if obres.GetName() in config.DNA + config.RNA + ] else: - self.resis = [obres for obres in pybel.ob.OBResidueIter( - self.protcomplex.OBMol) if obres.GetResidueProperty(0)] + self.resis = [ + obres + for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) + if obres.GetResidueProperty(0) + ] num_ligs = len(self.ligands) if num_ligs == 1: - logger.info('analyzing one ligand') + logger.info("analyzing one ligand") elif num_ligs > 1: - logger.info(f'analyzing {num_ligs} ligands') + logger.info(f"analyzing {num_ligs} ligands") else: - logger.info(f'structure contains no ligands') + logger.info(f"structure contains no ligands") def analyze(self): """Triggers analysis of all complexes in structure""" @@ -1406,42 +2203,66 @@ class PDBComplex: single_sites = [] for member in ligand.members: - single_sites.append(':'.join([str(x) for x in member])) - site = ' + '.join(single_sites) - site = site if not len(site) > 20 else site[:20] + '...' - longname = ligand.longname if not len(ligand.longname) > 20 else ligand.longname[:20] + '...' - ligtype = 'unspecified type' if ligand.type == 'UNSPECIFIED' else ligand.type - ligtext = f'{longname} [{ligtype}] -- {site}' - logger.info(f'processing ligand {ligtext}') - if ligtype == 'PEPTIDE': - logger.info(f'chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode') - if ligtype == 'INTRA': - logger.info(f'chain {ligand.chain} will be processed in [INTRA-CHAIN] mode') - any_in_biolip = len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) != 0 - - if ligtype not in ['POLYMER', 'DNA', 'ION', 'DNA+ION', 'RNA+ION', 'SMALLMOLECULE+ION'] and any_in_biolip: - logger.info('may be biologically irrelevant') + single_sites.append(":".join([str(x) for x in member])) + site = " + ".join(single_sites) + site = site if not len(site) > 20 else site[:20] + "..." + longname = ( + ligand.longname + if not len(ligand.longname) > 20 + else ligand.longname[:20] + "..." + ) + ligtype = "unspecified type" if ligand.type == "UNSPECIFIED" else ligand.type + ligtext = f"{longname} [{ligtype}] -- {site}" + logger.info(f"processing ligand {ligtext}") + if ligtype == "PEPTIDE": + logger.info( + f"chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode" + ) + if ligtype == "INTRA": + logger.info(f"chain {ligand.chain} will be processed in [INTRA-CHAIN] mode") + any_in_biolip = ( + len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) + != 0 + ) + + if ( + ligtype + not in ["POLYMER", "DNA", "ION", "DNA+ION", "RNA+ION", "SMALLMOLECULE+ION"] + and any_in_biolip + ): + logger.info("may be biologically irrelevant") lig_obj = Ligand(self, ligand) cutoff = lig_obj.max_dist_to_center + config.BS_DIST bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis) # Get a list of all atoms belonging to the binding site, search by idx - bs_atoms = [self.atoms[idx] for idx in [i for i in self.atoms.keys() - if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res] - if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf] - if ligand.type == 'PEPTIDE': + bs_atoms = [ + self.atoms[idx] + for idx in [ + i + for i in self.atoms.keys() + if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res + ] + if idx in self.Mapper.proteinmap + and self.Mapper.mapid(idx, mtype="protein") not in self.altconf + ] + if ligand.type == "PEPTIDE": # If peptide, don't consider the peptide chain as part of the protein binding site - bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain] - if ligand.type == 'INTRA': + bs_atoms = [ + a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain + ] + if ligand.type == "INTRA": # Interactions within the chain - bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain] + bs_atoms = [ + a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain + ] bs_atoms_refined = [] # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE) # and refine binding site atom selection with exact threshold min_dist = {} for r in bs_atoms: - bs_res_id = ''.join([str(whichresnumber(r)), whichchain(r)]) + bs_res_id = "".join([str(whichresnumber(r)), whichchain(r)]) for l in ligand.mol.atoms: distance = euclidean3d(r.coords, l.coords) if bs_res_id not in min_dist: @@ -1451,25 +2272,40 @@ class PDBComplex: if distance <= config.BS_DIST and r not in bs_atoms_refined: bs_atoms_refined.append(r) num_bs_atoms = len(bs_atoms_refined) - logger.info(f'binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})') - - bs_obj = BindingSite(bs_atoms_refined, self.protcomplex, self, self.altconf, min_dist, self.Mapper) + logger.info( + f"binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})" + ) + + bs_obj = BindingSite( + bs_atoms_refined, + self.protcomplex, + self, + self.altconf, + min_dist, + self.Mapper, + ) pli_obj = PLInteraction(lig_obj, bs_obj, self) self.interaction_sets[ligand.mol.title] = pli_obj def extract_bs(self, cutoff, ligcentroid, resis): """Return list of ids from residues belonging to the binding site""" - return [obres.GetIdx() for obres in resis if self.res_belongs_to_bs(obres, cutoff, ligcentroid)] + return [ + obres.GetIdx() + for obres in resis + if self.res_belongs_to_bs(obres, cutoff, ligcentroid) + ] def res_belongs_to_bs(self, res, cutoff, ligcentroid): """Check for each residue if its centroid is within a certain distance to the ligand centroid. Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" - rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) + rescentroid = centroid( + [(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)] + ) # Check geometry near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False # Check chain membership restricted_chain = True if res.GetChain() in config.PEPTIDES else False - return (near_enough and not restricted_chain) + return near_enough and not restricted_chain def get_atom(self, idx): return self.atoms[idx] diff --git a/plip/test/test_basic_functions.py b/plip/test/test_basic_functions.py index 7134f55..ac6a012 100644 --- a/plip/test/test_basic_functions.py +++ b/plip/test/test_basic_functions.py @@ -21,13 +21,17 @@ class TestLigandSupport(unittest.TestCase): def test_dna_rna(self): """Test if DNA and RNA is correctly processed as ligands""" tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1tf6.pdb') + tmpmol.load_pdb("./pdb/1tf6.pdb") # DNA ligand four times consisting of 31 parts (composite) - self.assertEqual([len(ligand.members) for ligand in tmpmol.ligands].count(31), 4) - for ligset in [set((x[0] for x in ligand.members)) for ligand in tmpmol.ligands]: + self.assertEqual( + [len(ligand.members) for ligand in tmpmol.ligands].count(31), 4 + ) + for ligset in [ + set((x[0] for x in ligand.members)) for ligand in tmpmol.ligands + ]: if len(ligset) == 4: # DNA only contains four bases - self.assertEqual(ligset, {'DG', 'DC', 'DA', 'DT'}) + self.assertEqual(ligset, {"DG", "DC", "DA", "DT"}) class TestMapping(unittest.TestCase): @@ -36,34 +40,34 @@ class TestMapping(unittest.TestCase): def test_ids(self): """Test if the atom IDs are correctly mapped from internal to original PDB.""" tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1vsn.pdb') - bsid = 'NFT:A:283' + tmpmol.load_pdb("./pdb/1vsn.pdb") + bsid = "NFT:A:283" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] for contact in s.hydrophobic_contacts: - if contact.restype == 'ALA' and contact.resnr == 133: + if contact.restype == "ALA" and contact.resnr == 133: self.assertEqual(contact.ligatom_orig_idx, 1636) self.assertEqual(contact.bsatom_orig_idx, 994) - if contact.restype == 'ASP' and contact.resnr == 61: + if contact.restype == "ASP" and contact.resnr == 61: self.assertEqual(contact.ligatom_orig_idx, 1639) self.assertEqual(contact.bsatom_orig_idx, 448) for contact in s.hbonds_ldon + s.hbonds_pdon: - if contact.restype == 'GLN' and contact.resnr == 19: + if contact.restype == "GLN" and contact.resnr == 19: self.assertEqual(contact.a_orig_idx, 1649) self.assertEqual(contact.d_orig_idx, 153) - if contact.restype == 'CYS' and contact.resnr == 25: + if contact.restype == "CYS" and contact.resnr == 25: self.assertEqual(contact.a_orig_idx, 1649) self.assertEqual(contact.d_orig_idx, 183) - if contact.restype == 'ASN' and contact.resnr == 158: + if contact.restype == "ASN" and contact.resnr == 158: self.assertEqual(contact.d_orig_idx, 1629) self.assertEqual(contact.a_orig_idx, 1199) for contact in s.halogen_bonds: - if contact.restype == 'TYR' and contact.resnr == 67: + if contact.restype == "TYR" and contact.resnr == 67: self.assertEqual(contact.don.x_orig_idx, 1627) self.assertEqual(contact.acc.o_orig_idx, 485) - if contact.restype == 'LEU' and contact.resnr == 157: + if contact.restype == "LEU" and contact.resnr == 157: self.assertEqual(contact.don.x_orig_idx, 1628) self.assertEqual(contact.acc.o_orig_idx, 1191) @@ -72,7 +76,7 @@ class GeometryTest(unittest.TestCase): """Tests for geometrical calculations in PLIP""" def vector_magnitude(self, v): - return numpy.sqrt(sum(x**2 for x in v)) + return numpy.sqrt(sum(x ** 2 for x in v)) # noinspection PyUnusedLocal def setUp(self): @@ -132,4 +136,7 @@ class GeometryTest(unittest.TestCase): def test_cluster_doubles(self): """Tests for mathematics.cluster_doubles""" # Are the results correct? - self.assertEqual(set(cluster_doubles([(1, 3), (4, 1), (5, 6), (7, 5)])), {(1, 3, 4), (5, 6, 7)}) + self.assertEqual( + set(cluster_doubles([(1, 3), (4, 1), (5, 6), (7, 5)])), + {(1, 3, 4), (5, 6, 7)}, + ) diff --git a/plip/test/test_command_line.py b/plip/test/test_command_line.py index 5d8e8e5..3e27943 100644 --- a/plip/test/test_command_line.py +++ b/plip/test/test_command_line.py @@ -20,29 +20,39 @@ class CommandLineTest(unittest.TestCase): def test_empty_input_file(self): """Input file is empty.""" - exitcode = subprocess.call(f'{sys.executable} ../plipcmd.py -f ./special/empty.pdb -o {self.tmp_dir.name}', - shell=True) + exitcode = subprocess.call( + f"{sys.executable} ../plipcmd.py -f ./special/empty.pdb -o {self.tmp_dir.name}", + shell=True, + ) self.assertEqual(exitcode, 1) def test_invalid_pdb_id(self): """A PDB ID with no valid PDB record is provided.""" - exitcode = subprocess.call(f'{sys.executable} ../plipcmd.py -i xx1x -o {self.tmp_dir.name}', shell=True) + exitcode = subprocess.call( + f"{sys.executable} ../plipcmd.py -i xx1x -o {self.tmp_dir.name}", shell=True + ) self.assertEqual(exitcode, 1) def test_invalid_input_file(self): """A file is provided which is not a PDB file.""" - exitcode = subprocess.call(f'{sys.executable} ../plipcmd.py -f ./special/non-pdb.pdb -o {self.tmp_dir.name}', - shell=True) + exitcode = subprocess.call( + f"{sys.executable} ../plipcmd.py -f ./special/non-pdb.pdb -o {self.tmp_dir.name}", + shell=True, + ) self.assertEqual(exitcode, 1) def test_pdb_format_not_available(self): """A valid PDB ID is provided, but there is no entry in PDB format from wwPDB""" - exitcode = subprocess.call(f'{sys.executable} ../plipcmd.py -i 4v59 -o {self.tmp_dir.name}', shell=True) + exitcode = subprocess.call( + f"{sys.executable} ../plipcmd.py -i 4v59 -o {self.tmp_dir.name}", shell=True + ) self.assertEqual(exitcode, 1) def test_valid_pdb(self): """A PDB ID with no valid PDB record is provided.""" - exitcode = subprocess.call(f'{sys.executable} ../plipcmd.py -x -f ./pdb/1eve.pdb -o {self.tmp_dir.name}', - shell=True) + exitcode = subprocess.call( + f"{sys.executable} ../plipcmd.py -x -f ./pdb/1eve.pdb -o {self.tmp_dir.name}", + shell=True, + ) self.assertEqual(len(os.listdir(self.tmp_dir.name)), 2) self.assertEqual(exitcode, 0) diff --git a/plip/test/test_hydrogen_bonds.py b/plip/test/test_hydrogen_bonds.py index 9d68532..f673c6e 100644 --- a/plip/test/test_hydrogen_bonds.py +++ b/plip/test/test_hydrogen_bonds.py @@ -8,31 +8,35 @@ def characterize_complex(pdb_file: str, binding_site_id: str) -> PLInteraction: pdb_complex = PDBComplex() pdb_complex.load_pdb(pdb_file) for ligand in pdb_complex.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == binding_site_id: + if ( + ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) + == binding_site_id + ): pdb_complex.characterize_complex(ligand) return pdb_complex.interaction_sets[binding_site_id] class HydrogenBondTestCase(unittest.TestCase): - def test_4dst_nondeterministic_protonation(self): config.NOHYDRO = False for i in range(0, 10): - interactions = characterize_complex('./pdb/4dst.pdb', 'GCP:A:202') + interactions = characterize_complex("./pdb/4dst.pdb", "GCP:A:202") all_hbonds = interactions.hbonds_ldon + interactions.hbonds_pdon self.assertTrue(len(all_hbonds) == 16 or len(all_hbonds) == 17) def test_4dst_deterministic_protonation(self): config.NOHYDRO = True for i in range(0, 10): - interactions = characterize_complex('./pdb/4dst_protonated.pdb', 'GCP:A:202') + interactions = characterize_complex( + "./pdb/4dst_protonated.pdb", "GCP:A:202" + ) all_hbonds = interactions.hbonds_ldon + interactions.hbonds_pdon self.assertTrue(len(all_hbonds) == 16) def test_no_protonation(self): config.NOHYDRO = True - interactions1 = characterize_complex('./pdb/1x0n_state_1.pdb', 'DTF:A:174') + interactions1 = characterize_complex("./pdb/1x0n_state_1.pdb", "DTF:A:174") self.assertEqual(len(interactions1.hbonds_ldon), 0) config.NOHYDRO = False - interactions2 = characterize_complex('./pdb/1x0n_state_1.pdb', 'DTF:A:174') + interactions2 = characterize_complex("./pdb/1x0n_state_1.pdb", "DTF:A:174") self.assertEqual(len(interactions2.hbonds_ldon), 1) diff --git a/plip/test/test_literature_validated.py b/plip/test/test_literature_validated.py index f3f7cbc..d49dba5 100644 --- a/plip/test/test_literature_validated.py +++ b/plip/test/test_literature_validated.py @@ -21,10 +21,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Chakrabarti et al. Geometry of nonbonded interactions involving planar groups in proteins. (2007) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1eve.pdb') - bsid = 'E20:A:2001' + tmpmol.load_pdb("./pdb/1eve.pdb") + bsid = "E20:A:2001" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Aromatic stacking with Trp84 and Trp279 @@ -39,10 +39,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Chakrabarti et al. Geometry of nonbonded interactions involving planar groups in proteins. (2007) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1h2t.pdb') - bsid = 'GDP:Z:1151' + tmpmol.load_pdb("./pdb/1h2t.pdb") + bsid = "GDP:Z:1151" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Sandwiched pi-stacking involving Tyr20 and Tyr43 @@ -60,10 +60,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Betzi et al. Discovery of a potential allosteric ligand binding site in CDK2 (2012) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3pxf.pdb') - bsids = ['2AN:A:305', '2AN:A:304'] + tmpmol.load_pdb("./pdb/3pxf.pdb") + bsids = ["2AN:A:305", "2AN:A:304"] for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) in bsids: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) in bsids: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsids[0]] # 2AN:A:305 @@ -91,10 +91,10 @@ class LiteratureValidatedTest(unittest.TestCase): from Sinorhizobium meliloti in the liganded and unliganded-closed states. (2008) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2reg.pdb') - bsid = 'CHT:A:1' + tmpmol.load_pdb("./pdb/2reg.pdb") + bsid = "CHT:A:1" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Cation-pi interactions with Trp43, Trp90, Trp205, and Tyr119 @@ -109,10 +109,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Bird et al. Crystal structures of Varicella Zoster Virus Thyrimidine Kinase. (2003) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1osn.pdb') - bsid = 'BVP:A:500' + tmpmol.load_pdb("./pdb/1osn.pdb") + bsid = "BVP:A:500" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Sandwiched pi-stacking involving Phe93 and Phe139 @@ -128,10 +128,10 @@ class LiteratureValidatedTest(unittest.TestCase): has implications for antiviral therapy (2008) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2w0s.pdb') - bsid = 'BVP:B:1207' # Complex of BVDU with Magnesium Cofactor + tmpmol.load_pdb("./pdb/2w0s.pdb") + bsid = "BVP:B:1207" # Complex of BVDU with Magnesium Cofactor for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonding of Tyr101 and Arg72 @@ -152,10 +152,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Li et al. Identification of a potent and selective non-basic cathepsin K inhibitor. (2006) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1vsn.pdb') - bsid = 'NFT:A:283' + tmpmol.load_pdb("./pdb/1vsn.pdb") + bsid = "NFT:A:283" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonding to Gly66 @@ -167,10 +167,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: De Moliner et al. Alternative binding modes of an inhibitor to two different kinases. (2003) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1p5e.pdb') - bsid = 'TBS:A:301' + tmpmol.load_pdb("./pdb/1p5e.pdb") + bsid = "TBS:A:301" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Halogen Bonding of Ile10 and Leu83 @@ -183,10 +183,10 @@ class LiteratureValidatedTest(unittest.TestCase): acetylcholinesterase.. (1993) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1acj.pdb') - bsid = 'THA:A:999' + tmpmol.load_pdb("./pdb/1acj.pdb") + bsid = "THA:A:999" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # pi-stacking interaction with Phe330 and Trp84 @@ -199,10 +199,10 @@ class LiteratureValidatedTest(unittest.TestCase): glutamicum CgmR in Complex with Inducers and with an Operator. (2010) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2zoz.pdb') - bsid = 'ET:B:184' + tmpmol.load_pdb("./pdb/2zoz.pdb") + bsid = "ET:B:184" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # pi-stacking interaction with Trp63 and Phe147 @@ -219,10 +219,10 @@ class LiteratureValidatedTest(unittest.TestCase): RNA editing ligase 1. (2004) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1xdn.pdb') - bsid = 'ATP:A:501' + tmpmol.load_pdb("./pdb/1xdn.pdb") + bsid = "ATP:A:501" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Arg111, Ile61 (backbone), Asn92, Val88, Lys87 and Glu86# @@ -241,10 +241,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Peisach et al. Interaction of a Peptidomimetic Aminimide Inhibitor with Elastase. (1995) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1bma.pdb') - bsid = '0QH:A:256' + tmpmol.load_pdb("./pdb/1bma.pdb") + bsid = "0QH:A:256" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to val224 and Gln200 @@ -265,14 +265,16 @@ class LiteratureValidatedTest(unittest.TestCase): and Their Prodrugs As Antimalarial Agents (2004) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4rao.pdb') - bsid = '3L7:B:301' + tmpmol.load_pdb("./pdb/4rao.pdb") + bsid = "3L7:B:301" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Val187, Lys165, Thr141, Lys140, Gly139, Thr138, Asp137 - hbonds = {hbond.resnr for hbond in s.hbonds_pdon} # res nr 100, 68, 69 and 199 in alternative conformation, + hbonds = { + hbond.resnr for hbond in s.hbonds_pdon + } # res nr 100, 68, 69 and 199 in alternative conformation, self.assertTrue({137, 138, 139, 140, 141, 165, 187}.issubset(hbonds)) # Water bridges to Asp137, Thr141, Met142, Arg199 and Gly139 # res nr 199 and 142 in alternative conformation @@ -286,10 +288,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Bhattacharya et al. Structural basis of HIV-1 capsid recognition by PF74 and CPSF6(2014) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4qnb.pdb') - bsid = '1B0:A:301' + tmpmol.load_pdb("./pdb/4qnb.pdb") + bsid = "1B0:A:301" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Asn57 and Lys70 @@ -304,10 +306,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Zaware et al. Structural basis of HIV-1 capsid recognition by PF74 and CPSF6(2014) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4kya.pdb') - bsid = '1UG:E:702' + tmpmol.load_pdb("./pdb/4kya.pdb") + bsid = "1UG:E:702" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Ala609 @@ -329,10 +331,10 @@ class LiteratureValidatedTest(unittest.TestCase): implications for ligand binding and specificity(2002) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1n7g.pdb') - bsid = 'NDP:A:701' + tmpmol.load_pdb("./pdb/1n7g.pdb") + bsid = "NDP:A:701" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Thr37, Gly38, Gln39, Asp40, Arg60, Leu92, Asp91, Ser63, Leu92, Ala115, Ser117, @@ -340,7 +342,9 @@ class LiteratureValidatedTest(unittest.TestCase): # Publication give the Prediction for Asp91 as hydrogen bond, when this contains two acceptor atoms. hbonds = {hbond.resnr for hbond in s.hbonds_pdon} # #@todo Hbond to 128 not detected - self.assertTrue({37, 38, 39, 40, 92, 63, 92, 115, 117, 185, 189, 215, 220}.issubset(hbonds)) + self.assertTrue( + {37, 38, 39, 40, 92, 63, 92, 115, 117, 185, 189, 215, 220}.issubset(hbonds) + ) # Water bridges to Gly35, Thr37, Gly38, Asp40, Arg60, Arg61, Ser63, Asn66, Ser117, Tyr128, Lys189, Arg220 waterbridges = {wb.resnr for wb in s.water_bridges} # Hydrogen bonds to 35, 37, 38, 40, 63, 117, 128, 189, 220 not detected due to prioritization @@ -358,10 +362,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Tsuhako et al. The design, synthesis, and biological evaluation of PIM kinase inhibitors.(2012) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4alw.pdb') - bsid = 'HY7:A:1308' + tmpmol.load_pdb("./pdb/4alw.pdb") + bsid = "HY7:A:1308" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Asp186 @@ -377,10 +381,10 @@ class LiteratureValidatedTest(unittest.TestCase): for TMAO.(2013) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3o1h.pdb') - bsid = 'TMO:B:1' + tmpmol.load_pdb("./pdb/3o1h.pdb") + bsid = "TMO:B:1" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Trp45 @@ -396,10 +400,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Shikha et al. Mechanism of mismatch recognition revealed by human MutSβ bound to unpaired DNA loops.(2012) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3thy.pdb') - bsid = 'ADP:A:935' + tmpmol.load_pdb("./pdb/3thy.pdb") + bsid = "ADP:A:935" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Saltbridge to His295 and Lys675 @@ -414,10 +418,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Ash et al. The structure of an N11A mutant of the G-protein domain of FeoB.(2011) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3tah.pdb') - bsid = 'BGO:A:300' + tmpmol.load_pdb("./pdb/3tah.pdb") + bsid = "BGO:A:300" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Ala11, Lys14, Thr15, Ser16, Asp113, Met114, Ala143 and Asp113 @@ -434,10 +438,10 @@ class LiteratureValidatedTest(unittest.TestCase): kinase CK2 inhibitors in clinical trials for the treatment of cancer (2011). """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3r0t.pdb') - bsid = 'FU9:A:338' + tmpmol.load_pdb("./pdb/3r0t.pdb") + bsid = "FU9:A:338" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Val116 @@ -462,10 +466,10 @@ class LiteratureValidatedTest(unittest.TestCase): Redox Potentials of DesulfoVibrio Vulgaris Flavodoxin.(2002) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1aku.pdb') - bsid = 'FMN:A:150' + tmpmol.load_pdb("./pdb/1aku.pdb") + bsid = "FMN:A:150" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Thr59 @@ -488,10 +492,10 @@ class LiteratureValidatedTest(unittest.TestCase): 673, a potent inhibitor derived from dihydropyridophthalazinone.(2014) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4pjt.pdb') - bsid = '2YQ:D:1104' + tmpmol.load_pdb("./pdb/4pjt.pdb") + bsid = "2YQ:D:1104" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Gly863 @@ -506,15 +510,15 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Presnell et al. Oxyanion-Mediated Inhibition of Serine Proteases.(1998) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1bju.pdb') - bsid = 'GP6:A:910' + tmpmol.load_pdb("./pdb/1bju.pdb") + bsid = "GP6:A:910" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] - #@todo Publication show hydrogen bond interactions for Gly219 + # @todo Publication show hydrogen bond interactions for Gly219 # Hydrogen bonds to Ser190, Ser195, Gly219 and Asp189 - hbonds = {hbond.resnr for hbond in s.hbonds_pdon+s.hbonds_ldon} + hbonds = {hbond.resnr for hbond in s.hbonds_pdon + s.hbonds_ldon} self.assertTrue({189, 190, 195}.issubset(hbonds)) # Water bridges to Ser190 and Val227 # Water bridge to 190 not detected due to prioritization @@ -532,10 +536,10 @@ class LiteratureValidatedTest(unittest.TestCase): Reference: Wilcken et al. Halogen-Enriched Fragment Libraries as Leads for Drug Rescue of Mutant p53.(2012) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4agl.pdb') - bsid = 'P84:A:400' + tmpmol.load_pdb("./pdb/4agl.pdb") + bsid = "P84:A:400" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Water bridges to Val147 @@ -554,10 +558,10 @@ class LiteratureValidatedTest(unittest.TestCase): Pathway.(2007) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2efj.pdb') - bsid = '37T:A:502' + tmpmol.load_pdb("./pdb/2efj.pdb") + bsid = "37T:A:502" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bond to Ser237 @@ -573,10 +577,10 @@ class LiteratureValidatedTest(unittest.TestCase): inhibitor.(2006) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2iuz.pdb') - bsid = 'D1H:A:1440' + tmpmol.load_pdb("./pdb/2iuz.pdb") + bsid = "D1H:A:1440" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Trp137 @@ -597,10 +601,10 @@ class LiteratureValidatedTest(unittest.TestCase): phosphodiesterase type 5 (PDE5) inhibitors.(2011) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3shy.pdb') - bsid = '5FO:A:1' + tmpmol.load_pdb("./pdb/3shy.pdb") + bsid = "5FO:A:1" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Gln817 @@ -622,10 +626,10 @@ class LiteratureValidatedTest(unittest.TestCase): substrate recognition site constructed by rearrangement of hydrogen bond network..(1998) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1ay8.pdb') - bsid = 'PLP:A:413' + tmpmol.load_pdb("./pdb/1ay8.pdb") + bsid = "PLP:A:413" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds to Gly108, Thr109, Asn194 and Ser257 @@ -644,10 +648,10 @@ class LiteratureValidatedTest(unittest.TestCase): evolutionary path selected by the Lewis epitope..(2014) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/4rdl.pdb') - bsid = 'FUC:A:601' + tmpmol.load_pdb("./pdb/4rdl.pdb") + bsid = "FUC:A:601" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Instead of FUC-A-604 (sugar representative) # Water bridges to Asn395 @@ -670,18 +674,22 @@ class LiteratureValidatedTest(unittest.TestCase): with CGP 53820, a novel pseudosymmetric inhibitor (1995) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1hii.pdb') - bsid = 'C20:B:101' + tmpmol.load_pdb("./pdb/1hii.pdb") + bsid = "C20:B:101" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Water bridges - waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges} - self.assertTrue({'50A', '50B'}.issubset(waterbridges)) # Bridging Ile-B50 and Ile-A50 with ligand + waterbridges = {str(wb.resnr) + wb.reschain for wb in s.water_bridges} + self.assertTrue( + {"50A", "50B"}.issubset(waterbridges) + ) # Bridging Ile-B50 and Ile-A50 with ligand # Hydrogen bonds - hbonds = {str(hbond.resnr)+hbond.reschain for hbond in s.hbonds_pdon+s.hbonds_ldon} - self.assertTrue({'27A', '27B', '29A', '48A', '48B'}.issubset(hbonds)) + hbonds = { + str(hbond.resnr) + hbond.reschain for hbond in s.hbonds_pdon + s.hbonds_ldon + } + self.assertTrue({"27A", "27B", "29A", "48A", "48B"}.issubset(hbonds)) # #@todo Publication mentions additional possible hydrogen bond with Asp28B # Hydrogen bonds with Asp-A25 are reported as a salt bridge as both partners have (potential) charges @@ -691,22 +699,28 @@ class LiteratureValidatedTest(unittest.TestCase): Diol Inhibitors of HIV-1 Protease (1994) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1hvi.pdb') - bsid = 'A77:A:800' + tmpmol.load_pdb("./pdb/1hvi.pdb") + bsid = "A77:A:800" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Water bridges - waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges} + waterbridges = {str(wb.resnr) + wb.reschain for wb in s.water_bridges} # #@todo Water bridge with 50B not detected - self.assertTrue({'50A'}.issubset(waterbridges)) # Bridging Ile-B50 and Ile-A50 with ligand + self.assertTrue( + {"50A"}.issubset(waterbridges) + ) # Bridging Ile-B50 and Ile-A50 with ligand # pi-cation Interactions picat = {pication.resnr for pication in s.pication_laro} - self.assertEqual({8}, picat) # Described as weakly polar contact/stacking in paper + self.assertEqual( + {8}, picat + ) # Described as weakly polar contact/stacking in paper # Hydrogen bonds - hbonds = {str(hbond.resnr)+hbond.reschain for hbond in s.hbonds_pdon+s.hbonds_ldon} - self.assertTrue({'25B', '27A', '27B', '48A', '48B'}.issubset(hbonds)) + hbonds = { + str(hbond.resnr) + hbond.reschain for hbond in s.hbonds_pdon + s.hbonds_ldon + } + self.assertTrue({"25B", "27A", "27B", "48A", "48B"}.issubset(hbonds)) # #@todo Paper describes additional hydrogen bond with Asp25A def test_3o7g(self): @@ -715,16 +729,18 @@ class LiteratureValidatedTest(unittest.TestCase): melanoma (2010) """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/3og7.pdb') - bsid = '032:A:1' + tmpmol.load_pdb("./pdb/3og7.pdb") + bsid = "032:A:1" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrogen bonds - hbonds = {str(hbond.resnr)+hbond.reschain for hbond in s.hbonds_pdon+s.hbonds_ldon} + hbonds = { + str(hbond.resnr) + hbond.reschain for hbond in s.hbonds_pdon + s.hbonds_ldon + } # Additional hydrogen bond to residue 530A reported - self.assertTrue({'594A'}.issubset(hbonds)) + self.assertTrue({"594A"}.issubset(hbonds)) def test_1hpx(self): """ @@ -735,20 +751,22 @@ class LiteratureValidatedTest(unittest.TestCase): For residues in the B chain, the offset is -100 (e.g. Ile 50B in the PDB structure is Ile 150 in the paper). """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1hpx.pdb') - bsid = 'KNI:B:900' + tmpmol.load_pdb("./pdb/1hpx.pdb") + bsid = "KNI:B:900" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Hydrophobic contacts to Val82, Ile84, Ile150 as part of flap (S1, S1' sites) - hydroph = {str(hyd.resnr)+hyd.reschain for hyd in s.all_hydrophobic_contacts} - self.assertTrue({'82A', '84A', '50B'}.issubset(hydroph)) + hydroph = {str(hyd.resnr) + hyd.reschain for hyd in s.all_hydrophobic_contacts} + self.assertTrue({"82A", "84A", "50B"}.issubset(hydroph)) # Hydrogen bonds - hbonds = {str(hbond.resnr)+hbond.reschain for hbond in s.hbonds_ldon+s.hbonds_pdon} + hbonds = { + str(hbond.resnr) + hbond.reschain for hbond in s.hbonds_ldon + s.hbonds_pdon + } # Additional hbond to 25B not detected (low angle?) - self.assertTrue({'29B', '48B', '27B', '25A'}.issubset(hbonds)) + self.assertTrue({"29B", "48B", "27B", "25A"}.issubset(hbonds)) # Water bridges - waterbridges = {str(wb.resnr)+wb.reschain for wb in s.water_bridges} + waterbridges = {str(wb.resnr) + wb.reschain for wb in s.water_bridges} # Waterbridge with Gly27 is detected instead of Ala28/Asp29 - self.assertTrue({'50A', '50B', '29A'}.issubset(waterbridges)) + self.assertTrue({"50A", "50B", "29A"}.issubset(waterbridges)) diff --git a/plip/test/test_metal_coordination.py b/plip/test/test_metal_coordination.py index 424b8df..e054735 100644 --- a/plip/test/test_metal_coordination.py +++ b/plip/test/test_metal_coordination.py @@ -22,19 +22,19 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1rmd.pdb') - bsid = 'ZN:A:119' + tmpmol.load_pdb("./pdb/1rmd.pdb") + bsid = "ZN:A:119" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Coordination by three cysteines and one histidine of the protein metalres = [mres.restype for mres in s.metal_complexes] - self.assertEqual(metalres.count('CYS'), 3) - self.assertEqual(metalres.count('HIS'), 1) + self.assertEqual(metalres.count("CYS"), 3) + self.assertEqual(metalres.count("HIS"), 1) # Zn atom with tetrahedral geometry (coordination number 4) self.assertEqual(s.metal_complexes[0].coordination_num, 4) - self.assertEqual(s.metal_complexes[0].geometry, 'tetrahedral') + self.assertEqual(s.metal_complexes[0].geometry, "tetrahedral") def test_1rla(self): """Rat liver arginase, a binuclear manganese metalloenzyme (1rmd) @@ -42,20 +42,20 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1rla.pdb') - bsid = 'MN:A:500' + tmpmol.load_pdb("./pdb/1rla.pdb") + bsid = "MN:A:500" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Coordination by one histidine, three aspartic acid residues, and one water molecule metalres = [mres.restype for mres in s.metal_complexes] - self.assertEqual(metalres.count('HIS'), 1) - self.assertEqual(metalres.count('ASP'), 3) - self.assertEqual(metalres.count('HOH'), 1) + self.assertEqual(metalres.count("HIS"), 1) + self.assertEqual(metalres.count("ASP"), 3) + self.assertEqual(metalres.count("HOH"), 1) # Mn atom with square pyramidal geometry (coordination number 5) self.assertEqual(s.metal_complexes[0].coordination_num, 5) - self.assertEqual(s.metal_complexes[0].geometry, 'square.pyramidal') + self.assertEqual(s.metal_complexes[0].geometry, "square.pyramidal") def test_1het(self): """Liver alcohol deshydrogenase (1het) @@ -63,18 +63,18 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1het.pdb') - bsid = 'ZN:A:401' + tmpmol.load_pdb("./pdb/1het.pdb") + bsid = "ZN:A:401" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Coordination by four cysteines metalres = [mres.restype + str(mres.resnr) for mres in s.metal_complexes] - self.assertEqual(set(metalres), {'CYS97', 'CYS100', 'CYS103', 'CYS111'}) + self.assertEqual(set(metalres), {"CYS97", "CYS100", "CYS103", "CYS111"}) # Zn atom with tetrahedral geometry (coordination number 4) self.assertEqual(s.metal_complexes[0].coordination_num, 4) - self.assertEqual(s.metal_complexes[0].geometry, 'tetrahedral') + self.assertEqual(s.metal_complexes[0].geometry, "tetrahedral") def test_1vfy(self): """Phosphatidylinositol-3-phosphate binding FYVE domain of VPS27P protein (1vfy) @@ -82,18 +82,18 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/1vfy.pdb') - bsid = 'ZN:A:300' + tmpmol.load_pdb("./pdb/1vfy.pdb") + bsid = "ZN:A:300" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Coordination by four cysteines metalres = [mres.restype for mres in s.metal_complexes] - self.assertEqual(set(metalres), {'CYS'}) + self.assertEqual(set(metalres), {"CYS"}) # Zn atom with tetrahedral geometry (coordination number 4) self.assertEqual(s.metal_complexes[0].coordination_num, 4) - self.assertEqual(s.metal_complexes[0].geometry, 'tetrahedral') + self.assertEqual(s.metal_complexes[0].geometry, "tetrahedral") def test_2pvb(self): """Pike parvalbumin binding calcium (2pvb) @@ -101,15 +101,15 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2pvb.pdb') - bsid = 'CA:A:110' + tmpmol.load_pdb("./pdb/2pvb.pdb") + bsid = "CA:A:110" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Ca atom with square pyramidal geometry (coordination number 5) self.assertEqual(s.metal_complexes[0].coordination_num, 5) - self.assertEqual(s.metal_complexes[0].geometry, 'square.pyramidal') + self.assertEqual(s.metal_complexes[0].geometry, "square.pyramidal") def test_2q8q(self): """Crystal Structure of S. aureus IsdE complexed with heme (2q8q) @@ -117,16 +117,16 @@ class MetalCoordinationTest(unittest.TestCase): """ tmpmol = PDBComplex() - tmpmol.load_pdb('./pdb/2q8q.pdb') - bsid = 'HEM:A:300' + tmpmol.load_pdb("./pdb/2q8q.pdb") + bsid = "HEM:A:300" for ligand in tmpmol.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: + if ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) == bsid: tmpmol.characterize_complex(ligand) s = tmpmol.interaction_sets[bsid] # Coordination by four nitrogens of heme itself and one additional histidine from the protein metalres = [mres.restype for mres in s.metal_complexes] - self.assertEqual(metalres.count('HEM'), 4) - self.assertEqual(metalres.count('HIS'), 1) + self.assertEqual(metalres.count("HEM"), 4) + self.assertEqual(metalres.count("HIS"), 1) # Fe atom with square pyramidal geometry (coordination number 5) self.assertEqual(s.metal_complexes[0].coordination_num, 5) - self.assertEqual(s.metal_complexes[0].geometry, 'square.pyramidal') + self.assertEqual(s.metal_complexes[0].geometry, "square.pyramidal") diff --git a/plip/test/test_pi_stacking.py b/plip/test/test_pi_stacking.py index bb560a3..16409ef 100644 --- a/plip/test/test_pi_stacking.py +++ b/plip/test/test_pi_stacking.py @@ -8,18 +8,22 @@ def characterize_complex(pdb_file: str, binding_site_id: str) -> PLInteraction: pdb_complex = PDBComplex() pdb_complex.load_pdb(pdb_file) for ligand in pdb_complex.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == binding_site_id: + if ( + ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) + == binding_site_id + ): pdb_complex.characterize_complex(ligand) return pdb_complex.interaction_sets[binding_site_id] class RingDetectionTest(unittest.TestCase): - def test_consistent_ring_detection(self): config.NOHYDRO = True angles = set() for i in range(0, 10): - interactions = characterize_complex('./pdb/4dst_protonated.pdb', 'GCP:A:202') + interactions = characterize_complex( + "./pdb/4dst_protonated.pdb", "GCP:A:202" + ) angles.add(interactions.pistacking[0].angle) self.assertTrue(len(angles) == 1) config.NOHYDRO = False diff --git a/plip/test/test_remote_services.py b/plip/test/test_remote_services.py index d52bffc..9cd27b5 100644 --- a/plip/test/test_remote_services.py +++ b/plip/test/test_remote_services.py @@ -14,16 +14,16 @@ class TestPDB(unittest.TestCase): def test_pdb_entry_status(self): # 1a0v is an obsolete entry and is replaced by 1y46 - status, current_pdbid = check_pdb_status('1a0v') - self.assertEqual(status, 'OBSOLETE') - self.assertEqual(current_pdbid, '1y46') + status, current_pdbid = check_pdb_status("1a0v") + self.assertEqual(status, "OBSOLETE") + self.assertEqual(current_pdbid, "1y46") # 1vsn is an current entry - status, current_pdbid = check_pdb_status('1vsn') - self.assertEqual(status, 'CURRENT') - self.assertEqual(current_pdbid, '1vsn') + status, current_pdbid = check_pdb_status("1vsn") + self.assertEqual(status, "CURRENT") + self.assertEqual(current_pdbid, "1vsn") # xxxx is not an PDB entry - status, current_pdbid = check_pdb_status('xxxx') - self.assertEqual(status, 'UNKNOWN') - self.assertEqual(current_pdbid, 'xxxx') + status, current_pdbid = check_pdb_status("xxxx") + self.assertEqual(status, "UNKNOWN") + self.assertEqual(current_pdbid, "xxxx") diff --git a/plip/test/test_water_bridges.py b/plip/test/test_water_bridges.py index 24bf128..d820b59 100644 --- a/plip/test/test_water_bridges.py +++ b/plip/test/test_water_bridges.py @@ -7,14 +7,16 @@ def characterize_complex(pdb_file: str, binding_site_id: str) -> PLInteraction: pdb_complex = PDBComplex() pdb_complex.load_pdb(pdb_file) for ligand in pdb_complex.ligands: - if ':'.join([ligand.hetid, ligand.chain, str(ligand.position)]) == binding_site_id: + if ( + ":".join([ligand.hetid, ligand.chain, str(ligand.position)]) + == binding_site_id + ): pdb_complex.characterize_complex(ligand) return pdb_complex.interaction_sets[binding_site_id] class WaterBridgeTest(unittest.TestCase): - def test_3ems(self): - interactions = characterize_complex('./pdb/3ems.pdb', 'ARG:A:131') + interactions = characterize_complex("./pdb/3ems.pdb", "ARG:A:131") water_bridges = interactions.water_bridges self.assertEqual(len(water_bridges), 4) diff --git a/plip/test/test_xml_parser.py b/plip/test/test_xml_parser.py index 1f45daa..da99e50 100644 --- a/plip/test/test_xml_parser.py +++ b/plip/test/test_xml_parser.py @@ -13,27 +13,27 @@ class XMLParserTest(unittest.TestCase): """Checks if the XML parser is working correctly""" def setUp(self): - self.px = PlipXML('./xml/1vsn.report.xml') - self.bsite = self.px.bsites['NFT:A:283'] - self.smiles = 'CC(C)CC(NC(c1ccc(cc1)c1ccc(cc1)S(N)(=O)=O)C(F)(F)F)C(=O)NCC=N' + self.px = PlipXML("./xml/1vsn.report.xml") + self.bsite = self.px.bsites["NFT:A:283"] + self.smiles = "CC(C)CC(NC(c1ccc(cc1)c1ccc(cc1)S(N)(=O)=O)C(F)(F)F)C(=O)NCC=N" def test_general_information(self): """Test if general information is correctly parsed.""" - self.assertEqual(self.px.version, '1.4.2') - self.assertEqual(self.px.pdbid, '1VSN') + self.assertEqual(self.px.version, "1.4.2") + self.assertEqual(self.px.pdbid, "1VSN") self.assertFalse(self.px.fixed) - self.assertEqual(self.px.filename, '1vsn.pdb') + self.assertEqual(self.px.filename, "1vsn.pdb") self.assertEqual(self.px.excluded, []) def test_bsite_information(self): """Test if the binding site information is correctly parsed.""" - self.assertEqual(self.bsite.pdbid, '1VSN') - self.assertEqual(self.bsite.uniqueid, '1VSN:NFT:A:283') - self.assertEqual(self.bsite.hetid, 'NFT') - self.assertEqual(self.bsite.longname, 'NFT') - self.assertEqual(self.bsite.ligtype, 'SMALLMOLECULE') + self.assertEqual(self.bsite.pdbid, "1VSN") + self.assertEqual(self.bsite.uniqueid, "1VSN:NFT:A:283") + self.assertEqual(self.bsite.hetid, "NFT") + self.assertEqual(self.bsite.longname, "NFT") + self.assertEqual(self.bsite.ligtype, "SMALLMOLECULE") self.assertEqual(self.bsite.smiles, self.smiles) - self.assertEqual(self.bsite.members, ['NFT:A:283']) + self.assertEqual(self.bsite.members, ["NFT:A:283"]) self.assertFalse(self.bsite.composite) # ligand properties @@ -50,7 +50,7 @@ class XMLParserTest(unittest.TestCase): self.assertAlmostEqual(self.bsite.logp, 6, 0) # Atom mappings (non-exhaustive test) - lmap = self.bsite.mappings['pdb_to_smiles'] + lmap = self.bsite.mappings["pdb_to_smiles"] self.assertEqual(lmap[1625], 24) self.assertEqual(lmap[1649], 33) self.assertEqual(lmap[1617], 14) @@ -59,7 +59,7 @@ class XMLParserTest(unittest.TestCase): self.assertEqual(len(self.bsite.bs_res), 35) # Interacting chains - self.assertEqual(self.bsite.interacting_chains, ['A']) + self.assertEqual(self.bsite.interacting_chains, ["A"]) # Has Interactions? self.assertTrue(self.bsite.has_interactions, True) @@ -72,8 +72,8 @@ class XMLParserTest(unittest.TestCase): hydrophobic1 = self.bsite.hydrophobics[0] self.assertEqual(hydrophobic1.dist, 3.67) self.assertEqual(hydrophobic1.resnr, 61) - self.assertEqual(hydrophobic1.restype, 'ASP') - self.assertEqual(hydrophobic1.reschain, 'A') + self.assertEqual(hydrophobic1.restype, "ASP") + self.assertEqual(hydrophobic1.reschain, "A") self.assertEqual(hydrophobic1.ligcarbonidx, 1639) self.assertEqual(hydrophobic1.protcarbonidx, 448) self.assertEqual(hydrophobic1.ligcoo, (-7.395, 24.225, 6.614)) @@ -83,17 +83,17 @@ class XMLParserTest(unittest.TestCase): self.assertEqual(len(self.bsite.hbonds), 6) hbond1 = self.bsite.hbonds[0] self.assertEqual(hbond1.resnr, 19) - self.assertEqual(hbond1.restype, 'GLN') - self.assertEqual(hbond1.reschain, 'A') + self.assertEqual(hbond1.restype, "GLN") + self.assertEqual(hbond1.reschain, "A") self.assertTrue(hbond1.sidechain) self.assertEqual(hbond1.dist_h_a, 2.16) self.assertEqual(hbond1.dist_d_a, 3.11) self.assertEqual(hbond1.don_angle, 160.05) self.assertTrue(hbond1.protisdon) self.assertEqual(hbond1.donoridx, 153) - self.assertEqual(hbond1.donortype, 'Nam') + self.assertEqual(hbond1.donortype, "Nam") self.assertEqual(hbond1.acceptoridx, 1649) - self.assertEqual(hbond1.acceptortype, 'N2') + self.assertEqual(hbond1.acceptortype, "N2") self.assertEqual(hbond1.ligcoo, (2.820, 18.145, 6.806)) self.assertEqual(hbond1.protcoo, (3.976, 15.409, 7.712)) @@ -101,17 +101,17 @@ class XMLParserTest(unittest.TestCase): self.assertEqual(len(self.bsite.wbridges), 1) wbridge1 = self.bsite.wbridges[0] self.assertEqual(wbridge1.resnr, 159) - self.assertEqual(wbridge1.restype, 'HIS') - self.assertEqual(wbridge1.reschain, 'A') + self.assertEqual(wbridge1.restype, "HIS") + self.assertEqual(wbridge1.reschain, "A") self.assertEqual(wbridge1.dist_a_w, 3.67) self.assertEqual(wbridge1.dist_d_w, 3.13) self.assertEqual(wbridge1.don_angle, 126.73) self.assertEqual(wbridge1.water_angle, 116.36) self.assertTrue(wbridge1.protisdon) self.assertEqual(wbridge1.donor_idx, 1210) - self.assertEqual(wbridge1.donortype, 'Nar') + self.assertEqual(wbridge1.donortype, "Nar") self.assertEqual(wbridge1.acceptor_idx, 1649) - self.assertEqual(wbridge1.acceptortype, 'N2') + self.assertEqual(wbridge1.acceptortype, "N2") self.assertEqual(wbridge1.ligcoo, (2.820, 18.145, 6.806)) self.assertEqual(wbridge1.protcoo, (6.401, 19.307, 4.971)) self.assertEqual(wbridge1.watercoo, (3.860, 18.563, 3.309)) @@ -129,16 +129,16 @@ class XMLParserTest(unittest.TestCase): self.assertEqual(len(self.bsite.halogens), 2) hal1 = self.bsite.halogens[0] self.assertEqual(hal1.resnr, 67) - self.assertEqual(hal1.restype, 'TYR') - self.assertEqual(hal1.reschain, 'A') + self.assertEqual(hal1.restype, "TYR") + self.assertEqual(hal1.reschain, "A") self.assertTrue(hal1.sidechain) self.assertEqual(hal1.dist, 3.37) self.assertEqual(hal1.don_angle, 156.70) self.assertEqual(hal1.acc_angle, 100.53) self.assertEqual(hal1.don_idx, 1627) - self.assertEqual(hal1.donortype, 'F') + self.assertEqual(hal1.donortype, "F") self.assertEqual(hal1.acc_idx, 485) - self.assertEqual(hal1.acceptortype, 'O3') + self.assertEqual(hal1.acceptortype, "O3") self.assertEqual(hal1.ligcoo, (-1.862, 29.303, 4.507)) self.assertEqual(hal1.protcoo, (-1.005, 26.276, 3.287)) diff --git a/plip/visualization/chimera.py b/plip/visualization/chimera.py index a37fffb..436d0e3 100644 --- a/plip/visualization/chimera.py +++ b/plip/visualization/chimera.py @@ -5,8 +5,8 @@ class ChimeraVisualizer: self.chimera = chimera_module self.tid = tid self.uid = plcomplex.uid - self.plipname = 'PLIP-%i' % self.tid - self.hetid, self.chain, self.pos = self.uid.split(':') + self.plipname = "PLIP-%i" % self.tid + self.hetid, self.chain, self.pos = self.uid.split(":") self.pos = int(self.pos) self.colorbyname = self.chimera.colorTable.getColorByName self.rc = self.chimera.runCommand @@ -32,7 +32,10 @@ class ChimeraVisualizer: self.update_model_dict() self.rc("background solid white") self.rc("setattr g display 0") # Hide all pseudobonds - self.rc("~display #%i & :/isHet & ~:%s" % (self.model_dict[self.plipname], self.hetid)) + self.rc( + "~display #%i & :/isHet & ~:%s" + % (self.model_dict[self.plipname], self.hetid) + ) def update_model_dict(self): """Updates the model dictionary""" @@ -51,39 +54,47 @@ class ChimeraVisualizer: def show_hydrophobic(self): """Visualizes hydrophobic contacts.""" - grp = self.getPseudoBondGroup("Hydrophobic Interactions-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Hydrophobic Interactions-%i" % self.tid, associateWith=[self.model] + ) grp.lineType = self.chimera.Dash grp.lineWidth = 3 - grp.color = self.colorbyname('gray') + grp.color = self.colorbyname("gray") for i in self.plcomplex.hydrophobic_contacts.pairs_ids: self.bs_res_ids.append(i[0]) def show_hbonds(self): """Visualizes hydrogen bonds.""" - grp = self.getPseudoBondGroup("Hydrogen Bonds-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Hydrogen Bonds-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 for i in self.plcomplex.hbonds.ldon_id: b = grp.newPseudoBond(self.atoms[i[0]], self.atoms[i[1]]) - b.color = self.colorbyname('blue') + b.color = self.colorbyname("blue") self.bs_res_ids.append(i[0]) for i in self.plcomplex.hbonds.pdon_id: b = grp.newPseudoBond(self.atoms[i[0]], self.atoms[i[1]]) - b.color = self.colorbyname('blue') + b.color = self.colorbyname("blue") self.bs_res_ids.append(i[1]) def show_halogen(self): """Visualizes halogen bonds.""" - grp = self.getPseudoBondGroup("HalogenBonds-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "HalogenBonds-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 for i in self.plcomplex.halogen_bonds: b = grp.newPseudoBond(self.atoms[i[0]], self.atoms[i[1]]) - b.color = self.colorbyname('turquoise') + b.color = self.colorbyname("turquoise") self.bs_res_ids.append(i.acc_id) def show_stacking(self): """Visualizes pi-stacking interactions.""" - grp = self.getPseudoBondGroup("pi-Stacking-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "pi-Stacking-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 grp.lineType = self.chimera.Dash for i, stack in enumerate(self.plcomplex.pistacking): @@ -101,13 +112,15 @@ class ChimeraVisualizer: r.addAtom(centroid_lig) b = grp.newPseudoBond(centroid_lig, centroid_prot) - b.color = self.colorbyname('forest green') + b.color = self.colorbyname("forest green") self.bs_res_ids += stack.proteinring_atoms def show_cationpi(self): """Visualizes cation-pi interactions""" - grp = self.getPseudoBondGroup("Cation-Pi-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Cation-Pi-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 grp.lineType = self.chimera.Dash for i, cat in enumerate(self.plcomplex.pication): @@ -125,7 +138,7 @@ class ChimeraVisualizer: r.addAtom(centroid) b = grp.newPseudoBond(centroid, chargecenter) - b.color = self.colorbyname('orange') + b.color = self.colorbyname("orange") if cat.protcharged: self.bs_res_ids += cat.charge_atoms @@ -135,7 +148,9 @@ class ChimeraVisualizer: def show_sbridges(self): """Visualizes salt bridges.""" # Salt Bridges - grp = self.getPseudoBondGroup("Salt Bridges-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Salt Bridges-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 grp.lineType = self.chimera.Dash for i, sbridge in enumerate(self.plcomplex.saltbridges): @@ -153,7 +168,7 @@ class ChimeraVisualizer: r.addAtom(chargecenter2) b = grp.newPseudoBond(chargecenter1, chargecenter2) - b.color = self.colorbyname('yellow') + b.color = self.colorbyname("yellow") if sbridge.protispos: self.bs_res_ids += sbridge.positive_atoms @@ -162,14 +177,20 @@ class ChimeraVisualizer: def show_wbridges(self): """Visualizes water bridges""" - grp = self.getPseudoBondGroup("Water Bridges-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Water Bridges-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 for i, wbridge in enumerate(self.plcomplex.waterbridges): - c = grp.newPseudoBond(self.atoms[wbridge.water_id], self.atoms[wbridge.acc_id]) - c.color = self.colorbyname('cornflower blue') + c = grp.newPseudoBond( + self.atoms[wbridge.water_id], self.atoms[wbridge.acc_id] + ) + c.color = self.colorbyname("cornflower blue") self.water_ids.append(wbridge.water_id) - b = grp.newPseudoBond(self.atoms[wbridge.don_id], self.atoms[wbridge.water_id]) - b.color = self.colorbyname('cornflower blue') + b = grp.newPseudoBond( + self.atoms[wbridge.don_id], self.atoms[wbridge.water_id] + ) + b.color = self.colorbyname("cornflower blue") self.water_ids.append(wbridge.water_id) if wbridge.protisdon: self.bs_res_ids.append(wbridge.don_id) @@ -178,16 +199,20 @@ class ChimeraVisualizer: def show_metal(self): """Visualizes metal coordination.""" - grp = self.getPseudoBondGroup("Metal Coordination-%i" % self.tid, associateWith=[self.model]) + grp = self.getPseudoBondGroup( + "Metal Coordination-%i" % self.tid, associateWith=[self.model] + ) grp.lineWidth = 3 for i, metal in enumerate(self.plcomplex.metal_complexes): - c = grp.newPseudoBond(self.atoms[metal.metal_id], self.atoms[metal.target_id]) - c.color = self.colorbyname('magenta') + c = grp.newPseudoBond( + self.atoms[metal.metal_id], self.atoms[metal.target_id] + ) + c.color = self.colorbyname("magenta") - if metal.location == 'water': + if metal.location == "water": self.water_ids.append(metal.target_id) - if metal.location.startswith('protein'): + if metal.location.startswith("protein"): self.bs_res_ids.append(metal.target_id) def cleanup(self): @@ -197,13 +222,16 @@ class ChimeraVisualizer: # Hide all non-interacting water molecules water_selection = [] for wid in self.water_ids: - water_selection.append('serialNumber=%i' % wid) + water_selection.append("serialNumber=%i" % wid) self.rc("~display :HOH") self.rc("display :@/%s" % " or ".join(water_selection)) # Show all interacting binding site residues self.rc("~display #%i & ~:/isHet" % self.model_dict[self.plipname]) - self.rc("display :%s" % ",".join([str(self.atoms[bsid].residue.id) for bsid in self.bs_res_ids])) + self.rc( + "display :%s" + % ",".join([str(self.atoms[bsid].residue.id) for bsid in self.bs_res_ids]) + ) self.rc("color lightblue :HOH") def zoom_to_ligand(self): diff --git a/plip/visualization/pymol.py b/plip/visualization/pymol.py index b97912a..2e1b0e3 100644 --- a/plip/visualization/pymol.py +++ b/plip/visualization/pymol.py @@ -7,7 +7,6 @@ from pymol import cmd class PyMOLVisualizer: - def __init__(self, plcomplex): if plcomplex is not None: self.plcomplex = plcomplex @@ -20,57 +19,86 @@ class PyMOLVisualizer: def set_initial_representations(self): """General settings for PyMOL""" self.standard_settings() - cmd.set('dash_gap', 0) # Show not dashes, but lines for the pliprofiler - cmd.set('ray_shadow', 0) # Turn on ray shadows for clearer ray-traced images - cmd.set('cartoon_color', 'mylightblue') + cmd.set("dash_gap", 0) # Show not dashes, but lines for the pliprofiler + cmd.set("ray_shadow", 0) # Turn on ray shadows for clearer ray-traced images + cmd.set("cartoon_color", "mylightblue") # Set clipping planes for full view - cmd.clip('far', -1000) - cmd.clip('near', 1000) + cmd.clip("far", -1000) + cmd.clip("near", 1000) def make_initial_selections(self): """Make empty selections for structures and interactions""" - for group in ['Hydrophobic-P', 'Hydrophobic-L', 'HBondDonor-P', - 'HBondDonor-L', 'HBondAccept-P', 'HBondAccept-L', - 'HalogenAccept', 'HalogenDonor', 'Water', 'MetalIons', 'StackRings-P', - 'PosCharge-P', 'PosCharge-L', 'NegCharge-P', 'NegCharge-L', - 'PiCatRing-P', 'StackRings-L', 'PiCatRing-L', 'Metal-M', 'Metal-P', - 'Metal-W', 'Metal-L', 'Unpaired-HBA', 'Unpaired-HBD', 'Unpaired-HAL', - 'Unpaired-RINGS']: - cmd.select(group, 'None') + for group in [ + "Hydrophobic-P", + "Hydrophobic-L", + "HBondDonor-P", + "HBondDonor-L", + "HBondAccept-P", + "HBondAccept-L", + "HalogenAccept", + "HalogenDonor", + "Water", + "MetalIons", + "StackRings-P", + "PosCharge-P", + "PosCharge-L", + "NegCharge-P", + "NegCharge-L", + "PiCatRing-P", + "StackRings-L", + "PiCatRing-L", + "Metal-M", + "Metal-P", + "Metal-W", + "Metal-L", + "Unpaired-HBA", + "Unpaired-HBD", + "Unpaired-HAL", + "Unpaired-RINGS", + ]: + cmd.select(group, "None") def standard_settings(self): """Sets up standard settings for a nice visualization.""" - cmd.set('bg_rgb', [1.0, 1.0, 1.0]) # White background - cmd.set('depth_cue', 0) # Turn off depth cueing (no fog) - cmd.set('cartoon_side_chain_helper', 1) # Improve combined visualization of sticks and cartoon - cmd.set('cartoon_fancy_helices', 1) # Nicer visualization of helices (using tapered ends) - cmd.set('transparency_mode', 1) # Turn on multilayer transparency - cmd.set('dash_radius', 0.05) + cmd.set("bg_rgb", [1.0, 1.0, 1.0]) # White background + cmd.set("depth_cue", 0) # Turn off depth cueing (no fog) + cmd.set( + "cartoon_side_chain_helper", 1 + ) # Improve combined visualization of sticks and cartoon + cmd.set( + "cartoon_fancy_helices", 1 + ) # Nicer visualization of helices (using tapered ends) + cmd.set("transparency_mode", 1) # Turn on multilayer transparency + cmd.set("dash_radius", 0.05) self.set_custom_colorset() def set_custom_colorset(self): """Defines a colorset with matching colors. Provided by Joachim.""" - cmd.set_color('myorange', '[253, 174, 97]') - cmd.set_color('mygreen', '[171, 221, 164]') - cmd.set_color('myred', '[215, 25, 28]') - cmd.set_color('myblue', '[43, 131, 186]') - cmd.set_color('mylightblue', '[158, 202, 225]') - cmd.set_color('mylightgreen', '[229, 245, 224]') - - def select_by_ids(self, selname, idlist, selection_exists=False, chunksize=20, restrict=None): + cmd.set_color("myorange", "[253, 174, 97]") + cmd.set_color("mygreen", "[171, 221, 164]") + cmd.set_color("myred", "[215, 25, 28]") + cmd.set_color("myblue", "[43, 131, 186]") + cmd.set_color("mylightblue", "[158, 202, 225]") + cmd.set_color("mylightgreen", "[229, 245, 224]") + + def select_by_ids( + self, selname, idlist, selection_exists=False, chunksize=20, restrict=None + ): """Selection with a large number of ids concatenated into a selection list can cause buffer overflow in PyMOL. This function takes a selection name and and list of IDs (list of integers) as input and makes a careful step-by-step selection (packages of 20 by default)""" idlist = list(set(idlist)) # Remove duplicates if not selection_exists: - cmd.select(selname, 'None') # Empty selection first - idchunks = [idlist[i:i + chunksize] for i in range(0, len(idlist), chunksize)] + cmd.select(selname, "None") # Empty selection first + idchunks = [idlist[i : i + chunksize] for i in range(0, len(idlist), chunksize)] for idchunk in idchunks: - cmd.select(selname, '%s or (id %s)' % (selname, '+'.join(map(str, idchunk)))) + cmd.select( + selname, "%s or (id %s)" % (selname, "+".join(map(str, idchunk))) + ) if restrict is not None: - cmd.select(selname, '%s and %s' % (selname, restrict)) + cmd.select(selname, "%s and %s" % (selname, restrict)) def object_exists(self, object_name): """Checks if an object exists in the open PyMOL session.""" @@ -80,40 +108,44 @@ class PyMOLVisualizer: """Visualizes hydrophobic contacts.""" hydroph = self.plcomplex.hydrophobic_contacts if not len(hydroph.bs_ids) == 0: - self.select_by_ids('Hydrophobic-P', hydroph.bs_ids, restrict=self.protname) - self.select_by_ids('Hydrophobic-L', hydroph.lig_ids, restrict=self.ligname) + self.select_by_ids("Hydrophobic-P", hydroph.bs_ids, restrict=self.protname) + self.select_by_ids("Hydrophobic-L", hydroph.lig_ids, restrict=self.ligname) for i in hydroph.pairs_ids: - cmd.select('tmp_bs', 'id %i & %s' % (i[0], self.protname)) - cmd.select('tmp_lig', 'id %i & %s' % (i[1], self.ligname)) - cmd.distance('Hydrophobic', 'tmp_bs', 'tmp_lig') - if self.object_exists('Hydrophobic'): - cmd.set('dash_gap', 0.5, 'Hydrophobic') - cmd.set('dash_color', 'grey50', 'Hydrophobic') + cmd.select("tmp_bs", "id %i & %s" % (i[0], self.protname)) + cmd.select("tmp_lig", "id %i & %s" % (i[1], self.ligname)) + cmd.distance("Hydrophobic", "tmp_bs", "tmp_lig") + if self.object_exists("Hydrophobic"): + cmd.set("dash_gap", 0.5, "Hydrophobic") + cmd.set("dash_color", "grey50", "Hydrophobic") else: - cmd.select('Hydrophobic-P', 'None') + cmd.select("Hydrophobic-P", "None") def show_hbonds(self): """Visualizes hydrogen bonds.""" hbonds = self.plcomplex.hbonds - for group in [['HBondDonor-P', hbonds.prot_don_id], - ['HBondAccept-P', hbonds.prot_acc_id]]: + for group in [ + ["HBondDonor-P", hbonds.prot_don_id], + ["HBondAccept-P", hbonds.prot_acc_id], + ]: if not len(group[1]) == 0: self.select_by_ids(group[0], group[1], restrict=self.protname) - for group in [['HBondDonor-L', hbonds.lig_don_id], - ['HBondAccept-L', hbonds.lig_acc_id]]: + for group in [ + ["HBondDonor-L", hbonds.lig_don_id], + ["HBondAccept-L", hbonds.lig_acc_id], + ]: if not len(group[1]) == 0: self.select_by_ids(group[0], group[1], restrict=self.ligname) for i in hbonds.ldon_id: - cmd.select('tmp_bs', 'id %i & %s' % (i[0], self.protname)) - cmd.select('tmp_lig', 'id %i & %s' % (i[1], self.ligname)) - cmd.distance('HBonds', 'tmp_bs', 'tmp_lig') + cmd.select("tmp_bs", "id %i & %s" % (i[0], self.protname)) + cmd.select("tmp_lig", "id %i & %s" % (i[1], self.ligname)) + cmd.distance("HBonds", "tmp_bs", "tmp_lig") for i in hbonds.pdon_id: - cmd.select('tmp_bs', 'id %i & %s' % (i[1], self.protname)) - cmd.select('tmp_lig', 'id %i & %s' % (i[0], self.ligname)) - cmd.distance('HBonds', 'tmp_bs', 'tmp_lig') - if self.object_exists('HBonds'): - cmd.set('dash_color', 'blue', 'HBonds') + cmd.select("tmp_bs", "id %i & %s" % (i[1], self.protname)) + cmd.select("tmp_lig", "id %i & %s" % (i[0], self.ligname)) + cmd.distance("HBonds", "tmp_bs", "tmp_lig") + if self.object_exists("HBonds"): + cmd.set("dash_color", "blue", "HBonds") def show_halogen(self): """Visualize halogen bonds.""" @@ -122,155 +154,218 @@ class PyMOLVisualizer: for h in halogen: all_don_x.append(h.don_id) all_acc_o.append(h.acc_id) - cmd.select('tmp_bs', 'id %i & %s' % (h.acc_id, self.protname)) - cmd.select('tmp_lig', 'id %i & %s' % (h.don_id, self.ligname)) + cmd.select("tmp_bs", "id %i & %s" % (h.acc_id, self.protname)) + cmd.select("tmp_lig", "id %i & %s" % (h.don_id, self.ligname)) - cmd.distance('HalogenBonds', 'tmp_bs', 'tmp_lig') + cmd.distance("HalogenBonds", "tmp_bs", "tmp_lig") if not len(all_acc_o) == 0: - self.select_by_ids('HalogenAccept', all_acc_o, restrict=self.protname) - self.select_by_ids('HalogenDonor', all_don_x, restrict=self.ligname) - if self.object_exists('HalogenBonds'): - cmd.set('dash_color', 'greencyan', 'HalogenBonds') + self.select_by_ids("HalogenAccept", all_acc_o, restrict=self.protname) + self.select_by_ids("HalogenDonor", all_don_x, restrict=self.ligname) + if self.object_exists("HalogenBonds"): + cmd.set("dash_color", "greencyan", "HalogenBonds") def show_stacking(self): """Visualize pi-stacking interactions.""" stacks = self.plcomplex.pistacking for i, stack in enumerate(stacks): - pires_ids = '+'.join(map(str, stack.proteinring_atoms)) - pilig_ids = '+'.join(map(str, stack.ligandring_atoms)) - cmd.select('StackRings-P', 'StackRings-P or (id %s & %s)' % (pires_ids, self.protname)) - cmd.select('StackRings-L', 'StackRings-L or (id %s & %s)' % (pilig_ids, self.ligname)) - cmd.select('StackRings-P', 'byres StackRings-P') - cmd.show('sticks', 'StackRings-P') - - cmd.pseudoatom('ps-pistack-1-%i' % i, pos=stack.proteinring_center) - cmd.pseudoatom('ps-pistack-2-%i' % i, pos=stack.ligandring_center) - cmd.pseudoatom('Centroids-P', pos=stack.proteinring_center) - cmd.pseudoatom('Centroids-L', pos=stack.ligandring_center) - - if stack.type == 'P': - cmd.distance('PiStackingP', 'ps-pistack-1-%i' % i, 'ps-pistack-2-%i' % i) - if stack.type == 'T': - cmd.distance('PiStackingT', 'ps-pistack-1-%i' % i, 'ps-pistack-2-%i' % i) - if self.object_exists('PiStackingP'): - cmd.set('dash_color', 'green', 'PiStackingP') - cmd.set('dash_gap', 0.3, 'PiStackingP') - cmd.set('dash_length', 0.6, 'PiStackingP') - if self.object_exists('PiStackingT'): - cmd.set('dash_color', 'smudge', 'PiStackingT') - cmd.set('dash_gap', 0.3, 'PiStackingT') - cmd.set('dash_length', 0.6, 'PiStackingT') + pires_ids = "+".join(map(str, stack.proteinring_atoms)) + pilig_ids = "+".join(map(str, stack.ligandring_atoms)) + cmd.select( + "StackRings-P", + "StackRings-P or (id %s & %s)" % (pires_ids, self.protname), + ) + cmd.select( + "StackRings-L", + "StackRings-L or (id %s & %s)" % (pilig_ids, self.ligname), + ) + cmd.select("StackRings-P", "byres StackRings-P") + cmd.show("sticks", "StackRings-P") + + cmd.pseudoatom("ps-pistack-1-%i" % i, pos=stack.proteinring_center) + cmd.pseudoatom("ps-pistack-2-%i" % i, pos=stack.ligandring_center) + cmd.pseudoatom("Centroids-P", pos=stack.proteinring_center) + cmd.pseudoatom("Centroids-L", pos=stack.ligandring_center) + + if stack.type == "P": + cmd.distance( + "PiStackingP", "ps-pistack-1-%i" % i, "ps-pistack-2-%i" % i + ) + if stack.type == "T": + cmd.distance( + "PiStackingT", "ps-pistack-1-%i" % i, "ps-pistack-2-%i" % i + ) + if self.object_exists("PiStackingP"): + cmd.set("dash_color", "green", "PiStackingP") + cmd.set("dash_gap", 0.3, "PiStackingP") + cmd.set("dash_length", 0.6, "PiStackingP") + if self.object_exists("PiStackingT"): + cmd.set("dash_color", "smudge", "PiStackingT") + cmd.set("dash_gap", 0.3, "PiStackingT") + cmd.set("dash_length", 0.6, "PiStackingT") def show_cationpi(self): """Visualize cation-pi interactions.""" for i, p in enumerate(self.plcomplex.pication): - cmd.pseudoatom('ps-picat-1-%i' % i, pos=p.ring_center) - cmd.pseudoatom('ps-picat-2-%i' % i, pos=p.charge_center) + cmd.pseudoatom("ps-picat-1-%i" % i, pos=p.ring_center) + cmd.pseudoatom("ps-picat-2-%i" % i, pos=p.charge_center) if p.protcharged: - cmd.pseudoatom('Chargecenter-P', pos=p.charge_center) - cmd.pseudoatom('Centroids-L', pos=p.ring_center) - pilig_ids = '+'.join(map(str, p.ring_atoms)) - cmd.select('PiCatRing-L', 'PiCatRing-L or (id %s & %s)' % (pilig_ids, self.ligname)) + cmd.pseudoatom("Chargecenter-P", pos=p.charge_center) + cmd.pseudoatom("Centroids-L", pos=p.ring_center) + pilig_ids = "+".join(map(str, p.ring_atoms)) + cmd.select( + "PiCatRing-L", + "PiCatRing-L or (id %s & %s)" % (pilig_ids, self.ligname), + ) for a in p.charge_atoms: - cmd.select('PosCharge-P', 'PosCharge-P or (id %i & %s)' % (a, self.protname)) + cmd.select( + "PosCharge-P", + "PosCharge-P or (id %i & %s)" % (a, self.protname), + ) else: - cmd.pseudoatom('Chargecenter-L', pos=p.charge_center) - cmd.pseudoatom('Centroids-P', pos=p.ring_center) - pires_ids = '+'.join(map(str, p.ring_atoms)) - cmd.select('PiCatRing-P', 'PiCatRing-P or (id %s & %s)' % (pires_ids, self.protname)) + cmd.pseudoatom("Chargecenter-L", pos=p.charge_center) + cmd.pseudoatom("Centroids-P", pos=p.ring_center) + pires_ids = "+".join(map(str, p.ring_atoms)) + cmd.select( + "PiCatRing-P", + "PiCatRing-P or (id %s & %s)" % (pires_ids, self.protname), + ) for a in p.charge_atoms: - cmd.select('PosCharge-L', 'PosCharge-L or (id %i & %s)' % (a, self.ligname)) - cmd.distance('PiCation', 'ps-picat-1-%i' % i, 'ps-picat-2-%i' % i) - if self.object_exists('PiCation'): - cmd.set('dash_color', 'orange', 'PiCation') - cmd.set('dash_gap', 0.3, 'PiCation') - cmd.set('dash_length', 0.6, 'PiCation') + cmd.select( + "PosCharge-L", "PosCharge-L or (id %i & %s)" % (a, self.ligname) + ) + cmd.distance("PiCation", "ps-picat-1-%i" % i, "ps-picat-2-%i" % i) + if self.object_exists("PiCation"): + cmd.set("dash_color", "orange", "PiCation") + cmd.set("dash_gap", 0.3, "PiCation") + cmd.set("dash_length", 0.6, "PiCation") def show_sbridges(self): """Visualize salt bridges.""" for i, saltb in enumerate(self.plcomplex.saltbridges): if saltb.protispos: for patom in saltb.positive_atoms: - cmd.select('PosCharge-P', 'PosCharge-P or (id %i & %s)' % (patom, self.protname)) + cmd.select( + "PosCharge-P", + "PosCharge-P or (id %i & %s)" % (patom, self.protname), + ) for latom in saltb.negative_atoms: - cmd.select('NegCharge-L', 'NegCharge-L or (id %i & %s)' % (latom, self.ligname)) - for sbgroup in [['ps-sbl-1-%i' % i, 'Chargecenter-P', saltb.positive_center], - ['ps-sbl-2-%i' % i, 'Chargecenter-L', saltb.negative_center]]: + cmd.select( + "NegCharge-L", + "NegCharge-L or (id %i & %s)" % (latom, self.ligname), + ) + for sbgroup in [ + ["ps-sbl-1-%i" % i, "Chargecenter-P", saltb.positive_center], + ["ps-sbl-2-%i" % i, "Chargecenter-L", saltb.negative_center], + ]: cmd.pseudoatom(sbgroup[0], pos=sbgroup[2]) cmd.pseudoatom(sbgroup[1], pos=sbgroup[2]) - cmd.distance('Saltbridges', 'ps-sbl-1-%i' % i, 'ps-sbl-2-%i' % i) + cmd.distance("Saltbridges", "ps-sbl-1-%i" % i, "ps-sbl-2-%i" % i) else: for patom in saltb.negative_atoms: - cmd.select('NegCharge-P', 'NegCharge-P or (id %i & %s)' % (patom, self.protname)) + cmd.select( + "NegCharge-P", + "NegCharge-P or (id %i & %s)" % (patom, self.protname), + ) for latom in saltb.positive_atoms: - cmd.select('PosCharge-L', 'PosCharge-L or (id %i & %s)' % (latom, self.ligname)) - for sbgroup in [['ps-sbp-1-%i' % i, 'Chargecenter-P', saltb.negative_center], - ['ps-sbp-2-%i' % i, 'Chargecenter-L', saltb.positive_center]]: + cmd.select( + "PosCharge-L", + "PosCharge-L or (id %i & %s)" % (latom, self.ligname), + ) + for sbgroup in [ + ["ps-sbp-1-%i" % i, "Chargecenter-P", saltb.negative_center], + ["ps-sbp-2-%i" % i, "Chargecenter-L", saltb.positive_center], + ]: cmd.pseudoatom(sbgroup[0], pos=sbgroup[2]) cmd.pseudoatom(sbgroup[1], pos=sbgroup[2]) - cmd.distance('Saltbridges', 'ps-sbp-1-%i' % i, 'ps-sbp-2-%i' % i) + cmd.distance("Saltbridges", "ps-sbp-1-%i" % i, "ps-sbp-2-%i" % i) - if self.object_exists('Saltbridges'): - cmd.set('dash_color', 'yellow', 'Saltbridges') - cmd.set('dash_gap', 0.5, 'Saltbridges') + if self.object_exists("Saltbridges"): + cmd.set("dash_color", "yellow", "Saltbridges") + cmd.set("dash_gap", 0.5, "Saltbridges") def show_wbridges(self): """Visualize water bridges.""" for bridge in self.plcomplex.waterbridges: if bridge.protisdon: - cmd.select('HBondDonor-P', 'HBondDonor-P or (id %i & %s)' % (bridge.don_id, self.protname)) - cmd.select('HBondAccept-L', 'HBondAccept-L or (id %i & %s)' % (bridge.acc_id, self.ligname)) - cmd.select('tmp_don', 'id %i & %s' % (bridge.don_id, self.protname)) - cmd.select('tmp_acc', 'id %i & %s' % (bridge.acc_id, self.ligname)) + cmd.select( + "HBondDonor-P", + "HBondDonor-P or (id %i & %s)" % (bridge.don_id, self.protname), + ) + cmd.select( + "HBondAccept-L", + "HBondAccept-L or (id %i & %s)" % (bridge.acc_id, self.ligname), + ) + cmd.select("tmp_don", "id %i & %s" % (bridge.don_id, self.protname)) + cmd.select("tmp_acc", "id %i & %s" % (bridge.acc_id, self.ligname)) else: - cmd.select('HBondDonor-L', 'HBondDonor-L or (id %i & %s)' % (bridge.don_id, self.ligname)) - cmd.select('HBondAccept-P', 'HBondAccept-P or (id %i & %s)' % (bridge.acc_id, self.protname)) - cmd.select('tmp_don', 'id %i & %s' % (bridge.don_id, self.ligname)) - cmd.select('tmp_acc', 'id %i & %s' % (bridge.acc_id, self.protname)) - cmd.select('Water', 'Water or (id %i & resn HOH)' % bridge.water_id) - cmd.select('tmp_water', 'id %i & resn HOH' % bridge.water_id) - cmd.distance('WaterBridges', 'tmp_acc', 'tmp_water') - cmd.distance('WaterBridges', 'tmp_don', 'tmp_water') - if self.object_exists('WaterBridges'): - cmd.set('dash_color', 'lightblue', 'WaterBridges') - cmd.delete('tmp_water or tmp_acc or tmp_don') - cmd.color('lightblue', 'Water') - cmd.show('spheres', 'Water') + cmd.select( + "HBondDonor-L", + "HBondDonor-L or (id %i & %s)" % (bridge.don_id, self.ligname), + ) + cmd.select( + "HBondAccept-P", + "HBondAccept-P or (id %i & %s)" % (bridge.acc_id, self.protname), + ) + cmd.select("tmp_don", "id %i & %s" % (bridge.don_id, self.ligname)) + cmd.select("tmp_acc", "id %i & %s" % (bridge.acc_id, self.protname)) + cmd.select("Water", "Water or (id %i & resn HOH)" % bridge.water_id) + cmd.select("tmp_water", "id %i & resn HOH" % bridge.water_id) + cmd.distance("WaterBridges", "tmp_acc", "tmp_water") + cmd.distance("WaterBridges", "tmp_don", "tmp_water") + if self.object_exists("WaterBridges"): + cmd.set("dash_color", "lightblue", "WaterBridges") + cmd.delete("tmp_water or tmp_acc or tmp_don") + cmd.color("lightblue", "Water") + cmd.show("spheres", "Water") def show_metal(self): """Visualize metal coordination.""" metal_complexes = self.plcomplex.metal_complexes if not len(metal_complexes) == 0: - self.select_by_ids('Metal-M', self.metal_ids) + self.select_by_ids("Metal-M", self.metal_ids) for metal_complex in metal_complexes: - cmd.select('tmp_m', 'id %i' % metal_complex.metal_id) - cmd.select('tmp_t', 'id %i' % metal_complex.target_id) - if metal_complex.location == 'water': - cmd.select('Metal-W', 'Metal-W or id %s' % metal_complex.target_id) - if metal_complex.location.startswith('protein'): - cmd.select('tmp_t', 'tmp_t & %s' % self.protname) - cmd.select('Metal-P', 'Metal-P or (id %s & %s)' % (metal_complex.target_id, self.protname)) - if metal_complex.location == 'ligand': - cmd.select('tmp_t', 'tmp_t & %s' % self.ligname) - cmd.select('Metal-L', 'Metal-L or (id %s & %s)' % (metal_complex.target_id, self.ligname)) - cmd.distance('MetalComplexes', 'tmp_m', 'tmp_t') - cmd.delete('tmp_m or tmp_t') - if self.object_exists('MetalComplexes'): - cmd.set('dash_color', 'violetpurple', 'MetalComplexes') - cmd.set('dash_gap', 0.5, 'MetalComplexes') + cmd.select("tmp_m", "id %i" % metal_complex.metal_id) + cmd.select("tmp_t", "id %i" % metal_complex.target_id) + if metal_complex.location == "water": + cmd.select("Metal-W", "Metal-W or id %s" % metal_complex.target_id) + if metal_complex.location.startswith("protein"): + cmd.select("tmp_t", "tmp_t & %s" % self.protname) + cmd.select( + "Metal-P", + "Metal-P or (id %s & %s)" + % (metal_complex.target_id, self.protname), + ) + if metal_complex.location == "ligand": + cmd.select("tmp_t", "tmp_t & %s" % self.ligname) + cmd.select( + "Metal-L", + "Metal-L or (id %s & %s)" + % (metal_complex.target_id, self.ligname), + ) + cmd.distance("MetalComplexes", "tmp_m", "tmp_t") + cmd.delete("tmp_m or tmp_t") + if self.object_exists("MetalComplexes"): + cmd.set("dash_color", "violetpurple", "MetalComplexes") + cmd.set("dash_gap", 0.5, "MetalComplexes") # Show water molecules for metal complexes - cmd.show('spheres', 'Metal-W') - cmd.color('lightblue', 'Metal-W') + cmd.show("spheres", "Metal-W") + cmd.color("lightblue", "Metal-W") def selections_cleanup(self): """Cleans up non-used selections""" if not len(self.plcomplex.unpaired_hba_idx) == 0: - self.select_by_ids('Unpaired-HBA', self.plcomplex.unpaired_hba_idx, selection_exists=True) + self.select_by_ids( + "Unpaired-HBA", self.plcomplex.unpaired_hba_idx, selection_exists=True + ) if not len(self.plcomplex.unpaired_hbd_idx) == 0: - self.select_by_ids('Unpaired-HBD', self.plcomplex.unpaired_hbd_idx, selection_exists=True) + self.select_by_ids( + "Unpaired-HBD", self.plcomplex.unpaired_hbd_idx, selection_exists=True + ) if not len(self.plcomplex.unpaired_hal_idx) == 0: - self.select_by_ids('Unpaired-HAL', self.plcomplex.unpaired_hal_idx, selection_exists=True) + self.select_by_ids( + "Unpaired-HAL", self.plcomplex.unpaired_hal_idx, selection_exists=True + ) selections = cmd.get_names("selections") for selection in selections: @@ -281,38 +376,52 @@ class PyMOLVisualizer: if empty: cmd.delete(selection) cmd.deselect() - cmd.delete('tmp*') - cmd.delete('ps-*') + cmd.delete("tmp*") + cmd.delete("ps-*") def selections_group(self): """Group all selections""" - cmd.group('Structures', '%s %s %sCartoon' % (self.protname, self.ligname, self.protname)) - cmd.group('Interactions', 'Hydrophobic HBonds HalogenBonds WaterBridges PiCation PiStackingP PiStackingT ' - 'Saltbridges MetalComplexes') - cmd.group('Atoms', '') - cmd.group('Atoms.Protein', 'Hydrophobic-P HBondAccept-P HBondDonor-P HalogenAccept Centroids-P PiCatRing-P ' - 'StackRings-P PosCharge-P NegCharge-P AllBSRes Chargecenter-P Metal-P') - cmd.group('Atoms.Ligand', 'Hydrophobic-L HBondAccept-L HBondDonor-L HalogenDonor Centroids-L NegCharge-L ' - 'PosCharge-L NegCharge-L ChargeCenter-L StackRings-L PiCatRing-L Metal-L Metal-M ' - 'Unpaired-HBA Unpaired-HBD Unpaired-HAL Unpaired-RINGS') - cmd.group('Atoms.Other', 'Water Metal-W') - cmd.order('*', 'y') + cmd.group( + "Structures", + "%s %s %sCartoon" % (self.protname, self.ligname, self.protname), + ) + cmd.group( + "Interactions", + "Hydrophobic HBonds HalogenBonds WaterBridges PiCation PiStackingP PiStackingT " + "Saltbridges MetalComplexes", + ) + cmd.group("Atoms", "") + cmd.group( + "Atoms.Protein", + "Hydrophobic-P HBondAccept-P HBondDonor-P HalogenAccept Centroids-P PiCatRing-P " + "StackRings-P PosCharge-P NegCharge-P AllBSRes Chargecenter-P Metal-P", + ) + cmd.group( + "Atoms.Ligand", + "Hydrophobic-L HBondAccept-L HBondDonor-L HalogenDonor Centroids-L NegCharge-L " + "PosCharge-L NegCharge-L ChargeCenter-L StackRings-L PiCatRing-L Metal-L Metal-M " + "Unpaired-HBA Unpaired-HBD Unpaired-HAL Unpaired-RINGS", + ) + cmd.group("Atoms.Other", "Water Metal-W") + cmd.order("*", "y") def additional_cleanup(self): """Cleanup of various representations""" cmd.remove('not alt ""+A') # Remove alternate conformations - cmd.hide('labels', 'Interactions') # Hide labels of lines - cmd.disable('%sCartoon' % self.protname) - cmd.hide('everything', 'hydrogens') + cmd.hide("labels", "Interactions") # Hide labels of lines + cmd.disable("%sCartoon" % self.protname) + cmd.hide("everything", "hydrogens") def zoom_to_ligand(self): """Zoom in too ligand and its interactions.""" cmd.center(self.ligname) cmd.orient(self.ligname) - cmd.turn('x', 110) # If the ligand is aligned with the longest axis, aromatic rings are hidden - if 'AllBSRes' in cmd.get_names("selections"): - cmd.zoom('%s or AllBSRes' % self.ligname, 3) + cmd.turn( + "x", 110 + ) # If the ligand is aligned with the longest axis, aromatic rings are hidden + if "AllBSRes" in cmd.get_names("selections"): + cmd.zoom("%s or AllBSRes" % self.ligname, 3) else: if self.object_exists(self.ligname): cmd.zoom(self.ligname, 3) @@ -320,8 +429,10 @@ class PyMOLVisualizer: def save_session(self, outfolder, override=None): """Saves a PyMOL session file.""" - filename = '%s_%s' % (self.protname.upper(), "_".join( - [self.hetid, self.plcomplex.chain, self.plcomplex.position])) + filename = "%s_%s" % ( + self.protname.upper(), + "_".join([self.hetid, self.plcomplex.chain, self.plcomplex.position]), + ) if override is not None: filename = override cmd.save("/".join([outfolder, "%s.pse" % filename])) @@ -331,15 +442,15 @@ class PyMOLVisualizer: Use this function in case neither cmd.ray() or cmd.png() work. """ sys.stdout = sys.__stdout__ - cmd.feedback('disable', 'movie', 'everything') + cmd.feedback("disable", "movie", "everything") cmd.viewport(width, height) - cmd.zoom('visible', 1.5) # Adapt the zoom to the viewport - cmd.set('ray_trace_frames', 1) # Frames are raytraced before saving an image. + cmd.zoom("visible", 1.5) # Adapt the zoom to the viewport + cmd.set("ray_trace_frames", 1) # Frames are raytraced before saving an image. cmd.mpng(filepath, 1, 1) # Use batch png mode with 1 frame only cmd.mplay() # cmd.mpng needs the animation to 'run' cmd.refresh() - originalfile = "".join([filepath, '0001.png']) - newfile = "".join([filepath, '.png']) + originalfile = "".join([filepath, "0001.png"]) + newfile = "".join([filepath, ".png"]) ################################################# # Wait for file for max. 1 second and rename it # @@ -349,29 +460,56 @@ class PyMOLVisualizer: while not os.path.isfile(originalfile) and attempts <= 10: sleep(0.1) attempts += 1 - if os.name == 'nt': # In Windows, make sure there is no file of the same name, cannot be overwritten as in Unix + if ( + os.name == "nt" + ): # In Windows, make sure there is no file of the same name, cannot be overwritten as in Unix if os.path.isfile(newfile): os.remove(newfile) os.rename(originalfile, newfile) # Remove frame number in filename # Check if imagemagick is available and crop + resize the images - if subprocess.call("type convert", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0: + if ( + subprocess.call( + "type convert", + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + == 0 + ): attempts, ecode = 0, 1 # Check if file is truncated and wait if that's the case while ecode != 0 and attempts <= 10: - ecode = subprocess.call(['convert', newfile, '/dev/null'], stdout=open('/dev/null', 'w'), - stderr=subprocess.STDOUT) + ecode = subprocess.call( + ["convert", newfile, "/dev/null"], + stdout=open("/dev/null", "w"), + stderr=subprocess.STDOUT, + ) sleep(0.1) attempts += 1 - trim = 'convert -trim ' + newfile + ' -bordercolor White -border 20x20 ' + newfile + ';' # Trim the image + trim = ( + "convert -trim " + + newfile + + " -bordercolor White -border 20x20 " + + newfile + + ";" + ) # Trim the image os.system(trim) - getwidth = 'w=`convert ' + newfile + ' -ping -format "%w" info:`;' # Get the width of the new image - getheight = 'h=`convert ' + newfile + ' -ping -format "%h" info:`;' # Get the hight of the new image + getwidth = ( + "w=`convert " + newfile + ' -ping -format "%w" info:`;' + ) # Get the width of the new image + getheight = ( + "h=`convert " + newfile + ' -ping -format "%h" info:`;' + ) # Get the hight of the new image newres = 'if [ "$w" -gt "$h" ]; then newr="${w%.*}x$w"; else newr="${h%.*}x$h"; fi;' # Set quadratic ratio - quadratic = 'convert ' + newfile + ' -gravity center -extent "$newr" ' + newfile # Fill with whitespace + quadratic = ( + "convert " + newfile + ' -gravity center -extent "$newr" ' + newfile + ) # Fill with whitespace os.system(getwidth + getheight + newres + quadratic) else: - sys.stderr.write('Imagemagick not available. Images will not be resized or cropped.') + sys.stderr.write( + "Imagemagick not available. Images will not be resized or cropped." + ) def save_picture(self, outfolder, filename): """Saves a picture""" @@ -380,26 +518,26 @@ class PyMOLVisualizer: def set_fancy_ray(self): """Give the molecule a flat, modern look.""" - cmd.set('light_count', 6) - cmd.set('spec_count', 1.5) - cmd.set('shininess', 4) - cmd.set('specular', 0.3) - cmd.set('reflect', 1.6) - cmd.set('ambient', 0) - cmd.set('direct', 0) - cmd.set('ray_shadow', 0) # Gives the molecules a flat, modern look - cmd.set('ambient_occlusion_mode', 1) - cmd.set('ray_opaque_background', 0) # Transparent background + cmd.set("light_count", 6) + cmd.set("spec_count", 1.5) + cmd.set("shininess", 4) + cmd.set("specular", 0.3) + cmd.set("reflect", 1.6) + cmd.set("ambient", 0) + cmd.set("direct", 0) + cmd.set("ray_shadow", 0) # Gives the molecules a flat, modern look + cmd.set("ambient_occlusion_mode", 1) + cmd.set("ray_opaque_background", 0) # Transparent background def adapt_for_peptides(self): """Adapt visualization for peptide ligands and interchain contacts""" - cmd.hide('sticks', self.ligname) - cmd.set('cartoon_color', 'lightorange', self.ligname) - cmd.show('cartoon', self.ligname) - cmd.show('sticks', "byres *-L") + cmd.hide("sticks", self.ligname) + cmd.set("cartoon_color", "lightorange", self.ligname) + cmd.show("cartoon", self.ligname) + cmd.show("sticks", "byres *-L") cmd.util.cnc(self.ligname) - cmd.remove('%sCartoon and chain %s' % (self.protname, self.plcomplex.chain)) - cmd.set('cartoon_side_chain_helper', 0) + cmd.remove("%sCartoon and chain %s" % (self.protname, self.plcomplex.chain)) + cmd.set("cartoon_side_chain_helper", 0) def adapt_for_intra(self): """Adapt visualization for intra-protein interactions""" @@ -408,43 +546,55 @@ class PyMOLVisualizer: """Refinements for the visualization""" # Show sticks for all residues interacing with the ligand - cmd.select('AllBSRes', 'byres (Hydrophobic-P or HBondDonor-P or HBondAccept-P or PosCharge-P or NegCharge-P or ' - 'StackRings-P or PiCatRing-P or HalogenAcc or Metal-P)') - cmd.show('sticks', 'AllBSRes') + cmd.select( + "AllBSRes", + "byres (Hydrophobic-P or HBondDonor-P or HBondAccept-P or PosCharge-P or NegCharge-P or " + "StackRings-P or PiCatRing-P or HalogenAcc or Metal-P)", + ) + cmd.show("sticks", "AllBSRes") # Show spheres for the ring centroids - cmd.hide('everything', 'centroids*') - cmd.show('nb_spheres', 'centroids*') + cmd.hide("everything", "centroids*") + cmd.show("nb_spheres", "centroids*") # Show spheres for centers of charge - if self.object_exists('Chargecenter-P') or self.object_exists('Chargecenter-L'): - cmd.hide('nonbonded', 'chargecenter*') - cmd.show('spheres', 'chargecenter*') - cmd.set('sphere_scale', 0.4, 'chargecenter*') - cmd.color('yellow', 'chargecenter*') + if self.object_exists("Chargecenter-P") or self.object_exists("Chargecenter-L"): + cmd.hide("nonbonded", "chargecenter*") + cmd.show("spheres", "chargecenter*") + cmd.set("sphere_scale", 0.4, "chargecenter*") + cmd.color("yellow", "chargecenter*") - cmd.set('valence', 1) # Show bond valency (e.g. double bonds) + cmd.set("valence", 1) # Show bond valency (e.g. double bonds) # Optional cartoon representation of the protein - cmd.copy('%sCartoon' % self.protname, self.protname) - cmd.show('cartoon', '%sCartoon' % self.protname) - cmd.show('sticks', '%sCartoon' % self.protname) - cmd.set('stick_transparency', 1, '%sCartoon' % self.protname) + cmd.copy("%sCartoon" % self.protname, self.protname) + cmd.show("cartoon", "%sCartoon" % self.protname) + cmd.show("sticks", "%sCartoon" % self.protname) + cmd.set("stick_transparency", 1, "%sCartoon" % self.protname) # Resize water molecules. Sometimes they are not heteroatoms HOH, but part of the protein - cmd.set('sphere_scale', 0.2, 'resn HOH or Water') # Needs to be done here because of the copy made - cmd.set('sphere_transparency', 0.4, '!(resn HOH or Water)') - - if 'Centroids*' in cmd.get_names("selections"): - cmd.color('grey80', 'Centroids*') - cmd.hide('spheres', '%sCartoon' % self.protname) - cmd.hide('cartoon', '%sCartoon and resn DA+DG+DC+DU+DT+A+G+C+U+T' % self.protname) # Hide DNA/RNA Cartoon - if self.ligname == 'SF4': # Special case for iron-sulfur clusters, can't be visualized with sticks - cmd.show('spheres', '%s' % self.ligname) - - cmd.hide('everything', 'resn HOH &!Water') # Hide all non-interacting water molecules - cmd.hide('sticks', '%s and !%s and !AllBSRes' % - (self.protname, self.ligname)) # Hide all non-interacting residues - - if self.ligandtype in ['PEPTIDE', 'INTRA']: + cmd.set( + "sphere_scale", 0.2, "resn HOH or Water" + ) # Needs to be done here because of the copy made + cmd.set("sphere_transparency", 0.4, "!(resn HOH or Water)") + + if "Centroids*" in cmd.get_names("selections"): + cmd.color("grey80", "Centroids*") + cmd.hide("spheres", "%sCartoon" % self.protname) + cmd.hide( + "cartoon", "%sCartoon and resn DA+DG+DC+DU+DT+A+G+C+U+T" % self.protname + ) # Hide DNA/RNA Cartoon + if ( + self.ligname == "SF4" + ): # Special case for iron-sulfur clusters, can't be visualized with sticks + cmd.show("spheres", "%s" % self.ligname) + + cmd.hide( + "everything", "resn HOH &!Water" + ) # Hide all non-interacting water molecules + cmd.hide( + "sticks", "%s and !%s and !AllBSRes" % (self.protname, self.ligname) + ) # Hide all non-interacting residues + + if self.ligandtype in ["PEPTIDE", "INTRA"]: self.adapt_for_peptides() - if self.ligandtype == 'INTRA': + if self.ligandtype == "INTRA": self.adapt_for_intra() diff --git a/plip/visualization/visualize.py b/plip/visualization/visualize.py index 4cd7a14..a551798 100644 --- a/plip/visualization/visualize.py +++ b/plip/visualization/visualize.py @@ -20,54 +20,63 @@ def visualize_in_pymol(plcomplex): lig_members = plcomplex.lig_members chain = plcomplex.chain if config.PEPTIDES: - vis.ligname = 'PeptideChain%s' % plcomplex.chain + vis.ligname = "PeptideChain%s" % plcomplex.chain if config.INTRA is not None: - vis.ligname = 'Intra%s' % plcomplex.chain + vis.ligname = "Intra%s" % plcomplex.chain ligname = vis.ligname hetid = plcomplex.hetid metal_ids = plcomplex.metal_ids - metal_ids_str = '+'.join([str(i) for i in metal_ids]) + metal_ids_str = "+".join([str(i) for i in metal_ids]) ######################## # Basic visualizations # ######################## - start_pymol(run=True, options='-pcq', quiet=not config.VERBOSE and not config.SILENT) + start_pymol( + run=True, options="-pcq", quiet=not config.VERBOSE and not config.SILENT + ) vis.set_initial_representations() cmd.load(plcomplex.sourcefile) - current_name = cmd.get_object_list(selection='(all)')[0] - logger.debug(f'setting current_name to {current_name} and pdbid to {pdbid}') + current_name = cmd.get_object_list(selection="(all)")[0] + logger.debug(f"setting current_name to {current_name} and pdbid to {pdbid}") cmd.set_name(current_name, pdbid) - cmd.hide('everything', 'all') + cmd.hide("everything", "all") if config.PEPTIDES: - cmd.select(ligname, 'chain %s and not resn HOH' % plcomplex.chain) + cmd.select(ligname, "chain %s and not resn HOH" % plcomplex.chain) else: - cmd.select(ligname, 'resn %s and chain %s and resi %s*' % (hetid, chain, plcomplex.position)) - logger.debug(f'selecting ligand for PDBID {pdbid} and ligand name {ligname}') - logger.debug(f'resn {hetid} and chain {chain} and resi {plcomplex.position}') + cmd.select( + ligname, + "resn %s and chain %s and resi %s*" % (hetid, chain, plcomplex.position), + ) + logger.debug(f"selecting ligand for PDBID {pdbid} and ligand name {ligname}") + logger.debug(f"resn {hetid} and chain {chain} and resi {plcomplex.position}") # Visualize and color metal ions if there are any if not len(metal_ids) == 0: vis.select_by_ids(ligname, metal_ids, selection_exists=True) - cmd.show('spheres', 'id %s and %s' % (metal_ids_str, pdbid)) + cmd.show("spheres", "id %s and %s" % (metal_ids_str, pdbid)) # Additionally, select all members of composite ligands if len(lig_members) > 1: for member in lig_members: resid, chain, resnr = member[0], member[1], str(member[2]) - cmd.select(ligname, '%s or (resn %s and chain %s and resi %s)' % (ligname, resid, chain, resnr)) - - cmd.show('sticks', ligname) - cmd.color('myblue') - cmd.color('myorange', ligname) - cmd.util.cnc('all') + cmd.select( + ligname, + "%s or (resn %s and chain %s and resi %s)" + % (ligname, resid, chain, resnr), + ) + + cmd.show("sticks", ligname) + cmd.color("myblue") + cmd.color("myorange", ligname) + cmd.util.cnc("all") if not len(metal_ids) == 0: - cmd.color('hotpink', 'id %s' % metal_ids_str) - cmd.hide('sticks', 'id %s' % metal_ids_str) - cmd.set('sphere_scale', 0.3, ligname) + cmd.color("hotpink", "id %s" % metal_ids_str) + cmd.hide("sticks", "id %s" % metal_ids_str) + cmd.set("sphere_scale", 0.3, ligname) cmd.deselect() vis.make_initial_selections() @@ -91,9 +100,9 @@ def visualize_in_pymol(plcomplex): vis.additional_cleanup() if config.DNARECEPTOR: # Rename Cartoon selection to Line selection and change repr. - cmd.set_name('%sCartoon' % plcomplex.pdbid, '%sLines' % plcomplex.pdbid) - cmd.hide('cartoon', '%sLines' % plcomplex.pdbid) - cmd.show('lines', '%sLines' % plcomplex.pdbid) + cmd.set_name("%sCartoon" % plcomplex.pdbid, "%sLines" % plcomplex.pdbid) + cmd.hide("cartoon", "%sLines" % plcomplex.pdbid) + cmd.show("lines", "%sLines" % plcomplex.pdbid) if config.PEPTIDES: filename = "%s_PeptideChain%s" % (pdbid.upper(), plcomplex.chain) @@ -104,7 +113,10 @@ def visualize_in_pymol(plcomplex): if config.PYMOL: vis.save_session(config.OUTPATH, override=filename) else: - filename = '%s_%s' % (pdbid.upper(), "_".join([hetid, plcomplex.chain, plcomplex.position])) + filename = "%s_%s" % ( + pdbid.upper(), + "_".join([hetid, plcomplex.chain, plcomplex.position]), + ) if config.PYMOL: vis.save_session(config.OUTPATH) if config.PICS: diff --git a/scripts/get-best.py b/scripts/get-best.py index a10c2d2..c0185e4 100644 --- a/scripts/get-best.py +++ b/scripts/get-best.py @@ -11,13 +11,14 @@ version = "1.0" desc_text = "PyMol Quick Visualtion " + version parser = argparse.ArgumentParser(description=desc_text) -parser.add_argument("-p","--protein",help="Path to protein file") -parser.add_argument("-l","--ligand",help="Path to ligand_out file") +parser.add_argument("-p", "--protein", help="Path to protein file") +parser.add_argument("-l", "--ligand", help="Path to ligand_out file") args = parser.parse_args() + def li(s): - #log.info(s) + # log.info(s) None @@ -35,10 +36,10 @@ ligand = args.ligand session = pymol2.PyMOL() session.start() cmd = session.cmd -cmd.load(protein,'pro') -cmd.load(ligand,'lig') -cmd.split_states('lig') +cmd.load(protein, "pro") +cmd.load(ligand, "lig") +cmd.split_states("lig") -#fname = re.sub(r'^.*?/', '', protein.replace(".pdbqt","")) + "-" + re.sub(r'^.*?/', '', ligand.replace(".pdbqt","")) + ".pdb" +# fname = re.sub(r'^.*?/', '', protein.replace(".pdbqt","")) + "-" + re.sub(r'^.*?/', '', ligand.replace(".pdbqt","")) + ".pdb" -cmd.save("best.pdb","pro lig_0001") +cmd.save("best.pdb", "pro lig_0001") diff --git a/scripts/get_dock_score.py b/scripts/get_dock_score.py index c8b87d5..1cc0412 100644 --- a/scripts/get_dock_score.py +++ b/scripts/get_dock_score.py @@ -3,8 +3,8 @@ import argparse parser = argparse.ArgumentParser(description="Get Docking Score") -parser.add_argument("-p","--protein",help="Path to protein file") -parser.add_argument("-l","--ligand",help="Path to ligand_out file") +parser.add_argument("-p", "--protein", help="Path to protein file") +parser.add_argument("-l", "--ligand", help="Path to ligand_out file") args = parser.parse_args() @@ -21,7 +21,13 @@ ligand = args.ligand from os.path import basename -print("# " + str(basename(protein)).replace(".pdbqt","") + "-" + str(basename(ligand)).replace("_out.pdbqt",""), end="\n\n") +print( + "# " + + str(basename(protein)).replace(".pdbqt", "") + + "-" + + str(basename(ligand)).replace("_out.pdbqt", ""), + end="\n\n", +) from tabulate import tabulate @@ -31,7 +37,7 @@ results = [] i = 1 for line in lines: ta = [] - if line.find('REMARK VINA') == 0 and line.split()[3] != "": + if line.find("REMARK VINA") == 0 and line.split()[3] != "": l = line.split() ta.append(i) ta.append(l[3]) @@ -41,6 +47,6 @@ for line in lines: if ta != []: results.append(ta) -print("## Docking Scores",end="\n\n") -print(tabulate(results,headers=["No.","Affinity","rmsd l.b","rmsd u.b"])) -print("",end="\n\n") +print("## Docking Scores", end="\n\n") +print(tabulate(results, headers=["No.", "Affinity", "rmsd l.b", "rmsd u.b"])) +print("", end="\n\n") diff --git a/scripts/makeReport.py b/scripts/makeReport.py index 79d9c70..fc53381 100644 --- a/scripts/makeReport.py +++ b/scripts/makeReport.py @@ -1,8 +1,8 @@ #!/usr/bin/python3 -import argparse +import argparse parser = argparse.ArgumentParser(description="Make Report Helper Script") -parser.add_argument("-i","--input",help="Path to report folder") +parser.add_argument("-i", "--input", help="Path to report folder") args = parser.parse_args() @@ -11,147 +11,160 @@ if args.input == None: exit(1) path = args.input -#path = '/Users/navanchauhan/Desktop/nCOV-19/scripts/pymol/test/' +# path = '/Users/navanchauhan/Desktop/nCOV-19/scripts/pymol/test/' import untangle from tabulate import tabulate -#import sys -#report = path + "report.md" -#sys.stdout = open(report, 'w') +# import sys +# report = path + "report.md" +# sys.stdout = open(report, 'w') from os import listdir from os.path import isfile, join + onlyfiles = [f for f in listdir(path) if isfile(join(path, f))] image = "" for x in onlyfiles: - if '.png' in x and 'UNL' in x: + if ".png" in x and "UNL" in x: image = x import os -fname = os.path.join(path,'report.xml') + +fname = os.path.join(path, "report.xml") doc = untangle.parse(fname) -hi, hb, wb, sb, ps, pc, hab, mc = 0,0,0,0,0,0,0,0 +hi, hb, wb, sb, ps, pc, hab, mc = 0, 0, 0, 0, 0, 0, 0, 0 indexForUNL = 0 for x in doc.report.bindingsite: - if x.identifiers.longname.cdata == 'UNL': + if x.identifiers.longname.cdata == "UNL": break else: indexForUNL += 1 name = doc.report.pdbid.cdata -#print(("# " + (name.replace("_"," ")).replace("PROTEIN","")), end="\n\n") +# print(("# " + (name.replace("_"," ")).replace("PROTEIN","")), end="\n\n") -print("## Visualisation", end="\n\n") -print(f'![]({image})', end="\n\n") +print("## Visualisation", end="\n\n") +print(f"![]({image})", end="\n\n") print("## Interactions", end="\n\n") fallback = 0 try: - if doc.report.bindingsite[indexForUNL].interactions.hydrophobic_interactions.cdata == '': + if ( + doc.report.bindingsite[indexForUNL].interactions.hydrophobic_interactions.cdata + == "" + ): print("No Hydrophobic Interactions Found", end="\n\n") else: print("**Hydrophobic Interactions Found**", end="\n\n") hi = 1 except AttributeError: - fallback=1 + fallback = 1 -if fallback==0: - if doc.report.bindingsite[indexForUNL].interactions.hydrophobic_interactions.cdata == '': +if fallback == 0: + if ( + doc.report.bindingsite[indexForUNL].interactions.hydrophobic_interactions.cdata + == "" + ): print("No Hydrophobic Interactions Found", end="\n\n") else: print("**Hydrophobic Interactions Found**", end="\n\n") hi = 1 - if doc.report.bindingsite[indexForUNL].interactions.hydrogen_bonds.cdata == '': + if doc.report.bindingsite[indexForUNL].interactions.hydrogen_bonds.cdata == "": print("No Hydrogen Bonds Found", end="\n\n") else: print("**Hydrogen Bonds Found**", end="\n\n") hb = 1 - if doc.report.bindingsite[indexForUNL].interactions.water_bridges.cdata == '': + if doc.report.bindingsite[indexForUNL].interactions.water_bridges.cdata == "": print("No Water Bridges Found", end="\n\n") else: print("**Water Bridges Found**", end="\n\n") wb = 1 - if doc.report.bindingsite[indexForUNL].interactions.salt_bridges.cdata == '': + if doc.report.bindingsite[indexForUNL].interactions.salt_bridges.cdata == "": print("No Salt Bridges Found", end="\n\n") else: print("**Salt Bridges Found**", end="\n\n") sb = 1 - if doc.report.bindingsite[indexForUNL].interactions.pi_stacks.cdata == '': + if doc.report.bindingsite[indexForUNL].interactions.pi_stacks.cdata == "": print("No Pi Stacks Found", end="\n\n") else: print("**Pi Stacks Found**", end="\n\n") ps = 1 - if doc.report.bindingsite[indexForUNL].interactions.pi_cation_interactions.cdata == '': + if ( + doc.report.bindingsite[indexForUNL].interactions.pi_cation_interactions.cdata + == "" + ): print("No Pi Cation Interactions Found", end="\n\n") else: print("**Pi Cation Interactions Found**", end="\n\n") pc = 1 - if doc.report.bindingsite[indexForUNL].interactions.halogen_bonds.cdata == '': + if doc.report.bindingsite[indexForUNL].interactions.halogen_bonds.cdata == "": print("No Halogen Bonds Found", end="\n\n") else: print("** Halogen Bonds Found**", end="\n\n") hab = 1 - if doc.report.bindingsite[indexForUNL].interactions.metal_complexes.cdata == '': - print("No Metal Complexes Found", end="\n\n") + if doc.report.bindingsite[indexForUNL].interactions.metal_complexes.cdata == "": + print("No Metal Complexes Found", end="\n\n") else: print("**Metal Complexes Found**", end="\n\n") mc = 1 if fallback == 1: - if doc.report.bindingsite.interactions.hydrophobic_interactions.cdata == '': + if doc.report.bindingsite.interactions.hydrophobic_interactions.cdata == "": print("No Hydrophobic Interactions Found", end="\n\n") else: print("**Hydrophobic Interactions Found**", end="\n\n") hi = 1 - if doc.report.bindingsite.interactions.hydrogen_bonds.cdata == '': + if doc.report.bindingsite.interactions.hydrogen_bonds.cdata == "": print("No Hydrogen Bonds Found", end="\n\n") else: print("**Hydrogen Bonds Found**", end="\n\n") hb = 1 - if doc.report.bindingsite.interactions.water_bridges.cdata == '': + if doc.report.bindingsite.interactions.water_bridges.cdata == "": print("No Water Bridges Found", end="\n\n") else: print("**Water Bridges Found**", end="\n\n") wb = 1 - if doc.report.bindingsite.interactions.salt_bridges.cdata == '': + if doc.report.bindingsite.interactions.salt_bridges.cdata == "": print("No Salt Bridges Found", end="\n\n") else: print("**Salt Bridges Found**", end="\n\n") sb = 1 - if doc.report.bindingsite.interactions.pi_stacks.cdata == '': + if doc.report.bindingsite.interactions.pi_stacks.cdata == "": print("No Pi Stacks Found", end="\n\n") else: print("**Pi Stacks Found**", end="\n\n") ps = 1 - if doc.report.bindingsite.interactions.pi_cation_interactions.cdata == '': + if doc.report.bindingsite.interactions.pi_cation_interactions.cdata == "": print("No Pi Cation Interactions Found", end="\n\n") else: print("**Pi Cation Interactions Found**", end="\n\n") pc = 1 - if doc.report.bindingsite.interactions.halogen_bonds.cdata == '': + if doc.report.bindingsite.interactions.halogen_bonds.cdata == "": print("No Halogen Bonds Found", end="\n\n") else: print("** Halogen Bonds Found**", end="\n\n") hab = 1 - if doc.report.bindingsite.interactions.metal_complexes.cdata == '': - print("No Metal Complexes Found", end="\n\n") + if doc.report.bindingsite.interactions.metal_complexes.cdata == "": + print("No Metal Complexes Found", end="\n\n") else: print("**Metal Complexes Found**", end="\n\n") mc = 1 if fallback == 0: if hi == 1: - print("## Hydrophobic Interactions",end="\n\n") + print("## Hydrophobic Interactions", end="\n\n") tableBody = [] - tableHeaders = ['No.','Res.','AA','Dist','Ligand Atom','Proton Atom'] + tableHeaders = ["No.", "Res.", "AA", "Dist", "Ligand Atom", "Proton Atom"] i = 1 - for x in doc.report.bindingsite[indexForUNL].interactions.hydrophobic_interactions.hydrophobic_interaction: + for x in doc.report.bindingsite[ + indexForUNL + ].interactions.hydrophobic_interactions.hydrophobic_interaction: l = [] l.append(i) l.append(x.resnr.cdata) @@ -164,11 +177,24 @@ if fallback == 0: print(tabulate(tableBody, headers=tableHeaders), end="\n\n") if hb == 1: - print("## Hydrogen Bonds",end="\n\n") + print("## Hydrogen Bonds", end="\n\n") tableBody = [] - tableHeaders = ['No.','Res.','AA','Dist H-A','Dist D-A','Don Angle','Protisdon?','Sidechain?','D. Atom','A. Atom'] + tableHeaders = [ + "No.", + "Res.", + "AA", + "Dist H-A", + "Dist D-A", + "Don Angle", + "Protisdon?", + "Sidechain?", + "D. Atom", + "A. Atom", + ] i = 1 - for x in doc.report.bindingsite[indexForUNL].interactions.hydrogen_bonds.hydrogen_bond: + for x in doc.report.bindingsite[ + indexForUNL + ].interactions.hydrogen_bonds.hydrogen_bond: l = [] l.append(i) l.append(x.resnr.cdata) @@ -182,15 +208,25 @@ if fallback == 0: l.append((x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) i += 1 tableBody.append(l) - #print(i, x.resnr.cdata, x.restype.cdata, x.dist_h_a.cdata, x.dist_d_a.cdata, x.don_angle.cdata, x.protisdon.cdata, x.sidechain.cdata, (x.donoridx.cdata + "[" + x.donortype.cdata + "]"), (x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) + # print(i, x.resnr.cdata, x.restype.cdata, x.dist_h_a.cdata, x.dist_d_a.cdata, x.don_angle.cdata, x.protisdon.cdata, x.sidechain.cdata, (x.donoridx.cdata + "[" + x.donortype.cdata + "]"), (x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) print(tabulate(tableBody, headers=tableHeaders), end="\n\n") if sb == 1: - print("## Salt Bridges",end="\n\n") + print("## Salt Bridges", end="\n\n") tableBody = [] - tableHeaders = ['Index','Residue','AA','Distance','Protein positive?','Ligand Group','Ligand Atoms'] + tableHeaders = [ + "Index", + "Residue", + "AA", + "Distance", + "Protein positive?", + "Ligand Group", + "Ligand Atoms", + ] i = 1 - for x in doc.report.bindingsite[indexForUNL].interactions.salt_bridges.salt_bridge: + for x in doc.report.bindingsite[ + indexForUNL + ].interactions.salt_bridges.salt_bridge: l = [] l.append(i) l.append(x.resnr.cdata) @@ -206,12 +242,14 @@ if fallback == 0: tableBody.append(l) print(tabulate(tableBody, headers=tableHeaders), end="\n\n") - if pc==1: - print("## Pi Cation Interactions",end="\n\n") + if pc == 1: + print("## Pi Cation Interactions", end="\n\n") tableBody = [] - tableHeaders = ['Index','Residue','AA','Distance','Prot charged?','Atoms'] + tableHeaders = ["Index", "Residue", "AA", "Distance", "Prot charged?", "Atoms"] i = 1 - for x in doc.report.bindingsite[indexForUNL].interactions.pi_cation_interactions.pi_cation_interaction: + for x in doc.report.bindingsite[ + indexForUNL + ].interactions.pi_cation_interactions.pi_cation_interaction: l = [] l.append(i) l.append(x.resnr.cdata) @@ -228,11 +266,15 @@ if fallback == 0: print(tabulate(tableBody, headers=tableHeaders), end="\n\n") elif fallback == 1: if hi == 1: - print("## Hydrophobic Interactions",end="\n\n") + print("## Hydrophobic Interactions", end="\n\n") tableBody = [] - tableHeaders = ['No.','Res.','AA','Dist','Ligand Atom','Proton Atom'] + tableHeaders = ["No.", "Res.", "AA", "Dist", "Ligand Atom", "Proton Atom"] i = 1 - for x in doc.report.bindingsite.interactions.hydrophobic_interactions.hydrophobic_interaction: + for ( + x + ) in ( + doc.report.bindingsite.interactions.hydrophobic_interactions.hydrophobic_interaction + ): l = [] l.append(i) l.append(x.resnr.cdata) @@ -245,9 +287,20 @@ elif fallback == 1: print(tabulate(tableBody, headers=tableHeaders), end="\n\n") if hb == 1: - print("## Hydrogen Bonds",end="\n\n") + print("## Hydrogen Bonds", end="\n\n") tableBody = [] - tableHeaders = ['No.','Res.','AA','Dist H-A','Dist D-A','Don Angle','Protisdon?','Sidechain?','D. Atom','A. Atom'] + tableHeaders = [ + "No.", + "Res.", + "AA", + "Dist H-A", + "Dist D-A", + "Don Angle", + "Protisdon?", + "Sidechain?", + "D. Atom", + "A. Atom", + ] i = 1 for x in doc.report.bindingsite.interactions.hydrogen_bonds.hydrogen_bond: l = [] @@ -263,13 +316,21 @@ elif fallback == 1: l.append((x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) i += 1 tableBody.append(l) - #print(i, x.resnr.cdata, x.restype.cdata, x.dist_h_a.cdata, x.dist_d_a.cdata, x.don_angle.cdata, x.protisdon.cdata, x.sidechain.cdata, (x.donoridx.cdata + "[" + x.donortype.cdata + "]"), (x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) + # print(i, x.resnr.cdata, x.restype.cdata, x.dist_h_a.cdata, x.dist_d_a.cdata, x.don_angle.cdata, x.protisdon.cdata, x.sidechain.cdata, (x.donoridx.cdata + "[" + x.donortype.cdata + "]"), (x.acceptoridx.cdata + "[" + x.acceptortype.cdata + "]")) print(tabulate(tableBody, headers=tableHeaders), end="\n\n") if sb == 1: - print("## Salt Bridges",end="\n\n") + print("## Salt Bridges", end="\n\n") tableBody = [] - tableHeaders = ['Index','Residue','AA','Distance','Protein positive?','Ligand Group','Ligand Atoms'] + tableHeaders = [ + "Index", + "Residue", + "AA", + "Distance", + "Protein positive?", + "Ligand Group", + "Ligand Atoms", + ] i = 1 for x in doc.report.bindingsite.interactions.salt_bridges.salt_bridge: l = [] @@ -287,12 +348,16 @@ elif fallback == 1: tableBody.append(l) print(tabulate(tableBody, headers=tableHeaders), end="\n\n") - if pc==1: - print("## Pi Cation Interactions",end="\n\n") + if pc == 1: + print("## Pi Cation Interactions", end="\n\n") tableBody = [] - tableHeaders = ['Index','Residue','AA','Distance','Prot charged?','Atoms'] + tableHeaders = ["Index", "Residue", "AA", "Distance", "Prot charged?", "Atoms"] i = 1 - for x in doc.report.bindingsite.interactions.pi_cation_interactions.pi_cation_interaction: + for ( + x + ) in ( + doc.report.bindingsite.interactions.pi_cation_interactions.pi_cation_interaction + ): l = [] l.append(i) l.append(x.resnr.cdata) @@ -310,7 +375,7 @@ elif fallback == 1: print("## Figures", end="\n\n") -print(f'![](output-back.png)', end="\n\n") -print(f'![](output-front.png)', end="\n\n") -print(f'![](closeup-back.png)', end="\n\n") -print(f'![](closeup-front.png)', end="\n\n") +print(f"![](output-back.png)", end="\n\n") +print(f"![](output-front.png)", end="\n\n") +print(f"![](closeup-back.png)", end="\n\n") +print(f"![](closeup-front.png)", end="\n\n") diff --git a/scripts/quick-ligand-protein.py b/scripts/quick-ligand-protein.py index faff403..e211844 100644 --- a/scripts/quick-ligand-protein.py +++ b/scripts/quick-ligand-protein.py @@ -1,12 +1,14 @@ #!/usr/bin/python3 import argparse -#import logzero -#import logging -#from logzero import logger as log + +# import logzero +# import logging +# from logzero import logger as log import pymol2 import time import os + print(os.getcwd()) ################# @@ -17,7 +19,7 @@ startTime = time.time() version = "1.0" desc_text = "PyMol Quick Visualtion " + version ligandColor = "red" -#logzero.loglevel(logging.INFO) +# logzero.loglevel(logging.INFO) height = 1000 width = 800 dpi = 300 @@ -28,9 +30,9 @@ m1 = "target" m2 = "ligand" parser = argparse.ArgumentParser(description=desc_text) -parser.add_argument("-p","--protein",help="Path to protein file") -parser.add_argument("-l","--ligand",help="Path to ligand_out file") -parser.add_argument("-c","--color",help="Color for ligand in visualisation") +parser.add_argument("-p", "--protein", help="Path to protein file") +parser.add_argument("-l", "--ligand", help="Path to ligand_out file") +parser.add_argument("-c", "--color", help="Color for ligand in visualisation") args = parser.parse_args() @@ -47,38 +49,55 @@ protein = args.protein print("Protein: ", protein) ligand = args.ligand + def loadMol(filename, name): print("Loading " + filename + " as " + name) - cmd.load(filename,name) + cmd.load(filename, name) + + def changeColor(name, colorName): print("Changed " + name + "'s color to " + colorName) - cmd.color(colorName,name) + cmd.color(colorName, name) + + def orientEtZoom(): cmd.orient() cmd.zoom() + + def showSurface(name): - cmd.show("surface",name) + cmd.show("surface", name) + + def surfaceTransparency(amount): - print("Changed surface transparency to " + str(amount*100) + "%") - cmd.set("transparency",amount) -def generatePNG(filename,height=height,width=width,dpi=dpi,ray=ray): + print("Changed surface transparency to " + str(amount * 100) + "%") + cmd.set("transparency", amount) + + +def generatePNG(filename, height=height, width=width, dpi=dpi, ray=ray): print("Generating " + filename + ".png") - cmd.png(filename,height,width,dpi=dpi,ray=ray) + cmd.png(filename, height, width, dpi=dpi, ray=ray) + + def flipHorizontal(): - cmd.rotate("y",180) + cmd.rotate("y", 180) + + def zoomTo(name): cmd.zoom(name) + + def generatePictures(): - generatePNG('output-front') + generatePNG("output-front") flipHorizontal() - generatePNG('output-back') + generatePNG("output-back") zoomTo(m2) - generatePNG('closeup-back') + generatePNG("closeup-back") orientEtZoom() flipHorizontal() zoomTo(m2) - generatePNG('closeup-front') - + generatePNG("closeup-front") + print("Initialising PyMol") session = pymol2.PyMOL() @@ -86,10 +105,10 @@ print("Starting PyMol Session") session.start() cmd = session.cmd -loadMol(protein,m1) -loadMol(ligand,m2) -changeColor(m1,"grey60") -changeColor(m2,ligandColor) +loadMol(protein, m1) +loadMol(ligand, m2) +changeColor(m1, "grey60") +changeColor(m2, ligandColor) orientEtZoom() showSurface(m1) surfaceTransparency(0.6) @@ -97,4 +116,4 @@ surfaceTransparency(0.6) generatePictures() endTime = time.time() -print("Finished Execution in " + str(round((endTime - startTime),2)) + " seconds.") +print("Finished Execution in " + str(round((endTime - startTime), 2)) + " seconds.") |