#! /usr/bin/env python3 """ Protein-Ligand Interaction Profiler - Analyze and visualize protein-ligand interactions in PDB files. plipcmd.py - Main script for PLIP command line execution. """ # system imports import argparse import logging import multiprocessing import os import sys from argparse import ArgumentParser from collections import namedtuple from plip.basic import config, logger logger = logger.get_logger() from plip.basic.config import __version__ from plip.basic.parallel import parallel_fn from plip.basic.remote import VisualizerData from plip.exchange.report import StructureReport from plip.exchange.webservices import fetch_pdb from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid from plip.structure.preparation import tilde_expansion, PDBComplex description = ( f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" "is a command-line based tool to analyze interactions in a protein-ligand complex. " "If you are using PLIP in your work, please cite: " "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" f"Supported and maintained by: {config.__maintainer__}" ) def threshold_limiter(aparser, arg): arg = float(arg) if arg <= 0: aparser.error("All thresholds have to be values larger than zero.") return arg def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"): """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" if not as_string: pdb_file_name = pdbfile.split("/")[-1] startmessage = f"starting analysis of {pdb_file_name}" else: startmessage = "starting analysis from STDIN" logger.info(startmessage) mol = PDBComplex() mol.output_path = outpath mol.load_pdb(pdbfile, as_string=as_string) # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid) for ligand in mol.ligands: mol.characterize_complex(ligand) create_folder_if_not_exists(outpath) # Generate the report files streport = StructureReport(mol, outputprefix=outputprefix) config.MAXTHREADS = min(config.MAXTHREADS, len(mol.interaction_sets)) ###################################### # PyMOL Visualization (parallelized) # ###################################### if config.PYMOL or config.PICS: from plip.visualization.visualize import visualize_in_pymol complexes = [ VisualizerData(mol, site) for site in sorted(mol.interaction_sets) if not len(mol.interaction_sets[site].interacting_res) == 0 ] if config.MAXTHREADS > 1: logger.info( f"generating visualizations in parallel on {config.MAXTHREADS} cores" ) parfn = parallel_fn(visualize_in_pymol) parfn(complexes, processes=config.MAXTHREADS) else: [visualize_in_pymol(plcomplex) for plcomplex in complexes] if config.XML: # Generate report in xml format streport.write_xml(as_string=config.STDOUT) if config.TXT: # Generate report in txt (rst) format streport.write_txt(as_string=config.STDOUT) def download_structure(inputpdbid): """Given a PDB ID, downloads the corresponding PDB structure. Checks for validity of ID and handles error while downloading. Returns the path of the downloaded file.""" try: if ( len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == "UnknownProtein" ): logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}") sys.exit(1) pdbfile, pdbid = fetch_pdb(inputpdbid.lower()) pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid)) create_folder_if_not_exists(config.BASEPATH) with open(pdbpath, "w") as g: g.write(pdbfile) logger.info(f"file downloaded as {pdbpath}") return pdbpath, pdbid except ValueError: # Invalid PDB ID, cannot fetch from RCBS server logger.error(f"PDB-ID does not exist: {inputpdbid}") sys.exit(1) def remove_duplicates(slist): """Checks input lists for duplicates and returns a list with unique entries""" unique = list(set(slist)) difference = len(slist) - len(unique) if difference == 1: logger.info("removed one duplicate entry from input list") if difference > 1: logger.info(f"Removed {difference} duplicate entries from input list") return unique def run_analysis(inputstructs, inputpdbids): """Main function. Calls functions for processing, report generation and visualization.""" pdbid, pdbpath = None, None # @todo For multiprocessing, implement better stacktracing for errors # Print title and version logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}") logger.info(f"brought to you by: {config.__maintainer__}") logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315") output_prefix = config.OUTPUTFILENAME if inputstructs is not None: # Process PDB file(s) num_structures = len(inputstructs) inputstructs = remove_duplicates(inputstructs) read_from_stdin = False for inputstruct in inputstructs: if inputstruct == "-": inputstruct = sys.stdin.read() read_from_stdin = True if config.RAWSTRING: if sys.version_info < (3,): inputstruct = bytes(inputstruct).decode("unicode_escape") else: inputstruct = bytes(inputstruct, "utf8").decode( "unicode_escape" ) else: if os.path.getsize(inputstruct) == 0: logger.error("empty PDB file") sys.exit(1) if num_structures > 1: basename = inputstruct.split(".")[-2].split("/")[-1] config.OUTPATH = "/".join([config.BASEPATH, basename]) output_prefix = "report" process_pdb( inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix, ) else: # Try to fetch the current PDB structure(s) directly from the RCBS server num_pdbids = len(inputpdbids) inputpdbids = remove_duplicates(inputpdbids) for inputpdbid in inputpdbids: pdbpath, pdbid = download_structure(inputpdbid) if num_pdbids > 1: config.OUTPATH = "/".join( [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()] ) output_prefix = "report" process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: if config.BASEPATH in [".", "./"]: logger.info( "finished analysis, find the result files in the working directory" ) else: logger.info( f"finished analysis, find the result files in {config.BASEPATH}" ) if __name__ == "__main__": """Parse command line arguments and start main script for analysis.""" parser = ArgumentParser(prog="PLIP", description=description) pdbstructure = parser.add_mutually_exclusive_group( required=True ) # Needs either PDB ID or file # '-' as file name reads from stdin pdbstructure.add_argument( "-f", "--file", dest="input", nargs="+", help="Set input file, '-' reads from stdin", ) pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+") outputgroup = parser.add_mutually_exclusive_group( required=False ) # Needs either outpath or stdout outputgroup.add_argument("-o", "--out", dest="outpath", default="./") outputgroup.add_argument( "-O", "--stdout", dest="stdout", action="store_true", default=False, help="Write to stdout instead of file", ) parser.add_argument( "--rawstring", dest="use_raw_string", default=False, action="store_true", help="Use Python raw strings for stdout and stdin", ) parser.add_argument( "-v", "--verbose", dest="verbose", default=False, help="Turn on verbose mode", action="store_true", ) parser.add_argument( "-q", "--quiet", dest="quiet", default=False, help="Turn on quiet mode", action="store_true", ) parser.add_argument( "-s", "--silent", dest="silent", default=False, help="Turn on silent mode", action="store_true", ) parser.add_argument( "-p", "--pics", dest="pics", default=False, help="Additional pictures", action="store_true", ) parser.add_argument( "-x", "--xml", dest="xml", default=False, help="Generate report file in XML format", action="store_true", ) parser.add_argument( "-t", "--txt", dest="txt", default=False, help="Generate report file in TXT (RST) format", action="store_true", ) parser.add_argument( "-y", "--pymol", dest="pymol", default=False, help="Additional PyMOL session files", action="store_true", ) parser.add_argument( "--maxthreads", dest="maxthreads", default=multiprocessing.cpu_count(), help="Set maximum number of main threads (number of binding sites processed simultaneously)." "If not set, PLIP uses all available CPUs if possible.", type=int, ) parser.add_argument( "--breakcomposite", dest="breakcomposite", default=False, help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", action="store_true", ) parser.add_argument( "--altlocation", dest="altlocation", default=False, help="Also consider alternate locations for atoms (e.g. alternate conformations).", action="store_true", ) parser.add_argument( "--nofix", dest="nofix", default=False, help="Turns off fixing of PDB files.", action="store_true", ) parser.add_argument( "--nofixfile", dest="nofixfile", default=False, help="Turns off writing files for fixed PDB files.", action="store_true", ) parser.add_argument( "--nopdbcanmap", dest="nopdbcanmap", default=False, help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", action="store_true", ) parser.add_argument( "--dnareceptor", dest="dnareceptor", default=False, help="Uses the DNA instead of the protein as a receptor for interactions.", action="store_true", ) parser.add_argument( "--name", dest="outputfilename", default="report", help="Set a filename for the report TXT and XML files. Will only work when processing single structures.", ) ligandtype = ( parser.add_mutually_exclusive_group() ) # Either peptide/inter or intra mode ligandtype.add_argument( "--peptides", "--inter", dest="peptides", default=[], help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", nargs="+", ) ligandtype.add_argument( "--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.", ) parser.add_argument( "--keepmod", dest="keepmod", default=False, help="Keep modified residues as ligands", action="store_true", ) parser.add_argument( "--nohydro", dest="nohydro", default=False, help="Do not add polar hydrogens in case your structure already contains hydrogens.", action="store_true", ) # Optional threshold arguments, not shown in help thr = namedtuple("threshold", "name type") thresholds = [ thr(name="aromatic_planarity", type="angle"), thr(name="hydroph_dist_max", type="distance"), thr(name="hbond_dist_max", type="distance"), thr(name="hbond_don_angle_min", type="angle"), thr(name="pistack_dist_max", type="distance"), thr(name="pistack_ang_dev", type="other"), thr(name="pistack_offset_max", type="distance"), thr(name="pication_dist_max", type="distance"), thr(name="saltbridge_dist_max", type="distance"), thr(name="halogen_dist_max", type="distance"), thr(name="halogen_acc_angle", type="angle"), thr(name="halogen_don_angle", type="angle"), thr(name="halogen_angle_dev", type="other"), thr(name="water_bridge_mindist", type="distance"), thr(name="water_bridge_maxdist", type="distance"), thr(name="water_bridge_omega_min", type="angle"), thr(name="water_bridge_omega_max", type="angle"), thr(name="water_bridge_theta_min", type="angle"), ] for t in thresholds: parser.add_argument( "--%s" % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), help=argparse.SUPPRESS, ) arguments = parser.parse_args() # configure log levels config.VERBOSE = True if arguments.verbose else False config.QUIET = True if arguments.quiet else False config.SILENT = True if arguments.silent else False if config.VERBOSE: logger.setLevel(logging.DEBUG) elif config.QUIET: logger.setLevel(logging.WARN) elif config.SILENT: logger.setLevel(logging.CRITICAL) else: logger.setLevel(config.DEFAULT_LOG_LEVEL) config.MAXTHREADS = arguments.maxthreads config.XML = arguments.xml config.TXT = arguments.txt config.PICS = arguments.pics config.PYMOL = arguments.pymol config.STDOUT = arguments.stdout config.RAWSTRING = arguments.use_raw_string config.OUTPATH = arguments.outpath config.OUTPATH = tilde_expansion( "".join([config.OUTPATH, "/"]) if not config.OUTPATH.endswith("/") else config.OUTPATH ) config.BASEPATH = config.OUTPATH # Used for batch processing config.BREAKCOMPOSITE = arguments.breakcomposite config.ALTLOC = arguments.altlocation config.PEPTIDES = arguments.peptides config.INTRA = arguments.intra config.NOFIX = arguments.nofix config.NOFIXFILE = arguments.nofixfile config.NOPDBCANMAP = arguments.nopdbcanmap config.KEEPMOD = arguments.keepmod config.DNARECEPTOR = arguments.dnareceptor config.OUTPUTFILENAME = arguments.outputfilename config.NOHYDRO = arguments.nohydro # Make sure we have pymol with --pics and --pymol if config.PICS or config.PYMOL: try: import pymol except ImportError: logger.error("PyMOL is required for the --pics and --pymol option") sys.exit(1) # Assign values to global thresholds for t in thresholds: tvalue = getattr(arguments, t.name) if tvalue is not None: if ( t.type == "angle" and not 0 < tvalue < 180 ): # Check value for angle thresholds parser.error( "Threshold for angles need to have values within 0 and 180." ) if t.type == "distance": if tvalue > 10: # Check value for angle thresholds parser.error( "Threshold for distances must not be larger than 10 Angstrom." ) elif ( tvalue > config.BS_DIST + 1 ): # Dynamically adapt the search space for binding site residues config.BS_DIST = tvalue + 1 setattr(config, t.name.upper(), tvalue) # Check additional conditions for interdependent thresholds if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV: parser.error( "The halogen acceptor angle has to be larger than the halogen angle deviation." ) if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV: parser.error( "The halogen donor angle has to be larger than the halogen angle deviation." ) if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST: parser.error( "The water bridge minimum distance has to be smaller than the water bridge maximum distance." ) if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: parser.error( "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle" ) expanded_path = ( tilde_expansion(arguments.input) if arguments.input is not None else None ) run_analysis(expanded_path, arguments.pdbid) # Start main script