diff options
Diffstat (limited to 'plip/plipcmd.py')
-rw-r--r-- | plip/plipcmd.py | 487 |
1 files changed, 0 insertions, 487 deletions
diff --git a/plip/plipcmd.py b/plip/plipcmd.py deleted file mode 100644 index 6b4a884..0000000 --- a/plip/plipcmd.py +++ /dev/null @@ -1,487 +0,0 @@ -#! /usr/bin/env python -""" -Protein-Ligand Interaction Profiler - Analyze and visualize protein-ligand interactions in PDB files. -plipcmd.py - Main script for PLIP command line execution. -""" - -# system imports -import argparse -import logging -import multiprocessing -import os -import sys -from argparse import ArgumentParser -from collections import namedtuple - -from plip.basic import config, logger - -logger = logger.get_logger() - -from plip.basic.config import __version__ -from plip.basic.parallel import parallel_fn -from plip.basic.remote import VisualizerData -from plip.exchange.report import StructureReport -from plip.exchange.webservices import fetch_pdb -from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid -from plip.structure.preparation import tilde_expansion, PDBComplex - -description = ( - f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" - "is a command-line based tool to analyze interactions in a protein-ligand complex. " - "If you are using PLIP in your work, please cite: " - "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " - "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" - f"Supported and maintained by: {config.__maintainer__}" -) - - -def threshold_limiter(aparser, arg): - arg = float(arg) - if arg <= 0: - aparser.error("All thresholds have to be values larger than zero.") - return arg - - -def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"): - """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" - if not as_string: - pdb_file_name = pdbfile.split("/")[-1] - startmessage = f"starting analysis of {pdb_file_name}" - else: - startmessage = "starting analysis from STDIN" - logger.info(startmessage) - mol = PDBComplex() - mol.output_path = outpath - mol.load_pdb(pdbfile, as_string=as_string) - # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid) - for ligand in mol.ligands: - mol.characterize_complex(ligand) - - create_folder_if_not_exists(outpath) - - # Generate the report files - streport = StructureReport(mol, outputprefix=outputprefix) - - config.MAXTHREADS = min(config.MAXTHREADS, len(mol.interaction_sets)) - - ###################################### - # PyMOL Visualization (parallelized) # - ###################################### - - if config.PYMOL or config.PICS: - from plip.visualization.visualize import visualize_in_pymol - - complexes = [ - VisualizerData(mol, site) - for site in sorted(mol.interaction_sets) - if not len(mol.interaction_sets[site].interacting_res) == 0 - ] - if config.MAXTHREADS > 1: - logger.info( - f"generating visualizations in parallel on {config.MAXTHREADS} cores" - ) - parfn = parallel_fn(visualize_in_pymol) - parfn(complexes, processes=config.MAXTHREADS) - else: - [visualize_in_pymol(plcomplex) for plcomplex in complexes] - - if config.XML: # Generate report in xml format - streport.write_xml(as_string=config.STDOUT) - - if config.TXT: # Generate report in txt (rst) format - streport.write_txt(as_string=config.STDOUT) - - -def download_structure(inputpdbid): - """Given a PDB ID, downloads the corresponding PDB structure. - Checks for validity of ID and handles error while downloading. - Returns the path of the downloaded file.""" - try: - if ( - len(inputpdbid) != 4 - or extract_pdbid(inputpdbid.lower()) == "UnknownProtein" - ): - logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}") - sys.exit(1) - pdbfile, pdbid = fetch_pdb(inputpdbid.lower()) - pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid)) - create_folder_if_not_exists(config.BASEPATH) - with open(pdbpath, "w") as g: - g.write(pdbfile) - logger.info(f"file downloaded as {pdbpath}") - return pdbpath, pdbid - - except ValueError: # Invalid PDB ID, cannot fetch from RCBS server - logger.error(f"PDB-ID does not exist: {inputpdbid}") - sys.exit(1) - - -def remove_duplicates(slist): - """Checks input lists for duplicates and returns - a list with unique entries""" - unique = list(set(slist)) - difference = len(slist) - len(unique) - if difference == 1: - logger.info("removed one duplicate entry from input list") - if difference > 1: - logger.info(f"Removed {difference} duplicate entries from input list") - return unique - - -def run_analysis(inputstructs, inputpdbids): - """Main function. Calls functions for processing, report generation and visualization.""" - pdbid, pdbpath = None, None - # @todo For multiprocessing, implement better stacktracing for errors - # Print title and version - logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}") - logger.info(f"brought to you by: {config.__maintainer__}") - logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315") - output_prefix = config.OUTPUTFILENAME - - if inputstructs is not None: # Process PDB file(s) - num_structures = len(inputstructs) - inputstructs = remove_duplicates(inputstructs) - read_from_stdin = False - for inputstruct in inputstructs: - if inputstruct == "-": - inputstruct = sys.stdin.read() - read_from_stdin = True - if config.RAWSTRING: - if sys.version_info < (3,): - inputstruct = bytes(inputstruct).decode("unicode_escape") - else: - inputstruct = bytes(inputstruct, "utf8").decode( - "unicode_escape" - ) - else: - if os.path.getsize(inputstruct) == 0: - logger.error("empty PDB file") - sys.exit(1) - if num_structures > 1: - basename = inputstruct.split(".")[-2].split("/")[-1] - config.OUTPATH = "/".join([config.BASEPATH, basename]) - output_prefix = "report" - process_pdb( - inputstruct, - config.OUTPATH, - as_string=read_from_stdin, - outputprefix=output_prefix, - ) - else: # Try to fetch the current PDB structure(s) directly from the RCBS server - num_pdbids = len(inputpdbids) - inputpdbids = remove_duplicates(inputpdbids) - for inputpdbid in inputpdbids: - pdbpath, pdbid = download_structure(inputpdbid) - if num_pdbids > 1: - config.OUTPATH = "/".join( - [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()] - ) - output_prefix = "report" - process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) - - if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: - if config.BASEPATH in [".", "./"]: - logger.info( - "finished analysis, find the result files in the working directory" - ) - else: - logger.info( - f"finished analysis, find the result files in {config.BASEPATH}" - ) - - -def main(): - """Parse command line arguments and start main script for analysis.""" - parser = ArgumentParser(prog="PLIP", description=description) - pdbstructure = parser.add_mutually_exclusive_group( - required=True - ) # Needs either PDB ID or file - # '-' as file name reads from stdin - pdbstructure.add_argument( - "-f", - "--file", - dest="input", - nargs="+", - help="Set input file, '-' reads from stdin", - ) - pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+") - outputgroup = parser.add_mutually_exclusive_group( - required=False - ) # Needs either outpath or stdout - outputgroup.add_argument("-o", "--out", dest="outpath", default="./") - outputgroup.add_argument( - "-O", - "--stdout", - dest="stdout", - action="store_true", - default=False, - help="Write to stdout instead of file", - ) - parser.add_argument( - "--rawstring", - dest="use_raw_string", - default=False, - action="store_true", - help="Use Python raw strings for stdin", - ) - parser.add_argument( - "-v", - "--verbose", - dest="verbose", - default=False, - help="Turn on verbose mode", - action="store_true", - ) - parser.add_argument( - "-q", - "--quiet", - dest="quiet", - default=False, - help="Turn on quiet mode", - action="store_true", - ) - parser.add_argument( - "-s", - "--silent", - dest="silent", - default=False, - help="Turn on silent mode", - action="store_true", - ) - parser.add_argument( - "-p", - "--pics", - dest="pics", - default=False, - help="Additional pictures", - action="store_true", - ) - parser.add_argument( - "-x", - "--xml", - dest="xml", - default=False, - help="Generate report file in XML format", - action="store_true", - ) - parser.add_argument( - "-t", - "--txt", - dest="txt", - default=False, - help="Generate report file in TXT (RST) format", - action="store_true", - ) - parser.add_argument( - "-y", - "--pymol", - dest="pymol", - default=False, - help="Additional PyMOL session files", - action="store_true", - ) - parser.add_argument( - "--maxthreads", - dest="maxthreads", - default=multiprocessing.cpu_count(), - help="Set maximum number of main threads (number of binding sites processed simultaneously)." - "If not set, PLIP uses all available CPUs if possible.", - type=int, - ) - parser.add_argument( - "--breakcomposite", - dest="breakcomposite", - default=False, - help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", - action="store_true", - ) - parser.add_argument( - "--altlocation", - dest="altlocation", - default=False, - help="Also consider alternate locations for atoms (e.g. alternate conformations).", - action="store_true", - ) - parser.add_argument( - "--nofix", - dest="nofix", - default=False, - help="Turns off fixing of PDB files.", - action="store_true", - ) - parser.add_argument( - "--nofixfile", - dest="nofixfile", - default=False, - help="Turns off writing files for fixed PDB files.", - action="store_true", - ) - parser.add_argument( - "--nopdbcanmap", - dest="nopdbcanmap", - default=False, - help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", - action="store_true", - ) - parser.add_argument( - "--dnareceptor", - dest="dnareceptor", - default=False, - help="Uses the DNA instead of the protein as a receptor for interactions.", - action="store_true", - ) - parser.add_argument( - "--name", - dest="outputfilename", - default="report", - help="Set a filename for the report TXT and XML files. Will only work when processing single structures.", - ) - ligandtype = ( - parser.add_mutually_exclusive_group() - ) # Either peptide/inter or intra mode - ligandtype.add_argument( - "--peptides", - "--inter", - dest="peptides", - default=[], - help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", - nargs="+", - ) - ligandtype.add_argument( - "--intra", - dest="intra", - help="Allows to define one chain to analyze intra-chain contacts.", - ) - parser.add_argument( - "--keepmod", - dest="keepmod", - default=False, - help="Keep modified residues as ligands", - action="store_true", - ) - parser.add_argument( - "--nohydro", - dest="nohydro", - default=False, - help="Do not add polar hydrogens in case your structure already contains hydrogens.", - action="store_true", - ) - # Optional threshold arguments, not shown in help - thr = namedtuple("threshold", "name type") - thresholds = [ - thr(name="aromatic_planarity", type="angle"), - thr(name="hydroph_dist_max", type="distance"), - thr(name="hbond_dist_max", type="distance"), - thr(name="hbond_don_angle_min", type="angle"), - thr(name="pistack_dist_max", type="distance"), - thr(name="pistack_ang_dev", type="other"), - thr(name="pistack_offset_max", type="distance"), - thr(name="pication_dist_max", type="distance"), - thr(name="saltbridge_dist_max", type="distance"), - thr(name="halogen_dist_max", type="distance"), - thr(name="halogen_acc_angle", type="angle"), - thr(name="halogen_don_angle", type="angle"), - thr(name="halogen_angle_dev", type="other"), - thr(name="water_bridge_mindist", type="distance"), - thr(name="water_bridge_maxdist", type="distance"), - thr(name="water_bridge_omega_min", type="angle"), - thr(name="water_bridge_omega_max", type="angle"), - thr(name="water_bridge_theta_min", type="angle"), - ] - for t in thresholds: - parser.add_argument( - "--%s" % t.name, - dest=t.name, - type=lambda val: threshold_limiter(parser, val), - help=argparse.SUPPRESS, - ) - arguments = parser.parse_args() - # configure log levels - config.VERBOSE = True if arguments.verbose else False - config.QUIET = True if arguments.quiet else False - config.SILENT = True if arguments.silent else False - if config.VERBOSE: - logger.setLevel(logging.DEBUG) - elif config.QUIET: - logger.setLevel(logging.WARN) - elif config.SILENT: - logger.setLevel(logging.CRITICAL) - else: - logger.setLevel(config.DEFAULT_LOG_LEVEL) - config.MAXTHREADS = arguments.maxthreads - config.XML = arguments.xml - config.TXT = arguments.txt - config.PICS = arguments.pics - config.PYMOL = arguments.pymol - config.STDOUT = arguments.stdout - config.RAWSTRING = arguments.use_raw_string - config.OUTPATH = arguments.outpath - config.OUTPATH = tilde_expansion( - "".join([config.OUTPATH, "/"]) - if not config.OUTPATH.endswith("/") - else config.OUTPATH - ) - config.BASEPATH = config.OUTPATH # Used for batch processing - config.BREAKCOMPOSITE = arguments.breakcomposite - config.ALTLOC = arguments.altlocation - config.PEPTIDES = arguments.peptides - config.INTRA = arguments.intra - config.NOFIX = arguments.nofix - config.NOFIXFILE = arguments.nofixfile - config.NOPDBCANMAP = arguments.nopdbcanmap - config.KEEPMOD = arguments.keepmod - config.DNARECEPTOR = arguments.dnareceptor - config.OUTPUTFILENAME = arguments.outputfilename - config.NOHYDRO = arguments.nohydro - # Make sure we have pymol with --pics and --pymol - if config.PICS or config.PYMOL: - try: - import pymol - except ImportError: - logger.error("PyMOL is required for the --pics and --pymol option") - sys.exit(1) - # Assign values to global thresholds - for t in thresholds: - tvalue = getattr(arguments, t.name) - if tvalue is not None: - if ( - t.type == "angle" and not 0 < tvalue < 180 - ): # Check value for angle thresholds - parser.error( - "Threshold for angles need to have values within 0 and 180." - ) - if t.type == "distance": - if tvalue > 10: # Check value for angle thresholds - parser.error( - "Threshold for distances must not be larger than 10 Angstrom." - ) - elif ( - tvalue > config.BS_DIST + 1 - ): # Dynamically adapt the search space for binding site residues - config.BS_DIST = tvalue + 1 - setattr(config, t.name.upper(), tvalue) - # Check additional conditions for interdependent thresholds - if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error( - "The halogen acceptor angle has to be larger than the halogen angle deviation." - ) - if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error( - "The halogen donor angle has to be larger than the halogen angle deviation." - ) - if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST: - parser.error( - "The water bridge minimum distance has to be smaller than the water bridge maximum distance." - ) - if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: - parser.error( - "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle" - ) - expanded_path = ( - tilde_expansion(arguments.input) if arguments.input is not None else None - ) - run_analysis(expanded_path, arguments.pdbid) # Start main script - - -if __name__ == "__main__": - main() |