From 4be08f7bdd77991e9e453c1cda863c3f20c338d5 Mon Sep 17 00:00:00 2001
From: Navan Chauhan <navanchauhan@gmail.com>
Date: Thu, 2 Jul 2020 20:48:33 +0530
Subject: initial commit

---
 plip/plipcmd.py | 310 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 plip/plipcmd.py

(limited to 'plip/plipcmd.py')

diff --git a/plip/plipcmd.py b/plip/plipcmd.py
new file mode 100644
index 0000000..c941f42
--- /dev/null
+++ b/plip/plipcmd.py
@@ -0,0 +1,310 @@
+#! /usr/bin/env python
+"""
+Protein-Ligand Interaction Profiler - Analyze and visualize protein-ligand interactions in PDB files.
+plipcmd.py - Main script for PLIP command line execution.
+"""
+
+# system imports
+import argparse
+import logging
+import multiprocessing
+import os
+import sys
+from argparse import ArgumentParser
+from collections import namedtuple
+
+from plip.basic import config, logger
+
+logger = logger.get_logger()
+
+from plip.basic.config import __version__
+from plip.basic.parallel import parallel_fn
+from plip.basic.remote import VisualizerData
+from plip.exchange.report import StructureReport
+from plip.exchange.webservices import fetch_pdb
+from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid
+from plip.structure.preparation import tilde_expansion, PDBComplex
+
+description = f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" \
+              "is a command-line based tool to analyze interactions in a protein-ligand complex. " \
+              "If you are using PLIP in your work, please cite: " \
+              "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " \
+              "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" \
+              f"Supported and maintained by: {config.__maintainer__}"
+
+
+def threshold_limiter(aparser, arg):
+    arg = float(arg)
+    if arg <= 0:
+        aparser.error("All thresholds have to be values larger than zero.")
+    return arg
+
+
+def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'):
+    """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
+    if not as_string:
+        pdb_file_name = pdbfile.split('/')[-1]
+        startmessage = f'starting analysis of {pdb_file_name}'
+    else:
+        startmessage = 'starting analysis from STDIN'
+    logger.info(startmessage)
+    mol = PDBComplex()
+    mol.output_path = outpath
+    mol.load_pdb(pdbfile, as_string=as_string)
+    # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
+    for ligand in mol.ligands:
+        mol.characterize_complex(ligand)
+
+    create_folder_if_not_exists(outpath)
+
+    # Generate the report files
+    streport = StructureReport(mol, outputprefix=outputprefix)
+
+    config.MAXTHREADS = min(config.MAXTHREADS, len(mol.interaction_sets))
+
+    ######################################
+    # PyMOL Visualization (parallelized) #
+    ######################################
+
+    if config.PYMOL or config.PICS:
+        from plip.visualization.visualize import visualize_in_pymol
+        complexes = [VisualizerData(mol, site) for site in sorted(mol.interaction_sets)
+                     if not len(mol.interaction_sets[site].interacting_res) == 0]
+        if config.MAXTHREADS > 1:
+            logger.info(f'generating visualizations in parallel on {config.MAXTHREADS} cores')
+            parfn = parallel_fn(visualize_in_pymol)
+            parfn(complexes, processes=config.MAXTHREADS)
+        else:
+            [visualize_in_pymol(plcomplex) for plcomplex in complexes]
+
+    if config.XML:  # Generate report in xml format
+        streport.write_xml(as_string=config.STDOUT)
+
+    if config.TXT:  # Generate report in txt (rst) format
+        streport.write_txt(as_string=config.STDOUT)
+
+
+def download_structure(inputpdbid):
+    """Given a PDB ID, downloads the corresponding PDB structure.
+    Checks for validity of ID and handles error while downloading.
+    Returns the path of the downloaded file."""
+    try:
+        if len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == 'UnknownProtein':
+            logger.error(f'invalid PDB-ID (wrong format): {inputpdbid}')
+            sys.exit(1)
+        pdbfile, pdbid = fetch_pdb(inputpdbid.lower())
+        pdbpath = tilde_expansion('%s/%s.pdb' % (config.BASEPATH.rstrip('/'), pdbid))
+        create_folder_if_not_exists(config.BASEPATH)
+        with open(pdbpath, 'w') as g:
+            g.write(pdbfile)
+        logger.info(f'file downloaded as {pdbpath}')
+        return pdbpath, pdbid
+
+    except ValueError:  # Invalid PDB ID, cannot fetch from RCBS server
+        logger.error(f'PDB-ID does not exist: {inputpdbid}')
+        sys.exit(1)
+
+
+def remove_duplicates(slist):
+    """Checks input lists for duplicates and returns
+    a list with unique entries"""
+    unique = list(set(slist))
+    difference = len(slist) - len(unique)
+    if difference == 1:
+        logger.info('removed one duplicate entry from input list')
+    if difference > 1:
+        logger.info(f'Removed {difference} duplicate entries from input list')
+    return unique
+
+
+def run_analysis(inputstructs, inputpdbids):
+    """Main function. Calls functions for processing, report generation and visualization."""
+    pdbid, pdbpath = None, None
+    # @todo For multiprocessing, implement better stacktracing for errors
+    # Print title and version
+    logger.info(f'Protein-Ligand Interaction Profiler (PLIP) {__version__}')
+    logger.info(f'brought to you by: {config.__maintainer__}')
+    logger.info(f'please cite: https://www.doi.org/10.1093/nar/gkv315')
+    output_prefix = config.OUTPUTFILENAME
+
+    if inputstructs is not None:  # Process PDB file(s)
+        num_structures = len(inputstructs)
+        inputstructs = remove_duplicates(inputstructs)
+        read_from_stdin = False
+        for inputstruct in inputstructs:
+            if inputstruct == '-':
+                inputstruct = sys.stdin.read()
+                read_from_stdin = True
+                if config.RAWSTRING:
+                    if sys.version_info < (3,):
+                        inputstruct = bytes(inputstruct).decode('unicode_escape')
+                    else:
+                        inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape')
+            else:
+                if os.path.getsize(inputstruct) == 0:
+                    logger.error('empty PDB file')
+                    sys.exit(1)
+                if num_structures > 1:
+                    basename = inputstruct.split('.')[-2].split('/')[-1]
+                    config.OUTPATH = '/'.join([config.BASEPATH, basename])
+                    output_prefix = 'report'
+            process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix)
+    else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
+        num_pdbids = len(inputpdbids)
+        inputpdbids = remove_duplicates(inputpdbids)
+        for inputpdbid in inputpdbids:
+            pdbpath, pdbid = download_structure(inputpdbid)
+            if num_pdbids > 1:
+                config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()])
+                output_prefix = 'report'
+            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)
+
+    if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None:
+        if config.BASEPATH in ['.', './']:
+            logger.info('finished analysis, find the result files in the working directory')
+        else:
+            logger.info(f'finished analysis, find the result files in {config.BASEPATH}')
+
+
+if __name__ == '__main__':
+    """Parse command line arguments and start main script for analysis."""
+    parser = ArgumentParser(prog="PLIP", description=description)
+    pdbstructure = parser.add_mutually_exclusive_group(required=True)  # Needs either PDB ID or file
+    # '-' as file name reads from stdin
+    pdbstructure.add_argument("-f", "--file", dest="input", nargs="+", help="Set input file, '-' reads from stdin")
+    pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+")
+    outputgroup = parser.add_mutually_exclusive_group(required=False)  # Needs either outpath or stdout
+    outputgroup.add_argument("-o", "--out", dest="outpath", default="./")
+    outputgroup.add_argument("-O", "--stdout", dest="stdout", action="store_true", default=False,
+                             help="Write to stdout instead of file")
+    parser.add_argument("--rawstring", dest="use_raw_string", default=False, action="store_true",
+                        help="Use Python raw strings for stdout and stdin")
+    parser.add_argument("-v", "--verbose", dest="verbose", default=False, help="Turn on verbose mode",
+                        action="store_true")
+    parser.add_argument("-q", "--quiet", dest="quiet", default=False, help="Turn on quiet mode", action="store_true")
+    parser.add_argument("-s", "--silent", dest="silent", default=False, help="Turn on silent mode", action="store_true")
+    parser.add_argument("-p", "--pics", dest="pics", default=False, help="Additional pictures", action="store_true")
+    parser.add_argument("-x", "--xml", dest="xml", default=False, help="Generate report file in XML format",
+                        action="store_true")
+    parser.add_argument("-t", "--txt", dest="txt", default=False, help="Generate report file in TXT (RST) format",
+                        action="store_true")
+    parser.add_argument("-y", "--pymol", dest="pymol", default=False, help="Additional PyMOL session files",
+                        action="store_true")
+    parser.add_argument("--maxthreads", dest="maxthreads", default=multiprocessing.cpu_count(),
+                        help="Set maximum number of main threads (number of binding sites processed simultaneously)."
+                             "If not set, PLIP uses all available CPUs if possible.",
+                        type=int)
+    parser.add_argument("--breakcomposite", dest="breakcomposite", default=False,
+                        help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.",
+                        action="store_true")
+    parser.add_argument("--altlocation", dest="altlocation", default=False,
+                        help="Also consider alternate locations for atoms (e.g. alternate conformations).",
+                        action="store_true")
+    parser.add_argument("--nofix", dest="nofix", default=False,
+                        help="Turns off fixing of PDB files.",
+                        action="store_true")
+    parser.add_argument("--nofixfile", dest="nofixfile", default=False,
+                        help="Turns off writing files for fixed PDB files.",
+                        action="store_true")
+    parser.add_argument("--nopdbcanmap", dest="nopdbcanmap", default=False,
+                        help="Turns off calculation of mapping between canonical and PDB atom order for ligands.",
+                        action="store_true")
+    parser.add_argument("--dnareceptor", dest="dnareceptor", default=False,
+                        help="Uses the DNA instead of the protein as a receptor for interactions.",
+                        action="store_true")
+    parser.add_argument("--name", dest="outputfilename", default="report",
+                        help="Set a filename for the report TXT and XML files. Will only work when processing single structures.")
+    ligandtype = parser.add_mutually_exclusive_group()  # Either peptide/inter or intra mode
+    ligandtype.add_argument("--peptides", "--inter", dest="peptides", default=[],
+                            help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
+                            nargs="+")
+    ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.")
+    parser.add_argument("--keepmod", dest="keepmod", default=False,
+                        help="Keep modified residues as ligands",
+                        action="store_true")
+    parser.add_argument("--nohydro", dest="nohydro", default=False,
+                        help="Do not add polar hydrogens in case your structure already contains hydrogens.",
+                        action="store_true")
+    # Optional threshold arguments, not shown in help
+    thr = namedtuple('threshold', 'name type')
+    thresholds = [thr(name='aromatic_planarity', type='angle'),
+                  thr(name='hydroph_dist_max', type='distance'), thr(name='hbond_dist_max', type='distance'),
+                  thr(name='hbond_don_angle_min', type='angle'), thr(name='pistack_dist_max', type='distance'),
+                  thr(name='pistack_ang_dev', type='other'), thr(name='pistack_offset_max', type='distance'),
+                  thr(name='pication_dist_max', type='distance'), thr(name='saltbridge_dist_max', type='distance'),
+                  thr(name='halogen_dist_max', type='distance'), thr(name='halogen_acc_angle', type='angle'),
+                  thr(name='halogen_don_angle', type='angle'), thr(name='halogen_angle_dev', type='other'),
+                  thr(name='water_bridge_mindist', type='distance'), thr(name='water_bridge_maxdist', type='distance'),
+                  thr(name='water_bridge_omega_min', type='angle'), thr(name='water_bridge_omega_max', type='angle'),
+                  thr(name='water_bridge_theta_min', type='angle')]
+    for t in thresholds:
+        parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val),
+                            help=argparse.SUPPRESS)
+
+    arguments = parser.parse_args()
+
+    # configure log levels
+    config.VERBOSE = True if arguments.verbose else False
+    config.QUIET = True if arguments.quiet else False
+    config.SILENT = True if arguments.silent else False
+    if config.VERBOSE:
+        logger.setLevel(logging.DEBUG)
+    elif config.QUIET:
+        logger.setLevel(logging.WARN)
+    elif config.SILENT:
+        logger.setLevel(logging.CRITICAL)
+    else:
+        logger.setLevel(config.DEFAULT_LOG_LEVEL)
+
+    config.MAXTHREADS = arguments.maxthreads
+    config.XML = arguments.xml
+    config.TXT = arguments.txt
+    config.PICS = arguments.pics
+    config.PYMOL = arguments.pymol
+    config.STDOUT = arguments.stdout
+    config.RAWSTRING = arguments.use_raw_string
+    config.OUTPATH = arguments.outpath
+    config.OUTPATH = tilde_expansion("".join([config.OUTPATH, '/'])
+                                     if not config.OUTPATH.endswith('/') else config.OUTPATH)
+    config.BASEPATH = config.OUTPATH  # Used for batch processing
+    config.BREAKCOMPOSITE = arguments.breakcomposite
+    config.ALTLOC = arguments.altlocation
+    config.PEPTIDES = arguments.peptides
+    config.INTRA = arguments.intra
+    config.NOFIX = arguments.nofix
+    config.NOFIXFILE = arguments.nofixfile
+    config.NOPDBCANMAP = arguments.nopdbcanmap
+    config.KEEPMOD = arguments.keepmod
+    config.DNARECEPTOR = arguments.dnareceptor
+    config.OUTPUTFILENAME = arguments.outputfilename
+    config.NOHYDRO = arguments.nohydro
+    # Make sure we have pymol with --pics and --pymol
+    if config.PICS or config.PYMOL:
+        try:
+            import pymol
+        except ImportError:
+            logger.error('PyMOL is required for the --pics and --pymol option')
+            sys.exit(1)
+    # Assign values to global thresholds
+    for t in thresholds:
+        tvalue = getattr(arguments, t.name)
+        if tvalue is not None:
+            if t.type == 'angle' and not 0 < tvalue < 180:  # Check value for angle thresholds
+                parser.error("Threshold for angles need to have values within 0 and 180.")
+            if t.type == 'distance':
+                if tvalue > 10:  # Check value for angle thresholds
+                    parser.error("Threshold for distances must not be larger than 10 Angstrom.")
+                elif tvalue > config.BS_DIST + 1:  # Dynamically adapt the search space for binding site residues
+                    config.BS_DIST = tvalue + 1
+            setattr(config, t.name.upper(), tvalue)
+    # Check additional conditions for interdependent thresholds
+    if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV:
+        parser.error("The halogen acceptor angle has to be larger than the halogen angle deviation.")
+    if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV:
+        parser.error("The halogen donor angle has to be larger than the halogen angle deviation.")
+    if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST:
+        parser.error("The water bridge minimum distance has to be smaller than the water bridge maximum distance.")
+    if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
+        parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle")
+    expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None
+    run_analysis(expanded_path, arguments.pdbid)  # Start main script
-- 
cgit v1.2.3