diff options
Diffstat (limited to 'plip/plipcmd.py')
-rw-r--r-- | plip/plipcmd.py | 410 |
1 files changed, 293 insertions, 117 deletions
diff --git a/plip/plipcmd.py b/plip/plipcmd.py index f7dffab..6b4a884 100644 --- a/plip/plipcmd.py +++ b/plip/plipcmd.py @@ -25,12 +25,14 @@ from plip.exchange.webservices import fetch_pdb from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid from plip.structure.preparation import tilde_expansion, PDBComplex -description = f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" \ - "is a command-line based tool to analyze interactions in a protein-ligand complex. " \ - "If you are using PLIP in your work, please cite: " \ - "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " \ - "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" \ - f"Supported and maintained by: {config.__maintainer__}" +description = ( + f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}" + "is a command-line based tool to analyze interactions in a protein-ligand complex. " + "If you are using PLIP in your work, please cite: " + "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. " + "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315" + f"Supported and maintained by: {config.__maintainer__}" +) def threshold_limiter(aparser, arg): @@ -40,13 +42,13 @@ def threshold_limiter(aparser, arg): return arg -def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): +def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"): """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output.""" if not as_string: - pdb_file_name = pdbfile.split('/')[-1] - startmessage = f'starting analysis of {pdb_file_name}' + pdb_file_name = pdbfile.split("/")[-1] + startmessage = f"starting analysis of {pdb_file_name}" else: - startmessage = 'starting analysis from STDIN' + startmessage = "starting analysis from STDIN" logger.info(startmessage) mol = PDBComplex() mol.output_path = outpath @@ -68,10 +70,16 @@ def process_pdb(pdbfile, outpath, as_string=False, outputprefix='report'): if config.PYMOL or config.PICS: from plip.visualization.visualize import visualize_in_pymol - complexes = [VisualizerData(mol, site) for site in sorted(mol.interaction_sets) - if not len(mol.interaction_sets[site].interacting_res) == 0] + + complexes = [ + VisualizerData(mol, site) + for site in sorted(mol.interaction_sets) + if not len(mol.interaction_sets[site].interacting_res) == 0 + ] if config.MAXTHREADS > 1: - logger.info(f'generating visualizations in parallel on {config.MAXTHREADS} cores') + logger.info( + f"generating visualizations in parallel on {config.MAXTHREADS} cores" + ) parfn = parallel_fn(visualize_in_pymol) parfn(complexes, processes=config.MAXTHREADS) else: @@ -89,19 +97,22 @@ def download_structure(inputpdbid): Checks for validity of ID and handles error while downloading. Returns the path of the downloaded file.""" try: - if len(inputpdbid) != 4 or extract_pdbid(inputpdbid.lower()) == 'UnknownProtein': - logger.error(f'invalid PDB-ID (wrong format): {inputpdbid}') + if ( + len(inputpdbid) != 4 + or extract_pdbid(inputpdbid.lower()) == "UnknownProtein" + ): + logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}") sys.exit(1) pdbfile, pdbid = fetch_pdb(inputpdbid.lower()) - pdbpath = tilde_expansion('%s/%s.pdb' % (config.BASEPATH.rstrip('/'), pdbid)) + pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid)) create_folder_if_not_exists(config.BASEPATH) - with open(pdbpath, 'w') as g: + with open(pdbpath, "w") as g: g.write(pdbfile) - logger.info(f'file downloaded as {pdbpath}') + logger.info(f"file downloaded as {pdbpath}") return pdbpath, pdbid except ValueError: # Invalid PDB ID, cannot fetch from RCBS server - logger.error(f'PDB-ID does not exist: {inputpdbid}') + logger.error(f"PDB-ID does not exist: {inputpdbid}") sys.exit(1) @@ -111,9 +122,9 @@ def remove_duplicates(slist): unique = list(set(slist)) difference = len(slist) - len(unique) if difference == 1: - logger.info('removed one duplicate entry from input list') + logger.info("removed one duplicate entry from input list") if difference > 1: - logger.info(f'Removed {difference} duplicate entries from input list') + logger.info(f"Removed {difference} duplicate entries from input list") return unique @@ -122,9 +133,9 @@ def run_analysis(inputstructs, inputpdbids): pdbid, pdbpath = None, None # @todo For multiprocessing, implement better stacktracing for errors # Print title and version - logger.info(f'Protein-Ligand Interaction Profiler (PLIP) {__version__}') - logger.info(f'brought to you by: {config.__maintainer__}') - logger.info(f'please cite: https://www.doi.org/10.1093/nar/gkv315') + logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}") + logger.info(f"brought to you by: {config.__maintainer__}") + logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315") output_prefix = config.OUTPUTFILENAME if inputstructs is not None: # Process PDB file(s) @@ -132,114 +143,258 @@ def run_analysis(inputstructs, inputpdbids): inputstructs = remove_duplicates(inputstructs) read_from_stdin = False for inputstruct in inputstructs: - if inputstruct == '-': + if inputstruct == "-": inputstruct = sys.stdin.read() read_from_stdin = True if config.RAWSTRING: if sys.version_info < (3,): - inputstruct = bytes(inputstruct).decode('unicode_escape') + inputstruct = bytes(inputstruct).decode("unicode_escape") else: - inputstruct = bytes(inputstruct, 'utf8').decode('unicode_escape') + inputstruct = bytes(inputstruct, "utf8").decode( + "unicode_escape" + ) else: if os.path.getsize(inputstruct) == 0: - logger.error('empty PDB file') + logger.error("empty PDB file") sys.exit(1) if num_structures > 1: - basename = inputstruct.split('.')[-2].split('/')[-1] - config.OUTPATH = '/'.join([config.BASEPATH, basename]) - output_prefix = 'report' - process_pdb(inputstruct, config.OUTPATH, as_string=read_from_stdin, outputprefix=output_prefix) + basename = inputstruct.split(".")[-2].split("/")[-1] + config.OUTPATH = "/".join([config.BASEPATH, basename]) + output_prefix = "report" + process_pdb( + inputstruct, + config.OUTPATH, + as_string=read_from_stdin, + outputprefix=output_prefix, + ) else: # Try to fetch the current PDB structure(s) directly from the RCBS server num_pdbids = len(inputpdbids) inputpdbids = remove_duplicates(inputpdbids) for inputpdbid in inputpdbids: pdbpath, pdbid = download_structure(inputpdbid) if num_pdbids > 1: - config.OUTPATH = '/'.join([config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]) - output_prefix = 'report' + config.OUTPATH = "/".join( + [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()] + ) + output_prefix = "report" process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix) if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None: - if config.BASEPATH in ['.', './']: - logger.info('finished analysis, find the result files in the working directory') + if config.BASEPATH in [".", "./"]: + logger.info( + "finished analysis, find the result files in the working directory" + ) else: - logger.info(f'finished analysis, find the result files in {config.BASEPATH}') + logger.info( + f"finished analysis, find the result files in {config.BASEPATH}" + ) def main(): """Parse command line arguments and start main script for analysis.""" parser = ArgumentParser(prog="PLIP", description=description) - pdbstructure = parser.add_mutually_exclusive_group(required=True) # Needs either PDB ID or file + pdbstructure = parser.add_mutually_exclusive_group( + required=True + ) # Needs either PDB ID or file # '-' as file name reads from stdin - pdbstructure.add_argument("-f", "--file", dest="input", nargs="+", help="Set input file, '-' reads from stdin") + pdbstructure.add_argument( + "-f", + "--file", + dest="input", + nargs="+", + help="Set input file, '-' reads from stdin", + ) pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+") - outputgroup = parser.add_mutually_exclusive_group(required=False) # Needs either outpath or stdout + outputgroup = parser.add_mutually_exclusive_group( + required=False + ) # Needs either outpath or stdout outputgroup.add_argument("-o", "--out", dest="outpath", default="./") - outputgroup.add_argument("-O", "--stdout", dest="stdout", action="store_true", default=False, - help="Write to stdout instead of file") - parser.add_argument("--rawstring", dest="use_raw_string", default=False, action="store_true", - help="Use Python raw strings for stdin") - parser.add_argument("-v", "--verbose", dest="verbose", default=False, help="Turn on verbose mode", - action="store_true") - parser.add_argument("-q", "--quiet", dest="quiet", default=False, help="Turn on quiet mode", action="store_true") - parser.add_argument("-s", "--silent", dest="silent", default=False, help="Turn on silent mode", action="store_true") - parser.add_argument("-p", "--pics", dest="pics", default=False, help="Additional pictures", action="store_true") - parser.add_argument("-x", "--xml", dest="xml", default=False, help="Generate report file in XML format", - action="store_true") - parser.add_argument("-t", "--txt", dest="txt", default=False, help="Generate report file in TXT (RST) format", - action="store_true") - parser.add_argument("-y", "--pymol", dest="pymol", default=False, help="Additional PyMOL session files", - action="store_true") - parser.add_argument("--maxthreads", dest="maxthreads", default=multiprocessing.cpu_count(), - help="Set maximum number of main threads (number of binding sites processed simultaneously)." - "If not set, PLIP uses all available CPUs if possible.", - type=int) - parser.add_argument("--breakcomposite", dest="breakcomposite", default=False, - help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", - action="store_true") - parser.add_argument("--altlocation", dest="altlocation", default=False, - help="Also consider alternate locations for atoms (e.g. alternate conformations).", - action="store_true") - parser.add_argument("--nofix", dest="nofix", default=False, - help="Turns off fixing of PDB files.", - action="store_true") - parser.add_argument("--nofixfile", dest="nofixfile", default=False, - help="Turns off writing files for fixed PDB files.", - action="store_true") - parser.add_argument("--nopdbcanmap", dest="nopdbcanmap", default=False, - help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", - action="store_true") - parser.add_argument("--dnareceptor", dest="dnareceptor", default=False, - help="Uses the DNA instead of the protein as a receptor for interactions.", - action="store_true") - parser.add_argument("--name", dest="outputfilename", default="report", - help="Set a filename for the report TXT and XML files. Will only work when processing single structures.") - ligandtype = parser.add_mutually_exclusive_group() # Either peptide/inter or intra mode - ligandtype.add_argument("--peptides", "--inter", dest="peptides", default=[], - help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", - nargs="+") - ligandtype.add_argument("--intra", dest="intra", help="Allows to define one chain to analyze intra-chain contacts.") - parser.add_argument("--keepmod", dest="keepmod", default=False, - help="Keep modified residues as ligands", - action="store_true") - parser.add_argument("--nohydro", dest="nohydro", default=False, - help="Do not add polar hydrogens in case your structure already contains hydrogens.", - action="store_true") + outputgroup.add_argument( + "-O", + "--stdout", + dest="stdout", + action="store_true", + default=False, + help="Write to stdout instead of file", + ) + parser.add_argument( + "--rawstring", + dest="use_raw_string", + default=False, + action="store_true", + help="Use Python raw strings for stdin", + ) + parser.add_argument( + "-v", + "--verbose", + dest="verbose", + default=False, + help="Turn on verbose mode", + action="store_true", + ) + parser.add_argument( + "-q", + "--quiet", + dest="quiet", + default=False, + help="Turn on quiet mode", + action="store_true", + ) + parser.add_argument( + "-s", + "--silent", + dest="silent", + default=False, + help="Turn on silent mode", + action="store_true", + ) + parser.add_argument( + "-p", + "--pics", + dest="pics", + default=False, + help="Additional pictures", + action="store_true", + ) + parser.add_argument( + "-x", + "--xml", + dest="xml", + default=False, + help="Generate report file in XML format", + action="store_true", + ) + parser.add_argument( + "-t", + "--txt", + dest="txt", + default=False, + help="Generate report file in TXT (RST) format", + action="store_true", + ) + parser.add_argument( + "-y", + "--pymol", + dest="pymol", + default=False, + help="Additional PyMOL session files", + action="store_true", + ) + parser.add_argument( + "--maxthreads", + dest="maxthreads", + default=multiprocessing.cpu_count(), + help="Set maximum number of main threads (number of binding sites processed simultaneously)." + "If not set, PLIP uses all available CPUs if possible.", + type=int, + ) + parser.add_argument( + "--breakcomposite", + dest="breakcomposite", + default=False, + help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.", + action="store_true", + ) + parser.add_argument( + "--altlocation", + dest="altlocation", + default=False, + help="Also consider alternate locations for atoms (e.g. alternate conformations).", + action="store_true", + ) + parser.add_argument( + "--nofix", + dest="nofix", + default=False, + help="Turns off fixing of PDB files.", + action="store_true", + ) + parser.add_argument( + "--nofixfile", + dest="nofixfile", + default=False, + help="Turns off writing files for fixed PDB files.", + action="store_true", + ) + parser.add_argument( + "--nopdbcanmap", + dest="nopdbcanmap", + default=False, + help="Turns off calculation of mapping between canonical and PDB atom order for ligands.", + action="store_true", + ) + parser.add_argument( + "--dnareceptor", + dest="dnareceptor", + default=False, + help="Uses the DNA instead of the protein as a receptor for interactions.", + action="store_true", + ) + parser.add_argument( + "--name", + dest="outputfilename", + default="report", + help="Set a filename for the report TXT and XML files. Will only work when processing single structures.", + ) + ligandtype = ( + parser.add_mutually_exclusive_group() + ) # Either peptide/inter or intra mode + ligandtype.add_argument( + "--peptides", + "--inter", + dest="peptides", + default=[], + help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts", + nargs="+", + ) + ligandtype.add_argument( + "--intra", + dest="intra", + help="Allows to define one chain to analyze intra-chain contacts.", + ) + parser.add_argument( + "--keepmod", + dest="keepmod", + default=False, + help="Keep modified residues as ligands", + action="store_true", + ) + parser.add_argument( + "--nohydro", + dest="nohydro", + default=False, + help="Do not add polar hydrogens in case your structure already contains hydrogens.", + action="store_true", + ) # Optional threshold arguments, not shown in help - thr = namedtuple('threshold', 'name type') - thresholds = [thr(name='aromatic_planarity', type='angle'), - thr(name='hydroph_dist_max', type='distance'), thr(name='hbond_dist_max', type='distance'), - thr(name='hbond_don_angle_min', type='angle'), thr(name='pistack_dist_max', type='distance'), - thr(name='pistack_ang_dev', type='other'), thr(name='pistack_offset_max', type='distance'), - thr(name='pication_dist_max', type='distance'), thr(name='saltbridge_dist_max', type='distance'), - thr(name='halogen_dist_max', type='distance'), thr(name='halogen_acc_angle', type='angle'), - thr(name='halogen_don_angle', type='angle'), thr(name='halogen_angle_dev', type='other'), - thr(name='water_bridge_mindist', type='distance'), thr(name='water_bridge_maxdist', type='distance'), - thr(name='water_bridge_omega_min', type='angle'), thr(name='water_bridge_omega_max', type='angle'), - thr(name='water_bridge_theta_min', type='angle')] + thr = namedtuple("threshold", "name type") + thresholds = [ + thr(name="aromatic_planarity", type="angle"), + thr(name="hydroph_dist_max", type="distance"), + thr(name="hbond_dist_max", type="distance"), + thr(name="hbond_don_angle_min", type="angle"), + thr(name="pistack_dist_max", type="distance"), + thr(name="pistack_ang_dev", type="other"), + thr(name="pistack_offset_max", type="distance"), + thr(name="pication_dist_max", type="distance"), + thr(name="saltbridge_dist_max", type="distance"), + thr(name="halogen_dist_max", type="distance"), + thr(name="halogen_acc_angle", type="angle"), + thr(name="halogen_don_angle", type="angle"), + thr(name="halogen_angle_dev", type="other"), + thr(name="water_bridge_mindist", type="distance"), + thr(name="water_bridge_maxdist", type="distance"), + thr(name="water_bridge_omega_min", type="angle"), + thr(name="water_bridge_omega_max", type="angle"), + thr(name="water_bridge_theta_min", type="angle"), + ] for t in thresholds: - parser.add_argument('--%s' % t.name, dest=t.name, type=lambda val: threshold_limiter(parser, val), - help=argparse.SUPPRESS) + parser.add_argument( + "--%s" % t.name, + dest=t.name, + type=lambda val: threshold_limiter(parser, val), + help=argparse.SUPPRESS, + ) arguments = parser.parse_args() # configure log levels config.VERBOSE = True if arguments.verbose else False @@ -261,8 +416,11 @@ def main(): config.STDOUT = arguments.stdout config.RAWSTRING = arguments.use_raw_string config.OUTPATH = arguments.outpath - config.OUTPATH = tilde_expansion("".join([config.OUTPATH, '/']) - if not config.OUTPATH.endswith('/') else config.OUTPATH) + config.OUTPATH = tilde_expansion( + "".join([config.OUTPATH, "/"]) + if not config.OUTPATH.endswith("/") + else config.OUTPATH + ) config.BASEPATH = config.OUTPATH # Used for batch processing config.BREAKCOMPOSITE = arguments.breakcomposite config.ALTLOC = arguments.altlocation @@ -280,32 +438,50 @@ def main(): try: import pymol except ImportError: - logger.error('PyMOL is required for the --pics and --pymol option') + logger.error("PyMOL is required for the --pics and --pymol option") sys.exit(1) # Assign values to global thresholds for t in thresholds: tvalue = getattr(arguments, t.name) if tvalue is not None: - if t.type == 'angle' and not 0 < tvalue < 180: # Check value for angle thresholds - parser.error("Threshold for angles need to have values within 0 and 180.") - if t.type == 'distance': + if ( + t.type == "angle" and not 0 < tvalue < 180 + ): # Check value for angle thresholds + parser.error( + "Threshold for angles need to have values within 0 and 180." + ) + if t.type == "distance": if tvalue > 10: # Check value for angle thresholds - parser.error("Threshold for distances must not be larger than 10 Angstrom.") - elif tvalue > config.BS_DIST + 1: # Dynamically adapt the search space for binding site residues + parser.error( + "Threshold for distances must not be larger than 10 Angstrom." + ) + elif ( + tvalue > config.BS_DIST + 1 + ): # Dynamically adapt the search space for binding site residues config.BS_DIST = tvalue + 1 setattr(config, t.name.upper(), tvalue) # Check additional conditions for interdependent thresholds if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error("The halogen acceptor angle has to be larger than the halogen angle deviation.") + parser.error( + "The halogen acceptor angle has to be larger than the halogen angle deviation." + ) if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV: - parser.error("The halogen donor angle has to be larger than the halogen angle deviation.") + parser.error( + "The halogen donor angle has to be larger than the halogen angle deviation." + ) if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST: - parser.error("The water bridge minimum distance has to be smaller than the water bridge maximum distance.") + parser.error( + "The water bridge minimum distance has to be smaller than the water bridge maximum distance." + ) if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX: - parser.error("The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle") - expanded_path = tilde_expansion(arguments.input) if arguments.input is not None else None + parser.error( + "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle" + ) + expanded_path = ( + tilde_expansion(arguments.input) if arguments.input is not None else None + ) run_analysis(expanded_path, arguments.pdbid) # Start main script -if __name__ == '__main__': +if __name__ == "__main__": main() |