plip/plipcmd.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486

#! /usr/bin/env python3
"""
Protein-Ligand Interaction Profiler - Analyze and visualize protein-ligand interactions in PDB files.
plipcmd.py - Main script for PLIP command line execution.
"""

# system imports
import argparse
import logging
import multiprocessing
import os
import sys
from argparse import ArgumentParser
from collections import namedtuple

from plip.basic import config, logger

logger = logger.get_logger()

from plip.basic.config import __version__
from plip.basic.parallel import parallel_fn
from plip.basic.remote import VisualizerData
from plip.exchange.report import StructureReport
from plip.exchange.webservices import fetch_pdb
from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid
from plip.structure.preparation import tilde_expansion, PDBComplex

description = (
    f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}"
    "is a command-line based tool to analyze interactions in a protein-ligand complex. "
    "If you are using PLIP in your work, please cite: "
    "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. "
    "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315"
    f"Supported and maintained by: {config.__maintainer__}"
)


def threshold_limiter(aparser, arg):
    arg = float(arg)
    if arg <= 0:
        aparser.error("All thresholds have to be values larger than zero.")
    return arg


def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"):
    """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
    if not as_string:
        pdb_file_name = pdbfile.split("/")[-1]
        startmessage = f"starting analysis of {pdb_file_name}"
    else:
        startmessage = "starting analysis from STDIN"
    logger.info(startmessage)
    mol = PDBComplex()
    mol.output_path = outpath
    mol.load_pdb(pdbfile, as_string=as_string)
    # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
    for ligand in mol.ligands:
        mol.characterize_complex(ligand)

    create_folder_if_not_exists(outpath)

    # Generate the report files
    streport = StructureReport(mol, outputprefix=outputprefix)

    config.MAXTHREADS = min(config.MAXTHREADS, len(mol.interaction_sets))

    ######################################
    # PyMOL Visualization (parallelized) #
    ######################################

    if config.PYMOL or config.PICS:
        from plip.visualization.visualize import visualize_in_pymol

        complexes = [
            VisualizerData(mol, site)
            for site in sorted(mol.interaction_sets)
            if not len(mol.interaction_sets[site].interacting_res) == 0
        ]
        if config.MAXTHREADS > 1:
            logger.info(
                f"generating visualizations in parallel on {config.MAXTHREADS} cores"
            )
            parfn = parallel_fn(visualize_in_pymol)
            parfn(complexes, processes=config.MAXTHREADS)
        else:
            [visualize_in_pymol(plcomplex) for plcomplex in complexes]

    if config.XML:  # Generate report in xml format
        streport.write_xml(as_string=config.STDOUT)

    if config.TXT:  # Generate report in txt (rst) format
        streport.write_txt(as_string=config.STDOUT)


def download_structure(inputpdbid):
    """Given a PDB ID, downloads the corresponding PDB structure.
    Checks for validity of ID and handles error while downloading.
    Returns the path of the downloaded file."""
    try:
        if (
            len(inputpdbid) != 4
            or extract_pdbid(inputpdbid.lower()) == "UnknownProtein"
        ):
            logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}")
            sys.exit(1)
        pdbfile, pdbid = fetch_pdb(inputpdbid.lower())
        pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid))
        create_folder_if_not_exists(config.BASEPATH)
        with open(pdbpath, "w") as g:
            g.write(pdbfile)
        logger.info(f"file downloaded as {pdbpath}")
        return pdbpath, pdbid

    except ValueError:  # Invalid PDB ID, cannot fetch from RCBS server
        logger.error(f"PDB-ID does not exist: {inputpdbid}")
        sys.exit(1)


def remove_duplicates(slist):
    """Checks input lists for duplicates and returns
    a list with unique entries"""
    unique = list(set(slist))
    difference = len(slist) - len(unique)
    if difference == 1:
        logger.info("removed one duplicate entry from input list")
    if difference > 1:
        logger.info(f"Removed {difference} duplicate entries from input list")
    return unique


def run_analysis(inputstructs, inputpdbids):
    """Main function. Calls functions for processing, report generation and visualization."""
    pdbid, pdbpath = None, None
    # @todo For multiprocessing, implement better stacktracing for errors
    # Print title and version
    logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}")
    logger.info(f"brought to you by: {config.__maintainer__}")
    logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315")
    output_prefix = config.OUTPUTFILENAME

    if inputstructs is not None:  # Process PDB file(s)
        num_structures = len(inputstructs)
        inputstructs = remove_duplicates(inputstructs)
        read_from_stdin = False
        for inputstruct in inputstructs:
            if inputstruct == "-":
                inputstruct = sys.stdin.read()
                read_from_stdin = True
                if config.RAWSTRING:
                    if sys.version_info < (3,):
                        inputstruct = bytes(inputstruct).decode("unicode_escape")
                    else:
                        inputstruct = bytes(inputstruct, "utf8").decode(
                            "unicode_escape"
                        )
            else:
                if os.path.getsize(inputstruct) == 0:
                    logger.error("empty PDB file")
                    sys.exit(1)
                if num_structures > 1:
                    basename = inputstruct.split(".")[-2].split("/")[-1]
                    config.OUTPATH = "/".join([config.BASEPATH, basename])
                    output_prefix = "report"
            process_pdb(
                inputstruct,
                config.OUTPATH,
                as_string=read_from_stdin,
                outputprefix=output_prefix,
            )
    else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
        num_pdbids = len(inputpdbids)
        inputpdbids = remove_duplicates(inputpdbids)
        for inputpdbid in inputpdbids:
            pdbpath, pdbid = download_structure(inputpdbid)
            if num_pdbids > 1:
                config.OUTPATH = "/".join(
                    [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]
                )
                output_prefix = "report"
            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)

    if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None:
        if config.BASEPATH in [".", "./"]:
            logger.info(
                "finished analysis, find the result files in the working directory"
            )
        else:
            logger.info(
                f"finished analysis, find the result files in {config.BASEPATH}"
            )


if __name__ == "__main__":
    """Parse command line arguments and start main script for analysis."""
    parser = ArgumentParser(prog="PLIP", description=description)
    pdbstructure = parser.add_mutually_exclusive_group(
        required=True
    )  # Needs either PDB ID or file
    # '-' as file name reads from stdin
    pdbstructure.add_argument(
        "-f",
        "--file",
        dest="input",
        nargs="+",
        help="Set input file, '-' reads from stdin",
    )
    pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+")
    outputgroup = parser.add_mutually_exclusive_group(
        required=False
    )  # Needs either outpath or stdout
    outputgroup.add_argument("-o", "--out", dest="outpath", default="./")
    outputgroup.add_argument(
        "-O",
        "--stdout",
        dest="stdout",
        action="store_true",
        default=False,
        help="Write to stdout instead of file",
    )
    parser.add_argument(
        "--rawstring",
        dest="use_raw_string",
        default=False,
        action="store_true",
        help="Use Python raw strings for stdout and stdin",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        dest="verbose",
        default=False,
        help="Turn on verbose mode",
        action="store_true",
    )
    parser.add_argument(
        "-q",
        "--quiet",
        dest="quiet",
        default=False,
        help="Turn on quiet mode",
        action="store_true",
    )
    parser.add_argument(
        "-s",
        "--silent",
        dest="silent",
        default=False,
        help="Turn on silent mode",
        action="store_true",
    )
    parser.add_argument(
        "-p",
        "--pics",
        dest="pics",
        default=False,
        help="Additional pictures",
        action="store_true",
    )
    parser.add_argument(
        "-x",
        "--xml",
        dest="xml",
        default=False,
        help="Generate report file in XML format",
        action="store_true",
    )
    parser.add_argument(
        "-t",
        "--txt",
        dest="txt",
        default=False,
        help="Generate report file in TXT (RST) format",
        action="store_true",
    )
    parser.add_argument(
        "-y",
        "--pymol",
        dest="pymol",
        default=False,
        help="Additional PyMOL session files",
        action="store_true",
    )
    parser.add_argument(
        "--maxthreads",
        dest="maxthreads",
        default=multiprocessing.cpu_count(),
        help="Set maximum number of main threads (number of binding sites processed simultaneously)."
        "If not set, PLIP uses all available CPUs if possible.",
        type=int,
    )
    parser.add_argument(
        "--breakcomposite",
        dest="breakcomposite",
        default=False,
        help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.",
        action="store_true",
    )
    parser.add_argument(
        "--altlocation",
        dest="altlocation",
        default=False,
        help="Also consider alternate locations for atoms (e.g. alternate conformations).",
        action="store_true",
    )
    parser.add_argument(
        "--nofix",
        dest="nofix",
        default=False,
        help="Turns off fixing of PDB files.",
        action="store_true",
    )
    parser.add_argument(
        "--nofixfile",
        dest="nofixfile",
        default=False,
        help="Turns off writing files for fixed PDB files.",
        action="store_true",
    )
    parser.add_argument(
        "--nopdbcanmap",
        dest="nopdbcanmap",
        default=False,
        help="Turns off calculation of mapping between canonical and PDB atom order for ligands.",
        action="store_true",
    )
    parser.add_argument(
        "--dnareceptor",
        dest="dnareceptor",
        default=False,
        help="Uses the DNA instead of the protein as a receptor for interactions.",
        action="store_true",
    )
    parser.add_argument(
        "--name",
        dest="outputfilename",
        default="report",
        help="Set a filename for the report TXT and XML files. Will only work when processing single structures.",
    )
    ligandtype = (
        parser.add_mutually_exclusive_group()
    )  # Either peptide/inter or intra mode
    ligandtype.add_argument(
        "--peptides",
        "--inter",
        dest="peptides",
        default=[],
        help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
        nargs="+",
    )
    ligandtype.add_argument(
        "--intra",
        dest="intra",
        help="Allows to define one chain to analyze intra-chain contacts.",
    )
    parser.add_argument(
        "--keepmod",
        dest="keepmod",
        default=False,
        help="Keep modified residues as ligands",
        action="store_true",
    )
    parser.add_argument(
        "--nohydro",
        dest="nohydro",
        default=False,
        help="Do not add polar hydrogens in case your structure already contains hydrogens.",
        action="store_true",
    )
    # Optional threshold arguments, not shown in help
    thr = namedtuple("threshold", "name type")
    thresholds = [
        thr(name="aromatic_planarity", type="angle"),
        thr(name="hydroph_dist_max", type="distance"),
        thr(name="hbond_dist_max", type="distance"),
        thr(name="hbond_don_angle_min", type="angle"),
        thr(name="pistack_dist_max", type="distance"),
        thr(name="pistack_ang_dev", type="other"),
        thr(name="pistack_offset_max", type="distance"),
        thr(name="pication_dist_max", type="distance"),
        thr(name="saltbridge_dist_max", type="distance"),
        thr(name="halogen_dist_max", type="distance"),
        thr(name="halogen_acc_angle", type="angle"),
        thr(name="halogen_don_angle", type="angle"),
        thr(name="halogen_angle_dev", type="other"),
        thr(name="water_bridge_mindist", type="distance"),
        thr(name="water_bridge_maxdist", type="distance"),
        thr(name="water_bridge_omega_min", type="angle"),
        thr(name="water_bridge_omega_max", type="angle"),
        thr(name="water_bridge_theta_min", type="angle"),
    ]
    for t in thresholds:
        parser.add_argument(
            "--%s" % t.name,
            dest=t.name,
            type=lambda val: threshold_limiter(parser, val),
            help=argparse.SUPPRESS,
        )

    arguments = parser.parse_args()

    # configure log levels
    config.VERBOSE = True if arguments.verbose else False
    config.QUIET = True if arguments.quiet else False
    config.SILENT = True if arguments.silent else False
    if config.VERBOSE:
        logger.setLevel(logging.DEBUG)
    elif config.QUIET:
        logger.setLevel(logging.WARN)
    elif config.SILENT:
        logger.setLevel(logging.CRITICAL)
    else:
        logger.setLevel(config.DEFAULT_LOG_LEVEL)

    config.MAXTHREADS = arguments.maxthreads
    config.XML = arguments.xml
    config.TXT = arguments.txt
    config.PICS = arguments.pics
    config.PYMOL = arguments.pymol
    config.STDOUT = arguments.stdout
    config.RAWSTRING = arguments.use_raw_string
    config.OUTPATH = arguments.outpath
    config.OUTPATH = tilde_expansion(
        "".join([config.OUTPATH, "/"])
        if not config.OUTPATH.endswith("/")
        else config.OUTPATH
    )
    config.BASEPATH = config.OUTPATH  # Used for batch processing
    config.BREAKCOMPOSITE = arguments.breakcomposite
    config.ALTLOC = arguments.altlocation
    config.PEPTIDES = arguments.peptides
    config.INTRA = arguments.intra
    config.NOFIX = arguments.nofix
    config.NOFIXFILE = arguments.nofixfile
    config.NOPDBCANMAP = arguments.nopdbcanmap
    config.KEEPMOD = arguments.keepmod
    config.DNARECEPTOR = arguments.dnareceptor
    config.OUTPUTFILENAME = arguments.outputfilename
    config.NOHYDRO = arguments.nohydro
    # Make sure we have pymol with --pics and --pymol
    if config.PICS or config.PYMOL:
        try:
            import pymol
        except ImportError:
            logger.error("PyMOL is required for the --pics and --pymol option")
            sys.exit(1)
    # Assign values to global thresholds
    for t in thresholds:
        tvalue = getattr(arguments, t.name)
        if tvalue is not None:
            if (
                t.type == "angle" and not 0 < tvalue < 180
            ):  # Check value for angle thresholds
                parser.error(
                    "Threshold for angles need to have values within 0 and 180."
                )
            if t.type == "distance":
                if tvalue > 10:  # Check value for angle thresholds
                    parser.error(
                        "Threshold for distances must not be larger than 10 Angstrom."
                    )
                elif (
                    tvalue > config.BS_DIST + 1
                ):  # Dynamically adapt the search space for binding site residues
                    config.BS_DIST = tvalue + 1
            setattr(config, t.name.upper(), tvalue)
    # Check additional conditions for interdependent thresholds
    if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen acceptor angle has to be larger than the halogen angle deviation."
        )
    if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen donor angle has to be larger than the halogen angle deviation."
        )
    if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST:
        parser.error(
            "The water bridge minimum distance has to be smaller than the water bridge maximum distance."
        )
    if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
        parser.error(
            "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle"
        )
    expanded_path = (
        tilde_expansion(arguments.input) if arguments.input is not None else None
    )
    run_analysis(expanded_path, arguments.pdbid)  # Start main script