#! /usr/bin/env python3
"""
Protein-Ligand Interaction Profiler - Analyze and visualize protein-ligand interactions in PDB files.
plipcmd.py - Main script for PLIP command line execution.
"""

# system imports
import argparse
import logging
import multiprocessing
import os
import sys
from argparse import ArgumentParser
from collections import namedtuple

from plip.basic import config, logger

logger = logger.get_logger()

from plip.basic.config import __version__
from plip.basic.parallel import parallel_fn
from plip.basic.remote import VisualizerData
from plip.exchange.report import StructureReport
from plip.exchange.webservices import fetch_pdb
from plip.structure.preparation import create_folder_if_not_exists, extract_pdbid
from plip.structure.preparation import tilde_expansion, PDBComplex

description = (
    f"The Protein-Ligand Interaction Profiler (PLIP) {__version__}"
    "is a command-line based tool to analyze interactions in a protein-ligand complex. "
    "If you are using PLIP in your work, please cite: "
    "Salentin,S. et al. PLIP: fully automated protein-ligand interaction profiler. "
    "Nucl. Acids Res. (1 July 2015) 43 (W1): W443-W447. doi:10.1093/nar/gkv315"
    f"Supported and maintained by: {config.__maintainer__}"
)


def threshold_limiter(aparser, arg):
    arg = float(arg)
    if arg <= 0:
        aparser.error("All thresholds have to be values larger than zero.")
    return arg


def process_pdb(pdbfile, outpath, as_string=False, outputprefix="report"):
    """Analysis of a single PDB file. Can generate textual reports XML, PyMOL session files and images as output."""
    if not as_string:
        pdb_file_name = pdbfile.split("/")[-1]
        startmessage = f"starting analysis of {pdb_file_name}"
    else:
        startmessage = "starting analysis from STDIN"
    logger.info(startmessage)
    mol = PDBComplex()
    mol.output_path = outpath
    mol.load_pdb(pdbfile, as_string=as_string)
    # @todo Offers possibility for filter function from command line (by ligand chain, position, hetid)
    for ligand in mol.ligands:
        mol.characterize_complex(ligand)

    create_folder_if_not_exists(outpath)

    # Generate the report files
    streport = StructureReport(mol, outputprefix=outputprefix)

    config.MAXTHREADS = min(config.MAXTHREADS, len(mol.interaction_sets))

    ######################################
    # PyMOL Visualization (parallelized) #
    ######################################

    if config.PYMOL or config.PICS:
        from plip.visualization.visualize import visualize_in_pymol

        complexes = [
            VisualizerData(mol, site)
            for site in sorted(mol.interaction_sets)
            if not len(mol.interaction_sets[site].interacting_res) == 0
        ]
        if config.MAXTHREADS > 1:
            logger.info(
                f"generating visualizations in parallel on {config.MAXTHREADS} cores"
            )
            parfn = parallel_fn(visualize_in_pymol)
            parfn(complexes, processes=config.MAXTHREADS)
        else:
            [visualize_in_pymol(plcomplex) for plcomplex in complexes]

    if config.XML:  # Generate report in xml format
        streport.write_xml(as_string=config.STDOUT)

    if config.TXT:  # Generate report in txt (rst) format
        streport.write_txt(as_string=config.STDOUT)


def download_structure(inputpdbid):
    """Given a PDB ID, downloads the corresponding PDB structure.
    Checks for validity of ID and handles error while downloading.
    Returns the path of the downloaded file."""
    try:
        if (
            len(inputpdbid) != 4
            or extract_pdbid(inputpdbid.lower()) == "UnknownProtein"
        ):
            logger.error(f"invalid PDB-ID (wrong format): {inputpdbid}")
            sys.exit(1)
        pdbfile, pdbid = fetch_pdb(inputpdbid.lower())
        pdbpath = tilde_expansion("%s/%s.pdb" % (config.BASEPATH.rstrip("/"), pdbid))
        create_folder_if_not_exists(config.BASEPATH)
        with open(pdbpath, "w") as g:
            g.write(pdbfile)
        logger.info(f"file downloaded as {pdbpath}")
        return pdbpath, pdbid

    except ValueError:  # Invalid PDB ID, cannot fetch from RCBS server
        logger.error(f"PDB-ID does not exist: {inputpdbid}")
        sys.exit(1)


def remove_duplicates(slist):
    """Checks input lists for duplicates and returns
    a list with unique entries"""
    unique = list(set(slist))
    difference = len(slist) - len(unique)
    if difference == 1:
        logger.info("removed one duplicate entry from input list")
    if difference > 1:
        logger.info(f"Removed {difference} duplicate entries from input list")
    return unique


def run_analysis(inputstructs, inputpdbids):
    """Main function. Calls functions for processing, report generation and visualization."""
    pdbid, pdbpath = None, None
    # @todo For multiprocessing, implement better stacktracing for errors
    # Print title and version
    logger.info(f"Protein-Ligand Interaction Profiler (PLIP) {__version__}")
    logger.info(f"brought to you by: {config.__maintainer__}")
    logger.info(f"please cite: https://www.doi.org/10.1093/nar/gkv315")
    output_prefix = config.OUTPUTFILENAME

    if inputstructs is not None:  # Process PDB file(s)
        num_structures = len(inputstructs)
        inputstructs = remove_duplicates(inputstructs)
        read_from_stdin = False
        for inputstruct in inputstructs:
            if inputstruct == "-":
                inputstruct = sys.stdin.read()
                read_from_stdin = True
                if config.RAWSTRING:
                    if sys.version_info < (3,):
                        inputstruct = bytes(inputstruct).decode("unicode_escape")
                    else:
                        inputstruct = bytes(inputstruct, "utf8").decode(
                            "unicode_escape"
                        )
            else:
                if os.path.getsize(inputstruct) == 0:
                    logger.error("empty PDB file")
                    sys.exit(1)
                if num_structures > 1:
                    basename = inputstruct.split(".")[-2].split("/")[-1]
                    config.OUTPATH = "/".join([config.BASEPATH, basename])
                    output_prefix = "report"
            process_pdb(
                inputstruct,
                config.OUTPATH,
                as_string=read_from_stdin,
                outputprefix=output_prefix,
            )
    else:  # Try to fetch the current PDB structure(s) directly from the RCBS server
        num_pdbids = len(inputpdbids)
        inputpdbids = remove_duplicates(inputpdbids)
        for inputpdbid in inputpdbids:
            pdbpath, pdbid = download_structure(inputpdbid)
            if num_pdbids > 1:
                config.OUTPATH = "/".join(
                    [config.BASEPATH, pdbid[1:3].upper(), pdbid.upper()]
                )
                output_prefix = "report"
            process_pdb(pdbpath, config.OUTPATH, outputprefix=output_prefix)

    if (pdbid is not None or inputstructs is not None) and config.BASEPATH is not None:
        if config.BASEPATH in [".", "./"]:
            logger.info(
                "finished analysis, find the result files in the working directory"
            )
        else:
            logger.info(
                f"finished analysis, find the result files in {config.BASEPATH}"
            )


if __name__ == "__main__":
    """Parse command line arguments and start main script for analysis."""
    parser = ArgumentParser(prog="PLIP", description=description)
    pdbstructure = parser.add_mutually_exclusive_group(
        required=True
    )  # Needs either PDB ID or file
    # '-' as file name reads from stdin
    pdbstructure.add_argument(
        "-f",
        "--file",
        dest="input",
        nargs="+",
        help="Set input file, '-' reads from stdin",
    )
    pdbstructure.add_argument("-i", "--input", dest="pdbid", nargs="+")
    outputgroup = parser.add_mutually_exclusive_group(
        required=False
    )  # Needs either outpath or stdout
    outputgroup.add_argument("-o", "--out", dest="outpath", default="./")
    outputgroup.add_argument(
        "-O",
        "--stdout",
        dest="stdout",
        action="store_true",
        default=False,
        help="Write to stdout instead of file",
    )
    parser.add_argument(
        "--rawstring",
        dest="use_raw_string",
        default=False,
        action="store_true",
        help="Use Python raw strings for stdout and stdin",
    )
    parser.add_argument(
        "-v",
        "--verbose",
        dest="verbose",
        default=False,
        help="Turn on verbose mode",
        action="store_true",
    )
    parser.add_argument(
        "-q",
        "--quiet",
        dest="quiet",
        default=False,
        help="Turn on quiet mode",
        action="store_true",
    )
    parser.add_argument(
        "-s",
        "--silent",
        dest="silent",
        default=False,
        help="Turn on silent mode",
        action="store_true",
    )
    parser.add_argument(
        "-p",
        "--pics",
        dest="pics",
        default=False,
        help="Additional pictures",
        action="store_true",
    )
    parser.add_argument(
        "-x",
        "--xml",
        dest="xml",
        default=False,
        help="Generate report file in XML format",
        action="store_true",
    )
    parser.add_argument(
        "-t",
        "--txt",
        dest="txt",
        default=False,
        help="Generate report file in TXT (RST) format",
        action="store_true",
    )
    parser.add_argument(
        "-y",
        "--pymol",
        dest="pymol",
        default=False,
        help="Additional PyMOL session files",
        action="store_true",
    )
    parser.add_argument(
        "--maxthreads",
        dest="maxthreads",
        default=multiprocessing.cpu_count(),
        help="Set maximum number of main threads (number of binding sites processed simultaneously)."
        "If not set, PLIP uses all available CPUs if possible.",
        type=int,
    )
    parser.add_argument(
        "--breakcomposite",
        dest="breakcomposite",
        default=False,
        help="Don't combine ligand fragments with covalent bonds but treat them as single ligands for the analysis.",
        action="store_true",
    )
    parser.add_argument(
        "--altlocation",
        dest="altlocation",
        default=False,
        help="Also consider alternate locations for atoms (e.g. alternate conformations).",
        action="store_true",
    )
    parser.add_argument(
        "--nofix",
        dest="nofix",
        default=False,
        help="Turns off fixing of PDB files.",
        action="store_true",
    )
    parser.add_argument(
        "--nofixfile",
        dest="nofixfile",
        default=False,
        help="Turns off writing files for fixed PDB files.",
        action="store_true",
    )
    parser.add_argument(
        "--nopdbcanmap",
        dest="nopdbcanmap",
        default=False,
        help="Turns off calculation of mapping between canonical and PDB atom order for ligands.",
        action="store_true",
    )
    parser.add_argument(
        "--dnareceptor",
        dest="dnareceptor",
        default=False,
        help="Uses the DNA instead of the protein as a receptor for interactions.",
        action="store_true",
    )
    parser.add_argument(
        "--name",
        dest="outputfilename",
        default="report",
        help="Set a filename for the report TXT and XML files. Will only work when processing single structures.",
    )
    ligandtype = (
        parser.add_mutually_exclusive_group()
    )  # Either peptide/inter or intra mode
    ligandtype.add_argument(
        "--peptides",
        "--inter",
        dest="peptides",
        default=[],
        help="Allows to define one or multiple chains as peptide ligands or to detect inter-chain contacts",
        nargs="+",
    )
    ligandtype.add_argument(
        "--intra",
        dest="intra",
        help="Allows to define one chain to analyze intra-chain contacts.",
    )
    parser.add_argument(
        "--keepmod",
        dest="keepmod",
        default=False,
        help="Keep modified residues as ligands",
        action="store_true",
    )
    parser.add_argument(
        "--nohydro",
        dest="nohydro",
        default=False,
        help="Do not add polar hydrogens in case your structure already contains hydrogens.",
        action="store_true",
    )
    # Optional threshold arguments, not shown in help
    thr = namedtuple("threshold", "name type")
    thresholds = [
        thr(name="aromatic_planarity", type="angle"),
        thr(name="hydroph_dist_max", type="distance"),
        thr(name="hbond_dist_max", type="distance"),
        thr(name="hbond_don_angle_min", type="angle"),
        thr(name="pistack_dist_max", type="distance"),
        thr(name="pistack_ang_dev", type="other"),
        thr(name="pistack_offset_max", type="distance"),
        thr(name="pication_dist_max", type="distance"),
        thr(name="saltbridge_dist_max", type="distance"),
        thr(name="halogen_dist_max", type="distance"),
        thr(name="halogen_acc_angle", type="angle"),
        thr(name="halogen_don_angle", type="angle"),
        thr(name="halogen_angle_dev", type="other"),
        thr(name="water_bridge_mindist", type="distance"),
        thr(name="water_bridge_maxdist", type="distance"),
        thr(name="water_bridge_omega_min", type="angle"),
        thr(name="water_bridge_omega_max", type="angle"),
        thr(name="water_bridge_theta_min", type="angle"),
    ]
    for t in thresholds:
        parser.add_argument(
            "--%s" % t.name,
            dest=t.name,
            type=lambda val: threshold_limiter(parser, val),
            help=argparse.SUPPRESS,
        )

    arguments = parser.parse_args()

    # configure log levels
    config.VERBOSE = True if arguments.verbose else False
    config.QUIET = True if arguments.quiet else False
    config.SILENT = True if arguments.silent else False
    if config.VERBOSE:
        logger.setLevel(logging.DEBUG)
    elif config.QUIET:
        logger.setLevel(logging.WARN)
    elif config.SILENT:
        logger.setLevel(logging.CRITICAL)
    else:
        logger.setLevel(config.DEFAULT_LOG_LEVEL)

    config.MAXTHREADS = arguments.maxthreads
    config.XML = arguments.xml
    config.TXT = arguments.txt
    config.PICS = arguments.pics
    config.PYMOL = arguments.pymol
    config.STDOUT = arguments.stdout
    config.RAWSTRING = arguments.use_raw_string
    config.OUTPATH = arguments.outpath
    config.OUTPATH = tilde_expansion(
        "".join([config.OUTPATH, "/"])
        if not config.OUTPATH.endswith("/")
        else config.OUTPATH
    )
    config.BASEPATH = config.OUTPATH  # Used for batch processing
    config.BREAKCOMPOSITE = arguments.breakcomposite
    config.ALTLOC = arguments.altlocation
    config.PEPTIDES = arguments.peptides
    config.INTRA = arguments.intra
    config.NOFIX = arguments.nofix
    config.NOFIXFILE = arguments.nofixfile
    config.NOPDBCANMAP = arguments.nopdbcanmap
    config.KEEPMOD = arguments.keepmod
    config.DNARECEPTOR = arguments.dnareceptor
    config.OUTPUTFILENAME = arguments.outputfilename
    config.NOHYDRO = arguments.nohydro
    # Make sure we have pymol with --pics and --pymol
    if config.PICS or config.PYMOL:
        try:
            import pymol
        except ImportError:
            logger.error("PyMOL is required for the --pics and --pymol option")
            sys.exit(1)
    # Assign values to global thresholds
    for t in thresholds:
        tvalue = getattr(arguments, t.name)
        if tvalue is not None:
            if (
                t.type == "angle" and not 0 < tvalue < 180
            ):  # Check value for angle thresholds
                parser.error(
                    "Threshold for angles need to have values within 0 and 180."
                )
            if t.type == "distance":
                if tvalue > 10:  # Check value for angle thresholds
                    parser.error(
                        "Threshold for distances must not be larger than 10 Angstrom."
                    )
                elif (
                    tvalue > config.BS_DIST + 1
                ):  # Dynamically adapt the search space for binding site residues
                    config.BS_DIST = tvalue + 1
            setattr(config, t.name.upper(), tvalue)
    # Check additional conditions for interdependent thresholds
    if not config.HALOGEN_ACC_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen acceptor angle has to be larger than the halogen angle deviation."
        )
    if not config.HALOGEN_DON_ANGLE > config.HALOGEN_ANGLE_DEV:
        parser.error(
            "The halogen donor angle has to be larger than the halogen angle deviation."
        )
    if not config.WATER_BRIDGE_MINDIST < config.WATER_BRIDGE_MAXDIST:
        parser.error(
            "The water bridge minimum distance has to be smaller than the water bridge maximum distance."
        )
    if not config.WATER_BRIDGE_OMEGA_MIN < config.WATER_BRIDGE_OMEGA_MAX:
        parser.error(
            "The water bridge omega minimum angle has to be smaller than the water bridge omega maximum angle"
        )
    expanded_path = (
        tilde_expansion(arguments.input) if arguments.input is not None else None
    )
    run_analysis(expanded_path, arguments.pdbid)  # Start main script