import gzip
import itertools
import os
import platform
import re
import subprocess
import sys
import tempfile
import zipfile
from collections import namedtuple

import numpy as np
from openbabel import pybel
from openbabel.pybel import Atom

from plip.basic import config, logger

logger = logger.get_logger()

# Windows and MacOS
if os.name != 'nt' and platform.system() != 'Darwin':  # Resource module not available for Windows
    import resource

# Settings
np.seterr(all='ignore')  # No runtime warnings


def tmpfile(prefix, direc):
    """Returns the path to a newly created temporary file."""
    return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc)


def is_lig(hetid):
    """Checks if a PDB compound can be excluded as a small molecule ligand"""
    h = hetid.upper()
    return not (h == 'HOH' or h in config.UNSUPPORTED)


def extract_pdbid(string):
    """Use regular expressions to get a PDB ID from a string"""
    p = re.compile("[0-9][0-9a-z]{3}")
    m = p.search(string.lower())
    try:
        return m.group()
    except AttributeError:
        return "UnknownProtein"


def whichrestype(atom):
    """Returns the residue name of an Pybel or OpenBabel atom."""
    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
    return atom.GetResidue().GetName() if atom.GetResidue() is not None else None


def whichresnumber(atom):
    """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file)."""
    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
    return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None


def whichchain(atom):
    """Returns the residue number of an PyBel or OpenBabel atom."""
    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
    return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None


#########################
# Mathematical operations
#########################


def euclidean3d(v1, v2):
    """Faster implementation of euclidean distance for the 3D case."""
    if not len(v1) == 3 and len(v2) == 3:
        return None
    return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2)


def vector(p1, p2):
    """Vector from p1 to p2.
    :param p1: coordinates of point p1
    :param p2: coordinates of point p2
    :returns : numpy array with vector coordinates
    """
    return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))])


def vecangle(v1, v2, deg=True):
    """Calculate the angle between two vectors
    :param v1: coordinates of vector v1
    :param v2: coordinates of vector v2
    :param deg: whether to return degrees or radians
    :returns : angle in degree or rad
    """
    if np.array_equal(v1, v2):
        return 0.0
    dm = np.dot(v1, v2)
    cm = np.linalg.norm(v1) * np.linalg.norm(v2)
    angle = np.arccos(dm / cm)  # Round here to prevent floating point errors
    return np.degrees([angle, ])[0] if deg else angle


def normalize_vector(v):
    """Take a vector and return the normalized vector
    :param v: a vector v
    :returns : normalized vector v
    """
    norm = np.linalg.norm(v)
    return v / norm if not norm == 0 else v


def centroid(coo):
    """Calculates the centroid from a 3D point cloud and returns the coordinates
    :param coo: Array of coordinate arrays
    :returns : centroid coordinates as list
    """
    return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo]))))


def projection(pnormal1, ppoint, tpoint):
    """Calculates the centroid from a 3D point cloud and returns the coordinates
    :param pnormal1: normal of plane
    :param ppoint: coordinates of point in the plane
    :param tpoint: coordinates of point to be projected
    :returns : coordinates of point orthogonally projected on the plane
    """
    # Choose the plane normal pointing to the point to be projected
    pnormal2 = [coo * (-1) for coo in pnormal1]
    d1 = euclidean3d(tpoint, pnormal1 + ppoint)
    d2 = euclidean3d(tpoint, pnormal2 + ppoint)
    pnormal = pnormal1 if d1 < d2 else pnormal2
    # Calculate the projection of tpoint to the plane
    sn = -np.dot(pnormal, vector(ppoint, tpoint))
    sd = np.dot(pnormal, pnormal)
    sb = sn / sd
    return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])]


def cluster_doubles(double_list):
    """Given a list of doubles, they are clustered if they share one element
    :param double_list: list of doubles
    :returns : list of clusters (tuples)
    """
    location = {}  # hashtable of which cluster each element is in
    clusters = []
    # Go through each double
    for t in double_list:
        a, b = t[0], t[1]
        # If they both are already in different clusters, merge the clusters
        if a in location and b in location:
            if location[a] != location[b]:
                if location[a] < location[b]:
                    clusters[location[a]] = clusters[location[a]].union(clusters[location[b]])  # Merge clusters
                    clusters = clusters[:location[b]] + clusters[location[b] + 1:]
                else:
                    clusters[location[b]] = clusters[location[b]].union(clusters[location[a]])  # Merge clusters
                    clusters = clusters[:location[a]] + clusters[location[a] + 1:]
                # Rebuild index of locations for each element as they have changed now
                location = {}
                for i, cluster in enumerate(clusters):
                    for c in cluster:
                        location[c] = i
        else:
            # If a is already in a cluster, add b to that cluster
            if a in location:
                clusters[location[a]].add(b)
                location[b] = location[a]
            # If b is already in a cluster, add a to that cluster
            if b in location:
                clusters[location[b]].add(a)
                location[a] = location[b]
            # If neither a nor b is in any cluster, create a new one with a and b
            if not (b in location and a in location):
                clusters.append(set(t))
                location[a] = len(clusters) - 1
                location[b] = len(clusters) - 1
    return map(tuple, clusters)


#################
# File operations
#################

def tilde_expansion(folder_path):
    """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>."""
    return os.path.expanduser(folder_path) if '~' in folder_path else folder_path


def folder_exists(folder_path):
    """Checks if a folder exists"""
    return os.path.exists(folder_path)


def create_folder_if_not_exists(folder_path):
    """Creates a folder if it does not exists."""
    folder_path = tilde_expansion(folder_path)
    folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path
    direc = os.path.dirname(folder_path)
    if not folder_exists(direc):
        os.makedirs(direc)


def cmd_exists(c):
    return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0


################
# PyMOL-specific
################


def initialize_pymol(options):
    """Initializes PyMOL"""
    import pymol
    # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround)
    pymol.finish_launching(args=['pymol', options, '-K'])
    pymol.cmd.reinitialize()


def start_pymol(quiet=False, options='-p', run=False):
    """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output.
    Command line options can be passed as the second argument."""
    import pymol
    pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:]
    if run:
        initialize_pymol(options)
    if quiet:
        pymol.cmd.feedback('disable', 'all', 'everything')


def nucleotide_linkage(residues):
    """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together."""

    nuc_covalent = []
    #######################################
    # Basic support for RNA/DNA as ligand #
    #######################################
    nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU']
    dna_rna = {}  # Dictionary of DNA/RNA residues by chain
    covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
    # Create missing covlinkage entries for DNA/RNA
    for ligand in residues:
        resname, chain, pos = ligand
        if resname in nucleotides:
            if chain not in dna_rna:
                dna_rna[chain] = [(resname, pos), ]
            else:
                dna_rna[chain].append((resname, pos))
    for chain in dna_rna:
        nuc_list = dna_rna[chain]
        for i, nucleotide in enumerate(nuc_list):
            if not i == len(nuc_list) - 1:
                name, pos = nucleotide
                nextnucleotide = nuc_list[i + 1]
                nextname, nextpos = nextnucleotide
                newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='',
                                     id2=nextname, chain2=chain, pos2=nextpos, conf2='')
                nuc_covalent.append(newlink)

    return nuc_covalent


def ring_is_planar(ring, r_atoms):
    """Given a set of ring atoms, check if the ring is sufficiently planar
    to be considered aromatic"""
    normals = []
    for a in r_atoms:
        adj = pybel.ob.OBAtomAtomIter(a.OBAtom)
        # Check for neighboring atoms in the ring
        n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)]
        vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1])
        normals.append(np.cross(vec1, vec2))
    # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less
    for n1, n2 in itertools.product(normals, repeat=2):
        arom_angle = vecangle(n1, n2)
        if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]):
            return False
    return True


def classify_by_name(names):
    """Classify a (composite) ligand by the HETID(s)"""
    if len(names) > 3:  # Polymer
        if len(set(config.RNA).intersection(set(names))) != 0:
            ligtype = 'RNA'
        elif len(set(config.DNA).intersection(set(names))) != 0:
            ligtype = 'DNA'
        else:
            ligtype = "POLYMER"
    else:
        ligtype = 'SMALLMOLECULE'

    for name in names:
        if name in config.METAL_IONS:
            if len(names) == 1:
                ligtype = 'ION'
            else:
                if "ION" not in ligtype:
                    ligtype += '+ION'
    return ligtype


def sort_members_by_importance(members):
    """Sort the members of a composite ligand according to two criteria:
    1. Split up in main and ion group. Ion groups are located behind the main group.
    2. Within each group, sort by chain and position."""
    main = [x for x in members if x[0] not in config.METAL_IONS]
    ion = [x for x in members if x[0] in config.METAL_IONS]
    sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
    sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
    sorted_ion = sorted(ion, key=lambda x: (x[1], x[2]))
    return sorted_main + sorted_ion


def get_isomorphisms(reference, lig):
    """Get all isomorphisms of the ligand."""
    query = pybel.ob.CompileMoleculeQuery(reference.OBMol)
    mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query)
    if all:
        isomorphs = pybel.ob.vvpairUIntUInt()
        mappr.MapAll(lig.OBMol, isomorphs)
    else:
        isomorphs = pybel.ob.vpairUIntUInt()
        mappr.MapFirst(lig.OBMol, isomorphs)
        isomorphs = [isomorphs]
    logger.debug(f'number of isomorphisms: {len(isomorphs)}')
    # @todo Check which isomorphism to take
    return isomorphs


def canonicalize(lig, preserve_bond_order=False):
    """Get the canonical atom order for the ligand."""
    atomorder = None
    # Get canonical atom order

    lig = pybel.ob.OBMol(lig.OBMol)
    if not preserve_bond_order:
        for bond in pybel.ob.OBMolBondIter(lig):
            if bond.GetBondOrder() != 1:
                bond.SetBondOrder(1)
    lig.DeleteData(pybel.ob.StereoData)
    lig = pybel.Molecule(lig)
    testcan = lig.write(format='can')
    try:
        pybel.readstring('can', testcan)
        reference = pybel.readstring('can', testcan)
    except IOError:
        testcan, reference = '', ''
    if testcan != '':
        reference.removeh()
        isomorphs = get_isomorphisms(reference, lig)  # isomorphs now holds all isomorphisms within the molecule
        if not len(isomorphs) == 0:
            smi_dict = {}
            smi_to_can = isomorphs[0]
            for x in smi_to_can:
                smi_dict[int(x[1]) + 1] = int(x[0]) + 1
            atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))]
        else:
            atomorder = None
    return atomorder


def int32_to_negative(int32):
    """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a
    32 bit integer and returns the actual number.
    """
    dct = {}
    if int32 == 4294967295:  # Special case in some structures (note, this is just a workaround)
        return -1
    for i in range(-1000, -1):
        dct[np.uint32(i)] = i
    if int32 in dct:
        return dct[int32]
    else:
        return int32


def read_pdb(pdbfname, as_string=False):
    """Reads a given PDB file and returns a Pybel Molecule."""
    pybel.ob.obErrorLog.StopLogging()  # Suppress all OpenBabel warnings
    if os.name != 'nt':  # Resource module not available for Windows
        maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1]
        resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize))
    sys.setrecursionlimit(10 ** 5)  # increase Python recursion limit
    return readmol(pdbfname, as_string=as_string)


def read(fil):
    """Returns a file handler and detects gzipped files."""
    if os.path.splitext(fil)[-1] == '.gz':
        return gzip.open(fil, 'rb')
    elif os.path.splitext(fil)[-1] == '.zip':
        zf = zipfile.ZipFile(fil, 'r')
        return zf.open(zf.infolist()[0].filename)
    else:
        return open(fil, 'r')


def readmol(path, as_string=False):
    """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type.
    In contrast to the standard Pybel implementation, the file is closed properly."""
    supported_formats = ['pdb']
    # Fix for Windows-generated files: Remove carriage return characters
    if "\r" in path and as_string:
        path = path.replace('\r', '')

    for sformat in supported_formats:
        obc = pybel.ob.OBConversion()
        obc.SetInFormat(sformat)
        logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel')

        # Read molecules with single bond information
        if as_string:
            try:
                mymol = pybel.readstring(sformat, path)
            except IOError:
                logger.error('no valid file format provided')
                sys.exit(1)
        else:
            read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None})
            try:
                mymol = next(read_file)
            except StopIteration:
                logger.error('file contains no valid molecules')
                sys.exit(1)

        logger.debug('molecule successfully read')

        # Assign multiple bonds
        mymol.OBMol.PerceiveBondOrders()
        return mymol, sformat

    logger.error('no valid file format provided')
    sys.exit(1)