1 files changed, 0 insertions, 434 deletions
diff --git a/plip/basic/supplemental.py b/plip/basic/supplemental.py
deleted file mode 100644
index 6b3ef7b..0000000
--- a/plip/basic/supplemental.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import gzip
-import itertools
-import os
-import platform
-import re
-import subprocess
-import sys
-import tempfile
-import zipfile
-from collections import namedtuple
-
-import numpy as np
-from openbabel import pybel
-from openbabel.pybel import Atom
-
-from plip.basic import config, logger
-
-logger = logger.get_logger()
-
-# Windows and MacOS
-if os.name != 'nt' and platform.system() != 'Darwin':  # Resource module not available for Windows
-    import resource
-
-# Settings
-np.seterr(all='ignore')  # No runtime warnings
-
-
-def tmpfile(prefix, direc):
-    """Returns the path to a newly created temporary file."""
-    return tempfile.mktemp(prefix=prefix, suffix='.pdb', dir=direc)
-
-
-def is_lig(hetid):
-    """Checks if a PDB compound can be excluded as a small molecule ligand"""
-    h = hetid.upper()
-    return not (h == 'HOH' or h in config.UNSUPPORTED)
-
-
-def extract_pdbid(string):
-    """Use regular expressions to get a PDB ID from a string"""
-    p = re.compile("[0-9][0-9a-z]{3}")
-    m = p.search(string.lower())
-    try:
-        return m.group()
-    except AttributeError:
-        return "UnknownProtein"
-
-
-def whichrestype(atom):
-    """Returns the residue name of an Pybel or OpenBabel atom."""
-    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
-    return atom.GetResidue().GetName() if atom.GetResidue() is not None else None
-
-
-def whichresnumber(atom):
-    """Returns the residue number of an Pybel or OpenBabel atom (numbering as in original PDB file)."""
-    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
-    return atom.GetResidue().GetNum() if atom.GetResidue() is not None else None
-
-
-def whichchain(atom):
-    """Returns the residue number of an PyBel or OpenBabel atom."""
-    atom = atom if not isinstance(atom, Atom) else atom.OBAtom  # Convert to OpenBabel Atom
-    return atom.GetResidue().GetChain() if atom.GetResidue() is not None else None
-
-
-#########################
-# Mathematical operations
-#########################
-
-
-def euclidean3d(v1, v2):
-    """Faster implementation of euclidean distance for the 3D case."""
-    if not len(v1) == 3 and len(v2) == 3:
-        return None
-    return np.sqrt((v1[0] - v2[0]) ** 2 + (v1[1] - v2[1]) ** 2 + (v1[2] - v2[2]) ** 2)
-
-
-def vector(p1, p2):
-    """Vector from p1 to p2.
-    :param p1: coordinates of point p1
-    :param p2: coordinates of point p2
-    :returns : numpy array with vector coordinates
-    """
-    return None if len(p1) != len(p2) else np.array([p2[i] - p1[i] for i in range(len(p1))])
-
-
-def vecangle(v1, v2, deg=True):
-    """Calculate the angle between two vectors
-    :param v1: coordinates of vector v1
-    :param v2: coordinates of vector v2
-    :param deg: whether to return degrees or radians
-    :returns : angle in degree or rad
-    """
-    if np.array_equal(v1, v2):
-        return 0.0
-    dm = np.dot(v1, v2)
-    cm = np.linalg.norm(v1) * np.linalg.norm(v2)
-    angle = np.arccos(dm / cm)  # Round here to prevent floating point errors
-    return np.degrees([angle, ])[0] if deg else angle
-
-
-def normalize_vector(v):
-    """Take a vector and return the normalized vector
-    :param v: a vector v
-    :returns : normalized vector v
-    """
-    norm = np.linalg.norm(v)
-    return v / norm if not norm == 0 else v
-
-
-def centroid(coo):
-    """Calculates the centroid from a 3D point cloud and returns the coordinates
-    :param coo: Array of coordinate arrays
-    :returns : centroid coordinates as list
-    """
-    return list(map(np.mean, (([c[0] for c in coo]), ([c[1] for c in coo]), ([c[2] for c in coo]))))
-
-
-def projection(pnormal1, ppoint, tpoint):
-    """Calculates the centroid from a 3D point cloud and returns the coordinates
-    :param pnormal1: normal of plane
-    :param ppoint: coordinates of point in the plane
-    :param tpoint: coordinates of point to be projected
-    :returns : coordinates of point orthogonally projected on the plane
-    """
-    # Choose the plane normal pointing to the point to be projected
-    pnormal2 = [coo * (-1) for coo in pnormal1]
-    d1 = euclidean3d(tpoint, pnormal1 + ppoint)
-    d2 = euclidean3d(tpoint, pnormal2 + ppoint)
-    pnormal = pnormal1 if d1 < d2 else pnormal2
-    # Calculate the projection of tpoint to the plane
-    sn = -np.dot(pnormal, vector(ppoint, tpoint))
-    sd = np.dot(pnormal, pnormal)
-    sb = sn / sd
-    return [c1 + c2 for c1, c2 in zip(tpoint, [sb * pn for pn in pnormal])]
-
-
-def cluster_doubles(double_list):
-    """Given a list of doubles, they are clustered if they share one element
-    :param double_list: list of doubles
-    :returns : list of clusters (tuples)
-    """
-    location = {}  # hashtable of which cluster each element is in
-    clusters = []
-    # Go through each double
-    for t in double_list:
-        a, b = t[0], t[1]
-        # If they both are already in different clusters, merge the clusters
-        if a in location and b in location:
-            if location[a] != location[b]:
-                if location[a] < location[b]:
-                    clusters[location[a]] = clusters[location[a]].union(clusters[location[b]])  # Merge clusters
-                    clusters = clusters[:location[b]] + clusters[location[b] + 1:]
-                else:
-                    clusters[location[b]] = clusters[location[b]].union(clusters[location[a]])  # Merge clusters
-                    clusters = clusters[:location[a]] + clusters[location[a] + 1:]
-                # Rebuild index of locations for each element as they have changed now
-                location = {}
-                for i, cluster in enumerate(clusters):
-                    for c in cluster:
-                        location[c] = i
-        else:
-            # If a is already in a cluster, add b to that cluster
-            if a in location:
-                clusters[location[a]].add(b)
-                location[b] = location[a]
-            # If b is already in a cluster, add a to that cluster
-            if b in location:
-                clusters[location[b]].add(a)
-                location[a] = location[b]
-            # If neither a nor b is in any cluster, create a new one with a and b
-            if not (b in location and a in location):
-                clusters.append(set(t))
-                location[a] = len(clusters) - 1
-                location[b] = len(clusters) - 1
-    return map(tuple, clusters)
-
-
-#################
-# File operations
-#################
-
-def tilde_expansion(folder_path):
-    """Tilde expansion, i.e. converts '~' in paths into <value of $HOME>."""
-    return os.path.expanduser(folder_path) if '~' in folder_path else folder_path
-
-
-def folder_exists(folder_path):
-    """Checks if a folder exists"""
-    return os.path.exists(folder_path)
-
-
-def create_folder_if_not_exists(folder_path):
-    """Creates a folder if it does not exists."""
-    folder_path = tilde_expansion(folder_path)
-    folder_path = "".join([folder_path, '/']) if not folder_path[-1] == '/' else folder_path
-    direc = os.path.dirname(folder_path)
-    if not folder_exists(direc):
-        os.makedirs(direc)
-
-
-def cmd_exists(c):
-    return subprocess.call("type " + c, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) == 0
-
-
-################
-# PyMOL-specific
-################
-
-
-def initialize_pymol(options):
-    """Initializes PyMOL"""
-    import pymol
-    # Pass standard arguments of function to prevent PyMOL from printing out PDB headers (workaround)
-    pymol.finish_launching(args=['pymol', options, '-K'])
-    pymol.cmd.reinitialize()
-
-
-def start_pymol(quiet=False, options='-p', run=False):
-    """Starts up PyMOL and sets general options. Quiet mode suppresses all PyMOL output.
-    Command line options can be passed as the second argument."""
-    import pymol
-    pymol.pymol_argv = ['pymol', '%s' % options] + sys.argv[1:]
-    if run:
-        initialize_pymol(options)
-    if quiet:
-        pymol.cmd.feedback('disable', 'all', 'everything')
-
-
-def nucleotide_linkage(residues):
-    """Support for DNA/RNA ligands by finding missing covalent linkages to stitch DNA/RNA together."""
-
-    nuc_covalent = []
-    #######################################
-    # Basic support for RNA/DNA as ligand #
-    #######################################
-    nucleotides = ['A', 'C', 'T', 'G', 'U', 'DA', 'DC', 'DT', 'DG', 'DU']
-    dna_rna = {}  # Dictionary of DNA/RNA residues by chain
-    covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
-    # Create missing covlinkage entries for DNA/RNA
-    for ligand in residues:
-        resname, chain, pos = ligand
-        if resname in nucleotides:
-            if chain not in dna_rna:
-                dna_rna[chain] = [(resname, pos), ]
-            else:
-                dna_rna[chain].append((resname, pos))
-    for chain in dna_rna:
-        nuc_list = dna_rna[chain]
-        for i, nucleotide in enumerate(nuc_list):
-            if not i == len(nuc_list) - 1:
-                name, pos = nucleotide
-                nextnucleotide = nuc_list[i + 1]
-                nextname, nextpos = nextnucleotide
-                newlink = covlinkage(id1=name, chain1=chain, pos1=pos, conf1='',
-                                     id2=nextname, chain2=chain, pos2=nextpos, conf2='')
-                nuc_covalent.append(newlink)
-
-    return nuc_covalent
-
-
-def ring_is_planar(ring, r_atoms):
-    """Given a set of ring atoms, check if the ring is sufficiently planar
-    to be considered aromatic"""
-    normals = []
-    for a in r_atoms:
-        adj = pybel.ob.OBAtomAtomIter(a.OBAtom)
-        # Check for neighboring atoms in the ring
-        n_coords = [pybel.Atom(neigh).coords for neigh in adj if ring.IsMember(neigh)]
-        vec1, vec2 = vector(a.coords, n_coords[0]), vector(a.coords, n_coords[1])
-        normals.append(np.cross(vec1, vec2))
-    # Given all normals of ring atoms and their neighbors, the angle between any has to be 5.0 deg or less
-    for n1, n2 in itertools.product(normals, repeat=2):
-        arom_angle = vecangle(n1, n2)
-        if all([arom_angle > config.AROMATIC_PLANARITY, arom_angle < 180.0 - config.AROMATIC_PLANARITY]):
-            return False
-    return True
-
-
-def classify_by_name(names):
-    """Classify a (composite) ligand by the HETID(s)"""
-    if len(names) > 3:  # Polymer
-        if len(set(config.RNA).intersection(set(names))) != 0:
-            ligtype = 'RNA'
-        elif len(set(config.DNA).intersection(set(names))) != 0:
-            ligtype = 'DNA'
-        else:
-            ligtype = "POLYMER"
-    else:
-        ligtype = 'SMALLMOLECULE'
-
-    for name in names:
-        if name in config.METAL_IONS:
-            if len(names) == 1:
-                ligtype = 'ION'
-            else:
-                if "ION" not in ligtype:
-                    ligtype += '+ION'
-    return ligtype
-
-
-def sort_members_by_importance(members):
-    """Sort the members of a composite ligand according to two criteria:
-    1. Split up in main and ion group. Ion groups are located behind the main group.
-    2. Within each group, sort by chain and position."""
-    main = [x for x in members if x[0] not in config.METAL_IONS]
-    ion = [x for x in members if x[0] in config.METAL_IONS]
-    sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
-    sorted_main = sorted(main, key=lambda x: (x[1], x[2]))
-    sorted_ion = sorted(ion, key=lambda x: (x[1], x[2]))
-    return sorted_main + sorted_ion
-
-
-def get_isomorphisms(reference, lig):
-    """Get all isomorphisms of the ligand."""
-    query = pybel.ob.CompileMoleculeQuery(reference.OBMol)
-    mappr = pybel.ob.OBIsomorphismMapper.GetInstance(query)
-    if all:
-        isomorphs = pybel.ob.vvpairUIntUInt()
-        mappr.MapAll(lig.OBMol, isomorphs)
-    else:
-        isomorphs = pybel.ob.vpairUIntUInt()
-        mappr.MapFirst(lig.OBMol, isomorphs)
-        isomorphs = [isomorphs]
-    logger.debug(f'number of isomorphisms: {len(isomorphs)}')
-    # @todo Check which isomorphism to take
-    return isomorphs
-
-
-def canonicalize(lig, preserve_bond_order=False):
-    """Get the canonical atom order for the ligand."""
-    atomorder = None
-    # Get canonical atom order
-
-    lig = pybel.ob.OBMol(lig.OBMol)
-    if not preserve_bond_order:
-        for bond in pybel.ob.OBMolBondIter(lig):
-            if bond.GetBondOrder() != 1:
-                bond.SetBondOrder(1)
-    lig.DeleteData(pybel.ob.StereoData)
-    lig = pybel.Molecule(lig)
-    testcan = lig.write(format='can')
-    try:
-        pybel.readstring('can', testcan)
-        reference = pybel.readstring('can', testcan)
-    except IOError:
-        testcan, reference = '', ''
-    if testcan != '':
-        reference.removeh()
-        isomorphs = get_isomorphisms(reference, lig)  # isomorphs now holds all isomorphisms within the molecule
-        if not len(isomorphs) == 0:
-            smi_dict = {}
-            smi_to_can = isomorphs[0]
-            for x in smi_to_can:
-                smi_dict[int(x[1]) + 1] = int(x[0]) + 1
-            atomorder = [smi_dict[x + 1] for x in range(len(lig.atoms))]
-        else:
-            atomorder = None
-    return atomorder
-
-
-def int32_to_negative(int32):
-    """Checks if a suspicious number (e.g. ligand position) is in fact a negative number represented as a
-    32 bit integer and returns the actual number.
-    """
-    dct = {}
-    if int32 == 4294967295:  # Special case in some structures (note, this is just a workaround)
-        return -1
-    for i in range(-1000, -1):
-        dct[np.uint32(i)] = i
-    if int32 in dct:
-        return dct[int32]
-    else:
-        return int32
-
-
-def read_pdb(pdbfname, as_string=False):
-    """Reads a given PDB file and returns a Pybel Molecule."""
-    pybel.ob.obErrorLog.StopLogging()  # Suppress all OpenBabel warnings
-    if os.name != 'nt':  # Resource module not available for Windows
-        maxsize = resource.getrlimit(resource.RLIMIT_STACK)[-1]
-        resource.setrlimit(resource.RLIMIT_STACK, (min(2 ** 28, maxsize), maxsize))
-    sys.setrecursionlimit(10 ** 5)  # increase Python recursion limit
-    return readmol(pdbfname, as_string=as_string)
-
-
-def read(fil):
-    """Returns a file handler and detects gzipped files."""
-    if os.path.splitext(fil)[-1] == '.gz':
-        return gzip.open(fil, 'rb')
-    elif os.path.splitext(fil)[-1] == '.zip':
-        zf = zipfile.ZipFile(fil, 'r')
-        return zf.open(zf.infolist()[0].filename)
-    else:
-        return open(fil, 'r')
-
-
-def readmol(path, as_string=False):
-    """Reads the given molecule file and returns the corresponding Pybel molecule as well as the input file type.
-    In contrast to the standard Pybel implementation, the file is closed properly."""
-    supported_formats = ['pdb']
-    # Fix for Windows-generated files: Remove carriage return characters
-    if "\r" in path and as_string:
-        path = path.replace('\r', '')
-
-    for sformat in supported_formats:
-        obc = pybel.ob.OBConversion()
-        obc.SetInFormat(sformat)
-        logger.debug(f'detected {sformat} as format, trying to read file with OpenBabel')
-
-        # Read molecules with single bond information
-        if as_string:
-            try:
-                mymol = pybel.readstring(sformat, path)
-            except IOError:
-                logger.error('no valid file format provided')
-                sys.exit(1)
-        else:
-            read_file = pybel.readfile(format=sformat, filename=path, opt={"s": None})
-            try:
-                mymol = next(read_file)
-            except StopIteration:
-                logger.error('file contains no valid molecules')
-                sys.exit(1)
-
-        logger.debug('molecule successfully read')
-
-        # Assign multiple bonds
-        mymol.OBMol.PerceiveBondOrders()
-        return mymol, sformat
-
-    logger.error('no valid file format provided')
-    sys.exit(1)