From 4be08f7bdd77991e9e453c1cda863c3f20c338d5 Mon Sep 17 00:00:00 2001
From: Navan Chauhan <navanchauhan@gmail.com>
Date: Thu, 2 Jul 2020 20:48:33 +0530
Subject: initial commit

---
 plip/structure/preparation.py | 1483 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1483 insertions(+)
 create mode 100644 plip/structure/preparation.py

(limited to 'plip/structure/preparation.py')

diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py
new file mode 100644
index 0000000..6b63d63
--- /dev/null
+++ b/plip/structure/preparation.py
@@ -0,0 +1,1483 @@
+import itertools
+import os
+import re
+import tempfile
+from collections import namedtuple
+from operator import itemgetter
+
+import numpy as np
+from openbabel import pybel
+
+from plip.basic import config, logger
+from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name
+from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar
+from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize
+from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance
+from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative
+from plip.structure.detection import halogen, pication, water_bridges, metal_complexation
+from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge
+
+logger = logger.get_logger()
+
+
+class PDBParser:
+    def __init__(self, pdbpath, as_string):
+        self.as_string = as_string
+        self.pdbpath = pdbpath
+        self.num_fixed_lines = 0
+        self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2")
+        self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb()
+
+    def parse_pdb(self):
+        """Extracts additional information from PDB files.
+        I. When reading in a PDB file, OpenBabel numbers ATOMS and HETATOMS continously.
+        In PDB files, TER records are also counted, leading to a different numbering system.
+        This functions reads in a PDB file and provides a mapping as a dictionary.
+        II. Additionally, it returns a list of modified residues.
+        III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified
+        IV. Alternative conformations
+        """
+        if self.as_string:
+            fil = self.pdbpath.rstrip('\n').split('\n')  # Removing trailing newline character
+        else:
+            f = read(self.pdbpath)
+            fil = f.readlines()
+            f.close()
+        corrected_lines = []
+        i, j = 0, 0  # idx and PDB numbering
+        d = {}
+        modres = set()
+        covalent = []
+        alt = []
+        previous_ter = False
+
+        # Standard without fixing
+        if not config.NOFIX:
+            if not config.PLUGIN_MODE:
+                lastnum = 0  # Atom numbering (has to be consecutive)
+                other_models = False
+                for line in fil:
+                    if not other_models:  # Only consider the first model in an NRM structure
+                        corrected_line, newnum = self.fix_pdbline(line, lastnum)
+                        if corrected_line is not None:
+                            if corrected_line.startswith('MODEL'):
+                                try:  # Get number of MODEL (1,2,3)
+                                    model_num = int(corrected_line[10:14])
+                                    if model_num > 1:  # MODEL 2,3,4 etc.
+                                        other_models = True
+                                except ValueError:
+                                    logger.debug(f'ignoring invalid MODEL entry: {corrected_line}')
+                            corrected_lines.append(corrected_line)
+                            lastnum = newnum
+                corrected_pdb = ''.join(corrected_lines)
+            else:
+                corrected_pdb = self.pdbpath
+                corrected_lines = fil
+        else:
+            corrected_pdb = self.pdbpath
+            corrected_lines = fil
+
+        for line in corrected_lines:
+            if line.startswith(("ATOM", "HETATM")):
+                # Retrieve alternate conformations
+                atomid, location = int(line[6:11]), line[16]
+                location = 'A' if location == ' ' else location
+                if location != 'A':
+                    alt.append(atomid)
+
+                if not previous_ter:
+                    i += 1
+                    j += 1
+                else:
+                    i += 1
+                    j += 2
+                d[i] = j
+                previous_ter = False
+            # Numbering Changes at TER records
+            if line.startswith("TER"):
+                previous_ter = True
+            # Get modified residues
+            if line.startswith("MODRES"):
+                modres.add(line[12:15].strip())
+            # Get covalent linkages between ligands
+            if line.startswith("LINK"):
+                covalent.append(self.get_linkage(line))
+        return d, modres, covalent, alt, corrected_pdb
+
+    def fix_pdbline(self, pdbline, lastnum):
+        """Fix a PDB line if information is missing."""
+        pdbqt_conversion = {
+            "HD": "H", "HS": "H", "NA": "N",
+            "NS": "N", "OA": "O", "OS": "O", "SA": "S"}
+        fixed = False
+        new_num = 0
+        forbidden_characters = "[^a-zA-Z0-9_]"
+        pdbline = pdbline.strip('\n')
+        # Some MD / Docking tools produce empty lines, leading to segfaults
+        if len(pdbline.strip()) == 0:
+            self.num_fixed_lines += 1
+            return None, lastnum
+        if len(pdbline) > 100:  # Should be 80 long
+            self.num_fixed_lines += 1
+            return None, lastnum
+        # TER Entries also have continuing numbering, consider them as well
+        if pdbline.startswith('TER'):
+            new_num = lastnum + 1
+        if pdbline.startswith('ATOM'):
+            new_num = lastnum + 1
+            current_num = int(pdbline[6:11])
+            resnum = pdbline[22:27].strip()
+            resname = pdbline[17:21].strip()
+            # Invalid residue number
+            try:
+                int(resnum)
+            except ValueError:
+                pdbline = pdbline[:22] + '   0 ' + pdbline[27:]
+                fixed = True
+            # Invalid characters in residue name
+            if re.match(forbidden_characters, resname.strip()):
+                pdbline = pdbline[:17] + 'UNK ' + pdbline[21:]
+                fixed = True
+            if lastnum + 1 != current_num:
+                pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:]
+                fixed = True
+            # No chain assigned
+            if pdbline[21] == ' ':
+                pdbline = pdbline[:21] + 'A' + pdbline[22:]
+                fixed = True
+            if pdbline.endswith('H'):
+                self.num_fixed_lines += 1
+                return None, lastnum
+            # Sometimes, converted PDB structures contain PDBQT atom types. Fix that.
+            for pdbqttype in pdbqt_conversion:
+                if pdbline.strip().endswith(pdbqttype):
+                    pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + '\n'
+                    self.num_fixed_lines += 1
+        if pdbline.startswith('HETATM'):
+            new_num = lastnum + 1
+            try:
+                current_num = int(pdbline[6:11])
+            except ValueError:
+                current_num = None
+                logger.debug(f'invalid HETATM entry: {pdbline}')
+            if lastnum + 1 != current_num:
+                pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:]
+                fixed = True
+            # No chain assigned or number assigned as chain
+            if pdbline[21] == ' ':
+                pdbline = pdbline[:21] + 'Z' + pdbline[22:]
+                fixed = True
+            # No residue number assigned
+            if pdbline[23:26] == '   ':
+                pdbline = pdbline[:23] + '999' + pdbline[26:]
+                fixed = True
+            # Non-standard Ligand Names
+            ligname = pdbline[17:21].strip()
+            if len(ligname) > 3:
+                pdbline = pdbline[:17] + ligname[:3] + ' ' + pdbline[21:]
+                fixed = True
+            if re.match(forbidden_characters, ligname.strip()):
+                pdbline = pdbline[:17] + 'LIG ' + pdbline[21:]
+                fixed = True
+            if len(ligname.strip()) == 0:
+                pdbline = pdbline[:17] + 'LIG ' + pdbline[21:]
+                fixed = True
+            if pdbline.endswith('H'):
+                self.num_fixed_lines += 1
+                return None, lastnum
+            # Sometimes, converted PDB structures contain PDBQT atom types. Fix that.
+            for pdbqttype in pdbqt_conversion:
+                if pdbline.strip().endswith(pdbqttype):
+                    pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + ' '
+                    self.num_fixed_lines += 1
+        self.num_fixed_lines += 1 if fixed else 0
+        return pdbline + '\n', max(new_num, lastnum)
+
+    def get_linkage(self, line):
+        """Get the linkage information from a LINK entry PDB line."""
+        conf1, id1, chain1, pos1 = line[16].strip(), line[17:20].strip(), line[21].strip(), int(line[22:26])
+        conf2, id2, chain2, pos2 = line[46].strip(), line[47:50].strip(), line[51].strip(), int(line[52:56])
+        return self.covlinkage(id1=id1, chain1=chain1, pos1=pos1, conf1=conf1,
+                               id2=id2, chain2=chain2, pos2=pos2, conf2=conf2)
+
+
+class LigandFinder:
+    def __init__(self, proteincomplex, altconf, modres, covalent, mapper):
+        self.lignames_all = None
+        self.lignames_kept = None
+        self.water = None
+        self.proteincomplex = proteincomplex
+        self.altconformations = altconf
+        self.modresidues = modres
+        self.covalent = covalent
+        self.mapper = mapper
+        self.ligands = self.getligs()
+        self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept))))
+
+    def getpeptides(self, chain):
+        """If peptide ligand chains are defined via the command line options,
+        try to extract the underlying ligand formed by all residues in the
+        given chain without water
+        """
+        all_from_chain = [o for o in pybel.ob.OBResidueIter(
+            self.proteincomplex.OBMol) if o.GetChain() == chain]  # All residues from chain
+        if len(all_from_chain) == 0:
+            return None
+        else:
+            non_water = [o for o in all_from_chain if not o.GetResidueProperty(9)]
+            ligand = self.extract_ligand(non_water)
+            return ligand
+
+    def getligs(self):
+        """Get all ligands from a PDB file and prepare them for analysis.
+        Returns all non-empty ligands.
+        """
+
+        if config.PEPTIDES == [] and config.INTRA is None:
+            # Extract small molecule ligands (default)
+            ligands = []
+
+            # Filter for ligands using lists
+            ligand_residues, self.lignames_all, self.water = self.filter_for_ligands()
+
+            all_res_dict = {(a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues}
+            self.lignames_kept = list(set([a.GetName() for a in ligand_residues]))
+
+            if not config.BREAKCOMPOSITE:
+                #  Update register of covalent links with those between DNA/RNA subunits
+                self.covalent += nucleotide_linkage(all_res_dict)
+                #  Find fragment linked by covalent bonds
+                res_kmers = self.identify_kmers(all_res_dict)
+            else:
+                res_kmers = [[a, ] for a in ligand_residues]
+            logger.debug(f'{len(res_kmers)} ligand kmer(s) detected for closer inspection')
+            for kmer in res_kmers:  # iterate over all ligands and extract molecules + information
+                if len(kmer) > config.MAX_COMPOSITE_LENGTH:
+                    logger.debug(
+                        f'ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)')
+                else:
+                    ligands.append(self.extract_ligand(kmer))
+
+        else:
+            # Extract peptides from given chains
+            self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)]
+            if config.PEPTIDES:
+                peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES]
+            elif config.INTRA is not None:
+                peptide_ligands = [self.getpeptides(config.INTRA), ]
+
+            ligands = [p for p in peptide_ligands if p is not None]
+            self.covalent, self.lignames_kept, self.lignames_all = [], [], set()
+
+        return [lig for lig in ligands if len(lig.mol.atoms) != 0]
+
+    def extract_ligand(self, kmer):
+        """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps."""
+        data = namedtuple('ligand', 'mol hetid chain position water members longname type atomorder can_to_pdb')
+        members = [(res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) for res in kmer]
+        members = sort_members_by_importance(members)
+        rname, rchain, rnum = members[0]
+        logger.debug(f'finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements')
+        names = [x[0] for x in members]
+        longname = '-'.join([x[0] for x in members])
+
+        if config.PEPTIDES:
+            ligtype = 'PEPTIDE'
+        elif config.INTRA is not None:
+            ligtype = 'INTRA'
+        else:
+            # Classify a ligand by its HETID(s)
+            ligtype = classify_by_name(names)
+        logger.debug(f'ligand classified as {ligtype}')
+
+        hetatoms = set()
+        for obresidue in kmer:
+            hetatoms_res = set([(obatom.GetIdx(), obatom) for obatom in pybel.ob.OBResidueAtomIter(obresidue)
+                                if obatom.GetAtomicNum() != 1])
+
+            if not config.ALTLOC:
+                # Remove alternative conformations (standard -> True)
+                hetatoms_res = set([atm for atm in hetatoms_res
+                                    if not self.mapper.mapid(atm[0], mtype='protein',
+                                                             to='internal') in self.altconformations])
+            hetatoms.update(hetatoms_res)
+        logger.debug(f'hetero atoms determined (n={len(hetatoms)})')
+
+        hetatoms = dict(hetatoms)  # make it a dict with idx as key and OBAtom as value
+        lig = pybel.ob.OBMol()  # new ligand mol
+        neighbours = dict()
+        for obatom in hetatoms.values():  # iterate over atom objects
+            idx = obatom.GetIdx()
+            lig.AddAtom(obatom)
+            # ids of all neighbours of obatom
+            neighbours[idx] = set([neighbour_atom.GetIdx() for neighbour_atom
+                                   in pybel.ob.OBAtomAtomIter(obatom)]) & set(hetatoms.keys())
+        logger.debug(f'atom neighbours mapped')
+
+        ##############################################################
+        # map the old atom idx of OBMol to the new idx of the ligand #
+        ##############################################################
+
+        newidx = dict(zip(hetatoms.keys(), [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)]))
+        mapold = dict(zip(newidx.values(), newidx))
+        # copy the bonds
+        for obatom in hetatoms:
+            for neighbour_atom in neighbours[obatom]:
+                bond = hetatoms[obatom].GetBond(hetatoms[neighbour_atom])
+                lig.AddBond(newidx[obatom], newidx[neighbour_atom], bond.GetBondOrder())
+        lig = pybel.Molecule(lig)
+
+        # For kmers, the representative ids are chosen (first residue of kmer)
+        lig.data.update({'Name': rname, 'Chain': rchain, 'ResNr': rnum})
+
+        # Check if a negative residue number is represented as a 32 bit integer
+        if rnum > 10 ** 5:
+            rnum = int32_to_negative(rnum)
+
+        lig.title = ':'.join((rname, rchain, str(rnum)))
+        self.mapper.ligandmaps[lig.title] = mapold
+
+        logger.debug('renumerated molecule generated')
+
+        if not config.NOPDBCANMAP:
+            atomorder = canonicalize(lig)
+        else:
+            atomorder = None
+
+        can_to_pdb = {}
+        if atomorder is not None:
+            can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold}
+
+        ligand = data(mol=lig, hetid=rname, chain=rchain, position=rnum, water=self.water,
+                      members=members, longname=longname, type=ligtype, atomorder=atomorder,
+                      can_to_pdb=can_to_pdb)
+        return ligand
+
+    def is_het_residue(self, obres):
+        """Given an OBResidue, determines if the residue is indeed a possible ligand
+        in the PDB file"""
+        if not obres.GetResidueProperty(0):
+            # If the residue is NOT amino (0)
+            # It can be amino_nucleo, coenzme, ion, nucleo, protein, purine, pyrimidine, solvent
+            # In these cases, it is a ligand candidate
+            return True
+        else:
+            # Here, the residue is classified as amino
+            # Amino acids can still be ligands, so we check for HETATM entries
+            # Only residues with at least one HETATM entry are processed as ligands
+            het_atoms = []
+            for atm in pybel.ob.OBResidueAtomIter(obres):
+                het_atoms.append(obres.IsHetAtom(atm))
+            if True in het_atoms:
+                return True
+        return False
+
+    def filter_for_ligands(self):
+        """Given an OpenBabel Molecule, get all ligands, their names, and water"""
+
+        candidates1 = [o for o in pybel.ob.OBResidueIter(
+            self.proteincomplex.OBMol) if not o.GetResidueProperty(9) and self.is_het_residue(o)]
+
+        if config.DNARECEPTOR:  # If DNA is the receptor, don't consider DNA as a ligand
+            candidates1 = [res for res in candidates1 if res.GetName() not in config.DNA + config.RNA]
+        all_lignames = set([a.GetName() for a in candidates1])
+
+        water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)]
+        # Filter out non-ligands
+        if not config.KEEPMOD:  # Keep modified residues as ligands
+            candidates2 = [a for a in candidates1 if is_lig(a.GetName()) and a.GetName() not in self.modresidues]
+        else:
+            candidates2 = [a for a in candidates1 if is_lig(a.GetName())]
+        logger.debug(f'{len(candidates2)} ligand(s) after first filtering step')
+
+        ############################################
+        # Filtering by counting and artifacts list #
+        ############################################
+        artifacts = []
+        unique_ligs = set(a.GetName() for a in candidates2)
+        for ulig in unique_ligs:
+            # Discard if appearing 15 times or more and is possible artifact
+            if ulig in config.biolip_list and [a.GetName() for a in candidates2].count(ulig) >= 15:
+                artifacts.append(ulig)
+
+        selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts]
+
+        return selected_ligands, all_lignames, water
+
+    def identify_kmers(self, residues):
+        """Using the covalent linkage information, find out which fragments/subunits form a ligand."""
+
+        # Remove all those not considered by ligands and pairings including alternate conformations
+        ligdoubles = [[(link.id1, link.chain1, link.pos1),
+                       (link.id2, link.chain2, link.pos2)] for link in
+                      [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept
+                       and c.conf1 in ['A', ''] and c.conf2 in ['A', '']
+                       and (c.id1, c.chain1, c.pos1) in residues
+                       and (c.id2, c.chain2, c.pos2) in residues]]
+        kmers = cluster_doubles(ligdoubles)
+        if not kmers:  # No ligand kmers, just normal independent ligands
+            return [[residues[res]] for res in residues]
+
+        else:
+            # res_kmers contains clusters of covalently bound ligand residues (kmer ligands)
+            res_kmers = [[residues[res] for res in kmer] for kmer in kmers]
+
+            # In this case, add other ligands which are not part of a kmer
+            in_kmer = []
+            for res_kmer in res_kmers:
+                for res in res_kmer:
+                    in_kmer.append((res.GetName(), res.GetChain(), res.GetNum()))
+            for res in residues:
+                if res not in in_kmer:
+                    newres = [residues[res], ]
+                    res_kmers.append(newres)
+            return res_kmers
+
+
+class Mapper:
+    """Provides functions for mapping atom IDs in the correct way"""
+
+    def __init__(self):
+        self.proteinmap = None  # Map internal atom IDs of protein residues to original PDB Atom IDs
+        self.ligandmaps = {}  # Map IDs of new ligand molecules to internal IDs (or PDB IDs?)
+        self.original_structure = None
+
+    def mapid(self, idx, mtype, bsid=None, to='original'):  # Mapping to original IDs is standard for ligands
+        if mtype == 'reversed':  # Needed to map internal ID back to original protein ID
+            return self.reversed_proteinmap[idx]
+        if mtype == 'protein':
+            return self.proteinmap[idx]
+        elif mtype == 'ligand':
+            if to == 'internal':
+                return self.ligandmaps[bsid][idx]
+            elif to == 'original':
+                return self.proteinmap[self.ligandmaps[bsid][idx]]
+
+    def id_to_atom(self, idx):
+        """Returns the atom for a given original ligand ID.
+        To do this, the ID is mapped to the protein first and then the atom returned.
+        """
+        mapped_idx = self.mapid(idx, 'reversed')
+        return pybel.Atom(self.original_structure.GetAtom(mapped_idx))
+
+
+class Mol:
+    def __init__(self, altconf, mapper, mtype, bsid):
+        self.mtype = mtype
+        self.bsid = bsid
+        self.rings = None
+        self.hydroph_atoms = None
+        self.charged = None
+        self.hbond_don_atom_pairs = None
+        self.hbond_acc_atoms = None
+        self.altconf = altconf
+        self.Mapper = mapper
+
+    def hydrophobic_atoms(self, all_atoms):
+        """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors."""
+        atom_set = []
+        data = namedtuple('hydrophobic', 'atom orig_atom orig_idx')
+        atm = [a for a in all_atoms if a.atomicnum == 6 and set([natom.GetAtomicNum() for natom
+                                                                 in pybel.ob.OBAtomAtomIter(a.OBAtom)]).issubset(
+            {1, 6})]
+        for atom in atm:
+            orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid)
+            orig_atom = self.Mapper.id_to_atom(orig_idx)
+            if atom.idx not in self.altconf:
+                atom_set.append(data(atom=atom, orig_atom=orig_atom, orig_idx=orig_idx))
+        return atom_set
+
+    def find_hba(self, all_atoms):
+        """Find all possible hydrogen bond acceptors"""
+        data = namedtuple('hbondacceptor', 'a a_orig_atom a_orig_idx type')
+        a_set = []
+        for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms):
+            if atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf:  # Exclude halogen atoms
+                a_orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid)
+                a_orig_atom = self.Mapper.id_to_atom(a_orig_idx)
+                a_set.append(data(a=atom, a_orig_atom=a_orig_atom, a_orig_idx=a_orig_idx, type='regular'))
+        a_set = sorted(a_set, key=lambda x: x.a_orig_idx)
+        return a_set
+
+    def find_hbd(self, all_atoms, hydroph_atoms):
+        """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)"""
+        donor_pairs = []
+        data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type')
+        for donor in [a for a in all_atoms if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf]:
+            in_ring = False
+            if not in_ring:
+                for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) if a.IsHbondDonorH()]:
+                    d_orig_idx = self.Mapper.mapid(donor.idx, mtype=self.mtype, bsid=self.bsid)
+                    d_orig_atom = self.Mapper.id_to_atom(d_orig_idx)
+                    donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=d_orig_idx,
+                                            h=pybel.Atom(adj_atom), type='regular'))
+        for carbon in hydroph_atoms:
+            for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) if a.GetAtomicNum() == 1]:
+                d_orig_idx = self.Mapper.mapid(carbon.atom.idx, mtype=self.mtype, bsid=self.bsid)
+                d_orig_atom = self.Mapper.id_to_atom(d_orig_idx)
+                donor_pairs.append(data(d=carbon, d_orig_atom=d_orig_atom,
+                                        d_orig_idx=d_orig_idx, h=pybel.Atom(adj_atom), type='weak'))
+        donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx))
+        return donor_pairs
+
+    def find_rings(self, mol, all_atoms):
+        """Find rings and return only aromatic.
+        Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic."""
+        data = namedtuple('aromatic_ring', 'atoms orig_atoms atoms_orig_idx normal obj center type')
+        rings = []
+        aromatic_amino = ['TYR', 'TRP', 'HIS', 'PHE']
+        ring_candidates = mol.OBMol.GetSSSR()
+        logger.debug(f'number of aromatic ring candidates: {len(ring_candidates)}')
+        # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity
+        for ring in ring_candidates:
+            r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)]
+            r_atoms = sorted(r_atoms, key=lambda x: x.idx)
+            if 4 < len(r_atoms) <= 6:
+                res = list(set([whichrestype(a) for a in r_atoms]))
+                # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel
+                if res[0] == 'UNL':
+                    ligand_orig_idx = [self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms]
+                    sort_order = np.argsort(np.array(ligand_orig_idx))
+                    r_atoms = [r_atoms[i] for i in sort_order]
+                if ring.IsAromatic() or res[0] in aromatic_amino or ring_is_planar(ring, r_atoms):
+                    # Causes segfault with OpenBabel 2.3.2, so deactivated
+                    # typ = ring.GetType() if not ring.GetType() == '' else 'unknown'
+                    # Alternative typing
+                    ring_type = '%s-membered' % len(r_atoms)
+                    ring_atms = [r_atoms[a].coords for a in [0, 2, 4]]  # Probe atoms for normals, assuming planarity
+                    ringv1 = vector(ring_atms[0], ring_atms[1])
+                    ringv2 = vector(ring_atms[2], ring_atms[0])
+                    atoms_orig_idx = [self.Mapper.mapid(r_atom.idx, mtype=self.mtype,
+                                                        bsid=self.bsid) for r_atom in r_atoms]
+                    orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx]
+                    rings.append(data(atoms=r_atoms,
+                                      orig_atoms=orig_atoms,
+                                      atoms_orig_idx=atoms_orig_idx,
+                                      normal=normalize_vector(np.cross(ringv1, ringv2)),
+                                      obj=ring,
+                                      center=centroid([ra.coords for ra in r_atoms]),
+                                      type=ring_type))
+        return rings
+
+    def get_hydrophobic_atoms(self):
+        return self.hydroph_atoms
+
+    def get_hba(self):
+        return self.hbond_acc_atoms
+
+    def get_hbd(self):
+        return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'regular']
+
+    def get_weak_hbd(self):
+        return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'weak']
+
+    def get_pos_charged(self):
+        return [charge for charge in self.charged if charge.type == 'positive']
+
+    def get_neg_charged(self):
+        return [charge for charge in self.charged if charge.type == 'negative']
+
+
+class PLInteraction:
+    """Class to store a ligand, a protein and their interactions."""
+
+    def __init__(self, lig_obj, bs_obj, protcomplex):
+        """Detect all interactions when initializing"""
+        self.ligand = lig_obj
+        self.lig_members = lig_obj.members
+        self.pdbid = protcomplex.pymol_name
+        self.bindingsite = bs_obj
+        self.Mapper = protcomplex.Mapper
+        self.output_path = protcomplex.output_path
+        self.altconf = protcomplex.altconf
+        # #@todo Refactor code to combine different directionality
+
+        self.saltbridge_lneg = saltbridge(self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True)
+        self.saltbridge_pneg = saltbridge(self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False)
+
+        self.all_hbonds_ldon = hbonds(self.bindingsite.get_hba(),
+                                      self.ligand.get_hbd(), False, 'strong')
+        self.all_hbonds_pdon = hbonds(self.ligand.get_hba(),
+                                      self.bindingsite.get_hbd(), True, 'strong')
+
+        self.hbonds_ldon = self.refine_hbonds_ldon(self.all_hbonds_ldon, self.saltbridge_lneg,
+                                                   self.saltbridge_pneg)
+        self.hbonds_pdon = self.refine_hbonds_pdon(self.all_hbonds_pdon, self.saltbridge_lneg,
+                                                   self.saltbridge_pneg)
+
+        self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings)
+
+        self.all_pi_cation_laro = pication(self.ligand.rings, self.bindingsite.get_pos_charged(), True)
+        self.pication_paro = pication(self.bindingsite.rings, self.ligand.get_pos_charged(), False)
+
+        self.pication_laro = self.refine_pi_cation_laro(self.all_pi_cation_laro, self.pistacking)
+
+        self.all_hydrophobic_contacts = hydrophobic_interactions(self.bindingsite.get_hydrophobic_atoms(),
+                                                                 self.ligand.get_hydrophobic_atoms())
+        self.hydrophobic_contacts = self.refine_hydrophobic(self.all_hydrophobic_contacts, self.pistacking)
+        self.halogen_bonds = halogen(self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don)
+        self.water_bridges = water_bridges(self.bindingsite.get_hba(), self.ligand.get_hba(),
+                                           self.bindingsite.get_hbd(), self.ligand.get_hbd(),
+                                           self.ligand.water)
+
+        self.water_bridges = self.refine_water_bridges(self.water_bridges, self.hbonds_ldon, self.hbonds_pdon)
+
+        self.metal_complexes = metal_complexation(self.ligand.metals, self.ligand.metal_binding,
+                                                  self.bindingsite.metal_binding)
+
+        self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon
+        self.all_itypes = self.all_itypes + self.hbonds_ldon + self.pistacking + self.pication_laro + self.pication_paro
+        self.all_itypes = self.all_itypes + self.hydrophobic_contacts + self.halogen_bonds + self.water_bridges
+        self.all_itypes = self.all_itypes + self.metal_complexes
+
+        self.no_interactions = all(len(i) == 0 for i in self.all_itypes)
+        self.unpaired_hba, self.unpaired_hbd, self.unpaired_hal = self.find_unpaired_ligand()
+        self.unpaired_hba_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid)
+                                      for atom in self.unpaired_hba]
+        self.unpaired_hbd_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid)
+                                      for atom in self.unpaired_hbd]
+        self.unpaired_hal_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid)
+                                      for atom in self.unpaired_hal]
+        self.num_unpaired_hba, self.num_unpaired_hbd = len(self.unpaired_hba), len(self.unpaired_hbd)
+        self.num_unpaired_hal = len(self.unpaired_hal)
+
+        # Exclude empty chains (coming from ligand as a target, from metal complexes)
+        self.interacting_chains = sorted(list(set([i.reschain for i in self.all_itypes
+                                                   if i.reschain not in [' ', None]])))
+
+        # Get all interacting residues, excluding ligand and water molecules
+        self.interacting_res = list(set([''.join([str(i.resnr), str(i.reschain)]) for i in self.all_itypes
+                                         if i.restype not in ['LIG', 'HOH']]))
+        if len(self.interacting_res) != 0:
+            logger.info(
+                f'ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}')
+            interactions_list = []
+            num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg)
+            num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon)
+            num_pication = len(self.pication_laro + self.pication_paro)
+            num_pistack = len(self.pistacking)
+            num_halogen = len(self.halogen_bonds)
+            num_waterbridges = len(self.water_bridges)
+            if num_saltbridges != 0:
+                interactions_list.append('%i salt bridge(s)' % num_saltbridges)
+            if num_hbonds != 0:
+                interactions_list.append('%i hydrogen bond(s)' % num_hbonds)
+            if num_pication != 0:
+                interactions_list.append('%i pi-cation interaction(s)' % num_pication)
+            if num_pistack != 0:
+                interactions_list.append('%i pi-stacking(s)' % num_pistack)
+            if num_halogen != 0:
+                interactions_list.append('%i halogen bond(s)' % num_halogen)
+            if num_waterbridges != 0:
+                interactions_list.append('%i water bridge(s)' % num_waterbridges)
+            if not len(interactions_list) == 0:
+                logger.info(f'complex uses {interactions_list}')
+        else:
+            logger.info('no interactions for this ligand')
+
+    def find_unpaired_ligand(self):
+        """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors.
+        """
+        unpaired_hba, unpaired_hbd, unpaired_hal = [], [], []
+        # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex)
+        involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [hbond.d.idx for hbond in self.hbonds_ldon]
+        [[involved_atoms.append(atom.idx) for atom in sb.negative.atoms] for sb in self.saltbridge_lneg]
+        [[involved_atoms.append(atom.idx) for atom in sb.positive.atoms] for sb in self.saltbridge_pneg]
+        [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon]
+        [involved_atoms.append(wb.d.idx) for wb in self.water_bridges if not wb.protisdon]
+        [involved_atoms.append(mcomplex.target.atom.idx) for mcomplex in self.metal_complexes
+         if mcomplex.location == 'ligand']
+        for atom in [hba.a for hba in self.ligand.get_hba()]:
+            if atom.idx not in involved_atoms:
+                unpaired_hba.append(atom)
+        for atom in [hbd.d for hbd in self.ligand.get_hbd()]:
+            if atom.idx not in involved_atoms:
+                unpaired_hbd.append(atom)
+
+        # unpaired halogen bond donors in ligand (not used for the previous + halogen bonds)
+        [involved_atoms.append(atom.don.x.idx) for atom in self.halogen_bonds]
+        for atom in [haldon.x for haldon in self.ligand.halogenbond_don]:
+            if atom.idx not in involved_atoms:
+                unpaired_hal.append(atom)
+
+        return unpaired_hba, unpaired_hbd, unpaired_hal
+
+    def refine_hydrophobic(self, all_h, pistacks):
+        """Apply several rules to reduce the number of hydrophobic interactions."""
+        sel = {}
+        #  1. Rings interacting via stacking can't have additional hydrophobic contacts between each other.
+        for pistack, h in itertools.product(pistacks, all_h):
+            h1, h2 = h.bsatom.idx, h.ligatom.idx
+            brs, lrs = [p1.idx for p1 in pistack.proteinring.atoms], [p2.idx for p2 in pistack.ligandring.atoms]
+            if h1 in brs and h2 in lrs:
+                sel[(h1, h2)] = "EXCLUDE"
+        hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel]
+        sel2 = {}
+        #  2. If a ligand atom interacts with several binding site atoms in the same residue,
+        #  keep only the one with the closest distance
+        for h in hydroph:
+            if not (h.ligatom.idx, h.resnr) in sel2:
+                sel2[(h.ligatom.idx, h.resnr)] = h
+            else:
+                if sel2[(h.ligatom.idx, h.resnr)].distance > h.distance:
+                    sel2[(h.ligatom.idx, h.resnr)] = h
+        hydroph = [h for h in sel2.values()]
+        hydroph_final = []
+        bsclust = {}
+        #  3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist
+        for h in hydroph:
+            if h.bsatom.idx not in bsclust:
+                bsclust[h.bsatom.idx] = [h, ]
+            else:
+                bsclust[h.bsatom.idx].append(h)
+
+        idx_to_h = {}
+        for bs in [a for a in bsclust if len(bsclust[a]) == 1]:
+            hydroph_final.append(bsclust[bs][0])
+
+        # A list of tuples with the idx of an atom and one of its neighbours is created
+        for bs in [a for a in bsclust if not len(bsclust[a]) == 1]:
+            tuples = []
+            all_idx = [i.ligatom.idx for i in bsclust[bs]]
+            for b in bsclust[bs]:
+                idx = b.ligatom.idx
+                neigh = [na for na in pybel.ob.OBAtomAtomIter(b.ligatom.OBAtom)]
+                for n in neigh:
+                    n_idx = n.GetIdx()
+                    if n_idx in all_idx:
+                        if n_idx < idx:
+                            tuples.append((n_idx, idx))
+                        else:
+                            tuples.append((idx, n_idx))
+                        idx_to_h[idx] = b
+
+            tuples = list(set(tuples))
+            tuples = sorted(tuples, key=itemgetter(1))
+            clusters = cluster_doubles(tuples)  # Cluster connected atoms (i.e. find hydrophobic patches)
+
+            for cluster in clusters:
+                min_dist = float('inf')
+                min_h = None
+                for atm_idx in cluster:
+                    h = idx_to_h[atm_idx]
+                    if h.distance < min_dist:
+                        min_dist = h.distance
+                        min_h = h
+                hydroph_final.append(min_h)
+        before, reduced = len(all_h), len(hydroph_final)
+        if not before == 0 and not before == reduced:
+            logger.info(f'reduced number of hydrophobic contacts from {before} to {reduced}')
+        return hydroph_final
+
+    def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg):
+        """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds."""
+        i_set = {}
+        for hbond in all_hbonds:
+            i_set[hbond] = False
+            for salt in salt_pneg:
+                protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms]
+                if hbond.d.idx in ligidx and hbond.a.idx in protidx:
+                    i_set[hbond] = True
+            for salt in salt_lneg:
+                protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms]
+                if hbond.d.idx in ligidx and hbond.a.idx in protidx:
+                    i_set[hbond] = True
+
+        # Allow only one hydrogen bond per donor, select interaction with larger donor angle
+        second_set = {}
+        hbls = [k for k in i_set.keys() if not i_set[k]]
+        for hbl in hbls:
+            if hbl.d.idx not in second_set:
+                second_set[hbl.d.idx] = (hbl.angle, hbl)
+            else:
+                if second_set[hbl.d.idx][0] < hbl.angle:
+                    second_set[hbl.d.idx] = (hbl.angle, hbl)
+        return [hb[1] for hb in second_set.values()]
+
+    def refine_hbonds_pdon(self, all_hbonds, salt_lneg, salt_pneg):
+        """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds with
+        atoms of the same group.
+        """
+        i_set = {}
+        for hbond in all_hbonds:
+            i_set[hbond] = False
+            for salt in salt_lneg:
+                protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms]
+                if hbond.a.idx in ligidx and hbond.d.idx in protidx:
+                    i_set[hbond] = True
+            for salt in salt_pneg:
+                protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms]
+                if hbond.a.idx in ligidx and hbond.d.idx in protidx:
+                    i_set[hbond] = True
+
+        # Allow only one hydrogen bond per donor, select interaction with larger donor angle
+        second_set = {}
+        hbps = [k for k in i_set.keys() if not i_set[k]]
+        for hbp in hbps:
+            if hbp.d.idx not in second_set:
+                second_set[hbp.d.idx] = (hbp.angle, hbp)
+            else:
+                if second_set[hbp.d.idx][0] < hbp.angle:
+                    second_set[hbp.d.idx] = (hbp.angle, hbp)
+        return [hb[1] for hb in second_set.values()]
+
+    def refine_pi_cation_laro(self, all_picat, stacks):
+        """Just important for constellations with histidine involved. If the histidine ring is positioned in stacking
+        position to an aromatic ring in the ligand, there is in most cases stacking and pi-cation interaction reported
+        as histidine also carries a positive charge in the ring. For such cases, only report stacking.
+        """
+        i_set = []
+        for picat in all_picat:
+            exclude = False
+            for stack in stacks:
+                if whichrestype(stack.proteinring.atoms[0]) == 'HIS' and picat.ring.obj == stack.ligandring.obj:
+                    exclude = True
+            if not exclude:
+                i_set.append(picat)
+        return i_set
+
+    def refine_water_bridges(self, wbridges, hbonds_ldon, hbonds_pdon):
+        """A donor atom already forming a hydrogen bond is not allowed to form a water bridge. Each water molecule
+        can only be donor for two water bridges, selecting the constellation with the omega angle closest to 110 deg."""
+        donor_atoms_hbonds = [hb.d.idx for hb in hbonds_ldon + hbonds_pdon]
+        wb_dict = {}
+        wb_dict2 = {}
+        omega = 110.0
+
+        # Just one hydrogen bond per donor atom
+        for wbridge in [wb for wb in wbridges if wb.d.idx not in donor_atoms_hbonds]:
+            if (wbridge.water.idx, wbridge.a.idx) not in wb_dict:
+                wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge
+            else:
+                if abs(omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle) < abs(omega - wbridge.w_angle):
+                    wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge
+        for wb_tuple in wb_dict:
+            water, acceptor = wb_tuple
+            if water not in wb_dict2:
+                wb_dict2[water] = [(abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), ]
+            elif len(wb_dict2[water]) == 1:
+                wb_dict2[water].append((abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]))
+                wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0])
+            else:
+                if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle):
+                    wb_dict2[water] = [wb_dict2[water][0], (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple])]
+
+        filtered_wb = []
+        for fwbridges in wb_dict2.values():
+            [filtered_wb.append(fwb[1]) for fwb in fwbridges]
+        return filtered_wb
+
+
+class BindingSite(Mol):
+    def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper):
+        """Find all relevant parts which could take part in interactions"""
+        Mol.__init__(self, altconf, mapper, mtype='protein', bsid=None)
+        self.complex = cclass
+        self.full_mol = protcomplex
+        self.all_atoms = atoms
+        self.min_dist = min_dist  # Minimum distance of bs res to ligand
+        self.bs_res = list(set([''.join([str(whichresnumber(a)), whichchain(a)]) for a in self.all_atoms]))  # e.g. 47A
+        self.rings = self.find_rings(self.full_mol, self.all_atoms)
+        self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms)
+        self.hbond_acc_atoms = self.find_hba(self.all_atoms)
+        self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms)
+        self.charged = self.find_charged(self.full_mol)
+        self.halogenbond_acc = self.find_hal(self.all_atoms)
+        self.metal_binding = self.find_metal_binding(self.full_mol)
+
+    def find_hal(self, atoms):
+        """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)"""
+        data = namedtuple('hal_acceptor', 'o o_orig_idx y y_orig_idx')
+        a_set = []
+        # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur
+        for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]:
+            n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() in [6, 7, 15, 16]]
+            if len(n_atoms) == 1:  # Proximal atom
+                o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
+                y_orig_idx = self.Mapper.mapid(n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid)
+                a_set.append(data(o=a, o_orig_idx=o_orig_idx, y=pybel.Atom(n_atoms[0]), y_orig_idx=y_orig_idx))
+        return a_set
+
+    def find_charged(self, mol):
+        """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid."""
+        data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain')
+        a_set = []
+        # Iterate through all residue, exclude those in chains defined as peptides
+        for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]:
+            if config.INTRA is not None:
+                if res.GetChain() != config.INTRA:
+                    continue
+            a_contributing = []
+            a_contributing_orig_idx = []
+            if res.GetName() in ('ARG', 'HIS', 'LYS'):  # Arginine, Histidine or Lysine have charged sidechains
+                for a in pybel.ob.OBResidueAtomIter(res):
+                    if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \
+                            and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf:
+                        a_contributing.append(pybel.Atom(a))
+                        a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein'))
+                if not len(a_contributing) == 0:
+                    a_set.append(data(atoms=a_contributing,
+                                      atoms_orig_idx=a_contributing_orig_idx,
+                                      type='positive',
+                                      center=centroid([ac.coords for ac in a_contributing]),
+                                      restype=res.GetName(),
+                                      resnr=res.GetNum(),
+                                      reschain=res.GetChain()))
+            if res.GetName() in ('GLU', 'ASP'):  # Aspartic or Glutamic Acid
+                for a in pybel.ob.OBResidueAtomIter(res):
+                    if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \
+                            and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf:
+                        a_contributing.append(pybel.Atom(a))
+                        a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein'))
+                if not len(a_contributing) == 0:
+                    a_set.append(data(atoms=a_contributing,
+                                      atoms_orig_idx=a_contributing_orig_idx,
+                                      type='negative',
+                                      center=centroid([ac.coords for ac in a_contributing]),
+                                      restype=res.GetName(),
+                                      resnr=res.GetNum(),
+                                      reschain=res.GetChain()))
+        return a_set
+
+    def find_metal_binding(self, mol):
+        """Looks for atoms that could possibly be involved in chelating a metal ion.
+        This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids"""
+        data = namedtuple('metal_binding', 'atom atom_orig_idx type restype resnr reschain location')
+        a_set = []
+        for res in pybel.ob.OBResidueIter(mol.OBMol):
+            restype, reschain, resnr = res.GetName().upper(), res.GetChain(), res.GetNum()
+            if restype in ['ASP', 'GLU', 'SER', 'THR', 'TYR']:  # Look for oxygens here
+                for a in pybel.ob.OBResidueAtomIter(res):
+                    if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \
+                            and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf:
+                        atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=restype,
+                                          resnr=resnr, reschain=reschain,
+                                          location='protein.sidechain'))
+            if restype == 'HIS':  # Look for nitrogen here
+                for a in pybel.ob.OBResidueAtomIter(res):
+                    if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \
+                            and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf:
+                        atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='N', restype=restype,
+                                          resnr=resnr, reschain=reschain,
+                                          location='protein.sidechain'))
+            if restype == 'CYS':  # Look for sulfur here
+                for a in pybel.ob.OBResidueAtomIter(res):
+                    if a.GetType().startswith('S') and res.GetAtomProperty(a, 8) \
+                            and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf:
+                        atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='S', restype=restype,
+                                          resnr=resnr, reschain=reschain,
+                                          location='protein.sidechain'))
+            for a in pybel.ob.OBResidueAtomIter(res):  # All main chain oxygens
+                if a.GetType().startswith('O') and res.GetAtomProperty(a, 2) \
+                        and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf and restype != 'HOH':
+                    atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid)
+                    a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=res.GetName(),
+                                      resnr=res.GetNum(), reschain=res.GetChain(),
+                                      location='protein.mainchain'))
+        return a_set
+
+
+class Ligand(Mol):
+    def __init__(self, cclass, ligand):
+        altconf = cclass.altconf
+        self.hetid, self.chain, self.position = ligand.hetid, ligand.chain, ligand.position
+        self.bsid = ':'.join([self.hetid, self.chain, str(self.position)])
+        Mol.__init__(self, altconf, cclass.Mapper, mtype='ligand', bsid=self.bsid)
+        self.members = ligand.members
+        self.longname = ligand.longname
+        self.type = ligand.type
+        self.complex = cclass
+        self.molecule = ligand.mol  # Pybel Molecule
+        self.smiles = self.molecule.write(format='can')  # SMILES String
+        self.inchikey = self.molecule.write(format='inchikey')
+        self.can_to_pdb = ligand.can_to_pdb
+        if not len(self.smiles) == 0:
+            self.smiles = self.smiles.split()[0]
+        else:
+            logger.warning(f'could not write SMILES for ligand {ligand}')
+            self.smiles = ''
+        self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms()  # Heavy atoms count
+        self.all_atoms = self.molecule.atoms
+        self.atmdict = {l.idx: l for l in self.all_atoms}
+        self.rings = self.find_rings(self.molecule, self.all_atoms)
+        self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms)
+        self.hbond_acc_atoms = self.find_hba(self.all_atoms)
+        self.num_rings = len(self.rings)
+        if self.num_rings != 0:
+            logger.info(f'contains {self.num_rings} aromatic ring(s)')
+        descvalues = self.molecule.calcdesc()
+        self.molweight, self.logp = float(descvalues['MW']), float(descvalues['logP'])
+        self.num_rot_bonds = int(self.molecule.OBMol.NumRotors())
+        self.atomorder = ligand.atomorder
+
+        ##########################################################
+        # Special Case for hydrogen bond acceptor identification #
+        ##########################################################
+
+        self.inverse_mapping = {v: k for k, v in self.Mapper.ligandmaps[self.bsid].items()}
+        self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()}
+        self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms)
+
+        ######
+        donor_pairs = []
+        data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type')
+        for donor in self.all_atoms:
+            pdbidx = self.Mapper.mapid(donor.idx, mtype='ligand', bsid=self.bsid, to='original')
+            d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]]
+            if d.OBAtom.IsHbondDonor():
+                for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH()]:
+                    d_orig_atom = self.Mapper.id_to_atom(pdbidx)
+                    donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=pdbidx,
+                                            h=pybel.Atom(adj_atom), type='regular'))
+        self.hbond_don_atom_pairs = donor_pairs
+        #######
+
+        self.charged = self.find_charged(self.all_atoms)
+        self.centroid = centroid([a.coords for a in self.all_atoms])
+        self.max_dist_to_center = max((euclidean3d(self.centroid, a.coords) for a in self.all_atoms))
+        self.water = []
+        data = namedtuple('water', 'oxy oxy_orig_idx')
+        for hoh in ligand.water:
+            oxy = None
+            for at in pybel.ob.OBResidueAtomIter(hoh):
+                if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf:
+                    oxy = pybel.Atom(at)
+            # There are some cases where there is no oxygen in a water residue, ignore those
+            if not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) == {1} and oxy is not None:
+                if euclidean3d(self.centroid, oxy.coords) < self.max_dist_to_center + config.BS_DIST:
+                    oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype='protein')
+                    self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx))
+        self.halogenbond_don = self.find_hal(self.all_atoms)
+        self.metal_binding = self.find_metal_binding(self.all_atoms, self.water)
+        self.metals = []
+        data = namedtuple('metal', 'm orig_m m_orig_idx')
+        for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]:
+            m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
+            orig_m = self.Mapper.id_to_atom(m_orig_idx)
+            self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m))
+        self.num_hba, self.num_hbd = len(self.hbond_acc_atoms), len(self.hbond_don_atom_pairs)
+        self.num_hal = len(self.halogenbond_don)
+
+    def get_canonical_num(self, atomnum):
+        """Converts internal atom ID into canonical atom ID. Agrees with Canonical SMILES in XML."""
+        return self.atomorder[atomnum - 1]
+
+    def is_functional_group(self, atom, group):
+        """Given a pybel atom, look up if it belongs to a function group"""
+        n_atoms = [a_neighbor.GetAtomicNum() for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom)]
+
+        if group in ['quartamine', 'tertamine'] and atom.atomicnum == 7:  # Nitrogen
+            # It's a nitrogen, so could be a protonated amine or quaternary ammonium
+            if '1' not in n_atoms and len(n_atoms) == 4:
+                return True if group == 'quartamine' else False  # It's a quat. ammonium (N with 4 residues != H)
+            elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3:
+                return True if group == 'tertamine' else False  # It's sp3-hybridized, so could pick up an hydrogen
+            else:
+                return False
+
+        if group in ['sulfonium', 'sulfonicacid', 'sulfate'] and atom.atomicnum == 16:  # Sulfur
+            if '1' not in n_atoms and len(n_atoms) == 3:  # It's a sulfonium (S with 3 residues != H)
+                return True if group == 'sulfonium' else False
+            elif n_atoms.count(8) == 3:  # It's a sulfonate or sulfonic acid
+                return True if group == 'sulfonicacid' else False
+            elif n_atoms.count(8) == 4:  # It's a sulfate
+                return True if group == 'sulfate' else False
+
+        if group == 'phosphate' and atom.atomicnum == 15:  # Phosphor
+            if set(n_atoms) == {8}:  # It's a phosphate
+                return True
+
+        if group in ['carboxylate', 'guanidine'] and atom.atomicnum == 6:  # It's a carbon atom
+            if n_atoms.count(8) == 2 and n_atoms.count(6) == 1:  # It's a carboxylate group
+                return True if group == 'carboxylate' else False
+            elif n_atoms.count(7) == 3 and len(n_atoms) == 3:  # It's a guanidine group
+                nitro_partners = []
+                for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom):
+                    nitro_partners.append(len([b_neighbor for b_neighbor in pybel.ob.OBAtomAtomIter(nitro)]))
+                if min(nitro_partners) == 1:  # One nitrogen is only connected to the carbon, can pick up a H
+                    return True if group == 'guanidine' else False
+
+        if group == 'halocarbon' and atom.atomicnum in [9, 17, 35, 53]:  # Halogen atoms
+            n_atoms = [na for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) if na.GetAtomicNum() == 6]
+            if len(n_atoms) == 1:  # Halocarbon
+                return True
+        else:
+            return False
+
+    def find_hal(self, atoms):
+        """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)"""
+        data = namedtuple('hal_donor', 'x orig_x x_orig_idx c c_orig_idx')
+        a_set = []
+        for a in atoms:
+            if self.is_functional_group(a, 'halocarbon'):
+                n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() == 6]
+                x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
+                orig_x = self.Mapper.id_to_atom(x_orig_idx)
+                c_orig_idx = [self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) for na in n_atoms]
+                a_set.append(data(x=a, orig_x=orig_x, x_orig_idx=x_orig_idx,
+                                  c=pybel.Atom(n_atoms[0]), c_orig_idx=c_orig_idx))
+        if len(a_set) != 0:
+            logger.info(f'ligand contains {len(a_set)} halogen atom(s)')
+        return a_set
+
+    def find_charged(self, all_atoms):
+        """Identify all positively charged groups in a ligand. This search is not exhaustive, as the cases can be quite
+        diverse. The typical cases seem to be protonated amines, quaternary ammoinium and sulfonium
+        as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)).
+        Identify negatively charged groups in the ligand.
+        """
+        data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup')
+        a_set = []
+        for a in all_atoms:
+            a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid)
+            a_orig = self.Mapper.id_to_atom(a_orig_idx)
+            if self.is_functional_group(a, 'quartamine'):
+                a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive',
+                                  center=list(a.coords), fgroup='quartamine'))
+            elif self.is_functional_group(a, 'tertamine'):
+                a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive',
+                                  center=list(a.coords),
+                                  fgroup='tertamine'))
+            if self.is_functional_group(a, 'sulfonium'):
+                a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive',
+                                  center=list(a.coords),
+                                  fgroup='sulfonium'))
+            if self.is_functional_group(a, 'phosphate'):
+                a_contributing = [a, ]
+                a_contributing_orig_idx = [a_orig_idx, ]
+                [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)]
+                [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid))
+                 for neighbor in a_contributing]
+                orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx]
+                a_set.append(
+                    data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx,
+                         type='negative',
+                         center=a.coords, fgroup='phosphate'))
+            if self.is_functional_group(a, 'sulfonicacid'):
+                a_contributing = [a, ]
+                a_contributing_orig_idx = [a_orig_idx, ]
+                [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) if
+                 neighbor.GetAtomicNum() == 8]
+                [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid))
+                 for neighbor in a_contributing]
+                orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx]
+                a_set.append(
+                    data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx,
+                         type='negative',
+                         center=a.coords, fgroup='sulfonicacid'))
+            elif self.is_functional_group(a, 'sulfate'):
+                a_contributing = [a, ]
+                a_contributing_orig_idx = [a_orig_idx, ]
+                [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid))
+                 for neighbor in a_contributing]
+                [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)]
+                orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx]
+                a_set.append(
+                    data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx,
+                         type='negative',
+                         center=a.coords, fgroup='sulfate'))
+            if self.is_functional_group(a, 'carboxylate'):
+                a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
+                                  if neighbor.GetAtomicNum() == 8]
+                a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)
+                                           for neighbor in a_contributing]
+                orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx]
+                a_set.append(
+                    data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx,
+                         type='negative',
+                         center=centroid([a.coords for a in a_contributing]), fgroup='carboxylate'))
+            elif self.is_functional_group(a, 'guanidine'):
+                a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)
+                                  if neighbor.GetAtomicNum() == 7]
+                a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)
+                                           for neighbor in a_contributing]
+                orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx]
+                a_set.append(
+                    data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx,
+                         type='positive',
+                         center=a.coords, fgroup='guanidine'))
+        return a_set
+
+    def find_metal_binding(self, lig_atoms, water_oxygens):
+        """Looks for atoms that could possibly be involved in binding a metal ion.
+        This can be any water oxygen, as well as oxygen from carboxylate, phophoryl, phenolate, alcohol;
+        nitrogen from imidazole; sulfur from thiolate.
+        """
+        a_set = []
+        data = namedtuple('metal_binding', 'atom orig_atom atom_orig_idx type fgroup restype resnr reschain location')
+        for oxygen in water_oxygens:
+            a_set.append(data(atom=oxygen.oxy, atom_orig_idx=oxygen.oxy_orig_idx, type='O', fgroup='water',
+                              restype=whichrestype(oxygen.oxy), resnr=whichresnumber(oxygen.oxy),
+                              reschain=whichchain(oxygen.oxy), location='water',
+                              orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx)))
+        # #@todo Refactor code
+        for a in lig_atoms:
+            a_orig_idx = self.Mapper.mapid(a.idx, mtype='ligand', bsid=self.bsid)
+            n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom)  # Neighboring atoms
+            # All atomic numbers of neighboring atoms
+            n_atoms_atomicnum = [n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom)]
+            if a.atomicnum == 8:  # Oxygen
+                if n_atoms_atomicnum.count('1') == 1 and len(n_atoms_atomicnum) == 2:  # Oxygen in alcohol (R-[O]-H)
+                    a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='alcohol',
+                                      restype=self.hetid, resnr=self.position, reschain=self.chain,
+                                      location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+                if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic():  # Phenolate oxygen
+                    a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='phenolate',
+                                      restype=self.hetid, resnr=self.position, reschain=self.chain,
+                                      location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+            if a.atomicnum == 6:  # It's a carbon atom
+                if n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1:  # It's a carboxylate group
+                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
+                        neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O',
+                                          fgroup='carboxylate',
+                                          restype=self.hetid,
+                                          resnr=self.position, reschain=self.chain,
+                                          location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+            if a.atomicnum == 15:  # It's a phosphor atom
+                if n_atoms_atomicnum.count(8) >= 3:  # It's a phosphoryl
+                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
+                        neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O',
+                                          fgroup='phosphoryl',
+                                          restype=self.hetid,
+                                          resnr=self.position, reschain=self.chain,
+                                          location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+                if n_atoms_atomicnum.count(8) == 2:  # It's another phosphor-containing group #@todo (correct name?)
+                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]:
+                        neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid)
+                        a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O',
+                                          fgroup='phosphor.other', restype=self.hetid,
+                                          resnr=self.position,
+                                          reschain=self.chain, location='ligand',
+                                          orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+            if a.atomicnum == 7:  # It's a nitrogen atom
+                if n_atoms_atomicnum.count(6) == 2:  # It's imidazole/pyrrole or similar
+                    a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='N', fgroup='imidazole/pyrrole',
+                                      restype=self.hetid, resnr=self.position, reschain=self.chain,
+                                      location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+            if a.atomicnum == 16:  # It's a sulfur atom
+                if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic():  # Thiolate
+                    a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='thiolate',
+                                      restype=self.hetid, resnr=self.position, reschain=self.chain,
+                                      location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+                if set(n_atoms_atomicnum) == {26}:  # Sulfur in Iron sulfur cluster
+                    a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='iron-sulfur.cluster',
+                                      restype=self.hetid, resnr=self.position, reschain=self.chain,
+                                      location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx)))
+
+        return a_set
+
+
+class PDBComplex:
+    """Contains a collection of objects associated with a PDB complex, i.e. one or several ligands and their binding
+    sites as well as information about the pliprofiler between them. Provides functions to load and prepare input files
+    such as PDB files.
+    """
+
+    def __init__(self):
+        self.interaction_sets = {}  # Dictionary with site identifiers as keys and object as value
+        self.protcomplex = None
+        self.filetype = None
+        self.atoms = {}  # Dictionary of Pybel atoms, accessible by their idx
+        self.sourcefiles = {}
+        self.information = {}
+        self.corrected_pdb = ''
+        self._output_path = tempfile.gettempdir()
+        self.pymol_name = None
+        self.modres = set()
+        self.resis = []
+        self.altconf = []  # Atom idx of atoms with alternate conformations
+        self.covalent = []  # Covalent linkages between ligands and protein residues/other ligands
+        self.excluded = []  # Excluded ligands
+        self.Mapper = Mapper()
+        self.ligands = []
+
+    def __str__(self):
+        formatted_lig_names = [":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands]
+        return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join(
+            [lig for lig in formatted_lig_names])
+
+    def load_pdb(self, pdbpath, as_string=False):
+        """Loads a pdb file with protein AND ligand(s), separates and prepares them.
+        If specified 'as_string', the input is a PDB string instead of a path."""
+        if as_string:
+            self.sourcefiles['pdbcomplex.original'] = None
+            self.sourcefiles['pdbcomplex'] = None
+            self.sourcefiles['pdbstring'] = pdbpath
+        else:
+            self.sourcefiles['pdbcomplex.original'] = pdbpath
+            self.sourcefiles['pdbcomplex'] = pdbpath
+        self.information['pdbfixes'] = False
+        pdbparser = PDBParser(pdbpath, as_string=as_string)  # Parse PDB file to find errors and get additional data
+        # #@todo Refactor and rename here
+        self.Mapper.proteinmap = pdbparser.proteinmap
+        self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()}
+        self.modres = pdbparser.modres
+        self.covalent = pdbparser.covalent
+        self.altconf = pdbparser.altconformations
+        self.corrected_pdb = pdbparser.corrected_pdb
+
+        if not config.PLUGIN_MODE:
+            if pdbparser.num_fixed_lines > 0:
+                logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file')
+                # Save modified PDB file
+                if not as_string:
+                    basename = os.path.basename(pdbpath).split('.')[0]
+                else:
+                    basename = "from_stdin"
+                pdbpath_fixed = tmpfile(prefix='plipfixed.' + basename + '_', direc=self.output_path)
+                create_folder_if_not_exists(self.output_path)
+                self.sourcefiles['pdbcomplex'] = pdbpath_fixed
+                self.corrected_pdb = re.sub(r'[^\x00-\x7F]+', ' ', self.corrected_pdb)  # Strip non-unicode chars
+                if not config.NOFIXFILE:  # Only write to file if this option is not activated
+                    with open(pdbpath_fixed, 'w') as f:
+                        f.write(self.corrected_pdb)
+                self.information['pdbfixes'] = True
+
+        if not as_string:
+            self.sourcefiles['filename'] = os.path.basename(self.sourcefiles['pdbcomplex'])
+        self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True)
+
+        # Update the model in the Mapper class instance
+        self.Mapper.original_structure = self.protcomplex.OBMol
+        logger.info('PDB structure successfully read')
+
+        # Determine (temporary) PyMOL Name from Filename
+        self.pymol_name = pdbpath.split('/')[-1].split('.')[0] + '-Protein'
+        # Replace characters causing problems in PyMOL
+        self.pymol_name = self.pymol_name.replace(' ', '').replace('(', '').replace(')', '').replace('-', '_')
+        # But if possible, name it after PDBID in Header
+        if 'HEADER' in self.protcomplex.data:  # If the PDB file has a proper header
+            potential_name = self.protcomplex.data['HEADER'][56:60].lower()
+            if extract_pdbid(potential_name) != 'UnknownProtein':
+                self.pymol_name = potential_name
+        logger.debug(f'PyMOL name set as: {self.pymol_name}')
+
+        # Extract and prepare ligands
+        ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper)
+        self.ligands = ligandfinder.ligands
+        self.excluded = ligandfinder.excluded
+
+        # decide whether to add polar hydrogens
+        if not config.NOHYDRO:
+            if not as_string:
+                basename = os.path.basename(pdbpath).split('.')[0]
+            else:
+                basename = "from_stdin"
+            self.protcomplex.OBMol.AddPolarHydrogens()
+            output_path = os.path.join(self._output_path, f'{basename}_protonated.pdb')
+            self.protcomplex.write('pdb', output_path, overwrite=True)
+            logger.info(f'protonated structure written to {output_path}')
+        else:
+            logger.warning('no polar hydrogens will be assigned (make sure your structure contains hydrogens)')
+
+        for atm in self.protcomplex:
+            self.atoms[atm.idx] = atm
+
+        if len(self.excluded) != 0:
+            logger.info(f'excluded molecules as ligands: {self.excluded}')
+
+        if config.DNARECEPTOR:
+            self.resis = [obres for obres in pybel.ob.OBResidueIter(
+                self.protcomplex.OBMol) if obres.GetName() in config.DNA + config.RNA]
+        else:
+            self.resis = [obres for obres in pybel.ob.OBResidueIter(
+                self.protcomplex.OBMol) if obres.GetResidueProperty(0)]
+
+        num_ligs = len(self.ligands)
+        if num_ligs == 1:
+            logger.info('analyzing one ligand')
+        elif num_ligs > 1:
+            logger.info(f'analyzing {num_ligs} ligands')
+        else:
+            logger.info(f'structure contains no ligands')
+
+    def analyze(self):
+        """Triggers analysis of all complexes in structure"""
+        for ligand in self.ligands:
+            self.characterize_complex(ligand)
+
+    def characterize_complex(self, ligand):
+        """Handles all basic functions for characterizing the interactions for one ligand"""
+
+        single_sites = []
+        for member in ligand.members:
+            single_sites.append(':'.join([str(x) for x in member]))
+        site = ' + '.join(single_sites)
+        site = site if not len(site) > 20 else site[:20] + '...'
+        longname = ligand.longname if not len(ligand.longname) > 20 else ligand.longname[:20] + '...'
+        ligtype = 'unspecified type' if ligand.type == 'UNSPECIFIED' else ligand.type
+        ligtext = f'{longname} [{ligtype}] -- {site}'
+        logger.info(f'processing ligand {ligtext}')
+        if ligtype == 'PEPTIDE':
+            logger.info(f'chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode')
+        if ligtype == 'INTRA':
+            logger.info(f'chain {ligand.chain} will be processed in [INTRA-CHAIN] mode')
+        any_in_biolip = len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) != 0
+
+        if ligtype not in ['POLYMER', 'DNA', 'ION', 'DNA+ION', 'RNA+ION', 'SMALLMOLECULE+ION'] and any_in_biolip:
+            logger.info('may be biologically irrelevant')
+
+        lig_obj = Ligand(self, ligand)
+        cutoff = lig_obj.max_dist_to_center + config.BS_DIST
+        bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis)
+        # Get a list of all atoms belonging to the binding site, search by idx
+        bs_atoms = [self.atoms[idx] for idx in [i for i in self.atoms.keys()
+                                                if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res]
+                    if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf]
+        if ligand.type == 'PEPTIDE':
+            # If peptide, don't consider the peptide chain as part of the protein binding site
+            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain]
+        if ligand.type == 'INTRA':
+            # Interactions within the chain
+            bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain]
+        bs_atoms_refined = []
+
+        # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE)
+        # and refine binding site atom selection with exact threshold
+        min_dist = {}
+        for r in bs_atoms:
+            bs_res_id = ''.join([str(whichresnumber(r)), whichchain(r)])
+            for l in ligand.mol.atoms:
+                distance = euclidean3d(r.coords, l.coords)
+                if bs_res_id not in min_dist:
+                    min_dist[bs_res_id] = (distance, whichrestype(r))
+                elif min_dist[bs_res_id][0] > distance:
+                    min_dist[bs_res_id] = (distance, whichrestype(r))
+                if distance <= config.BS_DIST and r not in bs_atoms_refined:
+                    bs_atoms_refined.append(r)
+        num_bs_atoms = len(bs_atoms_refined)
+        logger.info(f'binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})')
+
+        bs_obj = BindingSite(bs_atoms_refined, self.protcomplex, self, self.altconf, min_dist, self.Mapper)
+        pli_obj = PLInteraction(lig_obj, bs_obj, self)
+        self.interaction_sets[ligand.mol.title] = pli_obj
+
+    def extract_bs(self, cutoff, ligcentroid, resis):
+        """Return list of ids from residues belonging to the binding site"""
+        return [obres.GetIdx() for obres in resis if self.res_belongs_to_bs(obres, cutoff, ligcentroid)]
+
+    def res_belongs_to_bs(self, res, cutoff, ligcentroid):
+        """Check for each residue if its centroid is within a certain distance to the ligand centroid.
+        Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)"""
+        rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)])
+        # Check geometry
+        near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False
+        # Check chain membership
+        restricted_chain = True if res.GetChain() in config.PEPTIDES else False
+        return (near_enough and not restricted_chain)
+
+    def get_atom(self, idx):
+        return self.atoms[idx]
+
+    @property
+    def output_path(self):
+        return self._output_path
+
+    @output_path.setter
+    def output_path(self, path):
+        self._output_path = tilde_expansion(path)
-- 
cgit v1.2.3