diff options
Diffstat (limited to 'plip/structure')
| -rw-r--r-- | plip/structure/__init__.py | 0 | ||||
| -rw-r--r-- | plip/structure/detection.py | 479 | ||||
| -rw-r--r-- | plip/structure/preparation.py | 2315 | 
3 files changed, 0 insertions, 2794 deletions
| diff --git a/plip/structure/__init__.py b/plip/structure/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/plip/structure/__init__.py +++ /dev/null diff --git a/plip/structure/detection.py b/plip/structure/detection.py deleted file mode 100644 index 71fec63..0000000 --- a/plip/structure/detection.py +++ /dev/null @@ -1,479 +0,0 @@ -import itertools -from collections import defaultdict -from collections import namedtuple - -import numpy as np -from openbabel.openbabel import OBAtomAtomIter - -from plip.basic import config, logger -from plip.basic.supplemental import vecangle, vector, euclidean3d, projection -from plip.basic.supplemental import whichresnumber, whichrestype, whichchain - -logger = logger.get_logger() - -def filter_contacts(pairings): -    """Filter interactions by two criteria: -    1. No interactions between the same residue (important for intra mode). -    2. No duplicate interactions (A with B and B with A, also important for intra mode).""" -    if not config.INTRA: -        return pairings -    filtered1_pairings = [p for p in pairings if (p.resnr, p.reschain) != (p.resnr_l, p.reschain_l)] -    already_considered = [] -    filtered2_pairings = [] -    for contact in filtered1_pairings: -        try: -            dist = 'D{}'.format(round(contact.distance, 2)) -        except AttributeError: -            try: -                dist = 'D{}'.format(round(contact.distance_ah, 2)) -            except AttributeError: -                dist = 'D{}'.format(round(contact.distance_aw, 2)) -        res1, res2 = ''.join([str(contact.resnr), contact.reschain]), ''.join( -            [str(contact.resnr_l), contact.reschain_l]) -        data = {res1, res2, dist} -        if data not in already_considered: -            filtered2_pairings.append(contact) -            already_considered.append(data) -    return filtered2_pairings - - -################################################## -# FUNCTIONS FOR DETECTION OF SPECIFIC INTERACTIONS -################################################## - -def hydrophobic_interactions(atom_set_a, atom_set_b): -    """Detection of hydrophobic pliprofiler between atom_set_a (binding site) and atom_set_b (ligand). -    Definition: All pairs of qualified carbon atoms within a distance of HYDROPH_DIST_MAX -    """ -    data = namedtuple('hydroph_interaction', 'bsatom bsatom_orig_idx ligatom ligatom_orig_idx ' -                                             'distance restype resnr reschain restype_l, resnr_l, reschain_l') -    pairings = [] -    for a, b in itertools.product(atom_set_a, atom_set_b): -        if a.orig_idx == b.orig_idx: -            continue -        e = euclidean3d(a.atom.coords, b.atom.coords) -        if not config.MIN_DIST < e < config.HYDROPH_DIST_MAX: -            continue -        restype, resnr, reschain = whichrestype(a.atom), whichresnumber(a.atom), whichchain(a.atom) -        restype_l, resnr_l, reschain_l = whichrestype(b.orig_atom), whichresnumber(b.orig_atom), whichchain(b.orig_atom) -        contact = data(bsatom=a.atom, bsatom_orig_idx=a.orig_idx, ligatom=b.atom, ligatom_orig_idx=b.orig_idx, -                       distance=e, restype=restype, resnr=resnr, -                       reschain=reschain, restype_l=restype_l, -                       resnr_l=resnr_l, reschain_l=reschain_l) -        pairings.append(contact) -    return filter_contacts(pairings) - - -def hbonds(acceptors, donor_pairs, protisdon, typ): -    """Detection of hydrogen bonds between sets of acceptors and donor pairs. -    Definition: All pairs of hydrogen bond acceptor and donors with -    donor hydrogens and acceptor showing a distance within HBOND DIST MIN and HBOND DIST MAX -    and donor angles above HBOND_DON_ANGLE_MIN -    """ -    data = namedtuple('hbond', 'a a_orig_idx d d_orig_idx h distance_ah distance_ad angle type protisdon resnr ' -                               'restype reschain resnr_l restype_l reschain_l sidechain atype dtype') -    pairings = [] -    for acc, don in itertools.product(acceptors, donor_pairs): -        if not typ == 'strong': -            continue -        # Regular (strong) hydrogen bonds -        dist_ah = euclidean3d(acc.a.coords, don.h.coords) -        dist_ad = euclidean3d(acc.a.coords, don.d.coords) -        if not config.MIN_DIST < dist_ad < config.HBOND_DIST_MAX: -            continue -        vec1, vec2 = vector(don.h.coords, don.d.coords), vector(don.h.coords, acc.a.coords) -        v = vecangle(vec1, vec2) -        if not v > config.HBOND_DON_ANGLE_MIN: -            continue -        protatom = don.d.OBAtom if protisdon else acc.a.OBAtom -        ligatom = don.d.OBAtom if not protisdon else acc.a.OBAtom -        is_sidechain_hbond = protatom.GetResidue().GetAtomProperty(protatom, 8)  # Check if sidechain atom -        resnr = whichresnumber(don.d) if protisdon else whichresnumber(acc.a) -        resnr_l = whichresnumber(acc.a_orig_atom) if protisdon else whichresnumber(don.d_orig_atom) -        restype = whichrestype(don.d) if protisdon else whichrestype(acc.a) -        restype_l = whichrestype(acc.a_orig_atom) if protisdon else whichrestype(don.d_orig_atom) -        reschain = whichchain(don.d) if protisdon else whichchain(acc.a) -        rechain_l = whichchain(acc.a_orig_atom) if protisdon else whichchain(don.d_orig_atom) -        # Next line prevents H-Bonds within amino acids in intermolecular interactions -        if config.INTRA is not None and whichresnumber(don.d) == whichresnumber(acc.a): -            continue -        # Next line prevents backbone-backbone H-Bonds -        if config.INTRA is not None and protatom.GetResidue().GetAtomProperty(protatom, -                                                                              8) and ligatom.GetResidue().GetAtomProperty( -                ligatom, 8): -            continue -        contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, d=don.d, d_orig_idx=don.d_orig_idx, h=don.h, -                       distance_ah=dist_ah, distance_ad=dist_ad, angle=v, type=typ, protisdon=protisdon, -                       resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, -                       restype_l=restype_l, reschain_l=rechain_l, sidechain=is_sidechain_hbond, -                       atype=acc.a.type, dtype=don.d.type) -        pairings.append(contact) -    return filter_contacts(pairings) - - -def pistacking(rings_bs, rings_lig): -    """Return all pi-stackings between the given aromatic ring systems in receptor and ligand.""" -    data = namedtuple( -        'pistack', -        'proteinring ligandring distance angle offset type restype resnr reschain restype_l resnr_l reschain_l') -    pairings = [] -    for r, l in itertools.product(rings_bs, rings_lig): -        # DISTANCE AND RING ANGLE CALCULATION -        d = euclidean3d(r.center, l.center) -        b = vecangle(r.normal, l.normal) -        a = min(b, 180 - b if not 180 - b < 0 else b)  # Smallest of two angles, depending on direction of normal - -        # RING CENTER OFFSET CALCULATION (project each ring center into the other ring) -        proj1 = projection(l.normal, l.center, r.center) -        proj2 = projection(r.normal, r.center, l.center) -        offset = min(euclidean3d(proj1, l.center), euclidean3d(proj2, r.center)) - -        # RECEPTOR DATA -        resnr, restype, reschain = whichresnumber(r.atoms[0]), whichrestype(r.atoms[0]), whichchain(r.atoms[0]) -        resnr_l, restype_l, reschain_l = whichresnumber(l.orig_atoms[0]), whichrestype( -            l.orig_atoms[0]), whichchain(l.orig_atoms[0]) - -        # SELECTION BY DISTANCE, ANGLE AND OFFSET -        passed = False -        if not config.MIN_DIST < d < config.PISTACK_DIST_MAX: -            continue -        if 0 < a < config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: -            ptype = 'P' -            passed = True -        if 90 - config.PISTACK_ANG_DEV < a < 90 + config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: -            ptype = 'T' -            passed = True -        if passed: -            contact = data(proteinring=r, ligandring=l, distance=d, angle=a, offset=offset, -                           type=ptype, resnr=resnr, restype=restype, reschain=reschain, -                           resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) -            pairings.append(contact) -    return filter_contacts(pairings) - - -def pication(rings, pos_charged, protcharged): -    """Return all pi-Cation interaction between aromatic rings and positively charged groups. -    For tertiary and quaternary amines, check also the angle between the ring and the nitrogen. -    """ -    data = namedtuple( -        'pication', 'ring charge distance offset type restype resnr reschain restype_l resnr_l reschain_l protcharged') -    pairings = [] -    if len(rings) == 0 or len(pos_charged) == 0: -        return pairings -    for ring in rings: -        c = ring.center -        for p in pos_charged: -            d = euclidean3d(c, p.center) -            # Project the center of charge into the ring and measure distance to ring center -            proj = projection(ring.normal, ring.center, p.center) -            offset = euclidean3d(proj, ring.center) -            if not config.MIN_DIST < d < config.PICATION_DIST_MAX or not offset < config.PISTACK_OFFSET_MAX: -                continue -            if type(p).__name__ == 'lcharge' and p.fgroup == 'tertamine': -                # Special case here if the ligand has a tertiary amine, check an additional angle -                # Otherwise, we might have have a pi-cation interaction 'through' the ligand -                n_atoms = [a_neighbor for a_neighbor in OBAtomAtomIter(p.atoms[0].OBAtom)] -                n_atoms_coords = [(a.x(), a.y(), a.z()) for a in n_atoms] -                amine_normal = np.cross(vector(n_atoms_coords[0], n_atoms_coords[1]), -                                        vector(n_atoms_coords[2], n_atoms_coords[0])) -                b = vecangle(ring.normal, amine_normal) -                # Smallest of two angles, depending on direction of normal -                a = min(b, 180 - b if not 180 - b < 0 else b) -                if not a > 30.0: -                    resnr, restype = whichresnumber(ring.atoms[0]), whichrestype(ring.atoms[0]) -                    reschain = whichchain(ring.atoms[0]) -                    resnr_l, restype_l = whichresnumber(p.orig_atoms[0]), whichrestype(p.orig_atoms[0]) -                    reschain_l = whichchain(p.orig_atoms[0]) -                    contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', -                                   restype=restype, resnr=resnr, reschain=reschain, -                                   restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, -                                   protcharged=protcharged) -                    pairings.append(contact) -                break -            resnr = whichresnumber(p.atoms[0]) if protcharged else whichresnumber(ring.atoms[0]) -            resnr_l = whichresnumber(ring.orig_atoms[0]) if protcharged else whichresnumber(p.orig_atoms[0]) -            restype = whichrestype(p.atoms[0]) if protcharged else whichrestype(ring.atoms[0]) -            restype_l = whichrestype(ring.orig_atoms[0]) if protcharged else whichrestype(p.orig_atoms[0]) -            reschain = whichchain(p.atoms[0]) if protcharged else whichchain(ring.atoms[0]) -            reschain_l = whichchain(ring.orig_atoms[0]) if protcharged else whichchain(p.orig_atoms[0]) -            contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', restype=restype, -                           resnr=resnr, reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, -                           reschain_l=reschain_l, protcharged=protcharged) -            pairings.append(contact) -    return filter_contacts(pairings) - - -def saltbridge(poscenter, negcenter, protispos): -    """Detect all salt bridges (pliprofiler between centers of positive and negative charge)""" -    data = namedtuple( -        'saltbridge', 'positive negative distance protispos resnr restype reschain resnr_l restype_l reschain_l') -    pairings = [] -    for pc, nc in itertools.product(poscenter, negcenter): -        if not config.MIN_DIST < euclidean3d(pc.center, nc.center) < config.SALTBRIDGE_DIST_MAX: -            continue -        resnr = pc.resnr if protispos else nc.resnr -        resnr_l = whichresnumber(nc.orig_atoms[0]) if protispos else whichresnumber(pc.orig_atoms[0]) -        restype = pc.restype if protispos else nc.restype -        restype_l = whichrestype(nc.orig_atoms[0]) if protispos else whichrestype(pc.orig_atoms[0]) -        reschain = pc.reschain if protispos else nc.reschain -        reschain_l = whichchain(nc.orig_atoms[0]) if protispos else whichchain(pc.orig_atoms[0]) -        contact = data(positive=pc, negative=nc, distance=euclidean3d(pc.center, nc.center), protispos=protispos, -                       resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, restype_l=restype_l, -                       reschain_l=reschain_l) -        pairings.append(contact) -    return filter_contacts(pairings) - - -def halogen(acceptor, donor): -    """Detect all halogen bonds of the type Y-O...X-C""" -    data = namedtuple('halogenbond', 'acc acc_orig_idx don don_orig_idx distance don_angle acc_angle restype ' -                                     'resnr reschain restype_l resnr_l reschain_l donortype acctype sidechain') -    pairings = [] -    for acc, don in itertools.product(acceptor, donor): -        dist = euclidean3d(acc.o.coords, don.x.coords) -        if not config.MIN_DIST < dist < config.HALOGEN_DIST_MAX: -            continue -        vec1, vec2 = vector(acc.o.coords, acc.y.coords), vector(acc.o.coords, don.x.coords) -        vec3, vec4 = vector(don.x.coords, acc.o.coords), vector(don.x.coords, don.c.coords) -        acc_angle, don_angle = vecangle(vec1, vec2), vecangle(vec3, vec4) -        is_sidechain_hal = acc.o.OBAtom.GetResidue().GetAtomProperty(acc.o.OBAtom, 8)  # Check if sidechain atom -        if not config.HALOGEN_ACC_ANGLE - config.HALOGEN_ANGLE_DEV < acc_angle \ -               < config.HALOGEN_ACC_ANGLE + config.HALOGEN_ANGLE_DEV: -            continue -        if not config.HALOGEN_DON_ANGLE - config.HALOGEN_ANGLE_DEV < don_angle \ -               < config.HALOGEN_DON_ANGLE + config.HALOGEN_ANGLE_DEV: -            continue -        restype, reschain, resnr = whichrestype(acc.o), whichchain(acc.o), whichresnumber(acc.o) -        restype_l, reschain_l, resnr_l = whichrestype(don.orig_x), whichchain(don.orig_x), whichresnumber(don.orig_x) -        contact = data(acc=acc, acc_orig_idx=acc.o_orig_idx, don=don, don_orig_idx=don.x_orig_idx, -                       distance=dist, don_angle=don_angle, acc_angle=acc_angle, -                       restype=restype, resnr=resnr, -                       reschain=reschain, restype_l=restype_l, -                       reschain_l=reschain_l, resnr_l=resnr_l, donortype=don.x.OBAtom.GetType(), acctype=acc.o.type, -                       sidechain=is_sidechain_hal) -        pairings.append(contact) -    return filter_contacts(pairings) - - -def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): -    """Find water-bridged hydrogen bonds between ligand and protein. For now only considers bridged of first degree.""" -    data = namedtuple('waterbridge', 'a a_orig_idx atype d d_orig_idx dtype h water water_orig_idx distance_aw ' -                                     'distance_dw d_angle w_angle type resnr restype reschain resnr_l restype_l reschain_l protisdon') -    pairings = [] -    # First find all acceptor-water pairs with distance within d -    # and all donor-water pairs with distance within d and angle greater theta -    lig_aw, prot_aw, lig_dw, prot_hw = [], [], [], [] -    for w in water: -        for acc1 in lig_hba: -            dist = euclidean3d(acc1.a.coords, w.oxy.coords) -            if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: -                lig_aw.append((acc1, w, dist)) -        for acc2 in bs_hba: -            dist = euclidean3d(acc2.a.coords, w.oxy.coords) -            if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: -                prot_aw.append((acc2, w, dist)) -        for don1 in lig_hbd: -            dist = euclidean3d(don1.d.coords, w.oxy.coords) -            d_angle = vecangle(vector(don1.h.coords, don1.d.coords), vector(don1.h.coords, w.oxy.coords)) -            if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ -                    and d_angle > config.WATER_BRIDGE_THETA_MIN: -                lig_dw.append((don1, w, dist, d_angle)) -        for don2 in bs_hbd: -            dist = euclidean3d(don2.d.coords, w.oxy.coords) -            d_angle = vecangle(vector(don2.h.coords, don2.d.coords), vector(don2.h.coords, w.oxy.coords)) -            if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ -                    and d_angle > config.WATER_BRIDGE_THETA_MIN: -                prot_hw.append((don2, w, dist, d_angle)) - -    for l, p in itertools.product(lig_aw, prot_hw): -        acc, wl, distance_aw = l -        don, wd, distance_dw, d_angle = p -        if not wl.oxy == wd.oxy: -            continue -        # Same water molecule and angle within omega -        w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) -        if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: -            continue -        resnr, reschain, restype = whichresnumber(don.d), whichchain(don.d), whichrestype(don.d) -        resnr_l, reschain_l, restype_l = whichresnumber(acc.a_orig_atom), whichchain( -            acc.a_orig_atom), whichrestype(acc.a_orig_atom) -        contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, -                       dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, -                       distance_aw=distance_aw, distance_dw=distance_dw, d_angle=d_angle, w_angle=w_angle, -                       type='first_deg', resnr=resnr, restype=restype, -                       reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, protisdon=True) -        pairings.append(contact) -    for p, l in itertools.product(prot_aw, lig_dw): -        acc, wl, distance_aw = p -        don, wd, distance_dw, d_angle = l -        if not wl.oxy == wd.oxy: -            continue -        # Same water molecule and angle within omega -        w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) -        if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: -            continue -        resnr, reschain, restype = whichresnumber(acc.a), whichchain(acc.a), whichrestype(acc.a) -        resnr_l, reschain_l, restype_l = whichresnumber(don.d_orig_atom), whichchain( -            don.d_orig_atom), whichrestype(don.d_orig_atom) -        contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, -                       dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, -                       distance_aw=distance_aw, distance_dw=distance_dw, -                       d_angle=d_angle, w_angle=w_angle, type='first_deg', resnr=resnr, -                       restype=restype, reschain=reschain, -                       restype_l=restype_l, reschain_l=reschain_l, resnr_l=resnr_l, protisdon=False) -        pairings.append(contact) -    return filter_contacts(pairings) - - -def metal_complexation(metals, metal_binding_lig, metal_binding_bs): -    """Find all metal complexes between metals and appropriate groups in both protein and ligand, as well as water""" -    data = namedtuple('metal_complex', 'metal metal_orig_idx metal_type target target_orig_idx target_type ' -                                       'coordination_num distance resnr restype ' -                                       'reschain  restype_l reschain_l resnr_l location rms, geometry num_partners complexnum') -    pairings_dict = {} -    pairings = [] -    # #@todo Refactor -    metal_to_id = {} -    metal_to_orig_atom = {} -    for metal, target in itertools.product(metals, metal_binding_lig + metal_binding_bs): -        distance = euclidean3d(metal.m.coords, target.atom.coords) -        if not distance < config.METAL_DIST_MAX: -            continue -        if metal.m not in pairings_dict: -            pairings_dict[metal.m] = [(target, distance), ] -            metal_to_id[metal.m] = metal.m_orig_idx -            metal_to_orig_atom[metal.m] = metal.orig_m -        else: -            pairings_dict[metal.m].append((target, distance)) -    for cnum, metal in enumerate(pairings_dict): -        rms = 0.0 -        excluded = [] -        # cnum +1 being the complex number -        contact_pairs = pairings_dict[metal] -        num_targets = len(contact_pairs) -        vectors_dict = defaultdict(list) -        for contact_pair in contact_pairs: -            target, distance = contact_pair -            vectors_dict[target.atom.idx].append(vector(metal.coords, target.atom.coords)) - -        # Listing of coordination numbers and their geometries -        configs = {2: ['linear', ], -                   3: ['trigonal.planar', 'trigonal.pyramidal'], -                   4: ['tetrahedral', 'square.planar'], -                   5: ['trigonal.bipyramidal', 'square.pyramidal'], -                   6: ['octahedral', ]} - -        # Angle signatures for each geometry (as seen from each target atom) -        ideal_angles = {'linear': [[180.0]] * 2, -                        'trigonal.planar': [[120.0, 120.0]] * 3, -                        'trigonal.pyramidal': [[109.5, 109.5]] * 3, -                        'tetrahedral': [[109.5, 109.5, 109.5, 109.5]] * 4, -                        'square.planar': [[90.0, 90.0, 90.0, 90.0]] * 4, -                        'trigonal.bipyramidal': [[120.0, 120.0, 90.0, 90.0]] * 3 + [[90.0, 90.0, 90.0, 180.0]] * 2, -                        'square.pyramidal': [[90.0, 90.0, 90.0, 180.0]] * 4 + [[90.0, 90.0, 90.0, 90.0]], -                        'octahedral': [[90.0, 90.0, 90.0, 90.0, 180.0]] * 6} -        angles_dict = {} - -        for target in vectors_dict: -            cur_vector = vectors_dict[target] -            other_vectors = [] -            for t in vectors_dict: -                if not t == target: -                    [other_vectors.append(x) for x in vectors_dict[t]] -            angles = [vecangle(pair[0], pair[1]) for pair in itertools.product(cur_vector, other_vectors)] -            angles_dict[target] = angles - -        all_total = []  # Record fit information for each geometry tested -        gdata = namedtuple('gdata', 'geometry rms coordination excluded diff_targets')  # Geometry Data -        # Can't specify geometry with only one target -        if num_targets == 1: -            final_geom = 'NA' -            final_coo = 1 -            excluded = [] -            rms = 0.0 -        else: -            for coo in sorted(configs, reverse=True):  # Start with highest coordination number -                geometries = configs[coo] -                for geometry in geometries: -                    signature = ideal_angles[geometry]  # Set of ideal angles for geometry, from each perspective -                    geometry_total = 0 -                    geometry_scores = []  # All scores for one geometry (from all subsignatures) -                    used_up_targets = []  # Use each target just once for a subsignature -                    not_used = [] -                    coo_diff = num_targets - coo  # How many more observed targets are there? - -                    # Find best match for each subsignature -                    for subsignature in signature:  # Ideal angles from one perspective -                        best_target = None  # There's one best-matching target for each subsignature -                        best_target_score = 999 - -                        for k, target in enumerate(angles_dict): -                            if target not in used_up_targets: -                                observed_angles = angles_dict[target]  # Observed angles from perspective of one target -                                single_target_scores = [] -                                used_up_observed_angles = [] -                                for i, ideal_angle in enumerate(subsignature): -                                    # For each angle in the signature, find the best-matching observed angle -                                    best_match = None -                                    best_match_diff = 999 -                                    for j, observed_angle in enumerate(observed_angles): -                                        if j not in used_up_observed_angles: -                                            diff = abs(ideal_angle - observed_angle) -                                            if diff < best_match_diff: -                                                best_match_diff = diff -                                                best_match = j -                                    if best_match is not None: -                                        used_up_observed_angles.append(best_match) -                                        single_target_scores.append(best_match_diff) -                                # Calculate RMS for target angles -                                target_total = sum([x ** 2 for x in single_target_scores]) ** 0.5  # Tot. score targ/sig -                                if target_total < best_target_score: -                                    best_target_score = target_total -                                    best_target = target - -                        used_up_targets.append(best_target) -                        geometry_scores.append(best_target_score) -                        # Total score is mean of RMS values -                        geometry_total = np.mean(geometry_scores) -                    # Record the targets not used for excluding them when deciding for a final geometry -                    [not_used.append(target) for target in angles_dict if target not in used_up_targets] -                    all_total.append(gdata(geometry=geometry, rms=geometry_total, coordination=coo, -                                           excluded=not_used, diff_targets=coo_diff)) - -        # Make a decision here. Starting with the geometry with lowest difference in ideal and observed partners ... -        # Check if the difference between the RMS to the next best solution is not larger than 0.5 -        if not num_targets == 1:  # Can't decide for any geoemtry in that case -            all_total = sorted(all_total, key=lambda x: abs(x.diff_targets)) -            for i, total in enumerate(all_total): -                next_total = all_total[i + 1] -                this_rms, next_rms = total.rms, next_total.rms -                diff_to_next = next_rms - this_rms -                if diff_to_next > 0.5: -                    final_geom, final_coo, rms, excluded = total.geometry, total.coordination, total.rms, total.excluded -                    break -                elif next_total.rms < 3.5: -                    final_geom, final_coo, = next_total.geometry, next_total.coordination -                    rms, excluded = next_total.rms, next_total.excluded -                    break -                elif i == len(all_total) - 2: -                    final_geom, final_coo, rms, excluded = "NA", "NA", float('nan'), [] -                    break - -        # Record all contact pairing, excluding those with targets superfluous for chosen geometry -        only_water = set([x[0].location for x in contact_pairs]) == {'water'} -        if not only_water:  # No complex if just with water as targets -            logger.info(f'metal ion {metal.type} complexed with {final_geom} geometry (coo. number {final_coo}/ {num_targets} observed)') -            for contact_pair in contact_pairs: -                target, distance = contact_pair -                if target.atom.idx not in excluded: -                    metal_orig_atom = metal_to_orig_atom[metal] -                    restype_l, reschain_l, resnr_l = whichrestype(metal_orig_atom), whichchain( -                        metal_orig_atom), whichresnumber(metal_orig_atom) -                    contact = data(metal=metal, metal_orig_idx=metal_to_id[metal], metal_type=metal.type, -                                   target=target, target_orig_idx=target.atom_orig_idx, target_type=target.type, -                                   coordination_num=final_coo, distance=distance, resnr=target.resnr, -                                   restype=target.restype, reschain=target.reschain, location=target.location, -                                   rms=rms, geometry=final_geom, num_partners=num_targets, complexnum=cnum + 1, -                                   resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) -                    pairings.append(contact) -    return filter_contacts(pairings) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py deleted file mode 100644 index 32083c7..0000000 --- a/plip/structure/preparation.py +++ /dev/null @@ -1,2315 +0,0 @@ -import itertools -import os -import re -import tempfile -from collections import namedtuple -from operator import itemgetter - -import numpy as np -from openbabel import pybel - -from plip.basic import config, logger -from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name -from plip.basic.supplemental import ( -    cluster_doubles, -    is_lig, -    normalize_vector, -    vector, -    ring_is_planar, -) -from plip.basic.supplemental import ( -    extract_pdbid, -    read_pdb, -    create_folder_if_not_exists, -    canonicalize, -) -from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance -from plip.basic.supplemental import ( -    whichchain, -    whichrestype, -    whichresnumber, -    euclidean3d, -    int32_to_negative, -) -from plip.structure.detection import ( -    halogen, -    pication, -    water_bridges, -    metal_complexation, -) -from plip.structure.detection import ( -    hydrophobic_interactions, -    pistacking, -    hbonds, -    saltbridge, -) - -logger = logger.get_logger() - - -class PDBParser: -    def __init__(self, pdbpath, as_string): -        self.as_string = as_string -        self.pdbpath = pdbpath -        self.num_fixed_lines = 0 -        self.covlinkage = namedtuple( -            "covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2" -        ) -        ( -            self.proteinmap, -            self.modres, -            self.covalent, -            self.altconformations, -            self.corrected_pdb, -        ) = self.parse_pdb() - -    def parse_pdb(self): -        """Extracts additional information from PDB files. -        I. When reading in a PDB file, OpenBabel numbers ATOMS and HETATOMS continously. -        In PDB files, TER records are also counted, leading to a different numbering system. -        This functions reads in a PDB file and provides a mapping as a dictionary. -        II. Additionally, it returns a list of modified residues. -        III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified -        IV. Alternative conformations -        """ -        if self.as_string: -            fil = self.pdbpath.rstrip("\n").split( -                "\n" -            )  # Removing trailing newline character -        else: -            f = read(self.pdbpath) -            fil = f.readlines() -            f.close() -        corrected_lines = [] -        i, j = 0, 0  # idx and PDB numbering -        d = {} -        modres = set() -        covalent = [] -        alt = [] -        previous_ter = False - -        # Standard without fixing -        if not config.NOFIX: -            if not config.PLUGIN_MODE: -                lastnum = 0  # Atom numbering (has to be consecutive) -                other_models = False -                for line in fil: -                    if ( -                        not other_models -                    ):  # Only consider the first model in an NRM structure -                        corrected_line, newnum = self.fix_pdbline(line, lastnum) -                        if corrected_line is not None: -                            if corrected_line.startswith("MODEL"): -                                try:  # Get number of MODEL (1,2,3) -                                    model_num = int(corrected_line[10:14]) -                                    if model_num > 1:  # MODEL 2,3,4 etc. -                                        other_models = True -                                except ValueError: -                                    logger.debug( -                                        f"ignoring invalid MODEL entry: {corrected_line}" -                                    ) -                            corrected_lines.append(corrected_line) -                            lastnum = newnum -                corrected_pdb = "".join(corrected_lines) -            else: -                corrected_pdb = self.pdbpath -                corrected_lines = fil -        else: -            corrected_pdb = self.pdbpath -            corrected_lines = fil - -        for line in corrected_lines: -            if line.startswith(("ATOM", "HETATM")): -                # Retrieve alternate conformations -                atomid, location = int(line[6:11]), line[16] -                location = "A" if location == " " else location -                if location != "A": -                    alt.append(atomid) - -                if not previous_ter: -                    i += 1 -                    j += 1 -                else: -                    i += 1 -                    j += 2 -                d[i] = j -                previous_ter = False -            # Numbering Changes at TER records -            if line.startswith("TER"): -                previous_ter = True -            # Get modified residues -            if line.startswith("MODRES"): -                modres.add(line[12:15].strip()) -            # Get covalent linkages between ligands -            if line.startswith("LINK"): -                covalent.append(self.get_linkage(line)) -        return d, modres, covalent, alt, corrected_pdb - -    def fix_pdbline(self, pdbline, lastnum): -        """Fix a PDB line if information is missing.""" -        pdbqt_conversion = { -            "HD": "H", -            "HS": "H", -            "NA": "N", -            "NS": "N", -            "OA": "O", -            "OS": "O", -            "SA": "S", -        } -        fixed = False -        new_num = 0 -        forbidden_characters = "[^a-zA-Z0-9_]" -        pdbline = pdbline.strip("\n") -        # Some MD / Docking tools produce empty lines, leading to segfaults -        if len(pdbline.strip()) == 0: -            self.num_fixed_lines += 1 -            return None, lastnum -        if len(pdbline) > 100:  # Should be 80 long -            self.num_fixed_lines += 1 -            return None, lastnum -        # TER Entries also have continuing numbering, consider them as well -        if pdbline.startswith("TER"): -            new_num = lastnum + 1 -        if pdbline.startswith("ATOM"): -            new_num = lastnum + 1 -            current_num = int(pdbline[6:11]) -            resnum = pdbline[22:27].strip() -            resname = pdbline[17:21].strip() -            # Invalid residue number -            try: -                int(resnum) -            except ValueError: -                pdbline = pdbline[:22] + "   0 " + pdbline[27:] -                fixed = True -            # Invalid characters in residue name -            if re.match(forbidden_characters, resname.strip()): -                pdbline = pdbline[:17] + "UNK " + pdbline[21:] -                fixed = True -            if lastnum + 1 != current_num: -                pdbline = ( -                    pdbline[:6] -                    + (5 - len(str(new_num))) * " " -                    + str(new_num) -                    + " " -                    + pdbline[12:] -                ) -                fixed = True -            # No chain assigned -            if pdbline[21] == " ": -                pdbline = pdbline[:21] + "A" + pdbline[22:] -                fixed = True -            if pdbline.endswith("H"): -                self.num_fixed_lines += 1 -                return None, lastnum -            # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. -            for pdbqttype in pdbqt_conversion: -                if pdbline.strip().endswith(pdbqttype): -                    pdbline = ( -                        pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + "\n" -                    ) -                    self.num_fixed_lines += 1 -        if pdbline.startswith("HETATM"): -            new_num = lastnum + 1 -            try: -                current_num = int(pdbline[6:11]) -            except ValueError: -                current_num = None -                logger.debug(f"invalid HETATM entry: {pdbline}") -            if lastnum + 1 != current_num: -                pdbline = ( -                    pdbline[:6] -                    + (5 - len(str(new_num))) * " " -                    + str(new_num) -                    + " " -                    + pdbline[12:] -                ) -                fixed = True -            # No chain assigned or number assigned as chain -            if pdbline[21] == " ": -                pdbline = pdbline[:21] + "Z" + pdbline[22:] -                fixed = True -            # No residue number assigned -            if pdbline[23:26] == "   ": -                pdbline = pdbline[:23] + "999" + pdbline[26:] -                fixed = True -            # Non-standard Ligand Names -            ligname = pdbline[17:21].strip() -            if len(ligname) > 3: -                pdbline = pdbline[:17] + ligname[:3] + " " + pdbline[21:] -                fixed = True -            if re.match(forbidden_characters, ligname.strip()): -                pdbline = pdbline[:17] + "LIG " + pdbline[21:] -                fixed = True -            if len(ligname.strip()) == 0: -                pdbline = pdbline[:17] + "LIG " + pdbline[21:] -                fixed = True -            if pdbline.endswith("H"): -                self.num_fixed_lines += 1 -                return None, lastnum -            # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. -            for pdbqttype in pdbqt_conversion: -                if pdbline.strip().endswith(pdbqttype): -                    pdbline = ( -                        pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + " " -                    ) -                    self.num_fixed_lines += 1 -        self.num_fixed_lines += 1 if fixed else 0 -        return pdbline + "\n", max(new_num, lastnum) - -    def get_linkage(self, line): -        """Get the linkage information from a LINK entry PDB line.""" -        conf1, id1, chain1, pos1 = ( -            line[16].strip(), -            line[17:20].strip(), -            line[21].strip(), -            int(line[22:26]), -        ) -        conf2, id2, chain2, pos2 = ( -            line[46].strip(), -            line[47:50].strip(), -            line[51].strip(), -            int(line[52:56]), -        ) -        return self.covlinkage( -            id1=id1, -            chain1=chain1, -            pos1=pos1, -            conf1=conf1, -            id2=id2, -            chain2=chain2, -            pos2=pos2, -            conf2=conf2, -        ) - - -class LigandFinder: -    def __init__(self, proteincomplex, altconf, modres, covalent, mapper): -        self.lignames_all = None -        self.lignames_kept = None -        self.water = None -        self.proteincomplex = proteincomplex -        self.altconformations = altconf -        self.modresidues = modres -        self.covalent = covalent -        self.mapper = mapper -        self.ligands = self.getligs() -        self.excluded = sorted( -            list(self.lignames_all.difference(set(self.lignames_kept))) -        ) - -    def getpeptides(self, chain): -        """If peptide ligand chains are defined via the command line options, -        try to extract the underlying ligand formed by all residues in the -        given chain without water -        """ -        all_from_chain = [ -            o -            for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) -            if o.GetChain() == chain -        ]  # All residues from chain -        if len(all_from_chain) == 0: -            return None -        else: -            non_water = [o for o in all_from_chain if not o.GetResidueProperty(9)] -            ligand = self.extract_ligand(non_water) -            return ligand - -    def getligs(self): -        """Get all ligands from a PDB file and prepare them for analysis. -        Returns all non-empty ligands. -        """ - -        if config.PEPTIDES == [] and config.INTRA is None: -            # Extract small molecule ligands (default) -            ligands = [] - -            # Filter for ligands using lists -            ligand_residues, self.lignames_all, self.water = self.filter_for_ligands() - -            all_res_dict = { -                (a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues -            } -            self.lignames_kept = list(set([a.GetName() for a in ligand_residues])) - -            if not config.BREAKCOMPOSITE: -                #  Update register of covalent links with those between DNA/RNA subunits -                self.covalent += nucleotide_linkage(all_res_dict) -                #  Find fragment linked by covalent bonds -                res_kmers = self.identify_kmers(all_res_dict) -            else: -                res_kmers = [[a,] for a in ligand_residues] -            logger.debug( -                f"{len(res_kmers)} ligand kmer(s) detected for closer inspection" -            ) -            for ( -                kmer -            ) in ( -                res_kmers -            ):  # iterate over all ligands and extract molecules + information -                if len(kmer) > config.MAX_COMPOSITE_LENGTH: -                    logger.debug( -                        f"ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)" -                    ) -                else: -                    ligands.append(self.extract_ligand(kmer)) - -        else: -            # Extract peptides from given chains -            self.water = [ -                o -                for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) -                if o.GetResidueProperty(9) -            ] -            if config.PEPTIDES: -                peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES] -            elif config.INTRA is not None: -                peptide_ligands = [ -                    self.getpeptides(config.INTRA), -                ] - -            ligands = [p for p in peptide_ligands if p is not None] -            self.covalent, self.lignames_kept, self.lignames_all = [], [], set() - -        return [lig for lig in ligands if len(lig.mol.atoms) != 0] - -    def extract_ligand(self, kmer): -        """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps.""" -        data = namedtuple( -            "ligand", -            "mol hetid chain position water members longname type atomorder can_to_pdb", -        ) -        members = [ -            (res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) -            for res in kmer -        ] -        members = sort_members_by_importance(members) -        rname, rchain, rnum = members[0] -        logger.debug( -            f"finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements" -        ) -        names = [x[0] for x in members] -        longname = "-".join([x[0] for x in members]) - -        if config.PEPTIDES: -            ligtype = "PEPTIDE" -        elif config.INTRA is not None: -            ligtype = "INTRA" -        else: -            # Classify a ligand by its HETID(s) -            ligtype = classify_by_name(names) -        logger.debug(f"ligand classified as {ligtype}") - -        hetatoms = dict() -        for obresidue in kmer: -            cur_hetatoms = { -                obatom.GetIdx(): obatom -                for obatom in pybel.ob.OBResidueAtomIter(obresidue) -                if obatom.GetAtomicNum() != 1 -            } -            if not config.ALTLOC: -                # Remove alternative conformations (standard -> True) -                ids_to_remove = [ -                    atom_id -                    for atom_id in hetatoms.keys() -                    if self.mapper.mapid(atom_id, mtype="protein", to="internal") -                    in self.altconformations -                ] -                for atom_id in ids_to_remove: -                    del cur_hetatoms[atom_id] -            hetatoms.update(cur_hetatoms) -        logger.debug(f"hetero atoms determined (n={len(hetatoms)})") - -        lig = pybel.ob.OBMol()  # new ligand mol -        neighbours = dict() -        for obatom in hetatoms.values():  # iterate over atom objects -            idx = obatom.GetIdx() -            lig.AddAtom(obatom) -            # ids of all neighbours of obatom -            neighbours[idx] = set( -                [ -                    neighbour_atom.GetIdx() -                    for neighbour_atom in pybel.ob.OBAtomAtomIter(obatom) -                ] -            ) & set(hetatoms.keys()) -        logger.debug(f"atom neighbours mapped") - -        ############################################################## -        # map the old atom idx of OBMol to the new idx of the ligand # -        ############################################################## - -        newidx = dict( -            zip( -                hetatoms.keys(), -                [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)], -            ) -        ) -        mapold = dict(zip(newidx.values(), newidx)) -        # copy the bonds -        for obatom in hetatoms: -            for neighbour_atom in neighbours[obatom]: -                bond = hetatoms[obatom].GetBond(hetatoms[neighbour_atom]) -                lig.AddBond(newidx[obatom], newidx[neighbour_atom], bond.GetBondOrder()) -        lig = pybel.Molecule(lig) - -        # For kmers, the representative ids are chosen (first residue of kmer) -        lig.data.update({"Name": rname, "Chain": rchain, "ResNr": rnum}) - -        # Check if a negative residue number is represented as a 32 bit integer -        if rnum > 10 ** 5: -            rnum = int32_to_negative(rnum) - -        lig.title = ":".join((rname, rchain, str(rnum))) -        self.mapper.ligandmaps[lig.title] = mapold - -        logger.debug("renumerated molecule generated") - -        if not config.NOPDBCANMAP: -            atomorder = canonicalize(lig) -        else: -            atomorder = None - -        can_to_pdb = {} -        if atomorder is not None: -            can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold} - -        ligand = data( -            mol=lig, -            hetid=rname, -            chain=rchain, -            position=rnum, -            water=self.water, -            members=members, -            longname=longname, -            type=ligtype, -            atomorder=atomorder, -            can_to_pdb=can_to_pdb, -        ) -        return ligand - -    def is_het_residue(self, obres): -        """Given an OBResidue, determines if the residue is indeed a possible ligand -        in the PDB file""" -        if not obres.GetResidueProperty(0): -            # If the residue is NOT amino (0) -            # It can be amino_nucleo, coenzme, ion, nucleo, protein, purine, pyrimidine, solvent -            # In these cases, it is a ligand candidate -            return True -        else: -            # Here, the residue is classified as amino -            # Amino acids can still be ligands, so we check for HETATM entries -            # Only residues with at least one HETATM entry are processed as ligands -            het_atoms = [] -            for atm in pybel.ob.OBResidueAtomIter(obres): -                het_atoms.append(obres.IsHetAtom(atm)) -            if True in het_atoms: -                return True -        return False - -    def filter_for_ligands(self): -        """Given an OpenBabel Molecule, get all ligands, their names, and water""" - -        candidates1 = [ -            o -            for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) -            if not o.GetResidueProperty(9) and self.is_het_residue(o) -        ] - -        if config.DNARECEPTOR:  # If DNA is the receptor, don't consider DNA as a ligand -            candidates1 = [ -                res -                for res in candidates1 -                if res.GetName() not in config.DNA + config.RNA -            ] -        all_lignames = set([a.GetName() for a in candidates1]) - -        water = [ -            o -            for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) -            if o.GetResidueProperty(9) -        ] -        # Filter out non-ligands -        if not config.KEEPMOD:  # Keep modified residues as ligands -            candidates2 = [ -                a -                for a in candidates1 -                if is_lig(a.GetName()) and a.GetName() not in self.modresidues -            ] -        else: -            candidates2 = [a for a in candidates1 if is_lig(a.GetName())] -        logger.debug(f"{len(candidates2)} ligand(s) after first filtering step") - -        ############################################ -        # Filtering by counting and artifacts list # -        ############################################ -        artifacts = [] -        unique_ligs = set(a.GetName() for a in candidates2) -        for ulig in unique_ligs: -            # Discard if appearing 15 times or more and is possible artifact -            if ( -                ulig in config.biolip_list -                and [a.GetName() for a in candidates2].count(ulig) >= 15 -            ): -                artifacts.append(ulig) - -        selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts] - -        return selected_ligands, all_lignames, water - -    def identify_kmers(self, residues): -        """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" - -        # Remove all those not considered by ligands and pairings including alternate conformations -        ligdoubles = [ -            [(link.id1, link.chain1, link.pos1), (link.id2, link.chain2, link.pos2)] -            for link in [ -                c -                for c in self.covalent -                if c.id1 in self.lignames_kept -                and c.id2 in self.lignames_kept -                and c.conf1 in ["A", ""] -                and c.conf2 in ["A", ""] -                and (c.id1, c.chain1, c.pos1) in residues -                and (c.id2, c.chain2, c.pos2) in residues -            ] -        ] -        kmers = cluster_doubles(ligdoubles) -        if not kmers:  # No ligand kmers, just normal independent ligands -            return [[residues[res]] for res in residues] - -        else: -            # res_kmers contains clusters of covalently bound ligand residues (kmer ligands) -            res_kmers = [[residues[res] for res in kmer] for kmer in kmers] - -            # In this case, add other ligands which are not part of a kmer -            in_kmer = [] -            for res_kmer in res_kmers: -                for res in res_kmer: -                    in_kmer.append((res.GetName(), res.GetChain(), res.GetNum())) -            for res in residues: -                if res not in in_kmer: -                    newres = [ -                        residues[res], -                    ] -                    res_kmers.append(newres) -            return res_kmers - - -class Mapper: -    """Provides functions for mapping atom IDs in the correct way""" - -    def __init__(self): -        self.proteinmap = ( -            None  # Map internal atom IDs of protein residues to original PDB Atom IDs -        ) -        self.ligandmaps = ( -            {} -        )  # Map IDs of new ligand molecules to internal IDs (or PDB IDs?) -        self.original_structure = None - -    def mapid( -        self, idx, mtype, bsid=None, to="original" -    ):  # Mapping to original IDs is standard for ligands -        if mtype == "reversed":  # Needed to map internal ID back to original protein ID -            return self.reversed_proteinmap[idx] -        if mtype == "protein": -            return self.proteinmap[idx] -        elif mtype == "ligand": -            if to == "internal": -                return self.ligandmaps[bsid][idx] -            elif to == "original": -                return self.proteinmap[self.ligandmaps[bsid][idx]] - -    def id_to_atom(self, idx): -        """Returns the atom for a given original ligand ID. -        To do this, the ID is mapped to the protein first and then the atom returned. -        """ -        mapped_idx = self.mapid(idx, "reversed") -        return pybel.Atom(self.original_structure.GetAtom(mapped_idx)) - - -class Mol: -    def __init__(self, altconf, mapper, mtype, bsid): -        self.mtype = mtype -        self.bsid = bsid -        self.rings = None -        self.hydroph_atoms = None -        self.charged = None -        self.hbond_don_atom_pairs = None -        self.hbond_acc_atoms = None -        self.altconf = altconf -        self.Mapper = mapper - -    def hydrophobic_atoms(self, all_atoms): -        """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors.""" -        atom_set = [] -        data = namedtuple("hydrophobic", "atom orig_atom orig_idx") -        atm = [ -            a -            for a in all_atoms -            if a.atomicnum == 6 -            and set( -                [natom.GetAtomicNum() for natom in pybel.ob.OBAtomAtomIter(a.OBAtom)] -            ).issubset({1, 6}) -        ] -        for atom in atm: -            orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) -            orig_atom = self.Mapper.id_to_atom(orig_idx) -            if atom.idx not in self.altconf: -                atom_set.append(data(atom=atom, orig_atom=orig_atom, orig_idx=orig_idx)) -        return atom_set - -    def find_hba(self, all_atoms): -        """Find all possible hydrogen bond acceptors""" -        data = namedtuple("hbondacceptor", "a a_orig_atom a_orig_idx type") -        a_set = [] -        for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms): -            if ( -                atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf -            ):  # Exclude halogen atoms -                a_orig_idx = self.Mapper.mapid( -                    atom.idx, mtype=self.mtype, bsid=self.bsid -                ) -                a_orig_atom = self.Mapper.id_to_atom(a_orig_idx) -                a_set.append( -                    data( -                        a=atom, -                        a_orig_atom=a_orig_atom, -                        a_orig_idx=a_orig_idx, -                        type="regular", -                    ) -                ) -        a_set = sorted(a_set, key=lambda x: x.a_orig_idx) -        return a_set - -    def find_hbd(self, all_atoms, hydroph_atoms): -        """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)""" -        donor_pairs = [] -        data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") -        for donor in [ -            a -            for a in all_atoms -            if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf -        ]: -            in_ring = False -            if not in_ring: -                for adj_atom in [ -                    a -                    for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) -                    if a.IsHbondDonorH() -                ]: -                    d_orig_idx = self.Mapper.mapid( -                        donor.idx, mtype=self.mtype, bsid=self.bsid -                    ) -                    d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) -                    donor_pairs.append( -                        data( -                            d=donor, -                            d_orig_atom=d_orig_atom, -                            d_orig_idx=d_orig_idx, -                            h=pybel.Atom(adj_atom), -                            type="regular", -                        ) -                    ) -        for carbon in hydroph_atoms: -            for adj_atom in [ -                a -                for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) -                if a.GetAtomicNum() == 1 -            ]: -                d_orig_idx = self.Mapper.mapid( -                    carbon.atom.idx, mtype=self.mtype, bsid=self.bsid -                ) -                d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) -                donor_pairs.append( -                    data( -                        d=carbon, -                        d_orig_atom=d_orig_atom, -                        d_orig_idx=d_orig_idx, -                        h=pybel.Atom(adj_atom), -                        type="weak", -                    ) -                ) -        donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx)) -        return donor_pairs - -    def find_rings(self, mol, all_atoms): -        """Find rings and return only aromatic. -        Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic.""" -        data = namedtuple( -            "aromatic_ring", "atoms orig_atoms atoms_orig_idx normal obj center type" -        ) -        rings = [] -        aromatic_amino = ["TYR", "TRP", "HIS", "PHE"] -        ring_candidates = mol.OBMol.GetSSSR() -        logger.debug(f"number of aromatic ring candidates: {len(ring_candidates)}") -        # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity -        for ring in ring_candidates: -            r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)] -            r_atoms = sorted(r_atoms, key=lambda x: x.idx) -            if 4 < len(r_atoms) <= 6: -                res = list(set([whichrestype(a) for a in r_atoms])) -                # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel -                if res[0] == "UNL": -                    ligand_orig_idx = [ -                        self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms -                    ] -                    sort_order = np.argsort(np.array(ligand_orig_idx)) -                    r_atoms = [r_atoms[i] for i in sort_order] -                if ( -                    ring.IsAromatic() -                    or res[0] in aromatic_amino -                    or ring_is_planar(ring, r_atoms) -                ): -                    # Causes segfault with OpenBabel 2.3.2, so deactivated -                    # typ = ring.GetType() if not ring.GetType() == '' else 'unknown' -                    # Alternative typing -                    ring_type = "%s-membered" % len(r_atoms) -                    ring_atms = [ -                        r_atoms[a].coords for a in [0, 2, 4] -                    ]  # Probe atoms for normals, assuming planarity -                    ringv1 = vector(ring_atms[0], ring_atms[1]) -                    ringv2 = vector(ring_atms[2], ring_atms[0]) -                    atoms_orig_idx = [ -                        self.Mapper.mapid(r_atom.idx, mtype=self.mtype, bsid=self.bsid) -                        for r_atom in r_atoms -                    ] -                    orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx] -                    rings.append( -                        data( -                            atoms=r_atoms, -                            orig_atoms=orig_atoms, -                            atoms_orig_idx=atoms_orig_idx, -                            normal=normalize_vector(np.cross(ringv1, ringv2)), -                            obj=ring, -                            center=centroid([ra.coords for ra in r_atoms]), -                            type=ring_type, -                        ) -                    ) -        return rings - -    def get_hydrophobic_atoms(self): -        return self.hydroph_atoms - -    def get_hba(self): -        return self.hbond_acc_atoms - -    def get_hbd(self): -        return [ -            don_pair -            for don_pair in self.hbond_don_atom_pairs -            if don_pair.type == "regular" -        ] - -    def get_weak_hbd(self): -        return [ -            don_pair -            for don_pair in self.hbond_don_atom_pairs -            if don_pair.type == "weak" -        ] - -    def get_pos_charged(self): -        return [charge for charge in self.charged if charge.type == "positive"] - -    def get_neg_charged(self): -        return [charge for charge in self.charged if charge.type == "negative"] - - -class PLInteraction: -    """Class to store a ligand, a protein and their interactions.""" - -    def __init__(self, lig_obj, bs_obj, protcomplex): -        """Detect all interactions when initializing""" -        self.ligand = lig_obj -        self.lig_members = lig_obj.members -        self.pdbid = protcomplex.pymol_name -        self.bindingsite = bs_obj -        self.Mapper = protcomplex.Mapper -        self.output_path = protcomplex.output_path -        self.altconf = protcomplex.altconf -        # #@todo Refactor code to combine different directionality - -        self.saltbridge_lneg = saltbridge( -            self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True -        ) -        self.saltbridge_pneg = saltbridge( -            self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False -        ) - -        self.all_hbonds_ldon = hbonds( -            self.bindingsite.get_hba(), self.ligand.get_hbd(), False, "strong" -        ) -        self.all_hbonds_pdon = hbonds( -            self.ligand.get_hba(), self.bindingsite.get_hbd(), True, "strong" -        ) - -        self.hbonds_ldon = self.refine_hbonds_ldon( -            self.all_hbonds_ldon, self.saltbridge_lneg, self.saltbridge_pneg -        ) -        self.hbonds_pdon = self.refine_hbonds_pdon( -            self.all_hbonds_pdon, self.saltbridge_lneg, self.saltbridge_pneg -        ) - -        self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings) - -        self.all_pi_cation_laro = pication( -            self.ligand.rings, self.bindingsite.get_pos_charged(), True -        ) -        self.pication_paro = pication( -            self.bindingsite.rings, self.ligand.get_pos_charged(), False -        ) - -        self.pication_laro = self.refine_pi_cation_laro( -            self.all_pi_cation_laro, self.pistacking -        ) - -        self.all_hydrophobic_contacts = hydrophobic_interactions( -            self.bindingsite.get_hydrophobic_atoms(), -            self.ligand.get_hydrophobic_atoms(), -        ) -        self.hydrophobic_contacts = self.refine_hydrophobic( -            self.all_hydrophobic_contacts, self.pistacking -        ) -        self.halogen_bonds = halogen( -            self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don -        ) -        self.water_bridges = water_bridges( -            self.bindingsite.get_hba(), -            self.ligand.get_hba(), -            self.bindingsite.get_hbd(), -            self.ligand.get_hbd(), -            self.ligand.water, -        ) - -        self.water_bridges = self.refine_water_bridges( -            self.water_bridges, self.hbonds_ldon, self.hbonds_pdon -        ) - -        self.metal_complexes = metal_complexation( -            self.ligand.metals, -            self.ligand.metal_binding, -            self.bindingsite.metal_binding, -        ) - -        self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon -        self.all_itypes = ( -            self.all_itypes -            + self.hbonds_ldon -            + self.pistacking -            + self.pication_laro -            + self.pication_paro -        ) -        self.all_itypes = ( -            self.all_itypes -            + self.hydrophobic_contacts -            + self.halogen_bonds -            + self.water_bridges -        ) -        self.all_itypes = self.all_itypes + self.metal_complexes - -        self.no_interactions = all(len(i) == 0 for i in self.all_itypes) -        ( -            self.unpaired_hba, -            self.unpaired_hbd, -            self.unpaired_hal, -        ) = self.find_unpaired_ligand() -        self.unpaired_hba_orig_idx = [ -            self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) -            for atom in self.unpaired_hba -        ] -        self.unpaired_hbd_orig_idx = [ -            self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) -            for atom in self.unpaired_hbd -        ] -        self.unpaired_hal_orig_idx = [ -            self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) -            for atom in self.unpaired_hal -        ] -        self.num_unpaired_hba, self.num_unpaired_hbd = ( -            len(self.unpaired_hba), -            len(self.unpaired_hbd), -        ) -        self.num_unpaired_hal = len(self.unpaired_hal) - -        # Exclude empty chains (coming from ligand as a target, from metal complexes) -        self.interacting_chains = sorted( -            list( -                set( -                    [ -                        i.reschain -                        for i in self.all_itypes -                        if i.reschain not in [" ", None] -                    ] -                ) -            ) -        ) - -        # Get all interacting residues, excluding ligand and water molecules -        self.interacting_res = list( -            set( -                [ -                    "".join([str(i.resnr), str(i.reschain)]) -                    for i in self.all_itypes -                    if i.restype not in ["LIG", "HOH"] -                ] -            ) -        ) -        if len(self.interacting_res) != 0: -            logger.info( -                f"ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}" -            ) -            interactions_list = [] -            num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg) -            num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon) -            num_pication = len(self.pication_laro + self.pication_paro) -            num_pistack = len(self.pistacking) -            num_halogen = len(self.halogen_bonds) -            num_waterbridges = len(self.water_bridges) -            if num_saltbridges != 0: -                interactions_list.append("%i salt bridge(s)" % num_saltbridges) -            if num_hbonds != 0: -                interactions_list.append("%i hydrogen bond(s)" % num_hbonds) -            if num_pication != 0: -                interactions_list.append("%i pi-cation interaction(s)" % num_pication) -            if num_pistack != 0: -                interactions_list.append("%i pi-stacking(s)" % num_pistack) -            if num_halogen != 0: -                interactions_list.append("%i halogen bond(s)" % num_halogen) -            if num_waterbridges != 0: -                interactions_list.append("%i water bridge(s)" % num_waterbridges) -            if not len(interactions_list) == 0: -                logger.info(f"complex uses {interactions_list}") -        else: -            logger.info("no interactions for this ligand") - -    def find_unpaired_ligand(self): -        """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors. -        """ -        unpaired_hba, unpaired_hbd, unpaired_hal = [], [], [] -        # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex) -        involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [ -            hbond.d.idx for hbond in self.hbonds_ldon -        ] -        [ -            [involved_atoms.append(atom.idx) for atom in sb.negative.atoms] -            for sb in self.saltbridge_lneg -        ] -        [ -            [involved_atoms.append(atom.idx) for atom in sb.positive.atoms] -            for sb in self.saltbridge_pneg -        ] -        [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon] -        [ -            involved_atoms.append(wb.d.idx) -            for wb in self.water_bridges -            if not wb.protisdon -        ] -        [ -            involved_atoms.append(mcomplex.target.atom.idx) -            for mcomplex in self.metal_complexes -            if mcomplex.location == "ligand" -        ] -        for atom in [hba.a for hba in self.ligand.get_hba()]: -            if atom.idx not in involved_atoms: -                unpaired_hba.append(atom) -        for atom in [hbd.d for hbd in self.ligand.get_hbd()]: -            if atom.idx not in involved_atoms: -                unpaired_hbd.append(atom) - -        # unpaired halogen bond donors in ligand (not used for the previous + halogen bonds) -        [involved_atoms.append(atom.don.x.idx) for atom in self.halogen_bonds] -        for atom in [haldon.x for haldon in self.ligand.halogenbond_don]: -            if atom.idx not in involved_atoms: -                unpaired_hal.append(atom) - -        return unpaired_hba, unpaired_hbd, unpaired_hal - -    def refine_hydrophobic(self, all_h, pistacks): -        """Apply several rules to reduce the number of hydrophobic interactions.""" -        sel = {} -        #  1. Rings interacting via stacking can't have additional hydrophobic contacts between each other. -        for pistack, h in itertools.product(pistacks, all_h): -            h1, h2 = h.bsatom.idx, h.ligatom.idx -            brs, lrs = ( -                [p1.idx for p1 in pistack.proteinring.atoms], -                [p2.idx for p2 in pistack.ligandring.atoms], -            ) -            if h1 in brs and h2 in lrs: -                sel[(h1, h2)] = "EXCLUDE" -        hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel] -        sel2 = {} -        #  2. If a ligand atom interacts with several binding site atoms in the same residue, -        #  keep only the one with the closest distance -        for h in hydroph: -            if not (h.ligatom.idx, h.resnr) in sel2: -                sel2[(h.ligatom.idx, h.resnr)] = h -            else: -                if sel2[(h.ligatom.idx, h.resnr)].distance > h.distance: -                    sel2[(h.ligatom.idx, h.resnr)] = h -        hydroph = [h for h in sel2.values()] -        hydroph_final = [] -        bsclust = {} -        #  3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist -        for h in hydroph: -            if h.bsatom.idx not in bsclust: -                bsclust[h.bsatom.idx] = [ -                    h, -                ] -            else: -                bsclust[h.bsatom.idx].append(h) - -        idx_to_h = {} -        for bs in [a for a in bsclust if len(bsclust[a]) == 1]: -            hydroph_final.append(bsclust[bs][0]) - -        # A list of tuples with the idx of an atom and one of its neighbours is created -        for bs in [a for a in bsclust if not len(bsclust[a]) == 1]: -            tuples = [] -            all_idx = [i.ligatom.idx for i in bsclust[bs]] -            for b in bsclust[bs]: -                idx = b.ligatom.idx -                neigh = [na for na in pybel.ob.OBAtomAtomIter(b.ligatom.OBAtom)] -                for n in neigh: -                    n_idx = n.GetIdx() -                    if n_idx in all_idx: -                        if n_idx < idx: -                            tuples.append((n_idx, idx)) -                        else: -                            tuples.append((idx, n_idx)) -                        idx_to_h[idx] = b - -            tuples = list(set(tuples)) -            tuples = sorted(tuples, key=itemgetter(1)) -            clusters = cluster_doubles( -                tuples -            )  # Cluster connected atoms (i.e. find hydrophobic patches) - -            for cluster in clusters: -                min_dist = float("inf") -                min_h = None -                for atm_idx in cluster: -                    h = idx_to_h[atm_idx] -                    if h.distance < min_dist: -                        min_dist = h.distance -                        min_h = h -                hydroph_final.append(min_h) -        before, reduced = len(all_h), len(hydroph_final) -        if not before == 0 and not before == reduced: -            logger.info( -                f"reduced number of hydrophobic contacts from {before} to {reduced}" -            ) -        return hydroph_final - -    def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg): -        """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds.""" -        i_set = {} -        for hbond in all_hbonds: -            i_set[hbond] = False -            for salt in salt_pneg: -                protidx, ligidx = ( -                    [at.idx for at in salt.negative.atoms], -                    [at.idx for at in salt.positive.atoms], -                ) -                if hbond.d.idx in ligidx and hbond.a.idx in protidx: -                    i_set[hbond] = True -            for salt in salt_lneg: -                protidx, ligidx = ( -                    [at.idx for at in salt.positive.atoms], -                    [at.idx for at in salt.negative.atoms], -                ) -                if hbond.d.idx in ligidx and hbond.a.idx in protidx: -                    i_set[hbond] = True - -        # Allow only one hydrogen bond per donor, select interaction with larger donor angle -        second_set = {} -        hbls = [k for k in i_set.keys() if not i_set[k]] -        for hbl in hbls: -            if hbl.d.idx not in second_set: -                second_set[hbl.d.idx] = (hbl.angle, hbl) -            else: -                if second_set[hbl.d.idx][0] < hbl.angle: -                    second_set[hbl.d.idx] = (hbl.angle, hbl) -        return [hb[1] for hb in second_set.values()] - -    def refine_hbonds_pdon(self, all_hbonds, salt_lneg, salt_pneg): -        """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds with -        atoms of the same group. -        """ -        i_set = {} -        for hbond in all_hbonds: -            i_set[hbond] = False -            for salt in salt_lneg: -                protidx, ligidx = ( -                    [at.idx for at in salt.positive.atoms], -                    [at.idx for at in salt.negative.atoms], -                ) -                if hbond.a.idx in ligidx and hbond.d.idx in protidx: -                    i_set[hbond] = True -            for salt in salt_pneg: -                protidx, ligidx = ( -                    [at.idx for at in salt.negative.atoms], -                    [at.idx for at in salt.positive.atoms], -                ) -                if hbond.a.idx in ligidx and hbond.d.idx in protidx: -                    i_set[hbond] = True - -        # Allow only one hydrogen bond per donor, select interaction with larger donor angle -        second_set = {} -        hbps = [k for k in i_set.keys() if not i_set[k]] -        for hbp in hbps: -            if hbp.d.idx not in second_set: -                second_set[hbp.d.idx] = (hbp.angle, hbp) -            else: -                if second_set[hbp.d.idx][0] < hbp.angle: -                    second_set[hbp.d.idx] = (hbp.angle, hbp) -        return [hb[1] for hb in second_set.values()] - -    def refine_pi_cation_laro(self, all_picat, stacks): -        """Just important for constellations with histidine involved. If the histidine ring is positioned in stacking -        position to an aromatic ring in the ligand, there is in most cases stacking and pi-cation interaction reported -        as histidine also carries a positive charge in the ring. For such cases, only report stacking. -        """ -        i_set = [] -        for picat in all_picat: -            exclude = False -            for stack in stacks: -                if ( -                    whichrestype(stack.proteinring.atoms[0]) == "HIS" -                    and picat.ring.obj == stack.ligandring.obj -                ): -                    exclude = True -            if not exclude: -                i_set.append(picat) -        return i_set - -    def refine_water_bridges(self, wbridges, hbonds_ldon, hbonds_pdon): -        """A donor atom already forming a hydrogen bond is not allowed to form a water bridge. Each water molecule -        can only be donor for two water bridges, selecting the constellation with the omega angle closest to 110 deg.""" -        donor_atoms_hbonds = [hb.d.idx for hb in hbonds_ldon + hbonds_pdon] -        wb_dict = {} -        wb_dict2 = {} -        omega = 110.0 - -        # Just one hydrogen bond per donor atom -        for wbridge in [wb for wb in wbridges if wb.d.idx not in donor_atoms_hbonds]: -            if (wbridge.water.idx, wbridge.a.idx) not in wb_dict: -                wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge -            else: -                if abs( -                    omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle -                ) < abs(omega - wbridge.w_angle): -                    wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge -        for wb_tuple in wb_dict: -            water, acceptor = wb_tuple -            if water not in wb_dict2: -                wb_dict2[water] = [ -                    (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), -                ] -            elif len(wb_dict2[water]) == 1: -                wb_dict2[water].append( -                    (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]) -                ) -                wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0]) -            else: -                if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle): -                    wb_dict2[water] = [ -                        wb_dict2[water][0], -                        (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple]), -                    ] - -        filtered_wb = [] -        for fwbridges in wb_dict2.values(): -            [filtered_wb.append(fwb[1]) for fwb in fwbridges] -        return filtered_wb - - -class BindingSite(Mol): -    def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper): -        """Find all relevant parts which could take part in interactions""" -        Mol.__init__(self, altconf, mapper, mtype="protein", bsid=None) -        self.complex = cclass -        self.full_mol = protcomplex -        self.all_atoms = atoms -        self.min_dist = min_dist  # Minimum distance of bs res to ligand -        self.bs_res = list( -            set( -                [ -                    "".join([str(whichresnumber(a)), whichchain(a)]) -                    for a in self.all_atoms -                ] -            ) -        )  # e.g. 47A -        self.rings = self.find_rings(self.full_mol, self.all_atoms) -        self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) -        self.hbond_acc_atoms = self.find_hba(self.all_atoms) -        self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) -        self.charged = self.find_charged(self.full_mol) -        self.halogenbond_acc = self.find_hal(self.all_atoms) -        self.metal_binding = self.find_metal_binding(self.full_mol) - -    def find_hal(self, atoms): -        """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)""" -        data = namedtuple("hal_acceptor", "o o_orig_idx y y_orig_idx") -        a_set = [] -        # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur -        for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]: -            n_atoms = [ -                na -                for na in pybel.ob.OBAtomAtomIter(a.OBAtom) -                if na.GetAtomicNum() in [6, 7, 15, 16] -            ] -            if len(n_atoms) == 1:  # Proximal atom -                o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) -                y_orig_idx = self.Mapper.mapid( -                    n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid -                ) -                a_set.append( -                    data( -                        o=a, -                        o_orig_idx=o_orig_idx, -                        y=pybel.Atom(n_atoms[0]), -                        y_orig_idx=y_orig_idx, -                    ) -                ) -        return a_set - -    def find_charged(self, mol): -        """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid.""" -        data = namedtuple( -            "pcharge", "atoms atoms_orig_idx type center restype resnr reschain" -        ) -        a_set = [] -        # Iterate through all residue, exclude those in chains defined as peptides -        for res in [ -            r -            for r in pybel.ob.OBResidueIter(mol.OBMol) -            if not r.GetChain() in config.PEPTIDES -        ]: -            if config.INTRA is not None: -                if res.GetChain() != config.INTRA: -                    continue -            a_contributing = [] -            a_contributing_orig_idx = [] -            if res.GetName() in ( -                "ARG", -                "HIS", -                "LYS", -            ):  # Arginine, Histidine or Lysine have charged sidechains -                for a in pybel.ob.OBResidueAtomIter(res): -                    if ( -                        a.GetType().startswith("N") -                        and res.GetAtomProperty(a, 8) -                        and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        in self.altconf -                    ): -                        a_contributing.append(pybel.Atom(a)) -                        a_contributing_orig_idx.append( -                            self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        ) -                if not len(a_contributing) == 0: -                    a_set.append( -                        data( -                            atoms=a_contributing, -                            atoms_orig_idx=a_contributing_orig_idx, -                            type="positive", -                            center=centroid([ac.coords for ac in a_contributing]), -                            restype=res.GetName(), -                            resnr=res.GetNum(), -                            reschain=res.GetChain(), -                        ) -                    ) -            if res.GetName() in ("GLU", "ASP"):  # Aspartic or Glutamic Acid -                for a in pybel.ob.OBResidueAtomIter(res): -                    if ( -                        a.GetType().startswith("O") -                        and res.GetAtomProperty(a, 8) -                        and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        in self.altconf -                    ): -                        a_contributing.append(pybel.Atom(a)) -                        a_contributing_orig_idx.append( -                            self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        ) -                if not len(a_contributing) == 0: -                    a_set.append( -                        data( -                            atoms=a_contributing, -                            atoms_orig_idx=a_contributing_orig_idx, -                            type="negative", -                            center=centroid([ac.coords for ac in a_contributing]), -                            restype=res.GetName(), -                            resnr=res.GetNum(), -                            reschain=res.GetChain(), -                        ) -                    ) -        return a_set - -    def find_metal_binding(self, mol): -        """Looks for atoms that could possibly be involved in chelating a metal ion. -        This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids""" -        data = namedtuple( -            "metal_binding", "atom atom_orig_idx type restype resnr reschain location" -        ) -        a_set = [] -        for res in pybel.ob.OBResidueIter(mol.OBMol): -            restype, reschain, resnr = ( -                res.GetName().upper(), -                res.GetChain(), -                res.GetNum(), -            ) -            if restype in ["ASP", "GLU", "SER", "THR", "TYR"]:  # Look for oxygens here -                for a in pybel.ob.OBResidueAtomIter(res): -                    if ( -                        a.GetType().startswith("O") -                        and res.GetAtomProperty(a, 8) -                        and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        in self.altconf -                    ): -                        atom_orig_idx = self.Mapper.mapid( -                            a.GetIdx(), mtype=self.mtype, bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(a), -                                atom_orig_idx=atom_orig_idx, -                                type="O", -                                restype=restype, -                                resnr=resnr, -                                reschain=reschain, -                                location="protein.sidechain", -                            ) -                        ) -            if restype == "HIS":  # Look for nitrogen here -                for a in pybel.ob.OBResidueAtomIter(res): -                    if ( -                        a.GetType().startswith("N") -                        and res.GetAtomProperty(a, 8) -                        and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        in self.altconf -                    ): -                        atom_orig_idx = self.Mapper.mapid( -                            a.GetIdx(), mtype=self.mtype, bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(a), -                                atom_orig_idx=atom_orig_idx, -                                type="N", -                                restype=restype, -                                resnr=resnr, -                                reschain=reschain, -                                location="protein.sidechain", -                            ) -                        ) -            if restype == "CYS":  # Look for sulfur here -                for a in pybel.ob.OBResidueAtomIter(res): -                    if ( -                        a.GetType().startswith("S") -                        and res.GetAtomProperty(a, 8) -                        and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                        in self.altconf -                    ): -                        atom_orig_idx = self.Mapper.mapid( -                            a.GetIdx(), mtype=self.mtype, bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(a), -                                atom_orig_idx=atom_orig_idx, -                                type="S", -                                restype=restype, -                                resnr=resnr, -                                reschain=reschain, -                                location="protein.sidechain", -                            ) -                        ) -            for a in pybel.ob.OBResidueAtomIter(res):  # All main chain oxygens -                if ( -                    a.GetType().startswith("O") -                    and res.GetAtomProperty(a, 2) -                    and not self.Mapper.mapid(a.GetIdx(), mtype="protein") -                    in self.altconf -                    and restype != "HOH" -                ): -                    atom_orig_idx = self.Mapper.mapid( -                        a.GetIdx(), mtype=self.mtype, bsid=self.bsid -                    ) -                    a_set.append( -                        data( -                            atom=pybel.Atom(a), -                            atom_orig_idx=atom_orig_idx, -                            type="O", -                            restype=res.GetName(), -                            resnr=res.GetNum(), -                            reschain=res.GetChain(), -                            location="protein.mainchain", -                        ) -                    ) -        return a_set - - -class Ligand(Mol): -    def __init__(self, cclass, ligand): -        altconf = cclass.altconf -        self.hetid, self.chain, self.position = ( -            ligand.hetid, -            ligand.chain, -            ligand.position, -        ) -        self.bsid = ":".join([self.hetid, self.chain, str(self.position)]) -        Mol.__init__(self, altconf, cclass.Mapper, mtype="ligand", bsid=self.bsid) -        self.members = ligand.members -        self.longname = ligand.longname -        self.type = ligand.type -        self.complex = cclass -        self.molecule = ligand.mol  # Pybel Molecule -        self.smiles = self.molecule.write(format="can")  # SMILES String -        self.inchikey = self.molecule.write(format="inchikey") -        self.can_to_pdb = ligand.can_to_pdb -        if not len(self.smiles) == 0: -            self.smiles = self.smiles.split()[0] -        else: -            logger.warning(f"could not write SMILES for ligand {ligand}") -            self.smiles = "" -        self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms()  # Heavy atoms count -        self.all_atoms = self.molecule.atoms -        self.atmdict = {l.idx: l for l in self.all_atoms} -        self.rings = self.find_rings(self.molecule, self.all_atoms) -        self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) -        self.hbond_acc_atoms = self.find_hba(self.all_atoms) -        self.num_rings = len(self.rings) -        if self.num_rings != 0: -            logger.info(f"contains {self.num_rings} aromatic ring(s)") -        descvalues = self.molecule.calcdesc() -        self.molweight, self.logp = float(descvalues["MW"]), float(descvalues["logP"]) -        self.num_rot_bonds = int(self.molecule.OBMol.NumRotors()) -        self.atomorder = ligand.atomorder - -        ########################################################## -        # Special Case for hydrogen bond acceptor identification # -        ########################################################## - -        self.inverse_mapping = { -            v: k for k, v in self.Mapper.ligandmaps[self.bsid].items() -        } -        self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()} -        self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) - -        ###### -        donor_pairs = [] -        data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") -        for donor in self.all_atoms: -            pdbidx = self.Mapper.mapid( -                donor.idx, mtype="ligand", bsid=self.bsid, to="original" -            ) -            d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]] -            if d.OBAtom.IsHbondDonor(): -                for adj_atom in [ -                    a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH() -                ]: -                    d_orig_atom = self.Mapper.id_to_atom(pdbidx) -                    donor_pairs.append( -                        data( -                            d=donor, -                            d_orig_atom=d_orig_atom, -                            d_orig_idx=pdbidx, -                            h=pybel.Atom(adj_atom), -                            type="regular", -                        ) -                    ) -        self.hbond_don_atom_pairs = donor_pairs -        ####### - -        self.charged = self.find_charged(self.all_atoms) -        self.centroid = centroid([a.coords for a in self.all_atoms]) -        self.max_dist_to_center = max( -            (euclidean3d(self.centroid, a.coords) for a in self.all_atoms) -        ) -        self.water = [] -        data = namedtuple("water", "oxy oxy_orig_idx") -        for hoh in ligand.water: -            oxy = None -            for at in pybel.ob.OBResidueAtomIter(hoh): -                if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf: -                    oxy = pybel.Atom(at) -            # There are some cases where there is no oxygen in a water residue, ignore those -            if ( -                not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) -                == {1} -                and oxy is not None -            ): -                if ( -                    euclidean3d(self.centroid, oxy.coords) -                    < self.max_dist_to_center + config.BS_DIST -                ): -                    oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype="protein") -                    self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx)) -        self.halogenbond_don = self.find_hal(self.all_atoms) -        self.metal_binding = self.find_metal_binding(self.all_atoms, self.water) -        self.metals = [] -        data = namedtuple("metal", "m orig_m m_orig_idx") -        for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]: -            m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) -            orig_m = self.Mapper.id_to_atom(m_orig_idx) -            self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m)) -        self.num_hba, self.num_hbd = ( -            len(self.hbond_acc_atoms), -            len(self.hbond_don_atom_pairs), -        ) -        self.num_hal = len(self.halogenbond_don) - -    def get_canonical_num(self, atomnum): -        """Converts internal atom ID into canonical atom ID. Agrees with Canonical SMILES in XML.""" -        return self.atomorder[atomnum - 1] - -    def is_functional_group(self, atom, group): -        """Given a pybel atom, look up if it belongs to a function group""" -        n_atoms = [ -            a_neighbor.GetAtomicNum() -            for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom) -        ] - -        if group in ["quartamine", "tertamine"] and atom.atomicnum == 7:  # Nitrogen -            # It's a nitrogen, so could be a protonated amine or quaternary ammonium -            if "1" not in n_atoms and len(n_atoms) == 4: -                return ( -                    True if group == "quartamine" else False -                )  # It's a quat. ammonium (N with 4 residues != H) -            elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3: -                return ( -                    True if group == "tertamine" else False -                )  # It's sp3-hybridized, so could pick up an hydrogen -            else: -                return False - -        if ( -            group in ["sulfonium", "sulfonicacid", "sulfate"] and atom.atomicnum == 16 -        ):  # Sulfur -            if ( -                "1" not in n_atoms and len(n_atoms) == 3 -            ):  # It's a sulfonium (S with 3 residues != H) -                return True if group == "sulfonium" else False -            elif n_atoms.count(8) == 3:  # It's a sulfonate or sulfonic acid -                return True if group == "sulfonicacid" else False -            elif n_atoms.count(8) == 4:  # It's a sulfate -                return True if group == "sulfate" else False - -        if group == "phosphate" and atom.atomicnum == 15:  # Phosphor -            if set(n_atoms) == {8}:  # It's a phosphate -                return True - -        if ( -            group in ["carboxylate", "guanidine"] and atom.atomicnum == 6 -        ):  # It's a carbon atom -            if ( -                n_atoms.count(8) == 2 and n_atoms.count(6) == 1 -            ):  # It's a carboxylate group -                return True if group == "carboxylate" else False -            elif n_atoms.count(7) == 3 and len(n_atoms) == 3:  # It's a guanidine group -                nitro_partners = [] -                for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom): -                    nitro_partners.append( -                        len( -                            [ -                                b_neighbor -                                for b_neighbor in pybel.ob.OBAtomAtomIter(nitro) -                            ] -                        ) -                    ) -                if ( -                    min(nitro_partners) == 1 -                ):  # One nitrogen is only connected to the carbon, can pick up a H -                    return True if group == "guanidine" else False - -        if group == "halocarbon" and atom.atomicnum in [9, 17, 35, 53]:  # Halogen atoms -            n_atoms = [ -                na -                for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) -                if na.GetAtomicNum() == 6 -            ] -            if len(n_atoms) == 1:  # Halocarbon -                return True -        else: -            return False - -    def find_hal(self, atoms): -        """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)""" -        data = namedtuple("hal_donor", "x orig_x x_orig_idx c c_orig_idx") -        a_set = [] -        for a in atoms: -            if self.is_functional_group(a, "halocarbon"): -                n_atoms = [ -                    na -                    for na in pybel.ob.OBAtomAtomIter(a.OBAtom) -                    if na.GetAtomicNum() == 6 -                ] -                x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) -                orig_x = self.Mapper.id_to_atom(x_orig_idx) -                c_orig_idx = [ -                    self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) -                    for na in n_atoms -                ] -                a_set.append( -                    data( -                        x=a, -                        orig_x=orig_x, -                        x_orig_idx=x_orig_idx, -                        c=pybel.Atom(n_atoms[0]), -                        c_orig_idx=c_orig_idx, -                    ) -                ) -        if len(a_set) != 0: -            logger.info(f"ligand contains {len(a_set)} halogen atom(s)") -        return a_set - -    def find_charged(self, all_atoms): -        """Identify all positively charged groups in a ligand. This search is not exhaustive, as the cases can be quite -        diverse. The typical cases seem to be protonated amines, quaternary ammoinium and sulfonium -        as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)). -        Identify negatively charged groups in the ligand. -        """ -        data = namedtuple( -            "lcharge", "atoms orig_atoms atoms_orig_idx type center fgroup" -        ) -        a_set = [] -        for a in all_atoms: -            a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) -            a_orig = self.Mapper.id_to_atom(a_orig_idx) -            if self.is_functional_group(a, "quartamine"): -                a_set.append( -                    data( -                        atoms=[a,], -                        orig_atoms=[a_orig,], -                        atoms_orig_idx=[a_orig_idx,], -                        type="positive", -                        center=list(a.coords), -                        fgroup="quartamine", -                    ) -                ) -            elif self.is_functional_group(a, "tertamine"): -                a_set.append( -                    data( -                        atoms=[a,], -                        orig_atoms=[a_orig,], -                        atoms_orig_idx=[a_orig_idx,], -                        type="positive", -                        center=list(a.coords), -                        fgroup="tertamine", -                    ) -                ) -            if self.is_functional_group(a, "sulfonium"): -                a_set.append( -                    data( -                        atoms=[a,], -                        orig_atoms=[a_orig,], -                        atoms_orig_idx=[a_orig_idx,], -                        type="positive", -                        center=list(a.coords), -                        fgroup="sulfonium", -                    ) -                ) -            if self.is_functional_group(a, "phosphate"): -                a_contributing = [ -                    a, -                ] -                a_contributing_orig_idx = [ -                    a_orig_idx, -                ] -                [ -                    a_contributing.append(pybel.Atom(neighbor)) -                    for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) -                ] -                [ -                    a_contributing_orig_idx.append( -                        self.Mapper.mapid( -                            neighbor.idx, mtype=self.mtype, bsid=self.bsid -                        ) -                    ) -                    for neighbor in a_contributing -                ] -                orig_contributing = [ -                    self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx -                ] -                a_set.append( -                    data( -                        atoms=a_contributing, -                        orig_atoms=orig_contributing, -                        atoms_orig_idx=a_contributing_orig_idx, -                        type="negative", -                        center=a.coords, -                        fgroup="phosphate", -                    ) -                ) -            if self.is_functional_group(a, "sulfonicacid"): -                a_contributing = [ -                    a, -                ] -                a_contributing_orig_idx = [ -                    a_orig_idx, -                ] -                [ -                    a_contributing.append(pybel.Atom(neighbor)) -                    for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) -                    if neighbor.GetAtomicNum() == 8 -                ] -                [ -                    a_contributing_orig_idx.append( -                        self.Mapper.mapid( -                            neighbor.idx, mtype=self.mtype, bsid=self.bsid -                        ) -                    ) -                    for neighbor in a_contributing -                ] -                orig_contributing = [ -                    self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx -                ] -                a_set.append( -                    data( -                        atoms=a_contributing, -                        orig_atoms=orig_contributing, -                        atoms_orig_idx=a_contributing_orig_idx, -                        type="negative", -                        center=a.coords, -                        fgroup="sulfonicacid", -                    ) -                ) -            elif self.is_functional_group(a, "sulfate"): -                a_contributing = [ -                    a, -                ] -                a_contributing_orig_idx = [ -                    a_orig_idx, -                ] -                [ -                    a_contributing_orig_idx.append( -                        self.Mapper.mapid( -                            neighbor.idx, mtype=self.mtype, bsid=self.bsid -                        ) -                    ) -                    for neighbor in a_contributing -                ] -                [ -                    a_contributing.append(pybel.Atom(neighbor)) -                    for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) -                ] -                orig_contributing = [ -                    self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx -                ] -                a_set.append( -                    data( -                        atoms=a_contributing, -                        orig_atoms=orig_contributing, -                        atoms_orig_idx=a_contributing_orig_idx, -                        type="negative", -                        center=a.coords, -                        fgroup="sulfate", -                    ) -                ) -            if self.is_functional_group(a, "carboxylate"): -                a_contributing = [ -                    pybel.Atom(neighbor) -                    for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) -                    if neighbor.GetAtomicNum() == 8 -                ] -                a_contributing_orig_idx = [ -                    self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) -                    for neighbor in a_contributing -                ] -                orig_contributing = [ -                    self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx -                ] -                a_set.append( -                    data( -                        atoms=a_contributing, -                        orig_atoms=orig_contributing, -                        atoms_orig_idx=a_contributing_orig_idx, -                        type="negative", -                        center=centroid([a.coords for a in a_contributing]), -                        fgroup="carboxylate", -                    ) -                ) -            elif self.is_functional_group(a, "guanidine"): -                a_contributing = [ -                    pybel.Atom(neighbor) -                    for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) -                    if neighbor.GetAtomicNum() == 7 -                ] -                a_contributing_orig_idx = [ -                    self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) -                    for neighbor in a_contributing -                ] -                orig_contributing = [ -                    self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx -                ] -                a_set.append( -                    data( -                        atoms=a_contributing, -                        orig_atoms=orig_contributing, -                        atoms_orig_idx=a_contributing_orig_idx, -                        type="positive", -                        center=a.coords, -                        fgroup="guanidine", -                    ) -                ) -        return a_set - -    def find_metal_binding(self, lig_atoms, water_oxygens): -        """Looks for atoms that could possibly be involved in binding a metal ion. -        This can be any water oxygen, as well as oxygen from carboxylate, phophoryl, phenolate, alcohol; -        nitrogen from imidazole; sulfur from thiolate. -        """ -        a_set = [] -        data = namedtuple( -            "metal_binding", -            "atom orig_atom atom_orig_idx type fgroup restype resnr reschain location", -        ) -        for oxygen in water_oxygens: -            a_set.append( -                data( -                    atom=oxygen.oxy, -                    atom_orig_idx=oxygen.oxy_orig_idx, -                    type="O", -                    fgroup="water", -                    restype=whichrestype(oxygen.oxy), -                    resnr=whichresnumber(oxygen.oxy), -                    reschain=whichchain(oxygen.oxy), -                    location="water", -                    orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx), -                ) -            ) -        # #@todo Refactor code -        for a in lig_atoms: -            a_orig_idx = self.Mapper.mapid(a.idx, mtype="ligand", bsid=self.bsid) -            n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom)  # Neighboring atoms -            # All atomic numbers of neighboring atoms -            n_atoms_atomicnum = [ -                n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom) -            ] -            if a.atomicnum == 8:  # Oxygen -                if ( -                    n_atoms_atomicnum.count("1") == 1 and len(n_atoms_atomicnum) == 2 -                ):  # Oxygen in alcohol (R-[O]-H) -                    a_set.append( -                        data( -                            atom=a, -                            atom_orig_idx=a_orig_idx, -                            type="O", -                            fgroup="alcohol", -                            restype=self.hetid, -                            resnr=self.position, -                            reschain=self.chain, -                            location="ligand", -                            orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                        ) -                    ) -                if ( -                    True in [n.IsAromatic() for n in n_atoms] -                    and not a.OBAtom.IsAromatic() -                ):  # Phenolate oxygen -                    a_set.append( -                        data( -                            atom=a, -                            atom_orig_idx=a_orig_idx, -                            type="O", -                            fgroup="phenolate", -                            restype=self.hetid, -                            resnr=self.position, -                            reschain=self.chain, -                            location="ligand", -                            orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                        ) -                    ) -            if a.atomicnum == 6:  # It's a carbon atom -                if ( -                    n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1 -                ):  # It's a carboxylate group -                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: -                        neighbor_orig_idx = self.Mapper.mapid( -                            neighbor.GetIdx(), mtype="ligand", bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(neighbor), -                                atom_orig_idx=neighbor_orig_idx, -                                type="O", -                                fgroup="carboxylate", -                                restype=self.hetid, -                                resnr=self.position, -                                reschain=self.chain, -                                location="ligand", -                                orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                            ) -                        ) -            if a.atomicnum == 15:  # It's a phosphor atom -                if n_atoms_atomicnum.count(8) >= 3:  # It's a phosphoryl -                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: -                        neighbor_orig_idx = self.Mapper.mapid( -                            neighbor.GetIdx(), mtype="ligand", bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(neighbor), -                                atom_orig_idx=neighbor_orig_idx, -                                type="O", -                                fgroup="phosphoryl", -                                restype=self.hetid, -                                resnr=self.position, -                                reschain=self.chain, -                                location="ligand", -                                orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                            ) -                        ) -                if ( -                    n_atoms_atomicnum.count(8) == 2 -                ):  # It's another phosphor-containing group #@todo (correct name?) -                    for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: -                        neighbor_orig_idx = self.Mapper.mapid( -                            neighbor.GetIdx(), mtype="ligand", bsid=self.bsid -                        ) -                        a_set.append( -                            data( -                                atom=pybel.Atom(neighbor), -                                atom_orig_idx=neighbor_orig_idx, -                                type="O", -                                fgroup="phosphor.other", -                                restype=self.hetid, -                                resnr=self.position, -                                reschain=self.chain, -                                location="ligand", -                                orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                            ) -                        ) -            if a.atomicnum == 7:  # It's a nitrogen atom -                if n_atoms_atomicnum.count(6) == 2:  # It's imidazole/pyrrole or similar -                    a_set.append( -                        data( -                            atom=a, -                            atom_orig_idx=a_orig_idx, -                            type="N", -                            fgroup="imidazole/pyrrole", -                            restype=self.hetid, -                            resnr=self.position, -                            reschain=self.chain, -                            location="ligand", -                            orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                        ) -                    ) -            if a.atomicnum == 16:  # It's a sulfur atom -                if ( -                    True in [n.IsAromatic() for n in n_atoms] -                    and not a.OBAtom.IsAromatic() -                ):  # Thiolate -                    a_set.append( -                        data( -                            atom=a, -                            atom_orig_idx=a_orig_idx, -                            type="S", -                            fgroup="thiolate", -                            restype=self.hetid, -                            resnr=self.position, -                            reschain=self.chain, -                            location="ligand", -                            orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                        ) -                    ) -                if set(n_atoms_atomicnum) == {26}:  # Sulfur in Iron sulfur cluster -                    a_set.append( -                        data( -                            atom=a, -                            atom_orig_idx=a_orig_idx, -                            type="S", -                            fgroup="iron-sulfur.cluster", -                            restype=self.hetid, -                            resnr=self.position, -                            reschain=self.chain, -                            location="ligand", -                            orig_atom=self.Mapper.id_to_atom(a_orig_idx), -                        ) -                    ) - -        return a_set - - -class PDBComplex: -    """Contains a collection of objects associated with a PDB complex, i.e. one or several ligands and their binding -    sites as well as information about the pliprofiler between them. Provides functions to load and prepare input files -    such as PDB files. -    """ - -    def __init__(self): -        self.interaction_sets = ( -            {} -        )  # Dictionary with site identifiers as keys and object as value -        self.protcomplex = None -        self.filetype = None -        self.atoms = {}  # Dictionary of Pybel atoms, accessible by their idx -        self.sourcefiles = {} -        self.information = {} -        self.corrected_pdb = "" -        self._output_path = tempfile.gettempdir() -        self.pymol_name = None -        self.modres = set() -        self.resis = [] -        self.altconf = []  # Atom idx of atoms with alternate conformations -        self.covalent = ( -            [] -        )  # Covalent linkages between ligands and protein residues/other ligands -        self.excluded = []  # Excluded ligands -        self.Mapper = Mapper() -        self.ligands = [] - -    def __str__(self): -        formatted_lig_names = [ -            ":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands -        ] -        return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( -            [lig for lig in formatted_lig_names] -        ) - -    def load_pdb(self, pdbpath, as_string=False): -        """Loads a pdb file with protein AND ligand(s), separates and prepares them. -        If specified 'as_string', the input is a PDB string instead of a path.""" -        if as_string: -            self.sourcefiles["pdbcomplex.original"] = None -            self.sourcefiles["pdbcomplex"] = None -            self.sourcefiles["pdbstring"] = pdbpath -        else: -            self.sourcefiles["pdbcomplex.original"] = pdbpath -            self.sourcefiles["pdbcomplex"] = pdbpath -        self.information["pdbfixes"] = False -        pdbparser = PDBParser( -            pdbpath, as_string=as_string -        )  # Parse PDB file to find errors and get additional data -        # #@todo Refactor and rename here -        self.Mapper.proteinmap = pdbparser.proteinmap -        self.Mapper.reversed_proteinmap = { -            v: k for k, v in self.Mapper.proteinmap.items() -        } -        self.modres = pdbparser.modres -        self.covalent = pdbparser.covalent -        self.altconf = pdbparser.altconformations -        self.corrected_pdb = pdbparser.corrected_pdb - -        if not config.PLUGIN_MODE: -            if pdbparser.num_fixed_lines > 0: -                logger.info( -                    f"{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file" -                ) -                # Save modified PDB file -                if not as_string: -                    basename = os.path.basename(pdbpath).split(".")[0] -                else: -                    basename = "from_stdin" -                pdbpath_fixed = tmpfile( -                    prefix="plipfixed." + basename + "_", direc=self.output_path -                ) -                create_folder_if_not_exists(self.output_path) -                self.sourcefiles["pdbcomplex"] = pdbpath_fixed -                self.corrected_pdb = re.sub( -                    r"[^\x00-\x7F]+", " ", self.corrected_pdb -                )  # Strip non-unicode chars -                if ( -                    not config.NOFIXFILE -                ):  # Only write to file if this option is not activated -                    with open(pdbpath_fixed, "w") as f: -                        f.write(self.corrected_pdb) -                self.information["pdbfixes"] = True - -        if not as_string: -            self.sourcefiles["filename"] = os.path.basename( -                self.sourcefiles["pdbcomplex"] -            ) -        self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True) - -        # Update the model in the Mapper class instance -        self.Mapper.original_structure = self.protcomplex.OBMol -        logger.info("PDB structure successfully read") - -        # Determine (temporary) PyMOL Name from Filename -        self.pymol_name = pdbpath.split("/")[-1].split(".")[0] + "-Protein" -        # Replace characters causing problems in PyMOL -        self.pymol_name = ( -            self.pymol_name.replace(" ", "") -            .replace("(", "") -            .replace(")", "") -            .replace("-", "_") -        ) -        # But if possible, name it after PDBID in Header -        if "HEADER" in self.protcomplex.data:  # If the PDB file has a proper header -            potential_name = self.protcomplex.data["HEADER"][56:60].lower() -            if extract_pdbid(potential_name) != "UnknownProtein": -                self.pymol_name = potential_name -        logger.debug(f"PyMOL name set as: {self.pymol_name}") - -        # Extract and prepare ligands -        ligandfinder = LigandFinder( -            self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper -        ) -        self.ligands = ligandfinder.ligands -        self.excluded = ligandfinder.excluded - -        # decide whether to add polar hydrogens -        if not config.NOHYDRO: -            if not as_string: -                basename = os.path.basename(pdbpath).split(".")[0] -            else: -                basename = "from_stdin" -            self.protcomplex.OBMol.AddPolarHydrogens() -            output_path = os.path.join(self._output_path, f"{basename}_protonated.pdb") -            self.protcomplex.write("pdb", output_path, overwrite=True) -            logger.info(f"protonated structure written to {output_path}") -        else: -            logger.warning( -                "no polar hydrogens will be assigned (make sure your structure contains hydrogens)" -            ) - -        for atm in self.protcomplex: -            self.atoms[atm.idx] = atm - -        if len(self.excluded) != 0: -            logger.info(f"excluded molecules as ligands: {self.excluded}") - -        if config.DNARECEPTOR: -            self.resis = [ -                obres -                for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) -                if obres.GetName() in config.DNA + config.RNA -            ] -        else: -            self.resis = [ -                obres -                for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) -                if obres.GetResidueProperty(0) -            ] - -        num_ligs = len(self.ligands) -        if num_ligs == 1: -            logger.info("analyzing one ligand") -        elif num_ligs > 1: -            logger.info(f"analyzing {num_ligs} ligands") -        else: -            logger.info(f"structure contains no ligands") - -    def analyze(self): -        """Triggers analysis of all complexes in structure""" -        for ligand in self.ligands: -            self.characterize_complex(ligand) - -    def characterize_complex(self, ligand): -        """Handles all basic functions for characterizing the interactions for one ligand""" - -        single_sites = [] -        for member in ligand.members: -            single_sites.append(":".join([str(x) for x in member])) -        site = " + ".join(single_sites) -        site = site if not len(site) > 20 else site[:20] + "..." -        longname = ( -            ligand.longname -            if not len(ligand.longname) > 20 -            else ligand.longname[:20] + "..." -        ) -        ligtype = "unspecified type" if ligand.type == "UNSPECIFIED" else ligand.type -        ligtext = f"{longname} [{ligtype}] -- {site}" -        logger.info(f"processing ligand {ligtext}") -        if ligtype == "PEPTIDE": -            logger.info( -                f"chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode" -            ) -        if ligtype == "INTRA": -            logger.info(f"chain {ligand.chain} will be processed in [INTRA-CHAIN] mode") -        any_in_biolip = ( -            len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) -            != 0 -        ) - -        if ( -            ligtype -            not in ["POLYMER", "DNA", "ION", "DNA+ION", "RNA+ION", "SMALLMOLECULE+ION"] -            and any_in_biolip -        ): -            logger.info("may be biologically irrelevant") - -        lig_obj = Ligand(self, ligand) -        cutoff = lig_obj.max_dist_to_center + config.BS_DIST -        bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis) -        # Get a list of all atoms belonging to the binding site, search by idx -        bs_atoms = [ -            self.atoms[idx] -            for idx in [ -                i -                for i in self.atoms.keys() -                if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res -            ] -            if idx in self.Mapper.proteinmap -            and self.Mapper.mapid(idx, mtype="protein") not in self.altconf -        ] -        if ligand.type == "PEPTIDE": -            # If peptide, don't consider the peptide chain as part of the protein binding site -            bs_atoms = [ -                a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain -            ] -        if ligand.type == "INTRA": -            # Interactions within the chain -            bs_atoms = [ -                a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain -            ] -        bs_atoms_refined = [] - -        # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE) -        # and refine binding site atom selection with exact threshold -        min_dist = {} -        for r in bs_atoms: -            bs_res_id = "".join([str(whichresnumber(r)), whichchain(r)]) -            for l in ligand.mol.atoms: -                distance = euclidean3d(r.coords, l.coords) -                if bs_res_id not in min_dist: -                    min_dist[bs_res_id] = (distance, whichrestype(r)) -                elif min_dist[bs_res_id][0] > distance: -                    min_dist[bs_res_id] = (distance, whichrestype(r)) -                if distance <= config.BS_DIST and r not in bs_atoms_refined: -                    bs_atoms_refined.append(r) -        num_bs_atoms = len(bs_atoms_refined) -        logger.info( -            f"binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})" -        ) - -        bs_obj = BindingSite( -            bs_atoms_refined, -            self.protcomplex, -            self, -            self.altconf, -            min_dist, -            self.Mapper, -        ) -        pli_obj = PLInteraction(lig_obj, bs_obj, self) -        self.interaction_sets[ligand.mol.title] = pli_obj - -    def extract_bs(self, cutoff, ligcentroid, resis): -        """Return list of ids from residues belonging to the binding site""" -        return [ -            obres.GetIdx() -            for obres in resis -            if self.res_belongs_to_bs(obres, cutoff, ligcentroid) -        ] - -    def res_belongs_to_bs(self, res, cutoff, ligcentroid): -        """Check for each residue if its centroid is within a certain distance to the ligand centroid. -        Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" -        rescentroid = centroid( -            [(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)] -        ) -        # Check geometry -        near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False -        # Check chain membership -        restricted_chain = True if res.GetChain() in config.PEPTIDES else False -        return near_enough and not restricted_chain - -    def get_atom(self, idx): -        return self.atoms[idx] - -    @property -    def output_path(self): -        return self._output_path - -    @output_path.setter -    def output_path(self, path): -        self._output_path = tilde_expansion(path) | 
