From 4be08f7bdd77991e9e453c1cda863c3f20c338d5 Mon Sep 17 00:00:00 2001 From: Navan Chauhan Date: Thu, 2 Jul 2020 20:48:33 +0530 Subject: initial commit --- plip/structure/__init__.py | 0 plip/structure/detection.py | 479 +++++++++++++ plip/structure/preparation.py | 1483 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1962 insertions(+) create mode 100644 plip/structure/__init__.py create mode 100644 plip/structure/detection.py create mode 100644 plip/structure/preparation.py (limited to 'plip/structure') diff --git a/plip/structure/__init__.py b/plip/structure/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/plip/structure/detection.py b/plip/structure/detection.py new file mode 100644 index 0000000..71fec63 --- /dev/null +++ b/plip/structure/detection.py @@ -0,0 +1,479 @@ +import itertools +from collections import defaultdict +from collections import namedtuple + +import numpy as np +from openbabel.openbabel import OBAtomAtomIter + +from plip.basic import config, logger +from plip.basic.supplemental import vecangle, vector, euclidean3d, projection +from plip.basic.supplemental import whichresnumber, whichrestype, whichchain + +logger = logger.get_logger() + +def filter_contacts(pairings): + """Filter interactions by two criteria: + 1. No interactions between the same residue (important for intra mode). + 2. No duplicate interactions (A with B and B with A, also important for intra mode).""" + if not config.INTRA: + return pairings + filtered1_pairings = [p for p in pairings if (p.resnr, p.reschain) != (p.resnr_l, p.reschain_l)] + already_considered = [] + filtered2_pairings = [] + for contact in filtered1_pairings: + try: + dist = 'D{}'.format(round(contact.distance, 2)) + except AttributeError: + try: + dist = 'D{}'.format(round(contact.distance_ah, 2)) + except AttributeError: + dist = 'D{}'.format(round(contact.distance_aw, 2)) + res1, res2 = ''.join([str(contact.resnr), contact.reschain]), ''.join( + [str(contact.resnr_l), contact.reschain_l]) + data = {res1, res2, dist} + if data not in already_considered: + filtered2_pairings.append(contact) + already_considered.append(data) + return filtered2_pairings + + +################################################## +# FUNCTIONS FOR DETECTION OF SPECIFIC INTERACTIONS +################################################## + +def hydrophobic_interactions(atom_set_a, atom_set_b): + """Detection of hydrophobic pliprofiler between atom_set_a (binding site) and atom_set_b (ligand). + Definition: All pairs of qualified carbon atoms within a distance of HYDROPH_DIST_MAX + """ + data = namedtuple('hydroph_interaction', 'bsatom bsatom_orig_idx ligatom ligatom_orig_idx ' + 'distance restype resnr reschain restype_l, resnr_l, reschain_l') + pairings = [] + for a, b in itertools.product(atom_set_a, atom_set_b): + if a.orig_idx == b.orig_idx: + continue + e = euclidean3d(a.atom.coords, b.atom.coords) + if not config.MIN_DIST < e < config.HYDROPH_DIST_MAX: + continue + restype, resnr, reschain = whichrestype(a.atom), whichresnumber(a.atom), whichchain(a.atom) + restype_l, resnr_l, reschain_l = whichrestype(b.orig_atom), whichresnumber(b.orig_atom), whichchain(b.orig_atom) + contact = data(bsatom=a.atom, bsatom_orig_idx=a.orig_idx, ligatom=b.atom, ligatom_orig_idx=b.orig_idx, + distance=e, restype=restype, resnr=resnr, + reschain=reschain, restype_l=restype_l, + resnr_l=resnr_l, reschain_l=reschain_l) + pairings.append(contact) + return filter_contacts(pairings) + + +def hbonds(acceptors, donor_pairs, protisdon, typ): + """Detection of hydrogen bonds between sets of acceptors and donor pairs. + Definition: All pairs of hydrogen bond acceptor and donors with + donor hydrogens and acceptor showing a distance within HBOND DIST MIN and HBOND DIST MAX + and donor angles above HBOND_DON_ANGLE_MIN + """ + data = namedtuple('hbond', 'a a_orig_idx d d_orig_idx h distance_ah distance_ad angle type protisdon resnr ' + 'restype reschain resnr_l restype_l reschain_l sidechain atype dtype') + pairings = [] + for acc, don in itertools.product(acceptors, donor_pairs): + if not typ == 'strong': + continue + # Regular (strong) hydrogen bonds + dist_ah = euclidean3d(acc.a.coords, don.h.coords) + dist_ad = euclidean3d(acc.a.coords, don.d.coords) + if not config.MIN_DIST < dist_ad < config.HBOND_DIST_MAX: + continue + vec1, vec2 = vector(don.h.coords, don.d.coords), vector(don.h.coords, acc.a.coords) + v = vecangle(vec1, vec2) + if not v > config.HBOND_DON_ANGLE_MIN: + continue + protatom = don.d.OBAtom if protisdon else acc.a.OBAtom + ligatom = don.d.OBAtom if not protisdon else acc.a.OBAtom + is_sidechain_hbond = protatom.GetResidue().GetAtomProperty(protatom, 8) # Check if sidechain atom + resnr = whichresnumber(don.d) if protisdon else whichresnumber(acc.a) + resnr_l = whichresnumber(acc.a_orig_atom) if protisdon else whichresnumber(don.d_orig_atom) + restype = whichrestype(don.d) if protisdon else whichrestype(acc.a) + restype_l = whichrestype(acc.a_orig_atom) if protisdon else whichrestype(don.d_orig_atom) + reschain = whichchain(don.d) if protisdon else whichchain(acc.a) + rechain_l = whichchain(acc.a_orig_atom) if protisdon else whichchain(don.d_orig_atom) + # Next line prevents H-Bonds within amino acids in intermolecular interactions + if config.INTRA is not None and whichresnumber(don.d) == whichresnumber(acc.a): + continue + # Next line prevents backbone-backbone H-Bonds + if config.INTRA is not None and protatom.GetResidue().GetAtomProperty(protatom, + 8) and ligatom.GetResidue().GetAtomProperty( + ligatom, 8): + continue + contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, d=don.d, d_orig_idx=don.d_orig_idx, h=don.h, + distance_ah=dist_ah, distance_ad=dist_ad, angle=v, type=typ, protisdon=protisdon, + resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, + restype_l=restype_l, reschain_l=rechain_l, sidechain=is_sidechain_hbond, + atype=acc.a.type, dtype=don.d.type) + pairings.append(contact) + return filter_contacts(pairings) + + +def pistacking(rings_bs, rings_lig): + """Return all pi-stackings between the given aromatic ring systems in receptor and ligand.""" + data = namedtuple( + 'pistack', + 'proteinring ligandring distance angle offset type restype resnr reschain restype_l resnr_l reschain_l') + pairings = [] + for r, l in itertools.product(rings_bs, rings_lig): + # DISTANCE AND RING ANGLE CALCULATION + d = euclidean3d(r.center, l.center) + b = vecangle(r.normal, l.normal) + a = min(b, 180 - b if not 180 - b < 0 else b) # Smallest of two angles, depending on direction of normal + + # RING CENTER OFFSET CALCULATION (project each ring center into the other ring) + proj1 = projection(l.normal, l.center, r.center) + proj2 = projection(r.normal, r.center, l.center) + offset = min(euclidean3d(proj1, l.center), euclidean3d(proj2, r.center)) + + # RECEPTOR DATA + resnr, restype, reschain = whichresnumber(r.atoms[0]), whichrestype(r.atoms[0]), whichchain(r.atoms[0]) + resnr_l, restype_l, reschain_l = whichresnumber(l.orig_atoms[0]), whichrestype( + l.orig_atoms[0]), whichchain(l.orig_atoms[0]) + + # SELECTION BY DISTANCE, ANGLE AND OFFSET + passed = False + if not config.MIN_DIST < d < config.PISTACK_DIST_MAX: + continue + if 0 < a < config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: + ptype = 'P' + passed = True + if 90 - config.PISTACK_ANG_DEV < a < 90 + config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: + ptype = 'T' + passed = True + if passed: + contact = data(proteinring=r, ligandring=l, distance=d, angle=a, offset=offset, + type=ptype, resnr=resnr, restype=restype, reschain=reschain, + resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) + pairings.append(contact) + return filter_contacts(pairings) + + +def pication(rings, pos_charged, protcharged): + """Return all pi-Cation interaction between aromatic rings and positively charged groups. + For tertiary and quaternary amines, check also the angle between the ring and the nitrogen. + """ + data = namedtuple( + 'pication', 'ring charge distance offset type restype resnr reschain restype_l resnr_l reschain_l protcharged') + pairings = [] + if len(rings) == 0 or len(pos_charged) == 0: + return pairings + for ring in rings: + c = ring.center + for p in pos_charged: + d = euclidean3d(c, p.center) + # Project the center of charge into the ring and measure distance to ring center + proj = projection(ring.normal, ring.center, p.center) + offset = euclidean3d(proj, ring.center) + if not config.MIN_DIST < d < config.PICATION_DIST_MAX or not offset < config.PISTACK_OFFSET_MAX: + continue + if type(p).__name__ == 'lcharge' and p.fgroup == 'tertamine': + # Special case here if the ligand has a tertiary amine, check an additional angle + # Otherwise, we might have have a pi-cation interaction 'through' the ligand + n_atoms = [a_neighbor for a_neighbor in OBAtomAtomIter(p.atoms[0].OBAtom)] + n_atoms_coords = [(a.x(), a.y(), a.z()) for a in n_atoms] + amine_normal = np.cross(vector(n_atoms_coords[0], n_atoms_coords[1]), + vector(n_atoms_coords[2], n_atoms_coords[0])) + b = vecangle(ring.normal, amine_normal) + # Smallest of two angles, depending on direction of normal + a = min(b, 180 - b if not 180 - b < 0 else b) + if not a > 30.0: + resnr, restype = whichresnumber(ring.atoms[0]), whichrestype(ring.atoms[0]) + reschain = whichchain(ring.atoms[0]) + resnr_l, restype_l = whichresnumber(p.orig_atoms[0]), whichrestype(p.orig_atoms[0]) + reschain_l = whichchain(p.orig_atoms[0]) + contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', + restype=restype, resnr=resnr, reschain=reschain, + restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, + protcharged=protcharged) + pairings.append(contact) + break + resnr = whichresnumber(p.atoms[0]) if protcharged else whichresnumber(ring.atoms[0]) + resnr_l = whichresnumber(ring.orig_atoms[0]) if protcharged else whichresnumber(p.orig_atoms[0]) + restype = whichrestype(p.atoms[0]) if protcharged else whichrestype(ring.atoms[0]) + restype_l = whichrestype(ring.orig_atoms[0]) if protcharged else whichrestype(p.orig_atoms[0]) + reschain = whichchain(p.atoms[0]) if protcharged else whichchain(ring.atoms[0]) + reschain_l = whichchain(ring.orig_atoms[0]) if protcharged else whichchain(p.orig_atoms[0]) + contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', restype=restype, + resnr=resnr, reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, + reschain_l=reschain_l, protcharged=protcharged) + pairings.append(contact) + return filter_contacts(pairings) + + +def saltbridge(poscenter, negcenter, protispos): + """Detect all salt bridges (pliprofiler between centers of positive and negative charge)""" + data = namedtuple( + 'saltbridge', 'positive negative distance protispos resnr restype reschain resnr_l restype_l reschain_l') + pairings = [] + for pc, nc in itertools.product(poscenter, negcenter): + if not config.MIN_DIST < euclidean3d(pc.center, nc.center) < config.SALTBRIDGE_DIST_MAX: + continue + resnr = pc.resnr if protispos else nc.resnr + resnr_l = whichresnumber(nc.orig_atoms[0]) if protispos else whichresnumber(pc.orig_atoms[0]) + restype = pc.restype if protispos else nc.restype + restype_l = whichrestype(nc.orig_atoms[0]) if protispos else whichrestype(pc.orig_atoms[0]) + reschain = pc.reschain if protispos else nc.reschain + reschain_l = whichchain(nc.orig_atoms[0]) if protispos else whichchain(pc.orig_atoms[0]) + contact = data(positive=pc, negative=nc, distance=euclidean3d(pc.center, nc.center), protispos=protispos, + resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, restype_l=restype_l, + reschain_l=reschain_l) + pairings.append(contact) + return filter_contacts(pairings) + + +def halogen(acceptor, donor): + """Detect all halogen bonds of the type Y-O...X-C""" + data = namedtuple('halogenbond', 'acc acc_orig_idx don don_orig_idx distance don_angle acc_angle restype ' + 'resnr reschain restype_l resnr_l reschain_l donortype acctype sidechain') + pairings = [] + for acc, don in itertools.product(acceptor, donor): + dist = euclidean3d(acc.o.coords, don.x.coords) + if not config.MIN_DIST < dist < config.HALOGEN_DIST_MAX: + continue + vec1, vec2 = vector(acc.o.coords, acc.y.coords), vector(acc.o.coords, don.x.coords) + vec3, vec4 = vector(don.x.coords, acc.o.coords), vector(don.x.coords, don.c.coords) + acc_angle, don_angle = vecangle(vec1, vec2), vecangle(vec3, vec4) + is_sidechain_hal = acc.o.OBAtom.GetResidue().GetAtomProperty(acc.o.OBAtom, 8) # Check if sidechain atom + if not config.HALOGEN_ACC_ANGLE - config.HALOGEN_ANGLE_DEV < acc_angle \ + < config.HALOGEN_ACC_ANGLE + config.HALOGEN_ANGLE_DEV: + continue + if not config.HALOGEN_DON_ANGLE - config.HALOGEN_ANGLE_DEV < don_angle \ + < config.HALOGEN_DON_ANGLE + config.HALOGEN_ANGLE_DEV: + continue + restype, reschain, resnr = whichrestype(acc.o), whichchain(acc.o), whichresnumber(acc.o) + restype_l, reschain_l, resnr_l = whichrestype(don.orig_x), whichchain(don.orig_x), whichresnumber(don.orig_x) + contact = data(acc=acc, acc_orig_idx=acc.o_orig_idx, don=don, don_orig_idx=don.x_orig_idx, + distance=dist, don_angle=don_angle, acc_angle=acc_angle, + restype=restype, resnr=resnr, + reschain=reschain, restype_l=restype_l, + reschain_l=reschain_l, resnr_l=resnr_l, donortype=don.x.OBAtom.GetType(), acctype=acc.o.type, + sidechain=is_sidechain_hal) + pairings.append(contact) + return filter_contacts(pairings) + + +def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): + """Find water-bridged hydrogen bonds between ligand and protein. For now only considers bridged of first degree.""" + data = namedtuple('waterbridge', 'a a_orig_idx atype d d_orig_idx dtype h water water_orig_idx distance_aw ' + 'distance_dw d_angle w_angle type resnr restype reschain resnr_l restype_l reschain_l protisdon') + pairings = [] + # First find all acceptor-water pairs with distance within d + # and all donor-water pairs with distance within d and angle greater theta + lig_aw, prot_aw, lig_dw, prot_hw = [], [], [], [] + for w in water: + for acc1 in lig_hba: + dist = euclidean3d(acc1.a.coords, w.oxy.coords) + if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: + lig_aw.append((acc1, w, dist)) + for acc2 in bs_hba: + dist = euclidean3d(acc2.a.coords, w.oxy.coords) + if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: + prot_aw.append((acc2, w, dist)) + for don1 in lig_hbd: + dist = euclidean3d(don1.d.coords, w.oxy.coords) + d_angle = vecangle(vector(don1.h.coords, don1.d.coords), vector(don1.h.coords, w.oxy.coords)) + if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ + and d_angle > config.WATER_BRIDGE_THETA_MIN: + lig_dw.append((don1, w, dist, d_angle)) + for don2 in bs_hbd: + dist = euclidean3d(don2.d.coords, w.oxy.coords) + d_angle = vecangle(vector(don2.h.coords, don2.d.coords), vector(don2.h.coords, w.oxy.coords)) + if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ + and d_angle > config.WATER_BRIDGE_THETA_MIN: + prot_hw.append((don2, w, dist, d_angle)) + + for l, p in itertools.product(lig_aw, prot_hw): + acc, wl, distance_aw = l + don, wd, distance_dw, d_angle = p + if not wl.oxy == wd.oxy: + continue + # Same water molecule and angle within omega + w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) + if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: + continue + resnr, reschain, restype = whichresnumber(don.d), whichchain(don.d), whichrestype(don.d) + resnr_l, reschain_l, restype_l = whichresnumber(acc.a_orig_atom), whichchain( + acc.a_orig_atom), whichrestype(acc.a_orig_atom) + contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, + dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, + distance_aw=distance_aw, distance_dw=distance_dw, d_angle=d_angle, w_angle=w_angle, + type='first_deg', resnr=resnr, restype=restype, + reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, protisdon=True) + pairings.append(contact) + for p, l in itertools.product(prot_aw, lig_dw): + acc, wl, distance_aw = p + don, wd, distance_dw, d_angle = l + if not wl.oxy == wd.oxy: + continue + # Same water molecule and angle within omega + w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) + if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: + continue + resnr, reschain, restype = whichresnumber(acc.a), whichchain(acc.a), whichrestype(acc.a) + resnr_l, reschain_l, restype_l = whichresnumber(don.d_orig_atom), whichchain( + don.d_orig_atom), whichrestype(don.d_orig_atom) + contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, + dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, + distance_aw=distance_aw, distance_dw=distance_dw, + d_angle=d_angle, w_angle=w_angle, type='first_deg', resnr=resnr, + restype=restype, reschain=reschain, + restype_l=restype_l, reschain_l=reschain_l, resnr_l=resnr_l, protisdon=False) + pairings.append(contact) + return filter_contacts(pairings) + + +def metal_complexation(metals, metal_binding_lig, metal_binding_bs): + """Find all metal complexes between metals and appropriate groups in both protein and ligand, as well as water""" + data = namedtuple('metal_complex', 'metal metal_orig_idx metal_type target target_orig_idx target_type ' + 'coordination_num distance resnr restype ' + 'reschain restype_l reschain_l resnr_l location rms, geometry num_partners complexnum') + pairings_dict = {} + pairings = [] + # #@todo Refactor + metal_to_id = {} + metal_to_orig_atom = {} + for metal, target in itertools.product(metals, metal_binding_lig + metal_binding_bs): + distance = euclidean3d(metal.m.coords, target.atom.coords) + if not distance < config.METAL_DIST_MAX: + continue + if metal.m not in pairings_dict: + pairings_dict[metal.m] = [(target, distance), ] + metal_to_id[metal.m] = metal.m_orig_idx + metal_to_orig_atom[metal.m] = metal.orig_m + else: + pairings_dict[metal.m].append((target, distance)) + for cnum, metal in enumerate(pairings_dict): + rms = 0.0 + excluded = [] + # cnum +1 being the complex number + contact_pairs = pairings_dict[metal] + num_targets = len(contact_pairs) + vectors_dict = defaultdict(list) + for contact_pair in contact_pairs: + target, distance = contact_pair + vectors_dict[target.atom.idx].append(vector(metal.coords, target.atom.coords)) + + # Listing of coordination numbers and their geometries + configs = {2: ['linear', ], + 3: ['trigonal.planar', 'trigonal.pyramidal'], + 4: ['tetrahedral', 'square.planar'], + 5: ['trigonal.bipyramidal', 'square.pyramidal'], + 6: ['octahedral', ]} + + # Angle signatures for each geometry (as seen from each target atom) + ideal_angles = {'linear': [[180.0]] * 2, + 'trigonal.planar': [[120.0, 120.0]] * 3, + 'trigonal.pyramidal': [[109.5, 109.5]] * 3, + 'tetrahedral': [[109.5, 109.5, 109.5, 109.5]] * 4, + 'square.planar': [[90.0, 90.0, 90.0, 90.0]] * 4, + 'trigonal.bipyramidal': [[120.0, 120.0, 90.0, 90.0]] * 3 + [[90.0, 90.0, 90.0, 180.0]] * 2, + 'square.pyramidal': [[90.0, 90.0, 90.0, 180.0]] * 4 + [[90.0, 90.0, 90.0, 90.0]], + 'octahedral': [[90.0, 90.0, 90.0, 90.0, 180.0]] * 6} + angles_dict = {} + + for target in vectors_dict: + cur_vector = vectors_dict[target] + other_vectors = [] + for t in vectors_dict: + if not t == target: + [other_vectors.append(x) for x in vectors_dict[t]] + angles = [vecangle(pair[0], pair[1]) for pair in itertools.product(cur_vector, other_vectors)] + angles_dict[target] = angles + + all_total = [] # Record fit information for each geometry tested + gdata = namedtuple('gdata', 'geometry rms coordination excluded diff_targets') # Geometry Data + # Can't specify geometry with only one target + if num_targets == 1: + final_geom = 'NA' + final_coo = 1 + excluded = [] + rms = 0.0 + else: + for coo in sorted(configs, reverse=True): # Start with highest coordination number + geometries = configs[coo] + for geometry in geometries: + signature = ideal_angles[geometry] # Set of ideal angles for geometry, from each perspective + geometry_total = 0 + geometry_scores = [] # All scores for one geometry (from all subsignatures) + used_up_targets = [] # Use each target just once for a subsignature + not_used = [] + coo_diff = num_targets - coo # How many more observed targets are there? + + # Find best match for each subsignature + for subsignature in signature: # Ideal angles from one perspective + best_target = None # There's one best-matching target for each subsignature + best_target_score = 999 + + for k, target in enumerate(angles_dict): + if target not in used_up_targets: + observed_angles = angles_dict[target] # Observed angles from perspective of one target + single_target_scores = [] + used_up_observed_angles = [] + for i, ideal_angle in enumerate(subsignature): + # For each angle in the signature, find the best-matching observed angle + best_match = None + best_match_diff = 999 + for j, observed_angle in enumerate(observed_angles): + if j not in used_up_observed_angles: + diff = abs(ideal_angle - observed_angle) + if diff < best_match_diff: + best_match_diff = diff + best_match = j + if best_match is not None: + used_up_observed_angles.append(best_match) + single_target_scores.append(best_match_diff) + # Calculate RMS for target angles + target_total = sum([x ** 2 for x in single_target_scores]) ** 0.5 # Tot. score targ/sig + if target_total < best_target_score: + best_target_score = target_total + best_target = target + + used_up_targets.append(best_target) + geometry_scores.append(best_target_score) + # Total score is mean of RMS values + geometry_total = np.mean(geometry_scores) + # Record the targets not used for excluding them when deciding for a final geometry + [not_used.append(target) for target in angles_dict if target not in used_up_targets] + all_total.append(gdata(geometry=geometry, rms=geometry_total, coordination=coo, + excluded=not_used, diff_targets=coo_diff)) + + # Make a decision here. Starting with the geometry with lowest difference in ideal and observed partners ... + # Check if the difference between the RMS to the next best solution is not larger than 0.5 + if not num_targets == 1: # Can't decide for any geoemtry in that case + all_total = sorted(all_total, key=lambda x: abs(x.diff_targets)) + for i, total in enumerate(all_total): + next_total = all_total[i + 1] + this_rms, next_rms = total.rms, next_total.rms + diff_to_next = next_rms - this_rms + if diff_to_next > 0.5: + final_geom, final_coo, rms, excluded = total.geometry, total.coordination, total.rms, total.excluded + break + elif next_total.rms < 3.5: + final_geom, final_coo, = next_total.geometry, next_total.coordination + rms, excluded = next_total.rms, next_total.excluded + break + elif i == len(all_total) - 2: + final_geom, final_coo, rms, excluded = "NA", "NA", float('nan'), [] + break + + # Record all contact pairing, excluding those with targets superfluous for chosen geometry + only_water = set([x[0].location for x in contact_pairs]) == {'water'} + if not only_water: # No complex if just with water as targets + logger.info(f'metal ion {metal.type} complexed with {final_geom} geometry (coo. number {final_coo}/ {num_targets} observed)') + for contact_pair in contact_pairs: + target, distance = contact_pair + if target.atom.idx not in excluded: + metal_orig_atom = metal_to_orig_atom[metal] + restype_l, reschain_l, resnr_l = whichrestype(metal_orig_atom), whichchain( + metal_orig_atom), whichresnumber(metal_orig_atom) + contact = data(metal=metal, metal_orig_idx=metal_to_id[metal], metal_type=metal.type, + target=target, target_orig_idx=target.atom_orig_idx, target_type=target.type, + coordination_num=final_coo, distance=distance, resnr=target.resnr, + restype=target.restype, reschain=target.reschain, location=target.location, + rms=rms, geometry=final_geom, num_partners=num_targets, complexnum=cnum + 1, + resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) + pairings.append(contact) + return filter_contacts(pairings) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py new file mode 100644 index 0000000..6b63d63 --- /dev/null +++ b/plip/structure/preparation.py @@ -0,0 +1,1483 @@ +import itertools +import os +import re +import tempfile +from collections import namedtuple +from operator import itemgetter + +import numpy as np +from openbabel import pybel + +from plip.basic import config, logger +from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name +from plip.basic.supplemental import cluster_doubles, is_lig, normalize_vector, vector, ring_is_planar +from plip.basic.supplemental import extract_pdbid, read_pdb, create_folder_if_not_exists, canonicalize +from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance +from plip.basic.supplemental import whichchain, whichrestype, whichresnumber, euclidean3d, int32_to_negative +from plip.structure.detection import halogen, pication, water_bridges, metal_complexation +from plip.structure.detection import hydrophobic_interactions, pistacking, hbonds, saltbridge + +logger = logger.get_logger() + + +class PDBParser: + def __init__(self, pdbpath, as_string): + self.as_string = as_string + self.pdbpath = pdbpath + self.num_fixed_lines = 0 + self.covlinkage = namedtuple("covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2") + self.proteinmap, self.modres, self.covalent, self.altconformations, self.corrected_pdb = self.parse_pdb() + + def parse_pdb(self): + """Extracts additional information from PDB files. + I. When reading in a PDB file, OpenBabel numbers ATOMS and HETATOMS continously. + In PDB files, TER records are also counted, leading to a different numbering system. + This functions reads in a PDB file and provides a mapping as a dictionary. + II. Additionally, it returns a list of modified residues. + III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified + IV. Alternative conformations + """ + if self.as_string: + fil = self.pdbpath.rstrip('\n').split('\n') # Removing trailing newline character + else: + f = read(self.pdbpath) + fil = f.readlines() + f.close() + corrected_lines = [] + i, j = 0, 0 # idx and PDB numbering + d = {} + modres = set() + covalent = [] + alt = [] + previous_ter = False + + # Standard without fixing + if not config.NOFIX: + if not config.PLUGIN_MODE: + lastnum = 0 # Atom numbering (has to be consecutive) + other_models = False + for line in fil: + if not other_models: # Only consider the first model in an NRM structure + corrected_line, newnum = self.fix_pdbline(line, lastnum) + if corrected_line is not None: + if corrected_line.startswith('MODEL'): + try: # Get number of MODEL (1,2,3) + model_num = int(corrected_line[10:14]) + if model_num > 1: # MODEL 2,3,4 etc. + other_models = True + except ValueError: + logger.debug(f'ignoring invalid MODEL entry: {corrected_line}') + corrected_lines.append(corrected_line) + lastnum = newnum + corrected_pdb = ''.join(corrected_lines) + else: + corrected_pdb = self.pdbpath + corrected_lines = fil + else: + corrected_pdb = self.pdbpath + corrected_lines = fil + + for line in corrected_lines: + if line.startswith(("ATOM", "HETATM")): + # Retrieve alternate conformations + atomid, location = int(line[6:11]), line[16] + location = 'A' if location == ' ' else location + if location != 'A': + alt.append(atomid) + + if not previous_ter: + i += 1 + j += 1 + else: + i += 1 + j += 2 + d[i] = j + previous_ter = False + # Numbering Changes at TER records + if line.startswith("TER"): + previous_ter = True + # Get modified residues + if line.startswith("MODRES"): + modres.add(line[12:15].strip()) + # Get covalent linkages between ligands + if line.startswith("LINK"): + covalent.append(self.get_linkage(line)) + return d, modres, covalent, alt, corrected_pdb + + def fix_pdbline(self, pdbline, lastnum): + """Fix a PDB line if information is missing.""" + pdbqt_conversion = { + "HD": "H", "HS": "H", "NA": "N", + "NS": "N", "OA": "O", "OS": "O", "SA": "S"} + fixed = False + new_num = 0 + forbidden_characters = "[^a-zA-Z0-9_]" + pdbline = pdbline.strip('\n') + # Some MD / Docking tools produce empty lines, leading to segfaults + if len(pdbline.strip()) == 0: + self.num_fixed_lines += 1 + return None, lastnum + if len(pdbline) > 100: # Should be 80 long + self.num_fixed_lines += 1 + return None, lastnum + # TER Entries also have continuing numbering, consider them as well + if pdbline.startswith('TER'): + new_num = lastnum + 1 + if pdbline.startswith('ATOM'): + new_num = lastnum + 1 + current_num = int(pdbline[6:11]) + resnum = pdbline[22:27].strip() + resname = pdbline[17:21].strip() + # Invalid residue number + try: + int(resnum) + except ValueError: + pdbline = pdbline[:22] + ' 0 ' + pdbline[27:] + fixed = True + # Invalid characters in residue name + if re.match(forbidden_characters, resname.strip()): + pdbline = pdbline[:17] + 'UNK ' + pdbline[21:] + fixed = True + if lastnum + 1 != current_num: + pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:] + fixed = True + # No chain assigned + if pdbline[21] == ' ': + pdbline = pdbline[:21] + 'A' + pdbline[22:] + fixed = True + if pdbline.endswith('H'): + self.num_fixed_lines += 1 + return None, lastnum + # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. + for pdbqttype in pdbqt_conversion: + if pdbline.strip().endswith(pdbqttype): + pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + '\n' + self.num_fixed_lines += 1 + if pdbline.startswith('HETATM'): + new_num = lastnum + 1 + try: + current_num = int(pdbline[6:11]) + except ValueError: + current_num = None + logger.debug(f'invalid HETATM entry: {pdbline}') + if lastnum + 1 != current_num: + pdbline = pdbline[:6] + (5 - len(str(new_num))) * ' ' + str(new_num) + ' ' + pdbline[12:] + fixed = True + # No chain assigned or number assigned as chain + if pdbline[21] == ' ': + pdbline = pdbline[:21] + 'Z' + pdbline[22:] + fixed = True + # No residue number assigned + if pdbline[23:26] == ' ': + pdbline = pdbline[:23] + '999' + pdbline[26:] + fixed = True + # Non-standard Ligand Names + ligname = pdbline[17:21].strip() + if len(ligname) > 3: + pdbline = pdbline[:17] + ligname[:3] + ' ' + pdbline[21:] + fixed = True + if re.match(forbidden_characters, ligname.strip()): + pdbline = pdbline[:17] + 'LIG ' + pdbline[21:] + fixed = True + if len(ligname.strip()) == 0: + pdbline = pdbline[:17] + 'LIG ' + pdbline[21:] + fixed = True + if pdbline.endswith('H'): + self.num_fixed_lines += 1 + return None, lastnum + # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. + for pdbqttype in pdbqt_conversion: + if pdbline.strip().endswith(pdbqttype): + pdbline = pdbline.strip()[:-2] + ' ' + pdbqt_conversion[pdbqttype] + ' ' + self.num_fixed_lines += 1 + self.num_fixed_lines += 1 if fixed else 0 + return pdbline + '\n', max(new_num, lastnum) + + def get_linkage(self, line): + """Get the linkage information from a LINK entry PDB line.""" + conf1, id1, chain1, pos1 = line[16].strip(), line[17:20].strip(), line[21].strip(), int(line[22:26]) + conf2, id2, chain2, pos2 = line[46].strip(), line[47:50].strip(), line[51].strip(), int(line[52:56]) + return self.covlinkage(id1=id1, chain1=chain1, pos1=pos1, conf1=conf1, + id2=id2, chain2=chain2, pos2=pos2, conf2=conf2) + + +class LigandFinder: + def __init__(self, proteincomplex, altconf, modres, covalent, mapper): + self.lignames_all = None + self.lignames_kept = None + self.water = None + self.proteincomplex = proteincomplex + self.altconformations = altconf + self.modresidues = modres + self.covalent = covalent + self.mapper = mapper + self.ligands = self.getligs() + self.excluded = sorted(list(self.lignames_all.difference(set(self.lignames_kept)))) + + def getpeptides(self, chain): + """If peptide ligand chains are defined via the command line options, + try to extract the underlying ligand formed by all residues in the + given chain without water + """ + all_from_chain = [o for o in pybel.ob.OBResidueIter( + self.proteincomplex.OBMol) if o.GetChain() == chain] # All residues from chain + if len(all_from_chain) == 0: + return None + else: + non_water = [o for o in all_from_chain if not o.GetResidueProperty(9)] + ligand = self.extract_ligand(non_water) + return ligand + + def getligs(self): + """Get all ligands from a PDB file and prepare them for analysis. + Returns all non-empty ligands. + """ + + if config.PEPTIDES == [] and config.INTRA is None: + # Extract small molecule ligands (default) + ligands = [] + + # Filter for ligands using lists + ligand_residues, self.lignames_all, self.water = self.filter_for_ligands() + + all_res_dict = {(a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues} + self.lignames_kept = list(set([a.GetName() for a in ligand_residues])) + + if not config.BREAKCOMPOSITE: + # Update register of covalent links with those between DNA/RNA subunits + self.covalent += nucleotide_linkage(all_res_dict) + # Find fragment linked by covalent bonds + res_kmers = self.identify_kmers(all_res_dict) + else: + res_kmers = [[a, ] for a in ligand_residues] + logger.debug(f'{len(res_kmers)} ligand kmer(s) detected for closer inspection') + for kmer in res_kmers: # iterate over all ligands and extract molecules + information + if len(kmer) > config.MAX_COMPOSITE_LENGTH: + logger.debug( + f'ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)') + else: + ligands.append(self.extract_ligand(kmer)) + + else: + # Extract peptides from given chains + self.water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)] + if config.PEPTIDES: + peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES] + elif config.INTRA is not None: + peptide_ligands = [self.getpeptides(config.INTRA), ] + + ligands = [p for p in peptide_ligands if p is not None] + self.covalent, self.lignames_kept, self.lignames_all = [], [], set() + + return [lig for lig in ligands if len(lig.mol.atoms) != 0] + + def extract_ligand(self, kmer): + """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps.""" + data = namedtuple('ligand', 'mol hetid chain position water members longname type atomorder can_to_pdb') + members = [(res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) for res in kmer] + members = sort_members_by_importance(members) + rname, rchain, rnum = members[0] + logger.debug(f'finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements') + names = [x[0] for x in members] + longname = '-'.join([x[0] for x in members]) + + if config.PEPTIDES: + ligtype = 'PEPTIDE' + elif config.INTRA is not None: + ligtype = 'INTRA' + else: + # Classify a ligand by its HETID(s) + ligtype = classify_by_name(names) + logger.debug(f'ligand classified as {ligtype}') + + hetatoms = set() + for obresidue in kmer: + hetatoms_res = set([(obatom.GetIdx(), obatom) for obatom in pybel.ob.OBResidueAtomIter(obresidue) + if obatom.GetAtomicNum() != 1]) + + if not config.ALTLOC: + # Remove alternative conformations (standard -> True) + hetatoms_res = set([atm for atm in hetatoms_res + if not self.mapper.mapid(atm[0], mtype='protein', + to='internal') in self.altconformations]) + hetatoms.update(hetatoms_res) + logger.debug(f'hetero atoms determined (n={len(hetatoms)})') + + hetatoms = dict(hetatoms) # make it a dict with idx as key and OBAtom as value + lig = pybel.ob.OBMol() # new ligand mol + neighbours = dict() + for obatom in hetatoms.values(): # iterate over atom objects + idx = obatom.GetIdx() + lig.AddAtom(obatom) + # ids of all neighbours of obatom + neighbours[idx] = set([neighbour_atom.GetIdx() for neighbour_atom + in pybel.ob.OBAtomAtomIter(obatom)]) & set(hetatoms.keys()) + logger.debug(f'atom neighbours mapped') + + ############################################################## + # map the old atom idx of OBMol to the new idx of the ligand # + ############################################################## + + newidx = dict(zip(hetatoms.keys(), [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)])) + mapold = dict(zip(newidx.values(), newidx)) + # copy the bonds + for obatom in hetatoms: + for neighbour_atom in neighbours[obatom]: + bond = hetatoms[obatom].GetBond(hetatoms[neighbour_atom]) + lig.AddBond(newidx[obatom], newidx[neighbour_atom], bond.GetBondOrder()) + lig = pybel.Molecule(lig) + + # For kmers, the representative ids are chosen (first residue of kmer) + lig.data.update({'Name': rname, 'Chain': rchain, 'ResNr': rnum}) + + # Check if a negative residue number is represented as a 32 bit integer + if rnum > 10 ** 5: + rnum = int32_to_negative(rnum) + + lig.title = ':'.join((rname, rchain, str(rnum))) + self.mapper.ligandmaps[lig.title] = mapold + + logger.debug('renumerated molecule generated') + + if not config.NOPDBCANMAP: + atomorder = canonicalize(lig) + else: + atomorder = None + + can_to_pdb = {} + if atomorder is not None: + can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold} + + ligand = data(mol=lig, hetid=rname, chain=rchain, position=rnum, water=self.water, + members=members, longname=longname, type=ligtype, atomorder=atomorder, + can_to_pdb=can_to_pdb) + return ligand + + def is_het_residue(self, obres): + """Given an OBResidue, determines if the residue is indeed a possible ligand + in the PDB file""" + if not obres.GetResidueProperty(0): + # If the residue is NOT amino (0) + # It can be amino_nucleo, coenzme, ion, nucleo, protein, purine, pyrimidine, solvent + # In these cases, it is a ligand candidate + return True + else: + # Here, the residue is classified as amino + # Amino acids can still be ligands, so we check for HETATM entries + # Only residues with at least one HETATM entry are processed as ligands + het_atoms = [] + for atm in pybel.ob.OBResidueAtomIter(obres): + het_atoms.append(obres.IsHetAtom(atm)) + if True in het_atoms: + return True + return False + + def filter_for_ligands(self): + """Given an OpenBabel Molecule, get all ligands, their names, and water""" + + candidates1 = [o for o in pybel.ob.OBResidueIter( + self.proteincomplex.OBMol) if not o.GetResidueProperty(9) and self.is_het_residue(o)] + + if config.DNARECEPTOR: # If DNA is the receptor, don't consider DNA as a ligand + candidates1 = [res for res in candidates1 if res.GetName() not in config.DNA + config.RNA] + all_lignames = set([a.GetName() for a in candidates1]) + + water = [o for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) if o.GetResidueProperty(9)] + # Filter out non-ligands + if not config.KEEPMOD: # Keep modified residues as ligands + candidates2 = [a for a in candidates1 if is_lig(a.GetName()) and a.GetName() not in self.modresidues] + else: + candidates2 = [a for a in candidates1 if is_lig(a.GetName())] + logger.debug(f'{len(candidates2)} ligand(s) after first filtering step') + + ############################################ + # Filtering by counting and artifacts list # + ############################################ + artifacts = [] + unique_ligs = set(a.GetName() for a in candidates2) + for ulig in unique_ligs: + # Discard if appearing 15 times or more and is possible artifact + if ulig in config.biolip_list and [a.GetName() for a in candidates2].count(ulig) >= 15: + artifacts.append(ulig) + + selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts] + + return selected_ligands, all_lignames, water + + def identify_kmers(self, residues): + """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" + + # Remove all those not considered by ligands and pairings including alternate conformations + ligdoubles = [[(link.id1, link.chain1, link.pos1), + (link.id2, link.chain2, link.pos2)] for link in + [c for c in self.covalent if c.id1 in self.lignames_kept and c.id2 in self.lignames_kept + and c.conf1 in ['A', ''] and c.conf2 in ['A', ''] + and (c.id1, c.chain1, c.pos1) in residues + and (c.id2, c.chain2, c.pos2) in residues]] + kmers = cluster_doubles(ligdoubles) + if not kmers: # No ligand kmers, just normal independent ligands + return [[residues[res]] for res in residues] + + else: + # res_kmers contains clusters of covalently bound ligand residues (kmer ligands) + res_kmers = [[residues[res] for res in kmer] for kmer in kmers] + + # In this case, add other ligands which are not part of a kmer + in_kmer = [] + for res_kmer in res_kmers: + for res in res_kmer: + in_kmer.append((res.GetName(), res.GetChain(), res.GetNum())) + for res in residues: + if res not in in_kmer: + newres = [residues[res], ] + res_kmers.append(newres) + return res_kmers + + +class Mapper: + """Provides functions for mapping atom IDs in the correct way""" + + def __init__(self): + self.proteinmap = None # Map internal atom IDs of protein residues to original PDB Atom IDs + self.ligandmaps = {} # Map IDs of new ligand molecules to internal IDs (or PDB IDs?) + self.original_structure = None + + def mapid(self, idx, mtype, bsid=None, to='original'): # Mapping to original IDs is standard for ligands + if mtype == 'reversed': # Needed to map internal ID back to original protein ID + return self.reversed_proteinmap[idx] + if mtype == 'protein': + return self.proteinmap[idx] + elif mtype == 'ligand': + if to == 'internal': + return self.ligandmaps[bsid][idx] + elif to == 'original': + return self.proteinmap[self.ligandmaps[bsid][idx]] + + def id_to_atom(self, idx): + """Returns the atom for a given original ligand ID. + To do this, the ID is mapped to the protein first and then the atom returned. + """ + mapped_idx = self.mapid(idx, 'reversed') + return pybel.Atom(self.original_structure.GetAtom(mapped_idx)) + + +class Mol: + def __init__(self, altconf, mapper, mtype, bsid): + self.mtype = mtype + self.bsid = bsid + self.rings = None + self.hydroph_atoms = None + self.charged = None + self.hbond_don_atom_pairs = None + self.hbond_acc_atoms = None + self.altconf = altconf + self.Mapper = mapper + + def hydrophobic_atoms(self, all_atoms): + """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors.""" + atom_set = [] + data = namedtuple('hydrophobic', 'atom orig_atom orig_idx') + atm = [a for a in all_atoms if a.atomicnum == 6 and set([natom.GetAtomicNum() for natom + in pybel.ob.OBAtomAtomIter(a.OBAtom)]).issubset( + {1, 6})] + for atom in atm: + orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) + orig_atom = self.Mapper.id_to_atom(orig_idx) + if atom.idx not in self.altconf: + atom_set.append(data(atom=atom, orig_atom=orig_atom, orig_idx=orig_idx)) + return atom_set + + def find_hba(self, all_atoms): + """Find all possible hydrogen bond acceptors""" + data = namedtuple('hbondacceptor', 'a a_orig_atom a_orig_idx type') + a_set = [] + for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms): + if atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf: # Exclude halogen atoms + a_orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) + a_orig_atom = self.Mapper.id_to_atom(a_orig_idx) + a_set.append(data(a=atom, a_orig_atom=a_orig_atom, a_orig_idx=a_orig_idx, type='regular')) + a_set = sorted(a_set, key=lambda x: x.a_orig_idx) + return a_set + + def find_hbd(self, all_atoms, hydroph_atoms): + """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)""" + donor_pairs = [] + data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type') + for donor in [a for a in all_atoms if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf]: + in_ring = False + if not in_ring: + for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) if a.IsHbondDonorH()]: + d_orig_idx = self.Mapper.mapid(donor.idx, mtype=self.mtype, bsid=self.bsid) + d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) + donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=d_orig_idx, + h=pybel.Atom(adj_atom), type='regular')) + for carbon in hydroph_atoms: + for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) if a.GetAtomicNum() == 1]: + d_orig_idx = self.Mapper.mapid(carbon.atom.idx, mtype=self.mtype, bsid=self.bsid) + d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) + donor_pairs.append(data(d=carbon, d_orig_atom=d_orig_atom, + d_orig_idx=d_orig_idx, h=pybel.Atom(adj_atom), type='weak')) + donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx)) + return donor_pairs + + def find_rings(self, mol, all_atoms): + """Find rings and return only aromatic. + Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic.""" + data = namedtuple('aromatic_ring', 'atoms orig_atoms atoms_orig_idx normal obj center type') + rings = [] + aromatic_amino = ['TYR', 'TRP', 'HIS', 'PHE'] + ring_candidates = mol.OBMol.GetSSSR() + logger.debug(f'number of aromatic ring candidates: {len(ring_candidates)}') + # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity + for ring in ring_candidates: + r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)] + r_atoms = sorted(r_atoms, key=lambda x: x.idx) + if 4 < len(r_atoms) <= 6: + res = list(set([whichrestype(a) for a in r_atoms])) + # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel + if res[0] == 'UNL': + ligand_orig_idx = [self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms] + sort_order = np.argsort(np.array(ligand_orig_idx)) + r_atoms = [r_atoms[i] for i in sort_order] + if ring.IsAromatic() or res[0] in aromatic_amino or ring_is_planar(ring, r_atoms): + # Causes segfault with OpenBabel 2.3.2, so deactivated + # typ = ring.GetType() if not ring.GetType() == '' else 'unknown' + # Alternative typing + ring_type = '%s-membered' % len(r_atoms) + ring_atms = [r_atoms[a].coords for a in [0, 2, 4]] # Probe atoms for normals, assuming planarity + ringv1 = vector(ring_atms[0], ring_atms[1]) + ringv2 = vector(ring_atms[2], ring_atms[0]) + atoms_orig_idx = [self.Mapper.mapid(r_atom.idx, mtype=self.mtype, + bsid=self.bsid) for r_atom in r_atoms] + orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx] + rings.append(data(atoms=r_atoms, + orig_atoms=orig_atoms, + atoms_orig_idx=atoms_orig_idx, + normal=normalize_vector(np.cross(ringv1, ringv2)), + obj=ring, + center=centroid([ra.coords for ra in r_atoms]), + type=ring_type)) + return rings + + def get_hydrophobic_atoms(self): + return self.hydroph_atoms + + def get_hba(self): + return self.hbond_acc_atoms + + def get_hbd(self): + return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'regular'] + + def get_weak_hbd(self): + return [don_pair for don_pair in self.hbond_don_atom_pairs if don_pair.type == 'weak'] + + def get_pos_charged(self): + return [charge for charge in self.charged if charge.type == 'positive'] + + def get_neg_charged(self): + return [charge for charge in self.charged if charge.type == 'negative'] + + +class PLInteraction: + """Class to store a ligand, a protein and their interactions.""" + + def __init__(self, lig_obj, bs_obj, protcomplex): + """Detect all interactions when initializing""" + self.ligand = lig_obj + self.lig_members = lig_obj.members + self.pdbid = protcomplex.pymol_name + self.bindingsite = bs_obj + self.Mapper = protcomplex.Mapper + self.output_path = protcomplex.output_path + self.altconf = protcomplex.altconf + # #@todo Refactor code to combine different directionality + + self.saltbridge_lneg = saltbridge(self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True) + self.saltbridge_pneg = saltbridge(self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False) + + self.all_hbonds_ldon = hbonds(self.bindingsite.get_hba(), + self.ligand.get_hbd(), False, 'strong') + self.all_hbonds_pdon = hbonds(self.ligand.get_hba(), + self.bindingsite.get_hbd(), True, 'strong') + + self.hbonds_ldon = self.refine_hbonds_ldon(self.all_hbonds_ldon, self.saltbridge_lneg, + self.saltbridge_pneg) + self.hbonds_pdon = self.refine_hbonds_pdon(self.all_hbonds_pdon, self.saltbridge_lneg, + self.saltbridge_pneg) + + self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings) + + self.all_pi_cation_laro = pication(self.ligand.rings, self.bindingsite.get_pos_charged(), True) + self.pication_paro = pication(self.bindingsite.rings, self.ligand.get_pos_charged(), False) + + self.pication_laro = self.refine_pi_cation_laro(self.all_pi_cation_laro, self.pistacking) + + self.all_hydrophobic_contacts = hydrophobic_interactions(self.bindingsite.get_hydrophobic_atoms(), + self.ligand.get_hydrophobic_atoms()) + self.hydrophobic_contacts = self.refine_hydrophobic(self.all_hydrophobic_contacts, self.pistacking) + self.halogen_bonds = halogen(self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don) + self.water_bridges = water_bridges(self.bindingsite.get_hba(), self.ligand.get_hba(), + self.bindingsite.get_hbd(), self.ligand.get_hbd(), + self.ligand.water) + + self.water_bridges = self.refine_water_bridges(self.water_bridges, self.hbonds_ldon, self.hbonds_pdon) + + self.metal_complexes = metal_complexation(self.ligand.metals, self.ligand.metal_binding, + self.bindingsite.metal_binding) + + self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon + self.all_itypes = self.all_itypes + self.hbonds_ldon + self.pistacking + self.pication_laro + self.pication_paro + self.all_itypes = self.all_itypes + self.hydrophobic_contacts + self.halogen_bonds + self.water_bridges + self.all_itypes = self.all_itypes + self.metal_complexes + + self.no_interactions = all(len(i) == 0 for i in self.all_itypes) + self.unpaired_hba, self.unpaired_hbd, self.unpaired_hal = self.find_unpaired_ligand() + self.unpaired_hba_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) + for atom in self.unpaired_hba] + self.unpaired_hbd_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) + for atom in self.unpaired_hbd] + self.unpaired_hal_orig_idx = [self.Mapper.mapid(atom.idx, mtype='ligand', bsid=self.ligand.bsid) + for atom in self.unpaired_hal] + self.num_unpaired_hba, self.num_unpaired_hbd = len(self.unpaired_hba), len(self.unpaired_hbd) + self.num_unpaired_hal = len(self.unpaired_hal) + + # Exclude empty chains (coming from ligand as a target, from metal complexes) + self.interacting_chains = sorted(list(set([i.reschain for i in self.all_itypes + if i.reschain not in [' ', None]]))) + + # Get all interacting residues, excluding ligand and water molecules + self.interacting_res = list(set([''.join([str(i.resnr), str(i.reschain)]) for i in self.all_itypes + if i.restype not in ['LIG', 'HOH']])) + if len(self.interacting_res) != 0: + logger.info( + f'ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}') + interactions_list = [] + num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg) + num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon) + num_pication = len(self.pication_laro + self.pication_paro) + num_pistack = len(self.pistacking) + num_halogen = len(self.halogen_bonds) + num_waterbridges = len(self.water_bridges) + if num_saltbridges != 0: + interactions_list.append('%i salt bridge(s)' % num_saltbridges) + if num_hbonds != 0: + interactions_list.append('%i hydrogen bond(s)' % num_hbonds) + if num_pication != 0: + interactions_list.append('%i pi-cation interaction(s)' % num_pication) + if num_pistack != 0: + interactions_list.append('%i pi-stacking(s)' % num_pistack) + if num_halogen != 0: + interactions_list.append('%i halogen bond(s)' % num_halogen) + if num_waterbridges != 0: + interactions_list.append('%i water bridge(s)' % num_waterbridges) + if not len(interactions_list) == 0: + logger.info(f'complex uses {interactions_list}') + else: + logger.info('no interactions for this ligand') + + def find_unpaired_ligand(self): + """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors. + """ + unpaired_hba, unpaired_hbd, unpaired_hal = [], [], [] + # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex) + involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [hbond.d.idx for hbond in self.hbonds_ldon] + [[involved_atoms.append(atom.idx) for atom in sb.negative.atoms] for sb in self.saltbridge_lneg] + [[involved_atoms.append(atom.idx) for atom in sb.positive.atoms] for sb in self.saltbridge_pneg] + [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon] + [involved_atoms.append(wb.d.idx) for wb in self.water_bridges if not wb.protisdon] + [involved_atoms.append(mcomplex.target.atom.idx) for mcomplex in self.metal_complexes + if mcomplex.location == 'ligand'] + for atom in [hba.a for hba in self.ligand.get_hba()]: + if atom.idx not in involved_atoms: + unpaired_hba.append(atom) + for atom in [hbd.d for hbd in self.ligand.get_hbd()]: + if atom.idx not in involved_atoms: + unpaired_hbd.append(atom) + + # unpaired halogen bond donors in ligand (not used for the previous + halogen bonds) + [involved_atoms.append(atom.don.x.idx) for atom in self.halogen_bonds] + for atom in [haldon.x for haldon in self.ligand.halogenbond_don]: + if atom.idx not in involved_atoms: + unpaired_hal.append(atom) + + return unpaired_hba, unpaired_hbd, unpaired_hal + + def refine_hydrophobic(self, all_h, pistacks): + """Apply several rules to reduce the number of hydrophobic interactions.""" + sel = {} + # 1. Rings interacting via stacking can't have additional hydrophobic contacts between each other. + for pistack, h in itertools.product(pistacks, all_h): + h1, h2 = h.bsatom.idx, h.ligatom.idx + brs, lrs = [p1.idx for p1 in pistack.proteinring.atoms], [p2.idx for p2 in pistack.ligandring.atoms] + if h1 in brs and h2 in lrs: + sel[(h1, h2)] = "EXCLUDE" + hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel] + sel2 = {} + # 2. If a ligand atom interacts with several binding site atoms in the same residue, + # keep only the one with the closest distance + for h in hydroph: + if not (h.ligatom.idx, h.resnr) in sel2: + sel2[(h.ligatom.idx, h.resnr)] = h + else: + if sel2[(h.ligatom.idx, h.resnr)].distance > h.distance: + sel2[(h.ligatom.idx, h.resnr)] = h + hydroph = [h for h in sel2.values()] + hydroph_final = [] + bsclust = {} + # 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist + for h in hydroph: + if h.bsatom.idx not in bsclust: + bsclust[h.bsatom.idx] = [h, ] + else: + bsclust[h.bsatom.idx].append(h) + + idx_to_h = {} + for bs in [a for a in bsclust if len(bsclust[a]) == 1]: + hydroph_final.append(bsclust[bs][0]) + + # A list of tuples with the idx of an atom and one of its neighbours is created + for bs in [a for a in bsclust if not len(bsclust[a]) == 1]: + tuples = [] + all_idx = [i.ligatom.idx for i in bsclust[bs]] + for b in bsclust[bs]: + idx = b.ligatom.idx + neigh = [na for na in pybel.ob.OBAtomAtomIter(b.ligatom.OBAtom)] + for n in neigh: + n_idx = n.GetIdx() + if n_idx in all_idx: + if n_idx < idx: + tuples.append((n_idx, idx)) + else: + tuples.append((idx, n_idx)) + idx_to_h[idx] = b + + tuples = list(set(tuples)) + tuples = sorted(tuples, key=itemgetter(1)) + clusters = cluster_doubles(tuples) # Cluster connected atoms (i.e. find hydrophobic patches) + + for cluster in clusters: + min_dist = float('inf') + min_h = None + for atm_idx in cluster: + h = idx_to_h[atm_idx] + if h.distance < min_dist: + min_dist = h.distance + min_h = h + hydroph_final.append(min_h) + before, reduced = len(all_h), len(hydroph_final) + if not before == 0 and not before == reduced: + logger.info(f'reduced number of hydrophobic contacts from {before} to {reduced}') + return hydroph_final + + def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg): + """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds.""" + i_set = {} + for hbond in all_hbonds: + i_set[hbond] = False + for salt in salt_pneg: + protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms] + if hbond.d.idx in ligidx and hbond.a.idx in protidx: + i_set[hbond] = True + for salt in salt_lneg: + protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms] + if hbond.d.idx in ligidx and hbond.a.idx in protidx: + i_set[hbond] = True + + # Allow only one hydrogen bond per donor, select interaction with larger donor angle + second_set = {} + hbls = [k for k in i_set.keys() if not i_set[k]] + for hbl in hbls: + if hbl.d.idx not in second_set: + second_set[hbl.d.idx] = (hbl.angle, hbl) + else: + if second_set[hbl.d.idx][0] < hbl.angle: + second_set[hbl.d.idx] = (hbl.angle, hbl) + return [hb[1] for hb in second_set.values()] + + def refine_hbonds_pdon(self, all_hbonds, salt_lneg, salt_pneg): + """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds with + atoms of the same group. + """ + i_set = {} + for hbond in all_hbonds: + i_set[hbond] = False + for salt in salt_lneg: + protidx, ligidx = [at.idx for at in salt.positive.atoms], [at.idx for at in salt.negative.atoms] + if hbond.a.idx in ligidx and hbond.d.idx in protidx: + i_set[hbond] = True + for salt in salt_pneg: + protidx, ligidx = [at.idx for at in salt.negative.atoms], [at.idx for at in salt.positive.atoms] + if hbond.a.idx in ligidx and hbond.d.idx in protidx: + i_set[hbond] = True + + # Allow only one hydrogen bond per donor, select interaction with larger donor angle + second_set = {} + hbps = [k for k in i_set.keys() if not i_set[k]] + for hbp in hbps: + if hbp.d.idx not in second_set: + second_set[hbp.d.idx] = (hbp.angle, hbp) + else: + if second_set[hbp.d.idx][0] < hbp.angle: + second_set[hbp.d.idx] = (hbp.angle, hbp) + return [hb[1] for hb in second_set.values()] + + def refine_pi_cation_laro(self, all_picat, stacks): + """Just important for constellations with histidine involved. If the histidine ring is positioned in stacking + position to an aromatic ring in the ligand, there is in most cases stacking and pi-cation interaction reported + as histidine also carries a positive charge in the ring. For such cases, only report stacking. + """ + i_set = [] + for picat in all_picat: + exclude = False + for stack in stacks: + if whichrestype(stack.proteinring.atoms[0]) == 'HIS' and picat.ring.obj == stack.ligandring.obj: + exclude = True + if not exclude: + i_set.append(picat) + return i_set + + def refine_water_bridges(self, wbridges, hbonds_ldon, hbonds_pdon): + """A donor atom already forming a hydrogen bond is not allowed to form a water bridge. Each water molecule + can only be donor for two water bridges, selecting the constellation with the omega angle closest to 110 deg.""" + donor_atoms_hbonds = [hb.d.idx for hb in hbonds_ldon + hbonds_pdon] + wb_dict = {} + wb_dict2 = {} + omega = 110.0 + + # Just one hydrogen bond per donor atom + for wbridge in [wb for wb in wbridges if wb.d.idx not in donor_atoms_hbonds]: + if (wbridge.water.idx, wbridge.a.idx) not in wb_dict: + wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge + else: + if abs(omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle) < abs(omega - wbridge.w_angle): + wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge + for wb_tuple in wb_dict: + water, acceptor = wb_tuple + if water not in wb_dict2: + wb_dict2[water] = [(abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), ] + elif len(wb_dict2[water]) == 1: + wb_dict2[water].append((abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple])) + wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0]) + else: + if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle): + wb_dict2[water] = [wb_dict2[water][0], (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple])] + + filtered_wb = [] + for fwbridges in wb_dict2.values(): + [filtered_wb.append(fwb[1]) for fwb in fwbridges] + return filtered_wb + + +class BindingSite(Mol): + def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper): + """Find all relevant parts which could take part in interactions""" + Mol.__init__(self, altconf, mapper, mtype='protein', bsid=None) + self.complex = cclass + self.full_mol = protcomplex + self.all_atoms = atoms + self.min_dist = min_dist # Minimum distance of bs res to ligand + self.bs_res = list(set([''.join([str(whichresnumber(a)), whichchain(a)]) for a in self.all_atoms])) # e.g. 47A + self.rings = self.find_rings(self.full_mol, self.all_atoms) + self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) + self.hbond_acc_atoms = self.find_hba(self.all_atoms) + self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) + self.charged = self.find_charged(self.full_mol) + self.halogenbond_acc = self.find_hal(self.all_atoms) + self.metal_binding = self.find_metal_binding(self.full_mol) + + def find_hal(self, atoms): + """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)""" + data = namedtuple('hal_acceptor', 'o o_orig_idx y y_orig_idx') + a_set = [] + # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur + for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]: + n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() in [6, 7, 15, 16]] + if len(n_atoms) == 1: # Proximal atom + o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) + y_orig_idx = self.Mapper.mapid(n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid) + a_set.append(data(o=a, o_orig_idx=o_orig_idx, y=pybel.Atom(n_atoms[0]), y_orig_idx=y_orig_idx)) + return a_set + + def find_charged(self, mol): + """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid.""" + data = namedtuple('pcharge', 'atoms atoms_orig_idx type center restype resnr reschain') + a_set = [] + # Iterate through all residue, exclude those in chains defined as peptides + for res in [r for r in pybel.ob.OBResidueIter(mol.OBMol) if not r.GetChain() in config.PEPTIDES]: + if config.INTRA is not None: + if res.GetChain() != config.INTRA: + continue + a_contributing = [] + a_contributing_orig_idx = [] + if res.GetName() in ('ARG', 'HIS', 'LYS'): # Arginine, Histidine or Lysine have charged sidechains + for a in pybel.ob.OBResidueAtomIter(res): + if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + a_contributing.append(pybel.Atom(a)) + a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein')) + if not len(a_contributing) == 0: + a_set.append(data(atoms=a_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type='positive', + center=centroid([ac.coords for ac in a_contributing]), + restype=res.GetName(), + resnr=res.GetNum(), + reschain=res.GetChain())) + if res.GetName() in ('GLU', 'ASP'): # Aspartic or Glutamic Acid + for a in pybel.ob.OBResidueAtomIter(res): + if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + a_contributing.append(pybel.Atom(a)) + a_contributing_orig_idx.append(self.Mapper.mapid(a.GetIdx(), mtype='protein')) + if not len(a_contributing) == 0: + a_set.append(data(atoms=a_contributing, + atoms_orig_idx=a_contributing_orig_idx, + type='negative', + center=centroid([ac.coords for ac in a_contributing]), + restype=res.GetName(), + resnr=res.GetNum(), + reschain=res.GetChain())) + return a_set + + def find_metal_binding(self, mol): + """Looks for atoms that could possibly be involved in chelating a metal ion. + This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids""" + data = namedtuple('metal_binding', 'atom atom_orig_idx type restype resnr reschain location') + a_set = [] + for res in pybel.ob.OBResidueIter(mol.OBMol): + restype, reschain, resnr = res.GetName().upper(), res.GetChain(), res.GetNum() + if restype in ['ASP', 'GLU', 'SER', 'THR', 'TYR']: # Look for oxygens here + for a in pybel.ob.OBResidueAtomIter(res): + if a.GetType().startswith('O') and res.GetAtomProperty(a, 8) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=restype, + resnr=resnr, reschain=reschain, + location='protein.sidechain')) + if restype == 'HIS': # Look for nitrogen here + for a in pybel.ob.OBResidueAtomIter(res): + if a.GetType().startswith('N') and res.GetAtomProperty(a, 8) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='N', restype=restype, + resnr=resnr, reschain=reschain, + location='protein.sidechain')) + if restype == 'CYS': # Look for sulfur here + for a in pybel.ob.OBResidueAtomIter(res): + if a.GetType().startswith('S') and res.GetAtomProperty(a, 8) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf: + atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='S', restype=restype, + resnr=resnr, reschain=reschain, + location='protein.sidechain')) + for a in pybel.ob.OBResidueAtomIter(res): # All main chain oxygens + if a.GetType().startswith('O') and res.GetAtomProperty(a, 2) \ + and not self.Mapper.mapid(a.GetIdx(), mtype='protein') in self.altconf and restype != 'HOH': + atom_orig_idx = self.Mapper.mapid(a.GetIdx(), mtype=self.mtype, bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(a), atom_orig_idx=atom_orig_idx, type='O', restype=res.GetName(), + resnr=res.GetNum(), reschain=res.GetChain(), + location='protein.mainchain')) + return a_set + + +class Ligand(Mol): + def __init__(self, cclass, ligand): + altconf = cclass.altconf + self.hetid, self.chain, self.position = ligand.hetid, ligand.chain, ligand.position + self.bsid = ':'.join([self.hetid, self.chain, str(self.position)]) + Mol.__init__(self, altconf, cclass.Mapper, mtype='ligand', bsid=self.bsid) + self.members = ligand.members + self.longname = ligand.longname + self.type = ligand.type + self.complex = cclass + self.molecule = ligand.mol # Pybel Molecule + self.smiles = self.molecule.write(format='can') # SMILES String + self.inchikey = self.molecule.write(format='inchikey') + self.can_to_pdb = ligand.can_to_pdb + if not len(self.smiles) == 0: + self.smiles = self.smiles.split()[0] + else: + logger.warning(f'could not write SMILES for ligand {ligand}') + self.smiles = '' + self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms() # Heavy atoms count + self.all_atoms = self.molecule.atoms + self.atmdict = {l.idx: l for l in self.all_atoms} + self.rings = self.find_rings(self.molecule, self.all_atoms) + self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) + self.hbond_acc_atoms = self.find_hba(self.all_atoms) + self.num_rings = len(self.rings) + if self.num_rings != 0: + logger.info(f'contains {self.num_rings} aromatic ring(s)') + descvalues = self.molecule.calcdesc() + self.molweight, self.logp = float(descvalues['MW']), float(descvalues['logP']) + self.num_rot_bonds = int(self.molecule.OBMol.NumRotors()) + self.atomorder = ligand.atomorder + + ########################################################## + # Special Case for hydrogen bond acceptor identification # + ########################################################## + + self.inverse_mapping = {v: k for k, v in self.Mapper.ligandmaps[self.bsid].items()} + self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()} + self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) + + ###### + donor_pairs = [] + data = namedtuple('hbonddonor', 'd d_orig_atom d_orig_idx h type') + for donor in self.all_atoms: + pdbidx = self.Mapper.mapid(donor.idx, mtype='ligand', bsid=self.bsid, to='original') + d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]] + if d.OBAtom.IsHbondDonor(): + for adj_atom in [a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH()]: + d_orig_atom = self.Mapper.id_to_atom(pdbidx) + donor_pairs.append(data(d=donor, d_orig_atom=d_orig_atom, d_orig_idx=pdbidx, + h=pybel.Atom(adj_atom), type='regular')) + self.hbond_don_atom_pairs = donor_pairs + ####### + + self.charged = self.find_charged(self.all_atoms) + self.centroid = centroid([a.coords for a in self.all_atoms]) + self.max_dist_to_center = max((euclidean3d(self.centroid, a.coords) for a in self.all_atoms)) + self.water = [] + data = namedtuple('water', 'oxy oxy_orig_idx') + for hoh in ligand.water: + oxy = None + for at in pybel.ob.OBResidueAtomIter(hoh): + if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf: + oxy = pybel.Atom(at) + # There are some cases where there is no oxygen in a water residue, ignore those + if not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) == {1} and oxy is not None: + if euclidean3d(self.centroid, oxy.coords) < self.max_dist_to_center + config.BS_DIST: + oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype='protein') + self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx)) + self.halogenbond_don = self.find_hal(self.all_atoms) + self.metal_binding = self.find_metal_binding(self.all_atoms, self.water) + self.metals = [] + data = namedtuple('metal', 'm orig_m m_orig_idx') + for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]: + m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) + orig_m = self.Mapper.id_to_atom(m_orig_idx) + self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m)) + self.num_hba, self.num_hbd = len(self.hbond_acc_atoms), len(self.hbond_don_atom_pairs) + self.num_hal = len(self.halogenbond_don) + + def get_canonical_num(self, atomnum): + """Converts internal atom ID into canonical atom ID. Agrees with Canonical SMILES in XML.""" + return self.atomorder[atomnum - 1] + + def is_functional_group(self, atom, group): + """Given a pybel atom, look up if it belongs to a function group""" + n_atoms = [a_neighbor.GetAtomicNum() for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom)] + + if group in ['quartamine', 'tertamine'] and atom.atomicnum == 7: # Nitrogen + # It's a nitrogen, so could be a protonated amine or quaternary ammonium + if '1' not in n_atoms and len(n_atoms) == 4: + return True if group == 'quartamine' else False # It's a quat. ammonium (N with 4 residues != H) + elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3: + return True if group == 'tertamine' else False # It's sp3-hybridized, so could pick up an hydrogen + else: + return False + + if group in ['sulfonium', 'sulfonicacid', 'sulfate'] and atom.atomicnum == 16: # Sulfur + if '1' not in n_atoms and len(n_atoms) == 3: # It's a sulfonium (S with 3 residues != H) + return True if group == 'sulfonium' else False + elif n_atoms.count(8) == 3: # It's a sulfonate or sulfonic acid + return True if group == 'sulfonicacid' else False + elif n_atoms.count(8) == 4: # It's a sulfate + return True if group == 'sulfate' else False + + if group == 'phosphate' and atom.atomicnum == 15: # Phosphor + if set(n_atoms) == {8}: # It's a phosphate + return True + + if group in ['carboxylate', 'guanidine'] and atom.atomicnum == 6: # It's a carbon atom + if n_atoms.count(8) == 2 and n_atoms.count(6) == 1: # It's a carboxylate group + return True if group == 'carboxylate' else False + elif n_atoms.count(7) == 3 and len(n_atoms) == 3: # It's a guanidine group + nitro_partners = [] + for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom): + nitro_partners.append(len([b_neighbor for b_neighbor in pybel.ob.OBAtomAtomIter(nitro)])) + if min(nitro_partners) == 1: # One nitrogen is only connected to the carbon, can pick up a H + return True if group == 'guanidine' else False + + if group == 'halocarbon' and atom.atomicnum in [9, 17, 35, 53]: # Halogen atoms + n_atoms = [na for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) if na.GetAtomicNum() == 6] + if len(n_atoms) == 1: # Halocarbon + return True + else: + return False + + def find_hal(self, atoms): + """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)""" + data = namedtuple('hal_donor', 'x orig_x x_orig_idx c c_orig_idx') + a_set = [] + for a in atoms: + if self.is_functional_group(a, 'halocarbon'): + n_atoms = [na for na in pybel.ob.OBAtomAtomIter(a.OBAtom) if na.GetAtomicNum() == 6] + x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) + orig_x = self.Mapper.id_to_atom(x_orig_idx) + c_orig_idx = [self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) for na in n_atoms] + a_set.append(data(x=a, orig_x=orig_x, x_orig_idx=x_orig_idx, + c=pybel.Atom(n_atoms[0]), c_orig_idx=c_orig_idx)) + if len(a_set) != 0: + logger.info(f'ligand contains {len(a_set)} halogen atom(s)') + return a_set + + def find_charged(self, all_atoms): + """Identify all positively charged groups in a ligand. This search is not exhaustive, as the cases can be quite + diverse. The typical cases seem to be protonated amines, quaternary ammoinium and sulfonium + as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)). + Identify negatively charged groups in the ligand. + """ + data = namedtuple('lcharge', 'atoms orig_atoms atoms_orig_idx type center fgroup') + a_set = [] + for a in all_atoms: + a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) + a_orig = self.Mapper.id_to_atom(a_orig_idx) + if self.is_functional_group(a, 'quartamine'): + a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', + center=list(a.coords), fgroup='quartamine')) + elif self.is_functional_group(a, 'tertamine'): + a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', + center=list(a.coords), + fgroup='tertamine')) + if self.is_functional_group(a, 'sulfonium'): + a_set.append(data(atoms=[a, ], orig_atoms=[a_orig, ], atoms_orig_idx=[a_orig_idx, ], type='positive', + center=list(a.coords), + fgroup='sulfonium')) + if self.is_functional_group(a, 'phosphate'): + a_contributing = [a, ] + a_contributing_orig_idx = [a_orig_idx, ] + [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)] + [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) + for neighbor in a_contributing] + orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + a_set.append( + data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, + type='negative', + center=a.coords, fgroup='phosphate')) + if self.is_functional_group(a, 'sulfonicacid'): + a_contributing = [a, ] + a_contributing_orig_idx = [a_orig_idx, ] + [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) if + neighbor.GetAtomicNum() == 8] + [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) + for neighbor in a_contributing] + orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + a_set.append( + data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, + type='negative', + center=a.coords, fgroup='sulfonicacid')) + elif self.is_functional_group(a, 'sulfate'): + a_contributing = [a, ] + a_contributing_orig_idx = [a_orig_idx, ] + [a_contributing_orig_idx.append(self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid)) + for neighbor in a_contributing] + [a_contributing.append(pybel.Atom(neighbor)) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom)] + orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + a_set.append( + data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, + type='negative', + center=a.coords, fgroup='sulfate')) + if self.is_functional_group(a, 'carboxylate'): + a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + if neighbor.GetAtomicNum() == 8] + a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) + for neighbor in a_contributing] + orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + a_set.append( + data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, + type='negative', + center=centroid([a.coords for a in a_contributing]), fgroup='carboxylate')) + elif self.is_functional_group(a, 'guanidine'): + a_contributing = [pybel.Atom(neighbor) for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) + if neighbor.GetAtomicNum() == 7] + a_contributing_orig_idx = [self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) + for neighbor in a_contributing] + orig_contributing = [self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx] + a_set.append( + data(atoms=a_contributing, orig_atoms=orig_contributing, atoms_orig_idx=a_contributing_orig_idx, + type='positive', + center=a.coords, fgroup='guanidine')) + return a_set + + def find_metal_binding(self, lig_atoms, water_oxygens): + """Looks for atoms that could possibly be involved in binding a metal ion. + This can be any water oxygen, as well as oxygen from carboxylate, phophoryl, phenolate, alcohol; + nitrogen from imidazole; sulfur from thiolate. + """ + a_set = [] + data = namedtuple('metal_binding', 'atom orig_atom atom_orig_idx type fgroup restype resnr reschain location') + for oxygen in water_oxygens: + a_set.append(data(atom=oxygen.oxy, atom_orig_idx=oxygen.oxy_orig_idx, type='O', fgroup='water', + restype=whichrestype(oxygen.oxy), resnr=whichresnumber(oxygen.oxy), + reschain=whichchain(oxygen.oxy), location='water', + orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx))) + # #@todo Refactor code + for a in lig_atoms: + a_orig_idx = self.Mapper.mapid(a.idx, mtype='ligand', bsid=self.bsid) + n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom) # Neighboring atoms + # All atomic numbers of neighboring atoms + n_atoms_atomicnum = [n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom)] + if a.atomicnum == 8: # Oxygen + if n_atoms_atomicnum.count('1') == 1 and len(n_atoms_atomicnum) == 2: # Oxygen in alcohol (R-[O]-H) + a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='alcohol', + restype=self.hetid, resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic(): # Phenolate oxygen + a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='O', fgroup='phenolate', + restype=self.hetid, resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if a.atomicnum == 6: # It's a carbon atom + if n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1: # It's a carboxylate group + for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: + neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', + fgroup='carboxylate', + restype=self.hetid, + resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if a.atomicnum == 15: # It's a phosphor atom + if n_atoms_atomicnum.count(8) >= 3: # It's a phosphoryl + for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: + neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', + fgroup='phosphoryl', + restype=self.hetid, + resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if n_atoms_atomicnum.count(8) == 2: # It's another phosphor-containing group #@todo (correct name?) + for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: + neighbor_orig_idx = self.Mapper.mapid(neighbor.GetIdx(), mtype='ligand', bsid=self.bsid) + a_set.append(data(atom=pybel.Atom(neighbor), atom_orig_idx=neighbor_orig_idx, type='O', + fgroup='phosphor.other', restype=self.hetid, + resnr=self.position, + reschain=self.chain, location='ligand', + orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if a.atomicnum == 7: # It's a nitrogen atom + if n_atoms_atomicnum.count(6) == 2: # It's imidazole/pyrrole or similar + a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='N', fgroup='imidazole/pyrrole', + restype=self.hetid, resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if a.atomicnum == 16: # It's a sulfur atom + if True in [n.IsAromatic() for n in n_atoms] and not a.OBAtom.IsAromatic(): # Thiolate + a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='thiolate', + restype=self.hetid, resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + if set(n_atoms_atomicnum) == {26}: # Sulfur in Iron sulfur cluster + a_set.append(data(atom=a, atom_orig_idx=a_orig_idx, type='S', fgroup='iron-sulfur.cluster', + restype=self.hetid, resnr=self.position, reschain=self.chain, + location='ligand', orig_atom=self.Mapper.id_to_atom(a_orig_idx))) + + return a_set + + +class PDBComplex: + """Contains a collection of objects associated with a PDB complex, i.e. one or several ligands and their binding + sites as well as information about the pliprofiler between them. Provides functions to load and prepare input files + such as PDB files. + """ + + def __init__(self): + self.interaction_sets = {} # Dictionary with site identifiers as keys and object as value + self.protcomplex = None + self.filetype = None + self.atoms = {} # Dictionary of Pybel atoms, accessible by their idx + self.sourcefiles = {} + self.information = {} + self.corrected_pdb = '' + self._output_path = tempfile.gettempdir() + self.pymol_name = None + self.modres = set() + self.resis = [] + self.altconf = [] # Atom idx of atoms with alternate conformations + self.covalent = [] # Covalent linkages between ligands and protein residues/other ligands + self.excluded = [] # Excluded ligands + self.Mapper = Mapper() + self.ligands = [] + + def __str__(self): + formatted_lig_names = [":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands] + return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( + [lig for lig in formatted_lig_names]) + + def load_pdb(self, pdbpath, as_string=False): + """Loads a pdb file with protein AND ligand(s), separates and prepares them. + If specified 'as_string', the input is a PDB string instead of a path.""" + if as_string: + self.sourcefiles['pdbcomplex.original'] = None + self.sourcefiles['pdbcomplex'] = None + self.sourcefiles['pdbstring'] = pdbpath + else: + self.sourcefiles['pdbcomplex.original'] = pdbpath + self.sourcefiles['pdbcomplex'] = pdbpath + self.information['pdbfixes'] = False + pdbparser = PDBParser(pdbpath, as_string=as_string) # Parse PDB file to find errors and get additional data + # #@todo Refactor and rename here + self.Mapper.proteinmap = pdbparser.proteinmap + self.Mapper.reversed_proteinmap = {v: k for k, v in self.Mapper.proteinmap.items()} + self.modres = pdbparser.modres + self.covalent = pdbparser.covalent + self.altconf = pdbparser.altconformations + self.corrected_pdb = pdbparser.corrected_pdb + + if not config.PLUGIN_MODE: + if pdbparser.num_fixed_lines > 0: + logger.info(f'{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file') + # Save modified PDB file + if not as_string: + basename = os.path.basename(pdbpath).split('.')[0] + else: + basename = "from_stdin" + pdbpath_fixed = tmpfile(prefix='plipfixed.' + basename + '_', direc=self.output_path) + create_folder_if_not_exists(self.output_path) + self.sourcefiles['pdbcomplex'] = pdbpath_fixed + self.corrected_pdb = re.sub(r'[^\x00-\x7F]+', ' ', self.corrected_pdb) # Strip non-unicode chars + if not config.NOFIXFILE: # Only write to file if this option is not activated + with open(pdbpath_fixed, 'w') as f: + f.write(self.corrected_pdb) + self.information['pdbfixes'] = True + + if not as_string: + self.sourcefiles['filename'] = os.path.basename(self.sourcefiles['pdbcomplex']) + self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True) + + # Update the model in the Mapper class instance + self.Mapper.original_structure = self.protcomplex.OBMol + logger.info('PDB structure successfully read') + + # Determine (temporary) PyMOL Name from Filename + self.pymol_name = pdbpath.split('/')[-1].split('.')[0] + '-Protein' + # Replace characters causing problems in PyMOL + self.pymol_name = self.pymol_name.replace(' ', '').replace('(', '').replace(')', '').replace('-', '_') + # But if possible, name it after PDBID in Header + if 'HEADER' in self.protcomplex.data: # If the PDB file has a proper header + potential_name = self.protcomplex.data['HEADER'][56:60].lower() + if extract_pdbid(potential_name) != 'UnknownProtein': + self.pymol_name = potential_name + logger.debug(f'PyMOL name set as: {self.pymol_name}') + + # Extract and prepare ligands + ligandfinder = LigandFinder(self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper) + self.ligands = ligandfinder.ligands + self.excluded = ligandfinder.excluded + + # decide whether to add polar hydrogens + if not config.NOHYDRO: + if not as_string: + basename = os.path.basename(pdbpath).split('.')[0] + else: + basename = "from_stdin" + self.protcomplex.OBMol.AddPolarHydrogens() + output_path = os.path.join(self._output_path, f'{basename}_protonated.pdb') + self.protcomplex.write('pdb', output_path, overwrite=True) + logger.info(f'protonated structure written to {output_path}') + else: + logger.warning('no polar hydrogens will be assigned (make sure your structure contains hydrogens)') + + for atm in self.protcomplex: + self.atoms[atm.idx] = atm + + if len(self.excluded) != 0: + logger.info(f'excluded molecules as ligands: {self.excluded}') + + if config.DNARECEPTOR: + self.resis = [obres for obres in pybel.ob.OBResidueIter( + self.protcomplex.OBMol) if obres.GetName() in config.DNA + config.RNA] + else: + self.resis = [obres for obres in pybel.ob.OBResidueIter( + self.protcomplex.OBMol) if obres.GetResidueProperty(0)] + + num_ligs = len(self.ligands) + if num_ligs == 1: + logger.info('analyzing one ligand') + elif num_ligs > 1: + logger.info(f'analyzing {num_ligs} ligands') + else: + logger.info(f'structure contains no ligands') + + def analyze(self): + """Triggers analysis of all complexes in structure""" + for ligand in self.ligands: + self.characterize_complex(ligand) + + def characterize_complex(self, ligand): + """Handles all basic functions for characterizing the interactions for one ligand""" + + single_sites = [] + for member in ligand.members: + single_sites.append(':'.join([str(x) for x in member])) + site = ' + '.join(single_sites) + site = site if not len(site) > 20 else site[:20] + '...' + longname = ligand.longname if not len(ligand.longname) > 20 else ligand.longname[:20] + '...' + ligtype = 'unspecified type' if ligand.type == 'UNSPECIFIED' else ligand.type + ligtext = f'{longname} [{ligtype}] -- {site}' + logger.info(f'processing ligand {ligtext}') + if ligtype == 'PEPTIDE': + logger.info(f'chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode') + if ligtype == 'INTRA': + logger.info(f'chain {ligand.chain} will be processed in [INTRA-CHAIN] mode') + any_in_biolip = len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) != 0 + + if ligtype not in ['POLYMER', 'DNA', 'ION', 'DNA+ION', 'RNA+ION', 'SMALLMOLECULE+ION'] and any_in_biolip: + logger.info('may be biologically irrelevant') + + lig_obj = Ligand(self, ligand) + cutoff = lig_obj.max_dist_to_center + config.BS_DIST + bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis) + # Get a list of all atoms belonging to the binding site, search by idx + bs_atoms = [self.atoms[idx] for idx in [i for i in self.atoms.keys() + if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res] + if idx in self.Mapper.proteinmap and self.Mapper.mapid(idx, mtype='protein') not in self.altconf] + if ligand.type == 'PEPTIDE': + # If peptide, don't consider the peptide chain as part of the protein binding site + bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain] + if ligand.type == 'INTRA': + # Interactions within the chain + bs_atoms = [a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain] + bs_atoms_refined = [] + + # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE) + # and refine binding site atom selection with exact threshold + min_dist = {} + for r in bs_atoms: + bs_res_id = ''.join([str(whichresnumber(r)), whichchain(r)]) + for l in ligand.mol.atoms: + distance = euclidean3d(r.coords, l.coords) + if bs_res_id not in min_dist: + min_dist[bs_res_id] = (distance, whichrestype(r)) + elif min_dist[bs_res_id][0] > distance: + min_dist[bs_res_id] = (distance, whichrestype(r)) + if distance <= config.BS_DIST and r not in bs_atoms_refined: + bs_atoms_refined.append(r) + num_bs_atoms = len(bs_atoms_refined) + logger.info(f'binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})') + + bs_obj = BindingSite(bs_atoms_refined, self.protcomplex, self, self.altconf, min_dist, self.Mapper) + pli_obj = PLInteraction(lig_obj, bs_obj, self) + self.interaction_sets[ligand.mol.title] = pli_obj + + def extract_bs(self, cutoff, ligcentroid, resis): + """Return list of ids from residues belonging to the binding site""" + return [obres.GetIdx() for obres in resis if self.res_belongs_to_bs(obres, cutoff, ligcentroid)] + + def res_belongs_to_bs(self, res, cutoff, ligcentroid): + """Check for each residue if its centroid is within a certain distance to the ligand centroid. + Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" + rescentroid = centroid([(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)]) + # Check geometry + near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False + # Check chain membership + restricted_chain = True if res.GetChain() in config.PEPTIDES else False + return (near_enough and not restricted_chain) + + def get_atom(self, idx): + return self.atoms[idx] + + @property + def output_path(self): + return self._output_path + + @output_path.setter + def output_path(self, path): + self._output_path = tilde_expansion(path) -- cgit v1.2.3