diff options
author | Navan Chauhan <navanchauhan@gmail.com> | 2020-09-11 16:18:38 +0530 |
---|---|---|
committer | Navan Chauhan <navanchauhan@gmail.com> | 2020-09-11 16:18:38 +0530 |
commit | 9dadfdb3332b073aaff508d126e90200ad09868d (patch) | |
tree | 1578ff007b42b2175691f3dab11566611ec44907 /plip/structure | |
parent | 004f4513c8cd5cfffbf69484fb39a3d7bc98bd49 (diff) |
removed downloaded plip
Diffstat (limited to 'plip/structure')
-rw-r--r-- | plip/structure/__init__.py | 0 | ||||
-rw-r--r-- | plip/structure/detection.py | 479 | ||||
-rw-r--r-- | plip/structure/preparation.py | 2315 |
3 files changed, 0 insertions, 2794 deletions
diff --git a/plip/structure/__init__.py b/plip/structure/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/plip/structure/__init__.py +++ /dev/null diff --git a/plip/structure/detection.py b/plip/structure/detection.py deleted file mode 100644 index 71fec63..0000000 --- a/plip/structure/detection.py +++ /dev/null @@ -1,479 +0,0 @@ -import itertools -from collections import defaultdict -from collections import namedtuple - -import numpy as np -from openbabel.openbabel import OBAtomAtomIter - -from plip.basic import config, logger -from plip.basic.supplemental import vecangle, vector, euclidean3d, projection -from plip.basic.supplemental import whichresnumber, whichrestype, whichchain - -logger = logger.get_logger() - -def filter_contacts(pairings): - """Filter interactions by two criteria: - 1. No interactions between the same residue (important for intra mode). - 2. No duplicate interactions (A with B and B with A, also important for intra mode).""" - if not config.INTRA: - return pairings - filtered1_pairings = [p for p in pairings if (p.resnr, p.reschain) != (p.resnr_l, p.reschain_l)] - already_considered = [] - filtered2_pairings = [] - for contact in filtered1_pairings: - try: - dist = 'D{}'.format(round(contact.distance, 2)) - except AttributeError: - try: - dist = 'D{}'.format(round(contact.distance_ah, 2)) - except AttributeError: - dist = 'D{}'.format(round(contact.distance_aw, 2)) - res1, res2 = ''.join([str(contact.resnr), contact.reschain]), ''.join( - [str(contact.resnr_l), contact.reschain_l]) - data = {res1, res2, dist} - if data not in already_considered: - filtered2_pairings.append(contact) - already_considered.append(data) - return filtered2_pairings - - -################################################## -# FUNCTIONS FOR DETECTION OF SPECIFIC INTERACTIONS -################################################## - -def hydrophobic_interactions(atom_set_a, atom_set_b): - """Detection of hydrophobic pliprofiler between atom_set_a (binding site) and atom_set_b (ligand). - Definition: All pairs of qualified carbon atoms within a distance of HYDROPH_DIST_MAX - """ - data = namedtuple('hydroph_interaction', 'bsatom bsatom_orig_idx ligatom ligatom_orig_idx ' - 'distance restype resnr reschain restype_l, resnr_l, reschain_l') - pairings = [] - for a, b in itertools.product(atom_set_a, atom_set_b): - if a.orig_idx == b.orig_idx: - continue - e = euclidean3d(a.atom.coords, b.atom.coords) - if not config.MIN_DIST < e < config.HYDROPH_DIST_MAX: - continue - restype, resnr, reschain = whichrestype(a.atom), whichresnumber(a.atom), whichchain(a.atom) - restype_l, resnr_l, reschain_l = whichrestype(b.orig_atom), whichresnumber(b.orig_atom), whichchain(b.orig_atom) - contact = data(bsatom=a.atom, bsatom_orig_idx=a.orig_idx, ligatom=b.atom, ligatom_orig_idx=b.orig_idx, - distance=e, restype=restype, resnr=resnr, - reschain=reschain, restype_l=restype_l, - resnr_l=resnr_l, reschain_l=reschain_l) - pairings.append(contact) - return filter_contacts(pairings) - - -def hbonds(acceptors, donor_pairs, protisdon, typ): - """Detection of hydrogen bonds between sets of acceptors and donor pairs. - Definition: All pairs of hydrogen bond acceptor and donors with - donor hydrogens and acceptor showing a distance within HBOND DIST MIN and HBOND DIST MAX - and donor angles above HBOND_DON_ANGLE_MIN - """ - data = namedtuple('hbond', 'a a_orig_idx d d_orig_idx h distance_ah distance_ad angle type protisdon resnr ' - 'restype reschain resnr_l restype_l reschain_l sidechain atype dtype') - pairings = [] - for acc, don in itertools.product(acceptors, donor_pairs): - if not typ == 'strong': - continue - # Regular (strong) hydrogen bonds - dist_ah = euclidean3d(acc.a.coords, don.h.coords) - dist_ad = euclidean3d(acc.a.coords, don.d.coords) - if not config.MIN_DIST < dist_ad < config.HBOND_DIST_MAX: - continue - vec1, vec2 = vector(don.h.coords, don.d.coords), vector(don.h.coords, acc.a.coords) - v = vecangle(vec1, vec2) - if not v > config.HBOND_DON_ANGLE_MIN: - continue - protatom = don.d.OBAtom if protisdon else acc.a.OBAtom - ligatom = don.d.OBAtom if not protisdon else acc.a.OBAtom - is_sidechain_hbond = protatom.GetResidue().GetAtomProperty(protatom, 8) # Check if sidechain atom - resnr = whichresnumber(don.d) if protisdon else whichresnumber(acc.a) - resnr_l = whichresnumber(acc.a_orig_atom) if protisdon else whichresnumber(don.d_orig_atom) - restype = whichrestype(don.d) if protisdon else whichrestype(acc.a) - restype_l = whichrestype(acc.a_orig_atom) if protisdon else whichrestype(don.d_orig_atom) - reschain = whichchain(don.d) if protisdon else whichchain(acc.a) - rechain_l = whichchain(acc.a_orig_atom) if protisdon else whichchain(don.d_orig_atom) - # Next line prevents H-Bonds within amino acids in intermolecular interactions - if config.INTRA is not None and whichresnumber(don.d) == whichresnumber(acc.a): - continue - # Next line prevents backbone-backbone H-Bonds - if config.INTRA is not None and protatom.GetResidue().GetAtomProperty(protatom, - 8) and ligatom.GetResidue().GetAtomProperty( - ligatom, 8): - continue - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, d=don.d, d_orig_idx=don.d_orig_idx, h=don.h, - distance_ah=dist_ah, distance_ad=dist_ad, angle=v, type=typ, protisdon=protisdon, - resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, - restype_l=restype_l, reschain_l=rechain_l, sidechain=is_sidechain_hbond, - atype=acc.a.type, dtype=don.d.type) - pairings.append(contact) - return filter_contacts(pairings) - - -def pistacking(rings_bs, rings_lig): - """Return all pi-stackings between the given aromatic ring systems in receptor and ligand.""" - data = namedtuple( - 'pistack', - 'proteinring ligandring distance angle offset type restype resnr reschain restype_l resnr_l reschain_l') - pairings = [] - for r, l in itertools.product(rings_bs, rings_lig): - # DISTANCE AND RING ANGLE CALCULATION - d = euclidean3d(r.center, l.center) - b = vecangle(r.normal, l.normal) - a = min(b, 180 - b if not 180 - b < 0 else b) # Smallest of two angles, depending on direction of normal - - # RING CENTER OFFSET CALCULATION (project each ring center into the other ring) - proj1 = projection(l.normal, l.center, r.center) - proj2 = projection(r.normal, r.center, l.center) - offset = min(euclidean3d(proj1, l.center), euclidean3d(proj2, r.center)) - - # RECEPTOR DATA - resnr, restype, reschain = whichresnumber(r.atoms[0]), whichrestype(r.atoms[0]), whichchain(r.atoms[0]) - resnr_l, restype_l, reschain_l = whichresnumber(l.orig_atoms[0]), whichrestype( - l.orig_atoms[0]), whichchain(l.orig_atoms[0]) - - # SELECTION BY DISTANCE, ANGLE AND OFFSET - passed = False - if not config.MIN_DIST < d < config.PISTACK_DIST_MAX: - continue - if 0 < a < config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: - ptype = 'P' - passed = True - if 90 - config.PISTACK_ANG_DEV < a < 90 + config.PISTACK_ANG_DEV and offset < config.PISTACK_OFFSET_MAX: - ptype = 'T' - passed = True - if passed: - contact = data(proteinring=r, ligandring=l, distance=d, angle=a, offset=offset, - type=ptype, resnr=resnr, restype=restype, reschain=reschain, - resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) - pairings.append(contact) - return filter_contacts(pairings) - - -def pication(rings, pos_charged, protcharged): - """Return all pi-Cation interaction between aromatic rings and positively charged groups. - For tertiary and quaternary amines, check also the angle between the ring and the nitrogen. - """ - data = namedtuple( - 'pication', 'ring charge distance offset type restype resnr reschain restype_l resnr_l reschain_l protcharged') - pairings = [] - if len(rings) == 0 or len(pos_charged) == 0: - return pairings - for ring in rings: - c = ring.center - for p in pos_charged: - d = euclidean3d(c, p.center) - # Project the center of charge into the ring and measure distance to ring center - proj = projection(ring.normal, ring.center, p.center) - offset = euclidean3d(proj, ring.center) - if not config.MIN_DIST < d < config.PICATION_DIST_MAX or not offset < config.PISTACK_OFFSET_MAX: - continue - if type(p).__name__ == 'lcharge' and p.fgroup == 'tertamine': - # Special case here if the ligand has a tertiary amine, check an additional angle - # Otherwise, we might have have a pi-cation interaction 'through' the ligand - n_atoms = [a_neighbor for a_neighbor in OBAtomAtomIter(p.atoms[0].OBAtom)] - n_atoms_coords = [(a.x(), a.y(), a.z()) for a in n_atoms] - amine_normal = np.cross(vector(n_atoms_coords[0], n_atoms_coords[1]), - vector(n_atoms_coords[2], n_atoms_coords[0])) - b = vecangle(ring.normal, amine_normal) - # Smallest of two angles, depending on direction of normal - a = min(b, 180 - b if not 180 - b < 0 else b) - if not a > 30.0: - resnr, restype = whichresnumber(ring.atoms[0]), whichrestype(ring.atoms[0]) - reschain = whichchain(ring.atoms[0]) - resnr_l, restype_l = whichresnumber(p.orig_atoms[0]), whichrestype(p.orig_atoms[0]) - reschain_l = whichchain(p.orig_atoms[0]) - contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', - restype=restype, resnr=resnr, reschain=reschain, - restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, - protcharged=protcharged) - pairings.append(contact) - break - resnr = whichresnumber(p.atoms[0]) if protcharged else whichresnumber(ring.atoms[0]) - resnr_l = whichresnumber(ring.orig_atoms[0]) if protcharged else whichresnumber(p.orig_atoms[0]) - restype = whichrestype(p.atoms[0]) if protcharged else whichrestype(ring.atoms[0]) - restype_l = whichrestype(ring.orig_atoms[0]) if protcharged else whichrestype(p.orig_atoms[0]) - reschain = whichchain(p.atoms[0]) if protcharged else whichchain(ring.atoms[0]) - reschain_l = whichchain(ring.orig_atoms[0]) if protcharged else whichchain(p.orig_atoms[0]) - contact = data(ring=ring, charge=p, distance=d, offset=offset, type='regular', restype=restype, - resnr=resnr, reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, - reschain_l=reschain_l, protcharged=protcharged) - pairings.append(contact) - return filter_contacts(pairings) - - -def saltbridge(poscenter, negcenter, protispos): - """Detect all salt bridges (pliprofiler between centers of positive and negative charge)""" - data = namedtuple( - 'saltbridge', 'positive negative distance protispos resnr restype reschain resnr_l restype_l reschain_l') - pairings = [] - for pc, nc in itertools.product(poscenter, negcenter): - if not config.MIN_DIST < euclidean3d(pc.center, nc.center) < config.SALTBRIDGE_DIST_MAX: - continue - resnr = pc.resnr if protispos else nc.resnr - resnr_l = whichresnumber(nc.orig_atoms[0]) if protispos else whichresnumber(pc.orig_atoms[0]) - restype = pc.restype if protispos else nc.restype - restype_l = whichrestype(nc.orig_atoms[0]) if protispos else whichrestype(pc.orig_atoms[0]) - reschain = pc.reschain if protispos else nc.reschain - reschain_l = whichchain(nc.orig_atoms[0]) if protispos else whichchain(pc.orig_atoms[0]) - contact = data(positive=pc, negative=nc, distance=euclidean3d(pc.center, nc.center), protispos=protispos, - resnr=resnr, restype=restype, reschain=reschain, resnr_l=resnr_l, restype_l=restype_l, - reschain_l=reschain_l) - pairings.append(contact) - return filter_contacts(pairings) - - -def halogen(acceptor, donor): - """Detect all halogen bonds of the type Y-O...X-C""" - data = namedtuple('halogenbond', 'acc acc_orig_idx don don_orig_idx distance don_angle acc_angle restype ' - 'resnr reschain restype_l resnr_l reschain_l donortype acctype sidechain') - pairings = [] - for acc, don in itertools.product(acceptor, donor): - dist = euclidean3d(acc.o.coords, don.x.coords) - if not config.MIN_DIST < dist < config.HALOGEN_DIST_MAX: - continue - vec1, vec2 = vector(acc.o.coords, acc.y.coords), vector(acc.o.coords, don.x.coords) - vec3, vec4 = vector(don.x.coords, acc.o.coords), vector(don.x.coords, don.c.coords) - acc_angle, don_angle = vecangle(vec1, vec2), vecangle(vec3, vec4) - is_sidechain_hal = acc.o.OBAtom.GetResidue().GetAtomProperty(acc.o.OBAtom, 8) # Check if sidechain atom - if not config.HALOGEN_ACC_ANGLE - config.HALOGEN_ANGLE_DEV < acc_angle \ - < config.HALOGEN_ACC_ANGLE + config.HALOGEN_ANGLE_DEV: - continue - if not config.HALOGEN_DON_ANGLE - config.HALOGEN_ANGLE_DEV < don_angle \ - < config.HALOGEN_DON_ANGLE + config.HALOGEN_ANGLE_DEV: - continue - restype, reschain, resnr = whichrestype(acc.o), whichchain(acc.o), whichresnumber(acc.o) - restype_l, reschain_l, resnr_l = whichrestype(don.orig_x), whichchain(don.orig_x), whichresnumber(don.orig_x) - contact = data(acc=acc, acc_orig_idx=acc.o_orig_idx, don=don, don_orig_idx=don.x_orig_idx, - distance=dist, don_angle=don_angle, acc_angle=acc_angle, - restype=restype, resnr=resnr, - reschain=reschain, restype_l=restype_l, - reschain_l=reschain_l, resnr_l=resnr_l, donortype=don.x.OBAtom.GetType(), acctype=acc.o.type, - sidechain=is_sidechain_hal) - pairings.append(contact) - return filter_contacts(pairings) - - -def water_bridges(bs_hba, lig_hba, bs_hbd, lig_hbd, water): - """Find water-bridged hydrogen bonds between ligand and protein. For now only considers bridged of first degree.""" - data = namedtuple('waterbridge', 'a a_orig_idx atype d d_orig_idx dtype h water water_orig_idx distance_aw ' - 'distance_dw d_angle w_angle type resnr restype reschain resnr_l restype_l reschain_l protisdon') - pairings = [] - # First find all acceptor-water pairs with distance within d - # and all donor-water pairs with distance within d and angle greater theta - lig_aw, prot_aw, lig_dw, prot_hw = [], [], [], [] - for w in water: - for acc1 in lig_hba: - dist = euclidean3d(acc1.a.coords, w.oxy.coords) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: - lig_aw.append((acc1, w, dist)) - for acc2 in bs_hba: - dist = euclidean3d(acc2.a.coords, w.oxy.coords) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST: - prot_aw.append((acc2, w, dist)) - for don1 in lig_hbd: - dist = euclidean3d(don1.d.coords, w.oxy.coords) - d_angle = vecangle(vector(don1.h.coords, don1.d.coords), vector(don1.h.coords, w.oxy.coords)) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ - and d_angle > config.WATER_BRIDGE_THETA_MIN: - lig_dw.append((don1, w, dist, d_angle)) - for don2 in bs_hbd: - dist = euclidean3d(don2.d.coords, w.oxy.coords) - d_angle = vecangle(vector(don2.h.coords, don2.d.coords), vector(don2.h.coords, w.oxy.coords)) - if config.WATER_BRIDGE_MINDIST <= dist <= config.WATER_BRIDGE_MAXDIST \ - and d_angle > config.WATER_BRIDGE_THETA_MIN: - prot_hw.append((don2, w, dist, d_angle)) - - for l, p in itertools.product(lig_aw, prot_hw): - acc, wl, distance_aw = l - don, wd, distance_dw, d_angle = p - if not wl.oxy == wd.oxy: - continue - # Same water molecule and angle within omega - w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) - if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: - continue - resnr, reschain, restype = whichresnumber(don.d), whichchain(don.d), whichrestype(don.d) - resnr_l, reschain_l, restype_l = whichresnumber(acc.a_orig_atom), whichchain( - acc.a_orig_atom), whichrestype(acc.a_orig_atom) - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, - dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, - distance_aw=distance_aw, distance_dw=distance_dw, d_angle=d_angle, w_angle=w_angle, - type='first_deg', resnr=resnr, restype=restype, - reschain=reschain, restype_l=restype_l, resnr_l=resnr_l, reschain_l=reschain_l, protisdon=True) - pairings.append(contact) - for p, l in itertools.product(prot_aw, lig_dw): - acc, wl, distance_aw = p - don, wd, distance_dw, d_angle = l - if not wl.oxy == wd.oxy: - continue - # Same water molecule and angle within omega - w_angle = vecangle(vector(acc.a.coords, wl.oxy.coords), vector(wl.oxy.coords, don.h.coords)) - if not config.WATER_BRIDGE_OMEGA_MIN < w_angle < config.WATER_BRIDGE_OMEGA_MAX: - continue - resnr, reschain, restype = whichresnumber(acc.a), whichchain(acc.a), whichrestype(acc.a) - resnr_l, reschain_l, restype_l = whichresnumber(don.d_orig_atom), whichchain( - don.d_orig_atom), whichrestype(don.d_orig_atom) - contact = data(a=acc.a, a_orig_idx=acc.a_orig_idx, atype=acc.a.type, d=don.d, d_orig_idx=don.d_orig_idx, - dtype=don.d.type, h=don.h, water=wl.oxy, water_orig_idx=wl.oxy_orig_idx, - distance_aw=distance_aw, distance_dw=distance_dw, - d_angle=d_angle, w_angle=w_angle, type='first_deg', resnr=resnr, - restype=restype, reschain=reschain, - restype_l=restype_l, reschain_l=reschain_l, resnr_l=resnr_l, protisdon=False) - pairings.append(contact) - return filter_contacts(pairings) - - -def metal_complexation(metals, metal_binding_lig, metal_binding_bs): - """Find all metal complexes between metals and appropriate groups in both protein and ligand, as well as water""" - data = namedtuple('metal_complex', 'metal metal_orig_idx metal_type target target_orig_idx target_type ' - 'coordination_num distance resnr restype ' - 'reschain restype_l reschain_l resnr_l location rms, geometry num_partners complexnum') - pairings_dict = {} - pairings = [] - # #@todo Refactor - metal_to_id = {} - metal_to_orig_atom = {} - for metal, target in itertools.product(metals, metal_binding_lig + metal_binding_bs): - distance = euclidean3d(metal.m.coords, target.atom.coords) - if not distance < config.METAL_DIST_MAX: - continue - if metal.m not in pairings_dict: - pairings_dict[metal.m] = [(target, distance), ] - metal_to_id[metal.m] = metal.m_orig_idx - metal_to_orig_atom[metal.m] = metal.orig_m - else: - pairings_dict[metal.m].append((target, distance)) - for cnum, metal in enumerate(pairings_dict): - rms = 0.0 - excluded = [] - # cnum +1 being the complex number - contact_pairs = pairings_dict[metal] - num_targets = len(contact_pairs) - vectors_dict = defaultdict(list) - for contact_pair in contact_pairs: - target, distance = contact_pair - vectors_dict[target.atom.idx].append(vector(metal.coords, target.atom.coords)) - - # Listing of coordination numbers and their geometries - configs = {2: ['linear', ], - 3: ['trigonal.planar', 'trigonal.pyramidal'], - 4: ['tetrahedral', 'square.planar'], - 5: ['trigonal.bipyramidal', 'square.pyramidal'], - 6: ['octahedral', ]} - - # Angle signatures for each geometry (as seen from each target atom) - ideal_angles = {'linear': [[180.0]] * 2, - 'trigonal.planar': [[120.0, 120.0]] * 3, - 'trigonal.pyramidal': [[109.5, 109.5]] * 3, - 'tetrahedral': [[109.5, 109.5, 109.5, 109.5]] * 4, - 'square.planar': [[90.0, 90.0, 90.0, 90.0]] * 4, - 'trigonal.bipyramidal': [[120.0, 120.0, 90.0, 90.0]] * 3 + [[90.0, 90.0, 90.0, 180.0]] * 2, - 'square.pyramidal': [[90.0, 90.0, 90.0, 180.0]] * 4 + [[90.0, 90.0, 90.0, 90.0]], - 'octahedral': [[90.0, 90.0, 90.0, 90.0, 180.0]] * 6} - angles_dict = {} - - for target in vectors_dict: - cur_vector = vectors_dict[target] - other_vectors = [] - for t in vectors_dict: - if not t == target: - [other_vectors.append(x) for x in vectors_dict[t]] - angles = [vecangle(pair[0], pair[1]) for pair in itertools.product(cur_vector, other_vectors)] - angles_dict[target] = angles - - all_total = [] # Record fit information for each geometry tested - gdata = namedtuple('gdata', 'geometry rms coordination excluded diff_targets') # Geometry Data - # Can't specify geometry with only one target - if num_targets == 1: - final_geom = 'NA' - final_coo = 1 - excluded = [] - rms = 0.0 - else: - for coo in sorted(configs, reverse=True): # Start with highest coordination number - geometries = configs[coo] - for geometry in geometries: - signature = ideal_angles[geometry] # Set of ideal angles for geometry, from each perspective - geometry_total = 0 - geometry_scores = [] # All scores for one geometry (from all subsignatures) - used_up_targets = [] # Use each target just once for a subsignature - not_used = [] - coo_diff = num_targets - coo # How many more observed targets are there? - - # Find best match for each subsignature - for subsignature in signature: # Ideal angles from one perspective - best_target = None # There's one best-matching target for each subsignature - best_target_score = 999 - - for k, target in enumerate(angles_dict): - if target not in used_up_targets: - observed_angles = angles_dict[target] # Observed angles from perspective of one target - single_target_scores = [] - used_up_observed_angles = [] - for i, ideal_angle in enumerate(subsignature): - # For each angle in the signature, find the best-matching observed angle - best_match = None - best_match_diff = 999 - for j, observed_angle in enumerate(observed_angles): - if j not in used_up_observed_angles: - diff = abs(ideal_angle - observed_angle) - if diff < best_match_diff: - best_match_diff = diff - best_match = j - if best_match is not None: - used_up_observed_angles.append(best_match) - single_target_scores.append(best_match_diff) - # Calculate RMS for target angles - target_total = sum([x ** 2 for x in single_target_scores]) ** 0.5 # Tot. score targ/sig - if target_total < best_target_score: - best_target_score = target_total - best_target = target - - used_up_targets.append(best_target) - geometry_scores.append(best_target_score) - # Total score is mean of RMS values - geometry_total = np.mean(geometry_scores) - # Record the targets not used for excluding them when deciding for a final geometry - [not_used.append(target) for target in angles_dict if target not in used_up_targets] - all_total.append(gdata(geometry=geometry, rms=geometry_total, coordination=coo, - excluded=not_used, diff_targets=coo_diff)) - - # Make a decision here. Starting with the geometry with lowest difference in ideal and observed partners ... - # Check if the difference between the RMS to the next best solution is not larger than 0.5 - if not num_targets == 1: # Can't decide for any geoemtry in that case - all_total = sorted(all_total, key=lambda x: abs(x.diff_targets)) - for i, total in enumerate(all_total): - next_total = all_total[i + 1] - this_rms, next_rms = total.rms, next_total.rms - diff_to_next = next_rms - this_rms - if diff_to_next > 0.5: - final_geom, final_coo, rms, excluded = total.geometry, total.coordination, total.rms, total.excluded - break - elif next_total.rms < 3.5: - final_geom, final_coo, = next_total.geometry, next_total.coordination - rms, excluded = next_total.rms, next_total.excluded - break - elif i == len(all_total) - 2: - final_geom, final_coo, rms, excluded = "NA", "NA", float('nan'), [] - break - - # Record all contact pairing, excluding those with targets superfluous for chosen geometry - only_water = set([x[0].location for x in contact_pairs]) == {'water'} - if not only_water: # No complex if just with water as targets - logger.info(f'metal ion {metal.type} complexed with {final_geom} geometry (coo. number {final_coo}/ {num_targets} observed)') - for contact_pair in contact_pairs: - target, distance = contact_pair - if target.atom.idx not in excluded: - metal_orig_atom = metal_to_orig_atom[metal] - restype_l, reschain_l, resnr_l = whichrestype(metal_orig_atom), whichchain( - metal_orig_atom), whichresnumber(metal_orig_atom) - contact = data(metal=metal, metal_orig_idx=metal_to_id[metal], metal_type=metal.type, - target=target, target_orig_idx=target.atom_orig_idx, target_type=target.type, - coordination_num=final_coo, distance=distance, resnr=target.resnr, - restype=target.restype, reschain=target.reschain, location=target.location, - rms=rms, geometry=final_geom, num_partners=num_targets, complexnum=cnum + 1, - resnr_l=resnr_l, restype_l=restype_l, reschain_l=reschain_l) - pairings.append(contact) - return filter_contacts(pairings) diff --git a/plip/structure/preparation.py b/plip/structure/preparation.py deleted file mode 100644 index 32083c7..0000000 --- a/plip/structure/preparation.py +++ /dev/null @@ -1,2315 +0,0 @@ -import itertools -import os -import re -import tempfile -from collections import namedtuple -from operator import itemgetter - -import numpy as np -from openbabel import pybel - -from plip.basic import config, logger -from plip.basic.supplemental import centroid, tilde_expansion, tmpfile, classify_by_name -from plip.basic.supplemental import ( - cluster_doubles, - is_lig, - normalize_vector, - vector, - ring_is_planar, -) -from plip.basic.supplemental import ( - extract_pdbid, - read_pdb, - create_folder_if_not_exists, - canonicalize, -) -from plip.basic.supplemental import read, nucleotide_linkage, sort_members_by_importance -from plip.basic.supplemental import ( - whichchain, - whichrestype, - whichresnumber, - euclidean3d, - int32_to_negative, -) -from plip.structure.detection import ( - halogen, - pication, - water_bridges, - metal_complexation, -) -from plip.structure.detection import ( - hydrophobic_interactions, - pistacking, - hbonds, - saltbridge, -) - -logger = logger.get_logger() - - -class PDBParser: - def __init__(self, pdbpath, as_string): - self.as_string = as_string - self.pdbpath = pdbpath - self.num_fixed_lines = 0 - self.covlinkage = namedtuple( - "covlinkage", "id1 chain1 pos1 conf1 id2 chain2 pos2 conf2" - ) - ( - self.proteinmap, - self.modres, - self.covalent, - self.altconformations, - self.corrected_pdb, - ) = self.parse_pdb() - - def parse_pdb(self): - """Extracts additional information from PDB files. - I. When reading in a PDB file, OpenBabel numbers ATOMS and HETATOMS continously. - In PDB files, TER records are also counted, leading to a different numbering system. - This functions reads in a PDB file and provides a mapping as a dictionary. - II. Additionally, it returns a list of modified residues. - III. Furthermore, covalent linkages between ligands and protein residues/other ligands are identified - IV. Alternative conformations - """ - if self.as_string: - fil = self.pdbpath.rstrip("\n").split( - "\n" - ) # Removing trailing newline character - else: - f = read(self.pdbpath) - fil = f.readlines() - f.close() - corrected_lines = [] - i, j = 0, 0 # idx and PDB numbering - d = {} - modres = set() - covalent = [] - alt = [] - previous_ter = False - - # Standard without fixing - if not config.NOFIX: - if not config.PLUGIN_MODE: - lastnum = 0 # Atom numbering (has to be consecutive) - other_models = False - for line in fil: - if ( - not other_models - ): # Only consider the first model in an NRM structure - corrected_line, newnum = self.fix_pdbline(line, lastnum) - if corrected_line is not None: - if corrected_line.startswith("MODEL"): - try: # Get number of MODEL (1,2,3) - model_num = int(corrected_line[10:14]) - if model_num > 1: # MODEL 2,3,4 etc. - other_models = True - except ValueError: - logger.debug( - f"ignoring invalid MODEL entry: {corrected_line}" - ) - corrected_lines.append(corrected_line) - lastnum = newnum - corrected_pdb = "".join(corrected_lines) - else: - corrected_pdb = self.pdbpath - corrected_lines = fil - else: - corrected_pdb = self.pdbpath - corrected_lines = fil - - for line in corrected_lines: - if line.startswith(("ATOM", "HETATM")): - # Retrieve alternate conformations - atomid, location = int(line[6:11]), line[16] - location = "A" if location == " " else location - if location != "A": - alt.append(atomid) - - if not previous_ter: - i += 1 - j += 1 - else: - i += 1 - j += 2 - d[i] = j - previous_ter = False - # Numbering Changes at TER records - if line.startswith("TER"): - previous_ter = True - # Get modified residues - if line.startswith("MODRES"): - modres.add(line[12:15].strip()) - # Get covalent linkages between ligands - if line.startswith("LINK"): - covalent.append(self.get_linkage(line)) - return d, modres, covalent, alt, corrected_pdb - - def fix_pdbline(self, pdbline, lastnum): - """Fix a PDB line if information is missing.""" - pdbqt_conversion = { - "HD": "H", - "HS": "H", - "NA": "N", - "NS": "N", - "OA": "O", - "OS": "O", - "SA": "S", - } - fixed = False - new_num = 0 - forbidden_characters = "[^a-zA-Z0-9_]" - pdbline = pdbline.strip("\n") - # Some MD / Docking tools produce empty lines, leading to segfaults - if len(pdbline.strip()) == 0: - self.num_fixed_lines += 1 - return None, lastnum - if len(pdbline) > 100: # Should be 80 long - self.num_fixed_lines += 1 - return None, lastnum - # TER Entries also have continuing numbering, consider them as well - if pdbline.startswith("TER"): - new_num = lastnum + 1 - if pdbline.startswith("ATOM"): - new_num = lastnum + 1 - current_num = int(pdbline[6:11]) - resnum = pdbline[22:27].strip() - resname = pdbline[17:21].strip() - # Invalid residue number - try: - int(resnum) - except ValueError: - pdbline = pdbline[:22] + " 0 " + pdbline[27:] - fixed = True - # Invalid characters in residue name - if re.match(forbidden_characters, resname.strip()): - pdbline = pdbline[:17] + "UNK " + pdbline[21:] - fixed = True - if lastnum + 1 != current_num: - pdbline = ( - pdbline[:6] - + (5 - len(str(new_num))) * " " - + str(new_num) - + " " - + pdbline[12:] - ) - fixed = True - # No chain assigned - if pdbline[21] == " ": - pdbline = pdbline[:21] + "A" + pdbline[22:] - fixed = True - if pdbline.endswith("H"): - self.num_fixed_lines += 1 - return None, lastnum - # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. - for pdbqttype in pdbqt_conversion: - if pdbline.strip().endswith(pdbqttype): - pdbline = ( - pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + "\n" - ) - self.num_fixed_lines += 1 - if pdbline.startswith("HETATM"): - new_num = lastnum + 1 - try: - current_num = int(pdbline[6:11]) - except ValueError: - current_num = None - logger.debug(f"invalid HETATM entry: {pdbline}") - if lastnum + 1 != current_num: - pdbline = ( - pdbline[:6] - + (5 - len(str(new_num))) * " " - + str(new_num) - + " " - + pdbline[12:] - ) - fixed = True - # No chain assigned or number assigned as chain - if pdbline[21] == " ": - pdbline = pdbline[:21] + "Z" + pdbline[22:] - fixed = True - # No residue number assigned - if pdbline[23:26] == " ": - pdbline = pdbline[:23] + "999" + pdbline[26:] - fixed = True - # Non-standard Ligand Names - ligname = pdbline[17:21].strip() - if len(ligname) > 3: - pdbline = pdbline[:17] + ligname[:3] + " " + pdbline[21:] - fixed = True - if re.match(forbidden_characters, ligname.strip()): - pdbline = pdbline[:17] + "LIG " + pdbline[21:] - fixed = True - if len(ligname.strip()) == 0: - pdbline = pdbline[:17] + "LIG " + pdbline[21:] - fixed = True - if pdbline.endswith("H"): - self.num_fixed_lines += 1 - return None, lastnum - # Sometimes, converted PDB structures contain PDBQT atom types. Fix that. - for pdbqttype in pdbqt_conversion: - if pdbline.strip().endswith(pdbqttype): - pdbline = ( - pdbline.strip()[:-2] + " " + pdbqt_conversion[pdbqttype] + " " - ) - self.num_fixed_lines += 1 - self.num_fixed_lines += 1 if fixed else 0 - return pdbline + "\n", max(new_num, lastnum) - - def get_linkage(self, line): - """Get the linkage information from a LINK entry PDB line.""" - conf1, id1, chain1, pos1 = ( - line[16].strip(), - line[17:20].strip(), - line[21].strip(), - int(line[22:26]), - ) - conf2, id2, chain2, pos2 = ( - line[46].strip(), - line[47:50].strip(), - line[51].strip(), - int(line[52:56]), - ) - return self.covlinkage( - id1=id1, - chain1=chain1, - pos1=pos1, - conf1=conf1, - id2=id2, - chain2=chain2, - pos2=pos2, - conf2=conf2, - ) - - -class LigandFinder: - def __init__(self, proteincomplex, altconf, modres, covalent, mapper): - self.lignames_all = None - self.lignames_kept = None - self.water = None - self.proteincomplex = proteincomplex - self.altconformations = altconf - self.modresidues = modres - self.covalent = covalent - self.mapper = mapper - self.ligands = self.getligs() - self.excluded = sorted( - list(self.lignames_all.difference(set(self.lignames_kept))) - ) - - def getpeptides(self, chain): - """If peptide ligand chains are defined via the command line options, - try to extract the underlying ligand formed by all residues in the - given chain without water - """ - all_from_chain = [ - o - for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) - if o.GetChain() == chain - ] # All residues from chain - if len(all_from_chain) == 0: - return None - else: - non_water = [o for o in all_from_chain if not o.GetResidueProperty(9)] - ligand = self.extract_ligand(non_water) - return ligand - - def getligs(self): - """Get all ligands from a PDB file and prepare them for analysis. - Returns all non-empty ligands. - """ - - if config.PEPTIDES == [] and config.INTRA is None: - # Extract small molecule ligands (default) - ligands = [] - - # Filter for ligands using lists - ligand_residues, self.lignames_all, self.water = self.filter_for_ligands() - - all_res_dict = { - (a.GetName(), a.GetChain(), a.GetNum()): a for a in ligand_residues - } - self.lignames_kept = list(set([a.GetName() for a in ligand_residues])) - - if not config.BREAKCOMPOSITE: - # Update register of covalent links with those between DNA/RNA subunits - self.covalent += nucleotide_linkage(all_res_dict) - # Find fragment linked by covalent bonds - res_kmers = self.identify_kmers(all_res_dict) - else: - res_kmers = [[a,] for a in ligand_residues] - logger.debug( - f"{len(res_kmers)} ligand kmer(s) detected for closer inspection" - ) - for ( - kmer - ) in ( - res_kmers - ): # iterate over all ligands and extract molecules + information - if len(kmer) > config.MAX_COMPOSITE_LENGTH: - logger.debug( - f"ligand kmer(s) filtered out with a length of {len(kmer)} fragments ({config.MAX_COMPOSITE_LENGTH} allowed)" - ) - else: - ligands.append(self.extract_ligand(kmer)) - - else: - # Extract peptides from given chains - self.water = [ - o - for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) - if o.GetResidueProperty(9) - ] - if config.PEPTIDES: - peptide_ligands = [self.getpeptides(chain) for chain in config.PEPTIDES] - elif config.INTRA is not None: - peptide_ligands = [ - self.getpeptides(config.INTRA), - ] - - ligands = [p for p in peptide_ligands if p is not None] - self.covalent, self.lignames_kept, self.lignames_all = [], [], set() - - return [lig for lig in ligands if len(lig.mol.atoms) != 0] - - def extract_ligand(self, kmer): - """Extract the ligand by copying atoms and bonds and assign all information necessary for later steps.""" - data = namedtuple( - "ligand", - "mol hetid chain position water members longname type atomorder can_to_pdb", - ) - members = [ - (res.GetName(), res.GetChain(), int32_to_negative(res.GetNum())) - for res in kmer - ] - members = sort_members_by_importance(members) - rname, rchain, rnum = members[0] - logger.debug( - f"finalizing extraction for ligand {rname}:{rchain}:{rnum} with {len(kmer)} elements" - ) - names = [x[0] for x in members] - longname = "-".join([x[0] for x in members]) - - if config.PEPTIDES: - ligtype = "PEPTIDE" - elif config.INTRA is not None: - ligtype = "INTRA" - else: - # Classify a ligand by its HETID(s) - ligtype = classify_by_name(names) - logger.debug(f"ligand classified as {ligtype}") - - hetatoms = dict() - for obresidue in kmer: - cur_hetatoms = { - obatom.GetIdx(): obatom - for obatom in pybel.ob.OBResidueAtomIter(obresidue) - if obatom.GetAtomicNum() != 1 - } - if not config.ALTLOC: - # Remove alternative conformations (standard -> True) - ids_to_remove = [ - atom_id - for atom_id in hetatoms.keys() - if self.mapper.mapid(atom_id, mtype="protein", to="internal") - in self.altconformations - ] - for atom_id in ids_to_remove: - del cur_hetatoms[atom_id] - hetatoms.update(cur_hetatoms) - logger.debug(f"hetero atoms determined (n={len(hetatoms)})") - - lig = pybel.ob.OBMol() # new ligand mol - neighbours = dict() - for obatom in hetatoms.values(): # iterate over atom objects - idx = obatom.GetIdx() - lig.AddAtom(obatom) - # ids of all neighbours of obatom - neighbours[idx] = set( - [ - neighbour_atom.GetIdx() - for neighbour_atom in pybel.ob.OBAtomAtomIter(obatom) - ] - ) & set(hetatoms.keys()) - logger.debug(f"atom neighbours mapped") - - ############################################################## - # map the old atom idx of OBMol to the new idx of the ligand # - ############################################################## - - newidx = dict( - zip( - hetatoms.keys(), - [obatom.GetIdx() for obatom in pybel.ob.OBMolAtomIter(lig)], - ) - ) - mapold = dict(zip(newidx.values(), newidx)) - # copy the bonds - for obatom in hetatoms: - for neighbour_atom in neighbours[obatom]: - bond = hetatoms[obatom].GetBond(hetatoms[neighbour_atom]) - lig.AddBond(newidx[obatom], newidx[neighbour_atom], bond.GetBondOrder()) - lig = pybel.Molecule(lig) - - # For kmers, the representative ids are chosen (first residue of kmer) - lig.data.update({"Name": rname, "Chain": rchain, "ResNr": rnum}) - - # Check if a negative residue number is represented as a 32 bit integer - if rnum > 10 ** 5: - rnum = int32_to_negative(rnum) - - lig.title = ":".join((rname, rchain, str(rnum))) - self.mapper.ligandmaps[lig.title] = mapold - - logger.debug("renumerated molecule generated") - - if not config.NOPDBCANMAP: - atomorder = canonicalize(lig) - else: - atomorder = None - - can_to_pdb = {} - if atomorder is not None: - can_to_pdb = {atomorder[key - 1]: mapold[key] for key in mapold} - - ligand = data( - mol=lig, - hetid=rname, - chain=rchain, - position=rnum, - water=self.water, - members=members, - longname=longname, - type=ligtype, - atomorder=atomorder, - can_to_pdb=can_to_pdb, - ) - return ligand - - def is_het_residue(self, obres): - """Given an OBResidue, determines if the residue is indeed a possible ligand - in the PDB file""" - if not obres.GetResidueProperty(0): - # If the residue is NOT amino (0) - # It can be amino_nucleo, coenzme, ion, nucleo, protein, purine, pyrimidine, solvent - # In these cases, it is a ligand candidate - return True - else: - # Here, the residue is classified as amino - # Amino acids can still be ligands, so we check for HETATM entries - # Only residues with at least one HETATM entry are processed as ligands - het_atoms = [] - for atm in pybel.ob.OBResidueAtomIter(obres): - het_atoms.append(obres.IsHetAtom(atm)) - if True in het_atoms: - return True - return False - - def filter_for_ligands(self): - """Given an OpenBabel Molecule, get all ligands, their names, and water""" - - candidates1 = [ - o - for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) - if not o.GetResidueProperty(9) and self.is_het_residue(o) - ] - - if config.DNARECEPTOR: # If DNA is the receptor, don't consider DNA as a ligand - candidates1 = [ - res - for res in candidates1 - if res.GetName() not in config.DNA + config.RNA - ] - all_lignames = set([a.GetName() for a in candidates1]) - - water = [ - o - for o in pybel.ob.OBResidueIter(self.proteincomplex.OBMol) - if o.GetResidueProperty(9) - ] - # Filter out non-ligands - if not config.KEEPMOD: # Keep modified residues as ligands - candidates2 = [ - a - for a in candidates1 - if is_lig(a.GetName()) and a.GetName() not in self.modresidues - ] - else: - candidates2 = [a for a in candidates1 if is_lig(a.GetName())] - logger.debug(f"{len(candidates2)} ligand(s) after first filtering step") - - ############################################ - # Filtering by counting and artifacts list # - ############################################ - artifacts = [] - unique_ligs = set(a.GetName() for a in candidates2) - for ulig in unique_ligs: - # Discard if appearing 15 times or more and is possible artifact - if ( - ulig in config.biolip_list - and [a.GetName() for a in candidates2].count(ulig) >= 15 - ): - artifacts.append(ulig) - - selected_ligands = [a for a in candidates2 if a.GetName() not in artifacts] - - return selected_ligands, all_lignames, water - - def identify_kmers(self, residues): - """Using the covalent linkage information, find out which fragments/subunits form a ligand.""" - - # Remove all those not considered by ligands and pairings including alternate conformations - ligdoubles = [ - [(link.id1, link.chain1, link.pos1), (link.id2, link.chain2, link.pos2)] - for link in [ - c - for c in self.covalent - if c.id1 in self.lignames_kept - and c.id2 in self.lignames_kept - and c.conf1 in ["A", ""] - and c.conf2 in ["A", ""] - and (c.id1, c.chain1, c.pos1) in residues - and (c.id2, c.chain2, c.pos2) in residues - ] - ] - kmers = cluster_doubles(ligdoubles) - if not kmers: # No ligand kmers, just normal independent ligands - return [[residues[res]] for res in residues] - - else: - # res_kmers contains clusters of covalently bound ligand residues (kmer ligands) - res_kmers = [[residues[res] for res in kmer] for kmer in kmers] - - # In this case, add other ligands which are not part of a kmer - in_kmer = [] - for res_kmer in res_kmers: - for res in res_kmer: - in_kmer.append((res.GetName(), res.GetChain(), res.GetNum())) - for res in residues: - if res not in in_kmer: - newres = [ - residues[res], - ] - res_kmers.append(newres) - return res_kmers - - -class Mapper: - """Provides functions for mapping atom IDs in the correct way""" - - def __init__(self): - self.proteinmap = ( - None # Map internal atom IDs of protein residues to original PDB Atom IDs - ) - self.ligandmaps = ( - {} - ) # Map IDs of new ligand molecules to internal IDs (or PDB IDs?) - self.original_structure = None - - def mapid( - self, idx, mtype, bsid=None, to="original" - ): # Mapping to original IDs is standard for ligands - if mtype == "reversed": # Needed to map internal ID back to original protein ID - return self.reversed_proteinmap[idx] - if mtype == "protein": - return self.proteinmap[idx] - elif mtype == "ligand": - if to == "internal": - return self.ligandmaps[bsid][idx] - elif to == "original": - return self.proteinmap[self.ligandmaps[bsid][idx]] - - def id_to_atom(self, idx): - """Returns the atom for a given original ligand ID. - To do this, the ID is mapped to the protein first and then the atom returned. - """ - mapped_idx = self.mapid(idx, "reversed") - return pybel.Atom(self.original_structure.GetAtom(mapped_idx)) - - -class Mol: - def __init__(self, altconf, mapper, mtype, bsid): - self.mtype = mtype - self.bsid = bsid - self.rings = None - self.hydroph_atoms = None - self.charged = None - self.hbond_don_atom_pairs = None - self.hbond_acc_atoms = None - self.altconf = altconf - self.Mapper = mapper - - def hydrophobic_atoms(self, all_atoms): - """Select all carbon atoms which have only carbons and/or hydrogens as direct neighbors.""" - atom_set = [] - data = namedtuple("hydrophobic", "atom orig_atom orig_idx") - atm = [ - a - for a in all_atoms - if a.atomicnum == 6 - and set( - [natom.GetAtomicNum() for natom in pybel.ob.OBAtomAtomIter(a.OBAtom)] - ).issubset({1, 6}) - ] - for atom in atm: - orig_idx = self.Mapper.mapid(atom.idx, mtype=self.mtype, bsid=self.bsid) - orig_atom = self.Mapper.id_to_atom(orig_idx) - if atom.idx not in self.altconf: - atom_set.append(data(atom=atom, orig_atom=orig_atom, orig_idx=orig_idx)) - return atom_set - - def find_hba(self, all_atoms): - """Find all possible hydrogen bond acceptors""" - data = namedtuple("hbondacceptor", "a a_orig_atom a_orig_idx type") - a_set = [] - for atom in filter(lambda at: at.OBAtom.IsHbondAcceptor(), all_atoms): - if ( - atom.atomicnum not in [9, 17, 35, 53] and atom.idx not in self.altconf - ): # Exclude halogen atoms - a_orig_idx = self.Mapper.mapid( - atom.idx, mtype=self.mtype, bsid=self.bsid - ) - a_orig_atom = self.Mapper.id_to_atom(a_orig_idx) - a_set.append( - data( - a=atom, - a_orig_atom=a_orig_atom, - a_orig_idx=a_orig_idx, - type="regular", - ) - ) - a_set = sorted(a_set, key=lambda x: x.a_orig_idx) - return a_set - - def find_hbd(self, all_atoms, hydroph_atoms): - """Find all possible strong and weak hydrogen bonds donors (all hydrophobic C-H pairings)""" - donor_pairs = [] - data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") - for donor in [ - a - for a in all_atoms - if a.OBAtom.IsHbondDonor() and a.idx not in self.altconf - ]: - in_ring = False - if not in_ring: - for adj_atom in [ - a - for a in pybel.ob.OBAtomAtomIter(donor.OBAtom) - if a.IsHbondDonorH() - ]: - d_orig_idx = self.Mapper.mapid( - donor.idx, mtype=self.mtype, bsid=self.bsid - ) - d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) - donor_pairs.append( - data( - d=donor, - d_orig_atom=d_orig_atom, - d_orig_idx=d_orig_idx, - h=pybel.Atom(adj_atom), - type="regular", - ) - ) - for carbon in hydroph_atoms: - for adj_atom in [ - a - for a in pybel.ob.OBAtomAtomIter(carbon.atom.OBAtom) - if a.GetAtomicNum() == 1 - ]: - d_orig_idx = self.Mapper.mapid( - carbon.atom.idx, mtype=self.mtype, bsid=self.bsid - ) - d_orig_atom = self.Mapper.id_to_atom(d_orig_idx) - donor_pairs.append( - data( - d=carbon, - d_orig_atom=d_orig_atom, - d_orig_idx=d_orig_idx, - h=pybel.Atom(adj_atom), - type="weak", - ) - ) - donor_pairs = sorted(donor_pairs, key=lambda x: (x.d_orig_idx, x.h.idx)) - return donor_pairs - - def find_rings(self, mol, all_atoms): - """Find rings and return only aromatic. - Rings have to be sufficiently planar OR be detected by OpenBabel as aromatic.""" - data = namedtuple( - "aromatic_ring", "atoms orig_atoms atoms_orig_idx normal obj center type" - ) - rings = [] - aromatic_amino = ["TYR", "TRP", "HIS", "PHE"] - ring_candidates = mol.OBMol.GetSSSR() - logger.debug(f"number of aromatic ring candidates: {len(ring_candidates)}") - # Check here first for ligand rings not being detected as aromatic by Babel and check for planarity - for ring in ring_candidates: - r_atoms = [a for a in all_atoms if ring.IsMember(a.OBAtom)] - r_atoms = sorted(r_atoms, key=lambda x: x.idx) - if 4 < len(r_atoms) <= 6: - res = list(set([whichrestype(a) for a in r_atoms])) - # re-sort ring atoms for only ligands, because HETATM numbering is not canonical in OpenBabel - if res[0] == "UNL": - ligand_orig_idx = [ - self.Mapper.ligandmaps[self.bsid][a.idx] for a in r_atoms - ] - sort_order = np.argsort(np.array(ligand_orig_idx)) - r_atoms = [r_atoms[i] for i in sort_order] - if ( - ring.IsAromatic() - or res[0] in aromatic_amino - or ring_is_planar(ring, r_atoms) - ): - # Causes segfault with OpenBabel 2.3.2, so deactivated - # typ = ring.GetType() if not ring.GetType() == '' else 'unknown' - # Alternative typing - ring_type = "%s-membered" % len(r_atoms) - ring_atms = [ - r_atoms[a].coords for a in [0, 2, 4] - ] # Probe atoms for normals, assuming planarity - ringv1 = vector(ring_atms[0], ring_atms[1]) - ringv2 = vector(ring_atms[2], ring_atms[0]) - atoms_orig_idx = [ - self.Mapper.mapid(r_atom.idx, mtype=self.mtype, bsid=self.bsid) - for r_atom in r_atoms - ] - orig_atoms = [self.Mapper.id_to_atom(idx) for idx in atoms_orig_idx] - rings.append( - data( - atoms=r_atoms, - orig_atoms=orig_atoms, - atoms_orig_idx=atoms_orig_idx, - normal=normalize_vector(np.cross(ringv1, ringv2)), - obj=ring, - center=centroid([ra.coords for ra in r_atoms]), - type=ring_type, - ) - ) - return rings - - def get_hydrophobic_atoms(self): - return self.hydroph_atoms - - def get_hba(self): - return self.hbond_acc_atoms - - def get_hbd(self): - return [ - don_pair - for don_pair in self.hbond_don_atom_pairs - if don_pair.type == "regular" - ] - - def get_weak_hbd(self): - return [ - don_pair - for don_pair in self.hbond_don_atom_pairs - if don_pair.type == "weak" - ] - - def get_pos_charged(self): - return [charge for charge in self.charged if charge.type == "positive"] - - def get_neg_charged(self): - return [charge for charge in self.charged if charge.type == "negative"] - - -class PLInteraction: - """Class to store a ligand, a protein and their interactions.""" - - def __init__(self, lig_obj, bs_obj, protcomplex): - """Detect all interactions when initializing""" - self.ligand = lig_obj - self.lig_members = lig_obj.members - self.pdbid = protcomplex.pymol_name - self.bindingsite = bs_obj - self.Mapper = protcomplex.Mapper - self.output_path = protcomplex.output_path - self.altconf = protcomplex.altconf - # #@todo Refactor code to combine different directionality - - self.saltbridge_lneg = saltbridge( - self.bindingsite.get_pos_charged(), self.ligand.get_neg_charged(), True - ) - self.saltbridge_pneg = saltbridge( - self.ligand.get_pos_charged(), self.bindingsite.get_neg_charged(), False - ) - - self.all_hbonds_ldon = hbonds( - self.bindingsite.get_hba(), self.ligand.get_hbd(), False, "strong" - ) - self.all_hbonds_pdon = hbonds( - self.ligand.get_hba(), self.bindingsite.get_hbd(), True, "strong" - ) - - self.hbonds_ldon = self.refine_hbonds_ldon( - self.all_hbonds_ldon, self.saltbridge_lneg, self.saltbridge_pneg - ) - self.hbonds_pdon = self.refine_hbonds_pdon( - self.all_hbonds_pdon, self.saltbridge_lneg, self.saltbridge_pneg - ) - - self.pistacking = pistacking(self.bindingsite.rings, self.ligand.rings) - - self.all_pi_cation_laro = pication( - self.ligand.rings, self.bindingsite.get_pos_charged(), True - ) - self.pication_paro = pication( - self.bindingsite.rings, self.ligand.get_pos_charged(), False - ) - - self.pication_laro = self.refine_pi_cation_laro( - self.all_pi_cation_laro, self.pistacking - ) - - self.all_hydrophobic_contacts = hydrophobic_interactions( - self.bindingsite.get_hydrophobic_atoms(), - self.ligand.get_hydrophobic_atoms(), - ) - self.hydrophobic_contacts = self.refine_hydrophobic( - self.all_hydrophobic_contacts, self.pistacking - ) - self.halogen_bonds = halogen( - self.bindingsite.halogenbond_acc, self.ligand.halogenbond_don - ) - self.water_bridges = water_bridges( - self.bindingsite.get_hba(), - self.ligand.get_hba(), - self.bindingsite.get_hbd(), - self.ligand.get_hbd(), - self.ligand.water, - ) - - self.water_bridges = self.refine_water_bridges( - self.water_bridges, self.hbonds_ldon, self.hbonds_pdon - ) - - self.metal_complexes = metal_complexation( - self.ligand.metals, - self.ligand.metal_binding, - self.bindingsite.metal_binding, - ) - - self.all_itypes = self.saltbridge_lneg + self.saltbridge_pneg + self.hbonds_pdon - self.all_itypes = ( - self.all_itypes - + self.hbonds_ldon - + self.pistacking - + self.pication_laro - + self.pication_paro - ) - self.all_itypes = ( - self.all_itypes - + self.hydrophobic_contacts - + self.halogen_bonds - + self.water_bridges - ) - self.all_itypes = self.all_itypes + self.metal_complexes - - self.no_interactions = all(len(i) == 0 for i in self.all_itypes) - ( - self.unpaired_hba, - self.unpaired_hbd, - self.unpaired_hal, - ) = self.find_unpaired_ligand() - self.unpaired_hba_orig_idx = [ - self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) - for atom in self.unpaired_hba - ] - self.unpaired_hbd_orig_idx = [ - self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) - for atom in self.unpaired_hbd - ] - self.unpaired_hal_orig_idx = [ - self.Mapper.mapid(atom.idx, mtype="ligand", bsid=self.ligand.bsid) - for atom in self.unpaired_hal - ] - self.num_unpaired_hba, self.num_unpaired_hbd = ( - len(self.unpaired_hba), - len(self.unpaired_hbd), - ) - self.num_unpaired_hal = len(self.unpaired_hal) - - # Exclude empty chains (coming from ligand as a target, from metal complexes) - self.interacting_chains = sorted( - list( - set( - [ - i.reschain - for i in self.all_itypes - if i.reschain not in [" ", None] - ] - ) - ) - ) - - # Get all interacting residues, excluding ligand and water molecules - self.interacting_res = list( - set( - [ - "".join([str(i.resnr), str(i.reschain)]) - for i in self.all_itypes - if i.restype not in ["LIG", "HOH"] - ] - ) - ) - if len(self.interacting_res) != 0: - logger.info( - f"ligand interacts with {len(self.interacting_res)} binding site residue(s) in chain(s) {self.interacting_chains}" - ) - interactions_list = [] - num_saltbridges = len(self.saltbridge_lneg + self.saltbridge_pneg) - num_hbonds = len(self.hbonds_ldon + self.hbonds_pdon) - num_pication = len(self.pication_laro + self.pication_paro) - num_pistack = len(self.pistacking) - num_halogen = len(self.halogen_bonds) - num_waterbridges = len(self.water_bridges) - if num_saltbridges != 0: - interactions_list.append("%i salt bridge(s)" % num_saltbridges) - if num_hbonds != 0: - interactions_list.append("%i hydrogen bond(s)" % num_hbonds) - if num_pication != 0: - interactions_list.append("%i pi-cation interaction(s)" % num_pication) - if num_pistack != 0: - interactions_list.append("%i pi-stacking(s)" % num_pistack) - if num_halogen != 0: - interactions_list.append("%i halogen bond(s)" % num_halogen) - if num_waterbridges != 0: - interactions_list.append("%i water bridge(s)" % num_waterbridges) - if not len(interactions_list) == 0: - logger.info(f"complex uses {interactions_list}") - else: - logger.info("no interactions for this ligand") - - def find_unpaired_ligand(self): - """Identify unpaired functional in groups in ligands, involving H-Bond donors, acceptors, halogen bond donors. - """ - unpaired_hba, unpaired_hbd, unpaired_hal = [], [], [] - # Unpaired hydrogen bond acceptors/donors in ligand (not used for hydrogen bonds/water, salt bridges/mcomplex) - involved_atoms = [hbond.a.idx for hbond in self.hbonds_pdon] + [ - hbond.d.idx for hbond in self.hbonds_ldon - ] - [ - [involved_atoms.append(atom.idx) for atom in sb.negative.atoms] - for sb in self.saltbridge_lneg - ] - [ - [involved_atoms.append(atom.idx) for atom in sb.positive.atoms] - for sb in self.saltbridge_pneg - ] - [involved_atoms.append(wb.a.idx) for wb in self.water_bridges if wb.protisdon] - [ - involved_atoms.append(wb.d.idx) - for wb in self.water_bridges - if not wb.protisdon - ] - [ - involved_atoms.append(mcomplex.target.atom.idx) - for mcomplex in self.metal_complexes - if mcomplex.location == "ligand" - ] - for atom in [hba.a for hba in self.ligand.get_hba()]: - if atom.idx not in involved_atoms: - unpaired_hba.append(atom) - for atom in [hbd.d for hbd in self.ligand.get_hbd()]: - if atom.idx not in involved_atoms: - unpaired_hbd.append(atom) - - # unpaired halogen bond donors in ligand (not used for the previous + halogen bonds) - [involved_atoms.append(atom.don.x.idx) for atom in self.halogen_bonds] - for atom in [haldon.x for haldon in self.ligand.halogenbond_don]: - if atom.idx not in involved_atoms: - unpaired_hal.append(atom) - - return unpaired_hba, unpaired_hbd, unpaired_hal - - def refine_hydrophobic(self, all_h, pistacks): - """Apply several rules to reduce the number of hydrophobic interactions.""" - sel = {} - # 1. Rings interacting via stacking can't have additional hydrophobic contacts between each other. - for pistack, h in itertools.product(pistacks, all_h): - h1, h2 = h.bsatom.idx, h.ligatom.idx - brs, lrs = ( - [p1.idx for p1 in pistack.proteinring.atoms], - [p2.idx for p2 in pistack.ligandring.atoms], - ) - if h1 in brs and h2 in lrs: - sel[(h1, h2)] = "EXCLUDE" - hydroph = [h for h in all_h if not (h.bsatom.idx, h.ligatom.idx) in sel] - sel2 = {} - # 2. If a ligand atom interacts with several binding site atoms in the same residue, - # keep only the one with the closest distance - for h in hydroph: - if not (h.ligatom.idx, h.resnr) in sel2: - sel2[(h.ligatom.idx, h.resnr)] = h - else: - if sel2[(h.ligatom.idx, h.resnr)].distance > h.distance: - sel2[(h.ligatom.idx, h.resnr)] = h - hydroph = [h for h in sel2.values()] - hydroph_final = [] - bsclust = {} - # 3. If a protein atom interacts with several neighboring ligand atoms, just keep the one with the closest dist - for h in hydroph: - if h.bsatom.idx not in bsclust: - bsclust[h.bsatom.idx] = [ - h, - ] - else: - bsclust[h.bsatom.idx].append(h) - - idx_to_h = {} - for bs in [a for a in bsclust if len(bsclust[a]) == 1]: - hydroph_final.append(bsclust[bs][0]) - - # A list of tuples with the idx of an atom and one of its neighbours is created - for bs in [a for a in bsclust if not len(bsclust[a]) == 1]: - tuples = [] - all_idx = [i.ligatom.idx for i in bsclust[bs]] - for b in bsclust[bs]: - idx = b.ligatom.idx - neigh = [na for na in pybel.ob.OBAtomAtomIter(b.ligatom.OBAtom)] - for n in neigh: - n_idx = n.GetIdx() - if n_idx in all_idx: - if n_idx < idx: - tuples.append((n_idx, idx)) - else: - tuples.append((idx, n_idx)) - idx_to_h[idx] = b - - tuples = list(set(tuples)) - tuples = sorted(tuples, key=itemgetter(1)) - clusters = cluster_doubles( - tuples - ) # Cluster connected atoms (i.e. find hydrophobic patches) - - for cluster in clusters: - min_dist = float("inf") - min_h = None - for atm_idx in cluster: - h = idx_to_h[atm_idx] - if h.distance < min_dist: - min_dist = h.distance - min_h = h - hydroph_final.append(min_h) - before, reduced = len(all_h), len(hydroph_final) - if not before == 0 and not before == reduced: - logger.info( - f"reduced number of hydrophobic contacts from {before} to {reduced}" - ) - return hydroph_final - - def refine_hbonds_ldon(self, all_hbonds, salt_lneg, salt_pneg): - """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds.""" - i_set = {} - for hbond in all_hbonds: - i_set[hbond] = False - for salt in salt_pneg: - protidx, ligidx = ( - [at.idx for at in salt.negative.atoms], - [at.idx for at in salt.positive.atoms], - ) - if hbond.d.idx in ligidx and hbond.a.idx in protidx: - i_set[hbond] = True - for salt in salt_lneg: - protidx, ligidx = ( - [at.idx for at in salt.positive.atoms], - [at.idx for at in salt.negative.atoms], - ) - if hbond.d.idx in ligidx and hbond.a.idx in protidx: - i_set[hbond] = True - - # Allow only one hydrogen bond per donor, select interaction with larger donor angle - second_set = {} - hbls = [k for k in i_set.keys() if not i_set[k]] - for hbl in hbls: - if hbl.d.idx not in second_set: - second_set[hbl.d.idx] = (hbl.angle, hbl) - else: - if second_set[hbl.d.idx][0] < hbl.angle: - second_set[hbl.d.idx] = (hbl.angle, hbl) - return [hb[1] for hb in second_set.values()] - - def refine_hbonds_pdon(self, all_hbonds, salt_lneg, salt_pneg): - """Refine selection of hydrogen bonds. Do not allow groups which already form salt bridges to form H-Bonds with - atoms of the same group. - """ - i_set = {} - for hbond in all_hbonds: - i_set[hbond] = False - for salt in salt_lneg: - protidx, ligidx = ( - [at.idx for at in salt.positive.atoms], - [at.idx for at in salt.negative.atoms], - ) - if hbond.a.idx in ligidx and hbond.d.idx in protidx: - i_set[hbond] = True - for salt in salt_pneg: - protidx, ligidx = ( - [at.idx for at in salt.negative.atoms], - [at.idx for at in salt.positive.atoms], - ) - if hbond.a.idx in ligidx and hbond.d.idx in protidx: - i_set[hbond] = True - - # Allow only one hydrogen bond per donor, select interaction with larger donor angle - second_set = {} - hbps = [k for k in i_set.keys() if not i_set[k]] - for hbp in hbps: - if hbp.d.idx not in second_set: - second_set[hbp.d.idx] = (hbp.angle, hbp) - else: - if second_set[hbp.d.idx][0] < hbp.angle: - second_set[hbp.d.idx] = (hbp.angle, hbp) - return [hb[1] for hb in second_set.values()] - - def refine_pi_cation_laro(self, all_picat, stacks): - """Just important for constellations with histidine involved. If the histidine ring is positioned in stacking - position to an aromatic ring in the ligand, there is in most cases stacking and pi-cation interaction reported - as histidine also carries a positive charge in the ring. For such cases, only report stacking. - """ - i_set = [] - for picat in all_picat: - exclude = False - for stack in stacks: - if ( - whichrestype(stack.proteinring.atoms[0]) == "HIS" - and picat.ring.obj == stack.ligandring.obj - ): - exclude = True - if not exclude: - i_set.append(picat) - return i_set - - def refine_water_bridges(self, wbridges, hbonds_ldon, hbonds_pdon): - """A donor atom already forming a hydrogen bond is not allowed to form a water bridge. Each water molecule - can only be donor for two water bridges, selecting the constellation with the omega angle closest to 110 deg.""" - donor_atoms_hbonds = [hb.d.idx for hb in hbonds_ldon + hbonds_pdon] - wb_dict = {} - wb_dict2 = {} - omega = 110.0 - - # Just one hydrogen bond per donor atom - for wbridge in [wb for wb in wbridges if wb.d.idx not in donor_atoms_hbonds]: - if (wbridge.water.idx, wbridge.a.idx) not in wb_dict: - wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge - else: - if abs( - omega - wb_dict[(wbridge.water.idx, wbridge.a.idx)].w_angle - ) < abs(omega - wbridge.w_angle): - wb_dict[(wbridge.water.idx, wbridge.a.idx)] = wbridge - for wb_tuple in wb_dict: - water, acceptor = wb_tuple - if water not in wb_dict2: - wb_dict2[water] = [ - (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]), - ] - elif len(wb_dict2[water]) == 1: - wb_dict2[water].append( - (abs(omega - wb_dict[wb_tuple].w_angle), wb_dict[wb_tuple]) - ) - wb_dict2[water] = sorted(wb_dict2[water], key=lambda x: x[0]) - else: - if wb_dict2[water][1][0] < abs(omega - wb_dict[wb_tuple].w_angle): - wb_dict2[water] = [ - wb_dict2[water][0], - (wb_dict[wb_tuple].w_angle, wb_dict[wb_tuple]), - ] - - filtered_wb = [] - for fwbridges in wb_dict2.values(): - [filtered_wb.append(fwb[1]) for fwb in fwbridges] - return filtered_wb - - -class BindingSite(Mol): - def __init__(self, atoms, protcomplex, cclass, altconf, min_dist, mapper): - """Find all relevant parts which could take part in interactions""" - Mol.__init__(self, altconf, mapper, mtype="protein", bsid=None) - self.complex = cclass - self.full_mol = protcomplex - self.all_atoms = atoms - self.min_dist = min_dist # Minimum distance of bs res to ligand - self.bs_res = list( - set( - [ - "".join([str(whichresnumber(a)), whichchain(a)]) - for a in self.all_atoms - ] - ) - ) # e.g. 47A - self.rings = self.find_rings(self.full_mol, self.all_atoms) - self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) - self.hbond_acc_atoms = self.find_hba(self.all_atoms) - self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) - self.charged = self.find_charged(self.full_mol) - self.halogenbond_acc = self.find_hal(self.all_atoms) - self.metal_binding = self.find_metal_binding(self.full_mol) - - def find_hal(self, atoms): - """Look for halogen bond acceptors (Y-{O|P|N|S}, with Y=C,P,S)""" - data = namedtuple("hal_acceptor", "o o_orig_idx y y_orig_idx") - a_set = [] - # All oxygens, nitrogen, sulfurs with neighboring carbon, phosphor, nitrogen or sulfur - for a in [at for at in atoms if at.atomicnum in [8, 7, 16]]: - n_atoms = [ - na - for na in pybel.ob.OBAtomAtomIter(a.OBAtom) - if na.GetAtomicNum() in [6, 7, 15, 16] - ] - if len(n_atoms) == 1: # Proximal atom - o_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) - y_orig_idx = self.Mapper.mapid( - n_atoms[0].GetIdx(), mtype=self.mtype, bsid=self.bsid - ) - a_set.append( - data( - o=a, - o_orig_idx=o_orig_idx, - y=pybel.Atom(n_atoms[0]), - y_orig_idx=y_orig_idx, - ) - ) - return a_set - - def find_charged(self, mol): - """Looks for positive charges in arginine, histidine or lysine, for negative in aspartic and glutamic acid.""" - data = namedtuple( - "pcharge", "atoms atoms_orig_idx type center restype resnr reschain" - ) - a_set = [] - # Iterate through all residue, exclude those in chains defined as peptides - for res in [ - r - for r in pybel.ob.OBResidueIter(mol.OBMol) - if not r.GetChain() in config.PEPTIDES - ]: - if config.INTRA is not None: - if res.GetChain() != config.INTRA: - continue - a_contributing = [] - a_contributing_orig_idx = [] - if res.GetName() in ( - "ARG", - "HIS", - "LYS", - ): # Arginine, Histidine or Lysine have charged sidechains - for a in pybel.ob.OBResidueAtomIter(res): - if ( - a.GetType().startswith("N") - and res.GetAtomProperty(a, 8) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - ): - a_contributing.append(pybel.Atom(a)) - a_contributing_orig_idx.append( - self.Mapper.mapid(a.GetIdx(), mtype="protein") - ) - if not len(a_contributing) == 0: - a_set.append( - data( - atoms=a_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="positive", - center=centroid([ac.coords for ac in a_contributing]), - restype=res.GetName(), - resnr=res.GetNum(), - reschain=res.GetChain(), - ) - ) - if res.GetName() in ("GLU", "ASP"): # Aspartic or Glutamic Acid - for a in pybel.ob.OBResidueAtomIter(res): - if ( - a.GetType().startswith("O") - and res.GetAtomProperty(a, 8) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - ): - a_contributing.append(pybel.Atom(a)) - a_contributing_orig_idx.append( - self.Mapper.mapid(a.GetIdx(), mtype="protein") - ) - if not len(a_contributing) == 0: - a_set.append( - data( - atoms=a_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="negative", - center=centroid([ac.coords for ac in a_contributing]), - restype=res.GetName(), - resnr=res.GetNum(), - reschain=res.GetChain(), - ) - ) - return a_set - - def find_metal_binding(self, mol): - """Looks for atoms that could possibly be involved in chelating a metal ion. - This can be any main chain oxygen atom or oxygen, nitrogen and sulfur from specific amino acids""" - data = namedtuple( - "metal_binding", "atom atom_orig_idx type restype resnr reschain location" - ) - a_set = [] - for res in pybel.ob.OBResidueIter(mol.OBMol): - restype, reschain, resnr = ( - res.GetName().upper(), - res.GetChain(), - res.GetNum(), - ) - if restype in ["ASP", "GLU", "SER", "THR", "TYR"]: # Look for oxygens here - for a in pybel.ob.OBResidueAtomIter(res): - if ( - a.GetType().startswith("O") - and res.GetAtomProperty(a, 8) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - ): - atom_orig_idx = self.Mapper.mapid( - a.GetIdx(), mtype=self.mtype, bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(a), - atom_orig_idx=atom_orig_idx, - type="O", - restype=restype, - resnr=resnr, - reschain=reschain, - location="protein.sidechain", - ) - ) - if restype == "HIS": # Look for nitrogen here - for a in pybel.ob.OBResidueAtomIter(res): - if ( - a.GetType().startswith("N") - and res.GetAtomProperty(a, 8) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - ): - atom_orig_idx = self.Mapper.mapid( - a.GetIdx(), mtype=self.mtype, bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(a), - atom_orig_idx=atom_orig_idx, - type="N", - restype=restype, - resnr=resnr, - reschain=reschain, - location="protein.sidechain", - ) - ) - if restype == "CYS": # Look for sulfur here - for a in pybel.ob.OBResidueAtomIter(res): - if ( - a.GetType().startswith("S") - and res.GetAtomProperty(a, 8) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - ): - atom_orig_idx = self.Mapper.mapid( - a.GetIdx(), mtype=self.mtype, bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(a), - atom_orig_idx=atom_orig_idx, - type="S", - restype=restype, - resnr=resnr, - reschain=reschain, - location="protein.sidechain", - ) - ) - for a in pybel.ob.OBResidueAtomIter(res): # All main chain oxygens - if ( - a.GetType().startswith("O") - and res.GetAtomProperty(a, 2) - and not self.Mapper.mapid(a.GetIdx(), mtype="protein") - in self.altconf - and restype != "HOH" - ): - atom_orig_idx = self.Mapper.mapid( - a.GetIdx(), mtype=self.mtype, bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(a), - atom_orig_idx=atom_orig_idx, - type="O", - restype=res.GetName(), - resnr=res.GetNum(), - reschain=res.GetChain(), - location="protein.mainchain", - ) - ) - return a_set - - -class Ligand(Mol): - def __init__(self, cclass, ligand): - altconf = cclass.altconf - self.hetid, self.chain, self.position = ( - ligand.hetid, - ligand.chain, - ligand.position, - ) - self.bsid = ":".join([self.hetid, self.chain, str(self.position)]) - Mol.__init__(self, altconf, cclass.Mapper, mtype="ligand", bsid=self.bsid) - self.members = ligand.members - self.longname = ligand.longname - self.type = ligand.type - self.complex = cclass - self.molecule = ligand.mol # Pybel Molecule - self.smiles = self.molecule.write(format="can") # SMILES String - self.inchikey = self.molecule.write(format="inchikey") - self.can_to_pdb = ligand.can_to_pdb - if not len(self.smiles) == 0: - self.smiles = self.smiles.split()[0] - else: - logger.warning(f"could not write SMILES for ligand {ligand}") - self.smiles = "" - self.heavy_atoms = self.molecule.OBMol.NumHvyAtoms() # Heavy atoms count - self.all_atoms = self.molecule.atoms - self.atmdict = {l.idx: l for l in self.all_atoms} - self.rings = self.find_rings(self.molecule, self.all_atoms) - self.hydroph_atoms = self.hydrophobic_atoms(self.all_atoms) - self.hbond_acc_atoms = self.find_hba(self.all_atoms) - self.num_rings = len(self.rings) - if self.num_rings != 0: - logger.info(f"contains {self.num_rings} aromatic ring(s)") - descvalues = self.molecule.calcdesc() - self.molweight, self.logp = float(descvalues["MW"]), float(descvalues["logP"]) - self.num_rot_bonds = int(self.molecule.OBMol.NumRotors()) - self.atomorder = ligand.atomorder - - ########################################################## - # Special Case for hydrogen bond acceptor identification # - ########################################################## - - self.inverse_mapping = { - v: k for k, v in self.Mapper.ligandmaps[self.bsid].items() - } - self.pdb_to_idx_mapping = {v: k for k, v in self.Mapper.proteinmap.items()} - self.hbond_don_atom_pairs = self.find_hbd(self.all_atoms, self.hydroph_atoms) - - ###### - donor_pairs = [] - data = namedtuple("hbonddonor", "d d_orig_atom d_orig_idx h type") - for donor in self.all_atoms: - pdbidx = self.Mapper.mapid( - donor.idx, mtype="ligand", bsid=self.bsid, to="original" - ) - d = cclass.atoms[self.pdb_to_idx_mapping[pdbidx]] - if d.OBAtom.IsHbondDonor(): - for adj_atom in [ - a for a in pybel.ob.OBAtomAtomIter(d.OBAtom) if a.IsHbondDonorH() - ]: - d_orig_atom = self.Mapper.id_to_atom(pdbidx) - donor_pairs.append( - data( - d=donor, - d_orig_atom=d_orig_atom, - d_orig_idx=pdbidx, - h=pybel.Atom(adj_atom), - type="regular", - ) - ) - self.hbond_don_atom_pairs = donor_pairs - ####### - - self.charged = self.find_charged(self.all_atoms) - self.centroid = centroid([a.coords for a in self.all_atoms]) - self.max_dist_to_center = max( - (euclidean3d(self.centroid, a.coords) for a in self.all_atoms) - ) - self.water = [] - data = namedtuple("water", "oxy oxy_orig_idx") - for hoh in ligand.water: - oxy = None - for at in pybel.ob.OBResidueAtomIter(hoh): - if at.GetAtomicNum() == 8 and at.GetIdx() not in self.altconf: - oxy = pybel.Atom(at) - # There are some cases where there is no oxygen in a water residue, ignore those - if ( - not set([at.GetAtomicNum() for at in pybel.ob.OBResidueAtomIter(hoh)]) - == {1} - and oxy is not None - ): - if ( - euclidean3d(self.centroid, oxy.coords) - < self.max_dist_to_center + config.BS_DIST - ): - oxy_orig_idx = self.Mapper.mapid(oxy.idx, mtype="protein") - self.water.append(data(oxy=oxy, oxy_orig_idx=oxy_orig_idx)) - self.halogenbond_don = self.find_hal(self.all_atoms) - self.metal_binding = self.find_metal_binding(self.all_atoms, self.water) - self.metals = [] - data = namedtuple("metal", "m orig_m m_orig_idx") - for a in [a for a in self.all_atoms if a.type.upper() in config.METAL_IONS]: - m_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) - orig_m = self.Mapper.id_to_atom(m_orig_idx) - self.metals.append(data(m=a, m_orig_idx=m_orig_idx, orig_m=orig_m)) - self.num_hba, self.num_hbd = ( - len(self.hbond_acc_atoms), - len(self.hbond_don_atom_pairs), - ) - self.num_hal = len(self.halogenbond_don) - - def get_canonical_num(self, atomnum): - """Converts internal atom ID into canonical atom ID. Agrees with Canonical SMILES in XML.""" - return self.atomorder[atomnum - 1] - - def is_functional_group(self, atom, group): - """Given a pybel atom, look up if it belongs to a function group""" - n_atoms = [ - a_neighbor.GetAtomicNum() - for a_neighbor in pybel.ob.OBAtomAtomIter(atom.OBAtom) - ] - - if group in ["quartamine", "tertamine"] and atom.atomicnum == 7: # Nitrogen - # It's a nitrogen, so could be a protonated amine or quaternary ammonium - if "1" not in n_atoms and len(n_atoms) == 4: - return ( - True if group == "quartamine" else False - ) # It's a quat. ammonium (N with 4 residues != H) - elif atom.OBAtom.GetHyb() == 3 and len(n_atoms) >= 3: - return ( - True if group == "tertamine" else False - ) # It's sp3-hybridized, so could pick up an hydrogen - else: - return False - - if ( - group in ["sulfonium", "sulfonicacid", "sulfate"] and atom.atomicnum == 16 - ): # Sulfur - if ( - "1" not in n_atoms and len(n_atoms) == 3 - ): # It's a sulfonium (S with 3 residues != H) - return True if group == "sulfonium" else False - elif n_atoms.count(8) == 3: # It's a sulfonate or sulfonic acid - return True if group == "sulfonicacid" else False - elif n_atoms.count(8) == 4: # It's a sulfate - return True if group == "sulfate" else False - - if group == "phosphate" and atom.atomicnum == 15: # Phosphor - if set(n_atoms) == {8}: # It's a phosphate - return True - - if ( - group in ["carboxylate", "guanidine"] and atom.atomicnum == 6 - ): # It's a carbon atom - if ( - n_atoms.count(8) == 2 and n_atoms.count(6) == 1 - ): # It's a carboxylate group - return True if group == "carboxylate" else False - elif n_atoms.count(7) == 3 and len(n_atoms) == 3: # It's a guanidine group - nitro_partners = [] - for nitro in pybel.ob.OBAtomAtomIter(atom.OBAtom): - nitro_partners.append( - len( - [ - b_neighbor - for b_neighbor in pybel.ob.OBAtomAtomIter(nitro) - ] - ) - ) - if ( - min(nitro_partners) == 1 - ): # One nitrogen is only connected to the carbon, can pick up a H - return True if group == "guanidine" else False - - if group == "halocarbon" and atom.atomicnum in [9, 17, 35, 53]: # Halogen atoms - n_atoms = [ - na - for na in pybel.ob.OBAtomAtomIter(atom.OBAtom) - if na.GetAtomicNum() == 6 - ] - if len(n_atoms) == 1: # Halocarbon - return True - else: - return False - - def find_hal(self, atoms): - """Look for halogen bond donors (X-C, with X=F, Cl, Br, I)""" - data = namedtuple("hal_donor", "x orig_x x_orig_idx c c_orig_idx") - a_set = [] - for a in atoms: - if self.is_functional_group(a, "halocarbon"): - n_atoms = [ - na - for na in pybel.ob.OBAtomAtomIter(a.OBAtom) - if na.GetAtomicNum() == 6 - ] - x_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) - orig_x = self.Mapper.id_to_atom(x_orig_idx) - c_orig_idx = [ - self.Mapper.mapid(na.GetIdx(), mtype=self.mtype, bsid=self.bsid) - for na in n_atoms - ] - a_set.append( - data( - x=a, - orig_x=orig_x, - x_orig_idx=x_orig_idx, - c=pybel.Atom(n_atoms[0]), - c_orig_idx=c_orig_idx, - ) - ) - if len(a_set) != 0: - logger.info(f"ligand contains {len(a_set)} halogen atom(s)") - return a_set - - def find_charged(self, all_atoms): - """Identify all positively charged groups in a ligand. This search is not exhaustive, as the cases can be quite - diverse. The typical cases seem to be protonated amines, quaternary ammoinium and sulfonium - as mentioned in 'Cation-pi interactions in ligand recognition and catalysis' (Zacharias et al., 2002)). - Identify negatively charged groups in the ligand. - """ - data = namedtuple( - "lcharge", "atoms orig_atoms atoms_orig_idx type center fgroup" - ) - a_set = [] - for a in all_atoms: - a_orig_idx = self.Mapper.mapid(a.idx, mtype=self.mtype, bsid=self.bsid) - a_orig = self.Mapper.id_to_atom(a_orig_idx) - if self.is_functional_group(a, "quartamine"): - a_set.append( - data( - atoms=[a,], - orig_atoms=[a_orig,], - atoms_orig_idx=[a_orig_idx,], - type="positive", - center=list(a.coords), - fgroup="quartamine", - ) - ) - elif self.is_functional_group(a, "tertamine"): - a_set.append( - data( - atoms=[a,], - orig_atoms=[a_orig,], - atoms_orig_idx=[a_orig_idx,], - type="positive", - center=list(a.coords), - fgroup="tertamine", - ) - ) - if self.is_functional_group(a, "sulfonium"): - a_set.append( - data( - atoms=[a,], - orig_atoms=[a_orig,], - atoms_orig_idx=[a_orig_idx,], - type="positive", - center=list(a.coords), - fgroup="sulfonium", - ) - ) - if self.is_functional_group(a, "phosphate"): - a_contributing = [ - a, - ] - a_contributing_orig_idx = [ - a_orig_idx, - ] - [ - a_contributing.append(pybel.Atom(neighbor)) - for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - ] - [ - a_contributing_orig_idx.append( - self.Mapper.mapid( - neighbor.idx, mtype=self.mtype, bsid=self.bsid - ) - ) - for neighbor in a_contributing - ] - orig_contributing = [ - self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx - ] - a_set.append( - data( - atoms=a_contributing, - orig_atoms=orig_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="negative", - center=a.coords, - fgroup="phosphate", - ) - ) - if self.is_functional_group(a, "sulfonicacid"): - a_contributing = [ - a, - ] - a_contributing_orig_idx = [ - a_orig_idx, - ] - [ - a_contributing.append(pybel.Atom(neighbor)) - for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - if neighbor.GetAtomicNum() == 8 - ] - [ - a_contributing_orig_idx.append( - self.Mapper.mapid( - neighbor.idx, mtype=self.mtype, bsid=self.bsid - ) - ) - for neighbor in a_contributing - ] - orig_contributing = [ - self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx - ] - a_set.append( - data( - atoms=a_contributing, - orig_atoms=orig_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="negative", - center=a.coords, - fgroup="sulfonicacid", - ) - ) - elif self.is_functional_group(a, "sulfate"): - a_contributing = [ - a, - ] - a_contributing_orig_idx = [ - a_orig_idx, - ] - [ - a_contributing_orig_idx.append( - self.Mapper.mapid( - neighbor.idx, mtype=self.mtype, bsid=self.bsid - ) - ) - for neighbor in a_contributing - ] - [ - a_contributing.append(pybel.Atom(neighbor)) - for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - ] - orig_contributing = [ - self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx - ] - a_set.append( - data( - atoms=a_contributing, - orig_atoms=orig_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="negative", - center=a.coords, - fgroup="sulfate", - ) - ) - if self.is_functional_group(a, "carboxylate"): - a_contributing = [ - pybel.Atom(neighbor) - for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - if neighbor.GetAtomicNum() == 8 - ] - a_contributing_orig_idx = [ - self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) - for neighbor in a_contributing - ] - orig_contributing = [ - self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx - ] - a_set.append( - data( - atoms=a_contributing, - orig_atoms=orig_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="negative", - center=centroid([a.coords for a in a_contributing]), - fgroup="carboxylate", - ) - ) - elif self.is_functional_group(a, "guanidine"): - a_contributing = [ - pybel.Atom(neighbor) - for neighbor in pybel.ob.OBAtomAtomIter(a.OBAtom) - if neighbor.GetAtomicNum() == 7 - ] - a_contributing_orig_idx = [ - self.Mapper.mapid(neighbor.idx, mtype=self.mtype, bsid=self.bsid) - for neighbor in a_contributing - ] - orig_contributing = [ - self.Mapper.id_to_atom(idx) for idx in a_contributing_orig_idx - ] - a_set.append( - data( - atoms=a_contributing, - orig_atoms=orig_contributing, - atoms_orig_idx=a_contributing_orig_idx, - type="positive", - center=a.coords, - fgroup="guanidine", - ) - ) - return a_set - - def find_metal_binding(self, lig_atoms, water_oxygens): - """Looks for atoms that could possibly be involved in binding a metal ion. - This can be any water oxygen, as well as oxygen from carboxylate, phophoryl, phenolate, alcohol; - nitrogen from imidazole; sulfur from thiolate. - """ - a_set = [] - data = namedtuple( - "metal_binding", - "atom orig_atom atom_orig_idx type fgroup restype resnr reschain location", - ) - for oxygen in water_oxygens: - a_set.append( - data( - atom=oxygen.oxy, - atom_orig_idx=oxygen.oxy_orig_idx, - type="O", - fgroup="water", - restype=whichrestype(oxygen.oxy), - resnr=whichresnumber(oxygen.oxy), - reschain=whichchain(oxygen.oxy), - location="water", - orig_atom=self.Mapper.id_to_atom(oxygen.oxy_orig_idx), - ) - ) - # #@todo Refactor code - for a in lig_atoms: - a_orig_idx = self.Mapper.mapid(a.idx, mtype="ligand", bsid=self.bsid) - n_atoms = pybel.ob.OBAtomAtomIter(a.OBAtom) # Neighboring atoms - # All atomic numbers of neighboring atoms - n_atoms_atomicnum = [ - n.GetAtomicNum() for n in pybel.ob.OBAtomAtomIter(a.OBAtom) - ] - if a.atomicnum == 8: # Oxygen - if ( - n_atoms_atomicnum.count("1") == 1 and len(n_atoms_atomicnum) == 2 - ): # Oxygen in alcohol (R-[O]-H) - a_set.append( - data( - atom=a, - atom_orig_idx=a_orig_idx, - type="O", - fgroup="alcohol", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if ( - True in [n.IsAromatic() for n in n_atoms] - and not a.OBAtom.IsAromatic() - ): # Phenolate oxygen - a_set.append( - data( - atom=a, - atom_orig_idx=a_orig_idx, - type="O", - fgroup="phenolate", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if a.atomicnum == 6: # It's a carbon atom - if ( - n_atoms_atomicnum.count(8) == 2 and n_atoms_atomicnum.count(6) == 1 - ): # It's a carboxylate group - for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid( - neighbor.GetIdx(), mtype="ligand", bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(neighbor), - atom_orig_idx=neighbor_orig_idx, - type="O", - fgroup="carboxylate", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if a.atomicnum == 15: # It's a phosphor atom - if n_atoms_atomicnum.count(8) >= 3: # It's a phosphoryl - for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid( - neighbor.GetIdx(), mtype="ligand", bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(neighbor), - atom_orig_idx=neighbor_orig_idx, - type="O", - fgroup="phosphoryl", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if ( - n_atoms_atomicnum.count(8) == 2 - ): # It's another phosphor-containing group #@todo (correct name?) - for neighbor in [n for n in n_atoms if n.GetAtomicNum() == 8]: - neighbor_orig_idx = self.Mapper.mapid( - neighbor.GetIdx(), mtype="ligand", bsid=self.bsid - ) - a_set.append( - data( - atom=pybel.Atom(neighbor), - atom_orig_idx=neighbor_orig_idx, - type="O", - fgroup="phosphor.other", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if a.atomicnum == 7: # It's a nitrogen atom - if n_atoms_atomicnum.count(6) == 2: # It's imidazole/pyrrole or similar - a_set.append( - data( - atom=a, - atom_orig_idx=a_orig_idx, - type="N", - fgroup="imidazole/pyrrole", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if a.atomicnum == 16: # It's a sulfur atom - if ( - True in [n.IsAromatic() for n in n_atoms] - and not a.OBAtom.IsAromatic() - ): # Thiolate - a_set.append( - data( - atom=a, - atom_orig_idx=a_orig_idx, - type="S", - fgroup="thiolate", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - if set(n_atoms_atomicnum) == {26}: # Sulfur in Iron sulfur cluster - a_set.append( - data( - atom=a, - atom_orig_idx=a_orig_idx, - type="S", - fgroup="iron-sulfur.cluster", - restype=self.hetid, - resnr=self.position, - reschain=self.chain, - location="ligand", - orig_atom=self.Mapper.id_to_atom(a_orig_idx), - ) - ) - - return a_set - - -class PDBComplex: - """Contains a collection of objects associated with a PDB complex, i.e. one or several ligands and their binding - sites as well as information about the pliprofiler between them. Provides functions to load and prepare input files - such as PDB files. - """ - - def __init__(self): - self.interaction_sets = ( - {} - ) # Dictionary with site identifiers as keys and object as value - self.protcomplex = None - self.filetype = None - self.atoms = {} # Dictionary of Pybel atoms, accessible by their idx - self.sourcefiles = {} - self.information = {} - self.corrected_pdb = "" - self._output_path = tempfile.gettempdir() - self.pymol_name = None - self.modres = set() - self.resis = [] - self.altconf = [] # Atom idx of atoms with alternate conformations - self.covalent = ( - [] - ) # Covalent linkages between ligands and protein residues/other ligands - self.excluded = [] # Excluded ligands - self.Mapper = Mapper() - self.ligands = [] - - def __str__(self): - formatted_lig_names = [ - ":".join([x.hetid, x.chain, str(x.position)]) for x in self.ligands - ] - return "Protein structure %s with ligands:\n" % (self.pymol_name) + "\n".join( - [lig for lig in formatted_lig_names] - ) - - def load_pdb(self, pdbpath, as_string=False): - """Loads a pdb file with protein AND ligand(s), separates and prepares them. - If specified 'as_string', the input is a PDB string instead of a path.""" - if as_string: - self.sourcefiles["pdbcomplex.original"] = None - self.sourcefiles["pdbcomplex"] = None - self.sourcefiles["pdbstring"] = pdbpath - else: - self.sourcefiles["pdbcomplex.original"] = pdbpath - self.sourcefiles["pdbcomplex"] = pdbpath - self.information["pdbfixes"] = False - pdbparser = PDBParser( - pdbpath, as_string=as_string - ) # Parse PDB file to find errors and get additional data - # #@todo Refactor and rename here - self.Mapper.proteinmap = pdbparser.proteinmap - self.Mapper.reversed_proteinmap = { - v: k for k, v in self.Mapper.proteinmap.items() - } - self.modres = pdbparser.modres - self.covalent = pdbparser.covalent - self.altconf = pdbparser.altconformations - self.corrected_pdb = pdbparser.corrected_pdb - - if not config.PLUGIN_MODE: - if pdbparser.num_fixed_lines > 0: - logger.info( - f"{pdbparser.num_fixed_lines} lines automatically fixed in PDB input file" - ) - # Save modified PDB file - if not as_string: - basename = os.path.basename(pdbpath).split(".")[0] - else: - basename = "from_stdin" - pdbpath_fixed = tmpfile( - prefix="plipfixed." + basename + "_", direc=self.output_path - ) - create_folder_if_not_exists(self.output_path) - self.sourcefiles["pdbcomplex"] = pdbpath_fixed - self.corrected_pdb = re.sub( - r"[^\x00-\x7F]+", " ", self.corrected_pdb - ) # Strip non-unicode chars - if ( - not config.NOFIXFILE - ): # Only write to file if this option is not activated - with open(pdbpath_fixed, "w") as f: - f.write(self.corrected_pdb) - self.information["pdbfixes"] = True - - if not as_string: - self.sourcefiles["filename"] = os.path.basename( - self.sourcefiles["pdbcomplex"] - ) - self.protcomplex, self.filetype = read_pdb(self.corrected_pdb, as_string=True) - - # Update the model in the Mapper class instance - self.Mapper.original_structure = self.protcomplex.OBMol - logger.info("PDB structure successfully read") - - # Determine (temporary) PyMOL Name from Filename - self.pymol_name = pdbpath.split("/")[-1].split(".")[0] + "-Protein" - # Replace characters causing problems in PyMOL - self.pymol_name = ( - self.pymol_name.replace(" ", "") - .replace("(", "") - .replace(")", "") - .replace("-", "_") - ) - # But if possible, name it after PDBID in Header - if "HEADER" in self.protcomplex.data: # If the PDB file has a proper header - potential_name = self.protcomplex.data["HEADER"][56:60].lower() - if extract_pdbid(potential_name) != "UnknownProtein": - self.pymol_name = potential_name - logger.debug(f"PyMOL name set as: {self.pymol_name}") - - # Extract and prepare ligands - ligandfinder = LigandFinder( - self.protcomplex, self.altconf, self.modres, self.covalent, self.Mapper - ) - self.ligands = ligandfinder.ligands - self.excluded = ligandfinder.excluded - - # decide whether to add polar hydrogens - if not config.NOHYDRO: - if not as_string: - basename = os.path.basename(pdbpath).split(".")[0] - else: - basename = "from_stdin" - self.protcomplex.OBMol.AddPolarHydrogens() - output_path = os.path.join(self._output_path, f"{basename}_protonated.pdb") - self.protcomplex.write("pdb", output_path, overwrite=True) - logger.info(f"protonated structure written to {output_path}") - else: - logger.warning( - "no polar hydrogens will be assigned (make sure your structure contains hydrogens)" - ) - - for atm in self.protcomplex: - self.atoms[atm.idx] = atm - - if len(self.excluded) != 0: - logger.info(f"excluded molecules as ligands: {self.excluded}") - - if config.DNARECEPTOR: - self.resis = [ - obres - for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) - if obres.GetName() in config.DNA + config.RNA - ] - else: - self.resis = [ - obres - for obres in pybel.ob.OBResidueIter(self.protcomplex.OBMol) - if obres.GetResidueProperty(0) - ] - - num_ligs = len(self.ligands) - if num_ligs == 1: - logger.info("analyzing one ligand") - elif num_ligs > 1: - logger.info(f"analyzing {num_ligs} ligands") - else: - logger.info(f"structure contains no ligands") - - def analyze(self): - """Triggers analysis of all complexes in structure""" - for ligand in self.ligands: - self.characterize_complex(ligand) - - def characterize_complex(self, ligand): - """Handles all basic functions for characterizing the interactions for one ligand""" - - single_sites = [] - for member in ligand.members: - single_sites.append(":".join([str(x) for x in member])) - site = " + ".join(single_sites) - site = site if not len(site) > 20 else site[:20] + "..." - longname = ( - ligand.longname - if not len(ligand.longname) > 20 - else ligand.longname[:20] + "..." - ) - ligtype = "unspecified type" if ligand.type == "UNSPECIFIED" else ligand.type - ligtext = f"{longname} [{ligtype}] -- {site}" - logger.info(f"processing ligand {ligtext}") - if ligtype == "PEPTIDE": - logger.info( - f"chain {ligand.chain} will be processed in [PEPTIDE / INTER-CHAIN] mode" - ) - if ligtype == "INTRA": - logger.info(f"chain {ligand.chain} will be processed in [INTRA-CHAIN] mode") - any_in_biolip = ( - len(set([x[0] for x in ligand.members]).intersection(config.biolip_list)) - != 0 - ) - - if ( - ligtype - not in ["POLYMER", "DNA", "ION", "DNA+ION", "RNA+ION", "SMALLMOLECULE+ION"] - and any_in_biolip - ): - logger.info("may be biologically irrelevant") - - lig_obj = Ligand(self, ligand) - cutoff = lig_obj.max_dist_to_center + config.BS_DIST - bs_res = self.extract_bs(cutoff, lig_obj.centroid, self.resis) - # Get a list of all atoms belonging to the binding site, search by idx - bs_atoms = [ - self.atoms[idx] - for idx in [ - i - for i in self.atoms.keys() - if self.atoms[i].OBAtom.GetResidue().GetIdx() in bs_res - ] - if idx in self.Mapper.proteinmap - and self.Mapper.mapid(idx, mtype="protein") not in self.altconf - ] - if ligand.type == "PEPTIDE": - # If peptide, don't consider the peptide chain as part of the protein binding site - bs_atoms = [ - a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() != lig_obj.chain - ] - if ligand.type == "INTRA": - # Interactions within the chain - bs_atoms = [ - a for a in bs_atoms if a.OBAtom.GetResidue().GetChain() == lig_obj.chain - ] - bs_atoms_refined = [] - - # Create hash with BSRES -> (MINDIST_TO_LIG, AA_TYPE) - # and refine binding site atom selection with exact threshold - min_dist = {} - for r in bs_atoms: - bs_res_id = "".join([str(whichresnumber(r)), whichchain(r)]) - for l in ligand.mol.atoms: - distance = euclidean3d(r.coords, l.coords) - if bs_res_id not in min_dist: - min_dist[bs_res_id] = (distance, whichrestype(r)) - elif min_dist[bs_res_id][0] > distance: - min_dist[bs_res_id] = (distance, whichrestype(r)) - if distance <= config.BS_DIST and r not in bs_atoms_refined: - bs_atoms_refined.append(r) - num_bs_atoms = len(bs_atoms_refined) - logger.info( - f"binding site atoms in vicinity ({config.BS_DIST} A max. dist: {num_bs_atoms})" - ) - - bs_obj = BindingSite( - bs_atoms_refined, - self.protcomplex, - self, - self.altconf, - min_dist, - self.Mapper, - ) - pli_obj = PLInteraction(lig_obj, bs_obj, self) - self.interaction_sets[ligand.mol.title] = pli_obj - - def extract_bs(self, cutoff, ligcentroid, resis): - """Return list of ids from residues belonging to the binding site""" - return [ - obres.GetIdx() - for obres in resis - if self.res_belongs_to_bs(obres, cutoff, ligcentroid) - ] - - def res_belongs_to_bs(self, res, cutoff, ligcentroid): - """Check for each residue if its centroid is within a certain distance to the ligand centroid. - Additionally checks if a residue belongs to a chain restricted by the user (e.g. by defining a peptide chain)""" - rescentroid = centroid( - [(atm.x(), atm.y(), atm.z()) for atm in pybel.ob.OBResidueAtomIter(res)] - ) - # Check geometry - near_enough = True if euclidean3d(rescentroid, ligcentroid) < cutoff else False - # Check chain membership - restricted_chain = True if res.GetChain() in config.PEPTIDES else False - return near_enough and not restricted_chain - - def get_atom(self, idx): - return self.atoms[idx] - - @property - def output_path(self): - return self._output_path - - @output_path.setter - def output_path(self, path): - self._output_path = tilde_expansion(path) |