From cb07da2c692a6a15362d74afbd845ac8bdbfb085 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 20 Feb 2024 18:30:23 +0000 Subject: [PATCH 01/32] Added prototype functionality for performing region of interest (ROI) mapping for large molecules. This allows for the set up of protein FEP calculations (for example point mutations and covalent modifications). The merge code has also been modified to allow for multiple perturbable region of interests, which allows for multiple mutations FEP calculations to be run at the same time. --- .../Sandpit/Exscientia/Align/_align.py | 455 +++++++++++++++++- .../Sandpit/Exscientia/Align/_merge.py | 205 ++++---- 2 files changed, 540 insertions(+), 120 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 27731e95d..e915aa4cc 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -29,6 +29,7 @@ "matchAtoms", "viewMapping", "rmsdAlign", + "roiMatch", "flexAlign", "merge", ] @@ -38,6 +39,9 @@ import subprocess as _subprocess import sys as _sys +# This logger is temporary. +from loguru import logger as _logger + from .._Utils import _try_import, _have_imported, _assert_imported import warnings as _warnings @@ -51,14 +55,16 @@ if _have_imported(_rdkit): from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS - from rdkit import RDLogger as _RDLogger + + # no idea why, but RD_logger cannot be imported from rdkit, only RDLogger can + from rdkit import RDLogger as _RD_logger # Disable RDKit warnings. - _RDLogger.DisableLog("rdApp.*") + _RD_logger.DisableLog("rdApp.*") else: _Chem = _rdkit _rdFMCS = _rdkit - _RDLogger = _rdkit + _RD_logger = _rdkit from sire.legacy import Base as _SireBase from sire.legacy import Maths as _SireMaths @@ -1061,6 +1067,404 @@ def matchAtoms( return mappings[0:matches] +# NOTE: This function is currently experimental and has not gone through +# rigorous validation. Proceed with caution. +def _get_backbone(molecule): + """ + Extract the backbone atoms from a molecule. + Backbone atoms are defined as N, CA, C atoms in a residue. + This function is not intended to be applied to ligands. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule from which to extract the backbone atoms. + + Returns + ------- + + atom_idx : dict + A dictionary of the absolute and relative indices of the backbone atoms. + + relative_backbone_atoms : list + A list of the extracted backbone atoms. + """ + absolute_backbone_atom_indices = [] + for atom in molecule.getAtoms(): + if atom.name() in ["N", "CA", "C"]: + absolute_backbone_atom_indices.append(atom.index()) + + relative_backbone_atoms = molecule.extract(absolute_backbone_atom_indices) + relative_backbone_atom_indices = [ + atom.index() for atom in relative_backbone_atoms.getAtoms() + ] + atom_idx = dict(zip(absolute_backbone_atom_indices, relative_backbone_atom_indices)) + return (atom_idx, relative_backbone_atoms) + + +# NOTE: This function is currently experimental and has not gone through +# rigorous validation. Proceed with caution. +def roiMatch( + molecule0, + molecule1, + roi, + force_backbone_match=False, +): + """ + Matching of two molecules based on a region of interest (ROI). + The ROI is defined as a list of residues in the molecule/protein. + The function will attempt to match the ROI in the two molecules and + return the mapping between the two molecules. Multiple ROIs can be + defined by providing a list of residues. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + roi : list + A list of regions/residues of interest in the molecule/protein. + + force_backbone_match : bool + If set to True, will force the backbone atoms to be matched which + is useful for ensuring a more stable match between the two molecules. + This is set to False by default. + + Returns + ------- + + mapping : dict + A dictionary of the mapping between the two molecules. + + roi_idx : list + A list of the indices of the atoms in the ROI. This can be multiple + nested lists if the ROI is defined as a list of residues. + TODO: change the return type of roi_idx to a dictionary of lists + + Notes + ----- + + The key assumption of this function is that the two molecules are + structurally identical except for the region of interest (ROI). The ROI + could be a point mutation, or a residue that has been covalently modified. + The function will attempt to match the atoms in the ROI based on the + maximum common substructure (MCS) algorithm. First, the ROI is extracted + from the two molecules and then the atoms in the ROI are matched using + BioSimSpace.Align.matchAtoms function. The function will return the + mapping between the two molecules. This "relative" mapping will then be + used to map the atoms in the ROI to the "absolute" indices in the molecule. + So for example the relative mapping could be {0: 3, 1: 2, 2: 5} and + the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we + can bypass the need to map the entire molecule and only focus on the ROI, + which is significantly faster for large molecules. The rest of the mapping + is then composed of atoms before the ROI (pre-ROI) and after the ROI + (post-ROI). Every time we map the atoms in the ROI, we append the ROI + mapping to the pre-ROI mapping, which will then be used as the pre-ROI + mapping for the next ROI in the list. + + Examples + -------- + + Find the best maximum common substructure mapping between two molecules + with a region of interest defined as a list of residues. + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) + + Find the mapping between two molecules with multiple regions of interest + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) + + Find the best maximum common substructure mapping between two molecules, + while forcing the backbone atoms to be matched. + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], force_backbone_match=True) + """ + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if roi is None: + raise ValueError("residue of interest list is not provided.") + + roi_idx = [] + + _logger.debug(f"Number of mol A atoms: {molecule0.nAtoms()}") + _logger.debug(f"Number of mol B atoms: {molecule1.nAtoms()}") + + # Get the atoms before the ROI. + # This is being done so that when we map the atoms in ROI, we can append + # the ROI mapping to this pre-ROI mapping which will then be used as + # the pre-ROI mapping for the next ROI in the list, i.e. + # pre_roi_mapping = pre_roi_mapping + roi mapping + mapping to next ROI + pre_roi_molecule0 = molecule0.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule0 = [a.index() for a in pre_roi_molecule0.atoms()] + + pre_roi_molecule1 = molecule1.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule1 = [a.index() for a in pre_roi_molecule1.atoms()] + + pre_roi_mapping = dict(zip(pre_roi_atom_idx_molecule0, pre_roi_atom_idx_molecule1)) + + # Loop over the residues of interest + for i, res_idx in enumerate(roi): + _logger.debug(f"Starting match for ROI id: {res_idx}") + + molecule0_roi = molecule0.getResidues()[res_idx] + _logger.debug("Molecule0 ROI:") + _logger.debug(molecule0_roi) + + molecule1_roi = molecule1.getResidues()[res_idx] + _logger.debug("Molecule1 ROI:") + _logger.debug(molecule1_roi) + + # Don't allow for matching between the same residues + if ( + molecule0_roi.name() == molecule1_roi.name() + and molecule0_roi.nAtoms() == molecule1_roi.nAtoms() + ): + molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] + molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] + if molecule0_atoms == molecule1_atoms: + raise ValueError( + """ROI between the two input molecules is the identical, + stopping atom matching""" + ) + + res0_idx = [a.index() for a in molecule0_roi] + _logger.debug(f"res0 indices: {res0_idx}") + res1_idx = [a.index() for a in molecule1_roi] + _logger.debug(f"res1 indices: {res1_idx}") + + # Append the ROI indices to the list + roi_idx.append([res0_idx, res1_idx]) + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + for a in res0_extracted.getAtoms(): + _logger.debug(f"res0 atom: {a}") + for b in res1_extracted.getAtoms(): + _logger.debug(f"res1 atom: {b}") + + # If force_backbone_match is enabled, + # we are going to use the backbone atoms as a prematch + if force_backbone_match: + _logger.debug("Forcing backbone match") + + backbone_res0_idx, backbone_res0_atoms = _get_backbone( + molecule0_roi.toMolecule() + ) + backbone_res1_idx, backbone_res1_atoms = _get_backbone( + molecule1_roi.toMolecule() + ) + + _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") + _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") + + relative_backbone_mapping = matchAtoms( + backbone_res0_atoms, backbone_res1_atoms + ) + + # Translate the relative mapping to the absolute indices. + # The lookup table contains the relative indices of the atoms + # that have been mapped from one residue to the other, for example + # [0, 2, 4, 5]. We can use these as positional indices to get the + # absolute indices of the atoms from the original residue. i.e. + # if the original residue had atom indices such as: + # [100, 101, 102, 103, 104, 105], we can use the lookup table to + # get [100, 102, 104, 105]. Doing the same for the other residue + # will give us an absolute mapping between the two residues. + res0_lookup_table = list(relative_backbone_mapping.keys()) + absolute_backbone_mapped_atoms_res0 = [ + list(backbone_res0_idx.keys())[i] for i in res0_lookup_table + ] + + res1_lookup_table = list(relative_backbone_mapping.values()) + absolute_backbone_mapped_atoms_res1 = [ + list(backbone_res1_idx.keys())[i] for i in res1_lookup_table + ] + + absolute_backbone_mapping = dict( + zip( + absolute_backbone_mapped_atoms_res0, + absolute_backbone_mapped_atoms_res1, + ) + ) + _logger.debug(f"Absolute backbone mapping: {absolute_backbone_mapping}") + + mapping = matchAtoms( + res0_extracted, res1_extracted, prematch=absolute_backbone_mapping + ) + else: + mapping = matchAtoms(res0_extracted, res1_extracted) + + _logger.debug(f"Mapping: {mapping}") + + # Check how many atoms got mapped from one residue to the other + _logger.debug( + f"Mapped {len(mapping.keys())} out of {len(res0_idx)} atoms for molecule0" + ) + _logger.debug( + f"Mapped {len(mapping.values())} out of {len(res1_idx)} atoms for molecule1" + ) + + # If the number of mapped atoms is not the same as the number of atoms + # in the ROI we need to use that molecule as the reference + # for the lookup table. + # NOTE: We don't really need the conditional statement here that checks + # if all of the atoms in the ROI have been mapped, because + # we are going to translate atoms relative indices to absolute ones + # and that case would be captured by the lookup table. + if len(mapping.keys()) != len(res0_idx): + _logger.debug("Some atoms for molecule0 in the ROI have not been mapped.") + _logger.debug("Using molecule0 as the reference for the lookup table.") + residue_lookup_table = list(mapping.keys()) + absolute_mapped_atoms_res0 = [res0_idx[i] for i in residue_lookup_table] + + # i.e this bit is unnecessary + else: + absolute_mapped_atoms_res0 = res0_idx + + if len(mapping.values()) != len(res1_idx): + _logger.debug("Some atoms for molecule1 in the ROI have not been mapped.") + _logger.debug("Using molecule1 as the reference for the lookup table.") + residue_lookup_table = list(mapping.values()) + absolute_mapped_atoms_res1 = [res1_idx[i] for i in residue_lookup_table] + + else: + absolute_mapped_atoms_res1 = res1_idx + + absolute_roi_mapping = dict( + zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) + ) + + # Check that pre_roi_atom_indices are not part of molecule ROI indices + # NOTE: This could be a costly operation, if pre_roi_atom_indices is + # large. + if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( + i in pre_roi_atom_idx_molecule1 for i in res1_idx + ): + raise ValueError("Found atoms in pre ROI region that are part of the ROI.") + + # Now we have to think about what to do with the atom indices + # after the mapping as these are not going to be the same + # between the two molecules. + # If we are at the last residue of interest, we don't need to worry + # too much about the after ROI region as this region will be all of the + # molecule atoms after the last residue of interest. + # In the case when we are not at the last residue of interest, + # we need to map the atoms to the next ROI. + + if res_idx != roi[-1]: + + # If the next ROI residue index in the ROI list is next to + # the current ROI index, after_roi atom index list will be empty + # i.e. if we're currently at residue 10 and the next ROI is 11, + # we don't need to map the atoms. + # If we were at residue 10 and the next residue of interest is 12, + # we would need to map the atoms between residues 10 and 12. + if roi[i + 1] - roi[i] == 1: + _logger.debug( + "Next ROI is adjacent to the current ROI, no need to map atoms between the two." + ) + after_roi_atom_idx_molecule0 = [] + after_roi_atom_idx_molecule1 = [] + else: + after_roi_molecule0 = molecule0.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + # Append the mappings to the pre_roi_mapping, which will then be + # used as the pre_roi_mapping for the next ROI in the list. + pre_roi_mapping = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + else: + # Get all of the remaining atoms after the last ROI + after_roi_molecule0 = molecule0.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + + # Check that after_roi_atom_indices are not part of absolute_roi_mapping + if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): + raise ValueError("Found atoms in after ROI region that are part of the ROI") + + if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): + raise ValueError("Found atoms in after ROI region that are part of the ROI") + + # Print all 3 dictionaries + _logger.debug(f"Pre ROI region mapping: {pre_roi_mapping}") + _logger.debug(f"Absolute ROI mapping: {absolute_roi_mapping}") + _logger.debug(f"After ROI region mapping: {after_roi_mapping}") + + # Combine the dictionaries to get the full mapping + combined_dict = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + + # Print the matched atoms in the ROI + for idx0, idx1 in mapping.items(): + _logger.debug( + f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" + ) + + _logger.debug(f"Full matched atoms: {combined_dict}") + for idx0, idx1 in mapping.items(): + _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") + + # TODO: Change the return type of roi_idx to a dictionary of lists + return combined_dict, roi_idx + + def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): """ Align atoms in molecule0 to those in molecule1 using the mapping @@ -1335,7 +1739,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi=None, + roi_list=None, property_map0={}, property_map1={}, ): @@ -1372,7 +1776,7 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi : list + roi_list : list The region of interest to merge. Consist of two lists of atom indices. property_map0 : dict @@ -1432,11 +1836,11 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") - if roi is not None: - if not isinstance(roi, list): - raise TypeError("'roi' must be of type 'list'.") + if roi_list is not None: + if not isinstance(roi_list, list): + raise TypeError("'roi_list' must be of type 'list'.") else: - _validate_roi(molecule0, molecule1, roi) + _validate_roi(molecule0, molecule1, roi_list) # The user has passed an atom mapping. if mapping is not None: @@ -1467,7 +1871,7 @@ def merge( allow_ring_breaking=allow_ring_breaking, allow_ring_size_change=allow_ring_size_change, force=force, - roi=roi, + roi_list=roi_list, property_map0=property_map0, property_map1=property_map1, ) @@ -2116,7 +2520,7 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) -def _validate_roi(molecule0, molecule1, roi): +def _validate_roi(molecule0, molecule1, roi_list): """Internal function to validate that a mapping contains key:value pairs of the correct type. @@ -2129,21 +2533,24 @@ def _validate_roi(molecule0, molecule1, roi): molecule1 : :class:`Molecule ` The reference molecule. - roi : list + roi_list : list The region of interest to merge. """ - if len(roi) != 2: - raise ValueError("The length of roi list must be 2.") - if not isinstance(roi[0], list) or not isinstance(roi[1], list): - raise ValueError("The element of roi must be of type list") - for mol_idx, ele in enumerate(roi): - for atom_idx in ele: - if type(atom_idx) is not int: - raise ValueError(f"The element of roi[{mol_idx}] should be of type int") - if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): - raise IndexError( - f"The element of roi[{mol_idx}] should within range of number of atoms" - ) + for roi in roi_list: + if len(roi) != 2: + raise ValueError("The length of roi list must be 2.") + if not isinstance(roi[0], list) or not isinstance(roi[1], list): + raise ValueError("The element of roi must be of type list") + for mol_idx, ele in enumerate(roi): + for atom_idx in ele: + if type(atom_idx) is not int: + raise ValueError( + f"The element of roi[{mol_idx}] should be of type int" + ) + if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): + raise IndexError( + f"The element of roi[{mol_idx}] should within range of number of atoms" + ) def _to_sire_mapping(mapping): diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py index 859662a16..ae969fe0a 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py @@ -32,6 +32,9 @@ from sire.legacy import Mol as _SireMol from sire.legacy import Units as _SireUnits +# This logger is temporary. +from loguru import logger as _logger + from .._Exceptions import IncompatibleError as _IncompatibleError from .._SireWrappers import Molecule as _Molecule @@ -43,7 +46,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi=None, + roi_list=None, property_map0={}, property_map1={}, ): @@ -75,8 +78,8 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi : list - The region of interest to merge. Consist of two lists of atom indices. + roi_list : list + The region of interest to merge. Consist of a multiple nested list lists of atom indices. property_map0 : dict A dictionary that maps "properties" in this molecule to their @@ -156,9 +159,13 @@ def merge( # Get the atom indices from the mapping. idx0 = mapping.keys() idx1 = mapping.values() + _logger.debug(f"Mapping: {mapping}") + _logger.debug(f"idx0: {idx0}") + _logger.debug(f"idx1: {idx1}") # Create the reverse mapping: molecule1 --> molecule0 inv_mapping = {v: k for k, v in mapping.items()} + _logger.debug(f"Inverse mapping: {inv_mapping}") # Generate the mappings from each molecule to the merged molecule mol0_merged_mapping = {} @@ -201,6 +208,7 @@ def merge( if atom.index() not in idx0: atoms0.append(atom) atoms0_idx.append(atom.index()) + _logger.debug(f"Unique atoms in molecule0: {atoms0_idx}") # molecule1 for atom in molecule1.atoms(): @@ -208,10 +216,12 @@ def merge( atoms1.append(atom) atoms1_idx.append(atom.index()) + _logger.debug(f"Unique atoms in molecule1: {atoms1_idx}") + # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") # Only part of the ligand is to be merged - if roi is not None: + if roi_list is not None: if molecule0.nResidues() != molecule1.nResidues(): raise ValueError( "The two molecules need to have the same number of residues" @@ -1013,7 +1023,7 @@ def merge( # Check that the merge hasn't modified the connectivity. # The checking was blocked when merging a protein - if roi is None: + if roi_list is None: # molecule0 for x in range(0, molecule0.nAtoms()): # Convert to an AtomIdx. @@ -1192,135 +1202,138 @@ def merge( # Copy the intrascale from molecule1 into clj_nb_pairs0. # Perform a triangular loop over atoms from molecule1. - if roi is None: + if roi_list is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - iterlen = len(roi[1]) - iterrange = roi[1] - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) - - # Map the index to its position in the merged molecule. - idx_map = mol1_merged_mapping[idx] - - for y in range(x + 1, iterlen): - idy = iterrange[y] + for roi in roi_list: + iterlen = len(roi[1]) + iterrange = roi[1] + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy_map = mol1_merged_mapping[idy] + idx_map = mol1_merged_mapping[idx] - conn_type = conn0.connectionType(idx_map, idy_map) - # The atoms aren't bonded. - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Now copy in all intrascale values from molecule0 into both # clj_nb_pairs matrices. - if roi is None: + if roi_list is None: iterlen = molecule0.nAtoms() iterrange = list(range(molecule0.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - iterlen = len(roi[0]) - iterrange = roi[0] - - # Perform a triangular loop over atoms from molecule0. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) + for roi in roi_list: + iterlen = len(roi[0]) + iterrange = roi[0] - # Map the index to its position in the merged molecule. - idx_map = mol0_merged_mapping[idx] - - for y in range(x + 1, iterlen): - idy = iterrange[y] + # Perform a triangular loop over atoms from molecule0. + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy_map = mol0_merged_mapping[idy] + idx_map = mol0_merged_mapping[idx] - conn_type = conn0.connectionType(idx_map, idy_map) - # The atoms aren't bonded. - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Finally, copy the intrascale from molecule1 into clj_nb_pairs1. - if roi is None: + if roi_list is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - iterlen = len(roi[1]) - iterrange = roi[1] - - # Perform a triangular loop over atoms from molecule1. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) - - # Map the index to its position in the merged molecule. - idx = mol1_merged_mapping[idx] + for roi in roi_list: + iterlen = len(roi[1]) + iterrange = roi[1] - for y in range(x + 1, iterlen): - idy = iterrange[y] + # Perform a triangular loop over atoms from molecule1. + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy = mol1_merged_mapping[idy] + idx = mol1_merged_mapping[idx] - conn_type = conn1.connectionType(idx, idy) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + # Map the index to its position in the merged molecule. + idy = mol1_merged_mapping[idy] - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + conn_type = conn1.connectionType(idx, idy) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) # Store the two molecular components. edit_mol.setProperty("molecule0", molecule0) From 3570dcb5205ded2fadf33fc4425c8c66dc23241d Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Thu, 22 Feb 2024 13:43:06 +0000 Subject: [PATCH 02/32] Fix typos --- .../Sandpit/Exscientia/Align/_align.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index e915aa4cc..b2eca6267 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -56,15 +56,15 @@ from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS - # no idea why, but RD_logger cannot be imported from rdkit, only RDLogger can - from rdkit import RDLogger as _RD_logger + # No idea why, but RD_logger cannot be imported from rdkit, only RDLogger can + from rdkit import RDLogger as _RDLogger # Disable RDKit warnings. - _RD_logger.DisableLog("rdApp.*") + _RDLogger.DisableLog("rdApp.*") else: _Chem = _rdkit _rdFMCS = _rdkit - _RD_logger = _rdkit + _RDLogger = _rdkit from sire.legacy import Base as _SireBase from sire.legacy import Maths as _SireMaths @@ -1100,7 +1100,7 @@ def _get_backbone(molecule): atom.index() for atom in relative_backbone_atoms.getAtoms() ] atom_idx = dict(zip(absolute_backbone_atom_indices, relative_backbone_atom_indices)) - return (atom_idx, relative_backbone_atoms) + return atom_idx, relative_backbone_atoms # NOTE: This function is currently experimental and has not gone through @@ -1162,8 +1162,8 @@ def roiMatch( the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we can bypass the need to map the entire molecule and only focus on the ROI, which is significantly faster for large molecules. The rest of the mapping - is then composed of atoms before the ROI (pre-ROI) and after the ROI - (post-ROI). Every time we map the atoms in the ROI, we append the ROI + is then composed of atoms before the ROI (pre-ROI) and after the ROI. + Every time we map the atoms in the ROI, we append the ROI mapping to the pre-ROI mapping, which will then be used as the pre-ROI mapping for the next ROI in the list. @@ -1241,7 +1241,7 @@ def roiMatch( molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] if molecule0_atoms == molecule1_atoms: raise ValueError( - """ROI between the two input molecules is the identical, + """ROI between the two input molecules is identical, stopping atom matching""" ) @@ -1370,7 +1370,6 @@ def roiMatch( # molecule atoms after the last residue of interest. # In the case when we are not at the last residue of interest, # we need to map the atoms to the next ROI. - if res_idx != roi[-1]: # If the next ROI residue index in the ROI list is next to From 712758ed773e2809878d22daf5397ff5a061d842 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 5 Mar 2024 18:29:16 +0000 Subject: [PATCH 03/32] Updated align code to fix the bug with identical atom matching --- python/BioSimSpace/Sandpit/Exscientia/Align/_align.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index b2eca6267..a098bb447 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -1240,9 +1240,8 @@ def roiMatch( molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] if molecule0_atoms == molecule1_atoms: - raise ValueError( - """ROI between the two input molecules is identical, - stopping atom matching""" + _logger.warning( + f"Residue {res_idx} in molecule0 and molecule1 have identical atomtypes." ) res0_idx = [a.index() for a in molecule0_roi] From cd24aeaa3f18e8eecb2b883c810c5058fec3e79c Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Sun, 17 Mar 2024 12:40:36 +0000 Subject: [PATCH 04/32] Added basic support for mapping with kartograf. This functionality is needed in special cases where default rdKit MCS algorithm fails to provide suitable mappings. --- .../Sandpit/Exscientia/Align/_align.py | 101 +++++++++++++++++- 1 file changed, 99 insertions(+), 2 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index a098bb447..c7c783bd9 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -1103,6 +1103,81 @@ def _get_backbone(molecule): return atom_idx, relative_backbone_atoms +def _kartograf_map(molecule0, molecule1, kartograf_kwargs): + """ + A wrapper function for kartograf mapping algorithm. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + kartograf_kwargs : dict + A dictionary of keyword arguments to be passed to kartograf. + + Returns + ------- + + kartograf_mapping : gufe.mapping.ligandatommapping.LigandAtomMapping + The kartograf mapping object. + + """ + # try to import kartograf + try: + import rdkit.Chem as _Chem + from kartograf.atom_aligner import align_mol_shape as _align_mol_shape + from kartograf.atom_mapping_scorer import ( + MappingRMSDScorer as _MappingRMSDScorer, + ) + from kartograf import ( + KartografAtomMapper, + SmallMoleculeComponent as _SmallMoleculeComponent, + ) + except ImportError: + raise ImportError( + "Kartograf is not installed. Please install kartograf to use this function." + ) + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + from ..Convert import toRDKit as _toRDKit + + rdkit_mol0 = _toRDKit(molecule0) + rdkit_mol1 = _toRDKit(molecule1) + rdkit_mols = [rdkit_mol0, rdkit_mol1] + + # build small molecule components + mol0, mol1 = [_SmallMoleculeComponent.from_rdkit(m) for m in rdkit_mols] + + # align molecules first + a_mol1 = _align_mol_shape(mol1, ref_mol=mol0) + + # build Kartograf Atom Mapper + mapper = KartografAtomMapper(**kartograf_kwargs) + + # get mapping + kartograf_mapping = next(mapper.suggest_mappings(mol0, a_mol1)) + + # score mapping + rmsd_scorer = _MappingRMSDScorer() + score = rmsd_scorer(mapping=kartograf_mapping) + _logger.debug(f"RMSD score: {score:.2f}") + + return kartograf_mapping + + # NOTE: This function is currently experimental and has not gone through # rigorous validation. Proceed with caution. def roiMatch( @@ -1110,6 +1185,8 @@ def roiMatch( molecule1, roi, force_backbone_match=False, + use_kartograf=False, + kartograf_kwargs={}, ): """ Matching of two molecules based on a region of interest (ROI). @@ -1135,6 +1212,13 @@ def roiMatch( is useful for ensuring a more stable match between the two molecules. This is set to False by default. + use_kartograf : bool + If set to True, will use the kartograf algorithm to match the + molecules. This is set to False by default. + + kartograf_kwargs : dict + A dictionary of keyword arguments to be passed to kartograf. + Returns ------- @@ -1308,10 +1392,23 @@ def roiMatch( _logger.debug(f"Absolute backbone mapping: {absolute_backbone_mapping}") mapping = matchAtoms( - res0_extracted, res1_extracted, prematch=absolute_backbone_mapping + res0_extracted, + res1_extracted, + prematch=absolute_backbone_mapping, + complete_rings_only=False, ) else: - mapping = matchAtoms(res0_extracted, res1_extracted) + if use_kartograf: + _logger.debug("Using kartograf to map the ROI.") + kartograf_mapping = _kartograf_map( + res0_extracted, res1_extracted, kartograf_kwargs + ) + mapping = kartograf_mapping.componentA_to_componentB + else: + _logger.debug("Using rdKit MCS to map the ROI.") + mapping = matchAtoms( + res0_extracted, res1_extracted, complete_rings_only=False + ) _logger.debug(f"Mapping: {mapping}") From b0e3b2145e6115878d905717c48b194d038d7326 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 18 Mar 2024 09:44:24 +0000 Subject: [PATCH 05/32] Added additional options to the roiMatch function which allows for more fine-grained matching for protein residues --- .../Sandpit/Exscientia/Align/_align.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index c7c783bd9..9142da4a6 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -725,7 +725,9 @@ def matchAtoms( return_scores=False, prematch={}, timeout=5 * _Units.Time.second, + atomCompare=_rdFMCS.AtomCompare.CompareAny, complete_rings_only=True, + ring_matches_ring_only=True, prune_perturbed_constraints=None, prune_crossing_constraints=None, max_scoring_matches=1000, @@ -939,10 +941,10 @@ def matchAtoms( # Generate the MCS match. mcs = _rdFMCS.FindMCS( mols, - atomCompare=_rdFMCS.AtomCompare.CompareAny, + atomCompare=atomCompare, bondCompare=_rdFMCS.BondCompare.CompareAny, completeRingsOnly=complete_rings_only, - ringMatchesRingOnly=True, + ringMatchesRingOnly=ring_matches_ring_only, matchChiralTag=False, matchValences=False, maximizeBonds=False, @@ -1185,6 +1187,9 @@ def roiMatch( molecule1, roi, force_backbone_match=False, + ring_matches_ring_only=True, + complete_rings_only=True, + atomCompare=_rdFMCS.AtomCompare.CompareAny, use_kartograf=False, kartograf_kwargs={}, ): @@ -1212,6 +1217,10 @@ def roiMatch( is useful for ensuring a more stable match between the two molecules. This is set to False by default. + ring_matches_ring_only : bool + Whether ring bonds can only match ring bonds. + This is set to True by default. + use_kartograf : bool If set to True, will use the kartograf algorithm to match the molecules. This is set to False by default. @@ -1361,7 +1370,7 @@ def roiMatch( _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") relative_backbone_mapping = matchAtoms( - backbone_res0_atoms, backbone_res1_atoms + backbone_res0_atoms, backbone_res1_atoms, scoring_function="rmsd" ) # Translate the relative mapping to the absolute indices. @@ -1395,7 +1404,9 @@ def roiMatch( res0_extracted, res1_extracted, prematch=absolute_backbone_mapping, - complete_rings_only=False, + complete_rings_only=complete_rings_only, + scoring_function="rmsd", + ring_matches_ring_only=ring_matches_ring_only, ) else: if use_kartograf: @@ -1407,7 +1418,11 @@ def roiMatch( else: _logger.debug("Using rdKit MCS to map the ROI.") mapping = matchAtoms( - res0_extracted, res1_extracted, complete_rings_only=False + res0_extracted, + res1_extracted, + complete_rings_only=complete_rings_only, + ring_matches_ring_only=ring_matches_ring_only, + atomCompare=atomCompare, ) _logger.debug(f"Mapping: {mapping}") From 616e21dbdd0344f7c11866561bf134cd1acfb86f Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 29 Apr 2024 13:12:53 +0100 Subject: [PATCH 06/32] Added code for performing alignment between two proteins. This code works by breaking the two proteins into per-residue-parts and aligning each residue individually. The coordinates of the aligned residues are then used to update the coordinates of the input protein to be aligned. --- .../Sandpit/Exscientia/Align/_align.py | 165 +++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 9142da4a6..a10abd7c2 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -30,6 +30,7 @@ "viewMapping", "rmsdAlign", "roiMatch", + "perResidueRmsdAlign", "flexAlign", "merge", ] @@ -1405,7 +1406,7 @@ def roiMatch( res1_extracted, prematch=absolute_backbone_mapping, complete_rings_only=complete_rings_only, - scoring_function="rmsd", + scoring_function="RMSD", ring_matches_ring_only=ring_matches_ring_only, ) else: @@ -1683,6 +1684,168 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map return _Molecule(mol0) +def perResidueRmsdAlign( + molecule0, molecule1, mapping=None, property_map0={}, property_map1={} +): + """ + Align atoms in molecule0 to those in molecule1 using the mapping + between matched atom indices. The molecule is aligned using rigid-body + translation and rotations, with a root mean squared displacement (RMSD) + fit to find the optimal translation vector (as opposed to merely taking + the difference of centroids). This function is specifically designed to + be used for aligning protein residues. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + mapping : dict + A dictionary mapping atoms in molecule0 to those in molecule1. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + + Examples + -------- + + Align molecule0 to molecule1 based on a precomputed mapping. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.perResideRmsdAlign(molecule0, molecule1, mapping) + + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(property_map0, dict): + raise TypeError("'property_map0' must be of type 'dict'") + + if not isinstance(property_map1, dict): + raise TypeError("'property_map1' must be of type 'dict'") + + # The user has passed an atom mapping. + if mapping is not None: + if not isinstance(mapping, dict): + raise TypeError("'mapping' must be of type 'dict'.") + else: + _validate_mapping(molecule0, molecule1, mapping, "mapping") + + absolute_residue_mapping = {} + + # invert the mapping + # NOTE: This is not ideal as we have no way of detecting whether the mapping + # was inverted before or not. This could lead to incorrect mappings. + mapping = {v: k for k, v in mapping.items()} + for res in molecule1.getResidues(): + + # Get the mapping for the current residue using its atom indices + # i.e. {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} + _logger.debug(f"Residue {res.index()}") + _logger.debug(f"Residue atoms: {res.getAtoms()}") + residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} + + # update the absolute mapping dictionary to contain the mapping for each residue + # i.e. {0: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}...} + absolute_residue_mapping[res.index()] = residue_mapping + + # Extract the Sire molecule from each BioSimSpace molecule. + mol0 = molecule0._getSireObject() + mol1 = molecule1._getSireObject() + + mol0_residues = mol0.residues() + mol1_residues = mol1.residues() + + # Update the mapping to contain the residue indices + # i.e. {100: 103, 101: 102, 102: 105} -> {10: {100: 103, 101: 102, 102: 105}} + + # Align each residue from molecule0 to molecule1 individually + mol0_updated_residue_coords = [] + for i, res in enumerate(mol0_residues): + _logger.debug( + f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}" + ) + res0 = res.extract() + res1 = mol1_residues[i].extract() + + # Get the mapping for the current residue + residue_mapping = absolute_residue_mapping[i] + + # We will now offset the mapping with the first atom index of the residue + # ie. {100: 103, 101: 102, 102: 105} -> {0: 3, 1: 2, 2: 5} + relative_keys = list(residue_mapping.keys()) + relative_key = relative_keys[0] + relative_values = list(residue_mapping.values()) + relative_value = relative_values[0] + + relative_residue_mapping = { + k - relative_key: v - relative_value for k, v in residue_mapping.items() + } + + # Perform the alignment, res0 to res1. + # Convert the mapping to AtomIdx key:value pairs. + sire_mapping = _to_sire_mapping(mapping) + _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") + try: + res0 = ( + res0.move() + .align(res1, _SireMol.AtomResultMatcher(sire_mapping)) + .molecule() + ) + _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") + mol0_updated_residue_coords.append(res0.property("coordinates")) + + except Exception as e: + msg = f"Failed to align residues based on mapping: {mapping}" + if "Could not calculate the single value decomposition" in str(e): + msg += ". Try minimising your molecular coordinates prior to alignment." + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + + # Retrieve the atoms coordiantes from the updated residues + mol0_updated_coords = [] + for res in mol0_updated_residue_coords: + for atom_coord in res: + mol0_updated_coords.append(atom_coord) + + # Create a cursor for updating the coordinates property + cursor = mol0.cursor() + for i, atom in enumerate(cursor.atoms()): + atom["coordinates"] = mol0_updated_coords[i] + + # Commit the update back to the original molecule + mol0 = cursor.commit() + + # Return the aligned molecule. + return _Molecule(mol0) + + def flexAlign( molecule0, molecule1, From e5740a53f32806027eaa025fa1362b7c2e96f444 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 29 Apr 2024 13:35:58 +0100 Subject: [PATCH 07/32] Updated the per-residue alignment code to work correctly with mapping --- .../Sandpit/Exscientia/Align/_align.py | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index a10abd7c2..4922f95d7 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -1757,17 +1757,20 @@ def perResidueRmsdAlign( absolute_residue_mapping = {} - # invert the mapping - # NOTE: This is not ideal as we have no way of detecting whether the mapping - # was inverted before or not. This could lead to incorrect mappings. - mapping = {v: k for k, v in mapping.items()} for res in molecule1.getResidues(): # Get the mapping for the current residue using its atom indices # i.e. {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} - _logger.debug(f"Residue {res.index()}") - _logger.debug(f"Residue atoms: {res.getAtoms()}") - residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} + # _logger.debug(f"Residue {res.index()}") + # _logger.debug(f"Residue atoms: {res.getAtoms()}") + + for atom in res.getAtoms(): + # try to get the mapping for the atom + value = mapping.get(atom.index()) + if value is not None: + residue_mapping = {atom.index(): value} + + # residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} # update the absolute mapping dictionary to contain the mapping for each residue # i.e. {0: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}...} @@ -1786,9 +1789,7 @@ def perResidueRmsdAlign( # Align each residue from molecule0 to molecule1 individually mol0_updated_residue_coords = [] for i, res in enumerate(mol0_residues): - _logger.debug( - f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}" - ) + # _logger.debug(f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}") res0 = res.extract() res1 = mol1_residues[i].extract() @@ -1809,14 +1810,14 @@ def perResidueRmsdAlign( # Perform the alignment, res0 to res1. # Convert the mapping to AtomIdx key:value pairs. sire_mapping = _to_sire_mapping(mapping) - _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") + # _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") try: res0 = ( res0.move() .align(res1, _SireMol.AtomResultMatcher(sire_mapping)) .molecule() ) - _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") + # _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") mol0_updated_residue_coords.append(res0.property("coordinates")) except Exception as e: From 89220db6783cf6329cfc293badb3b2513f3ec8cb Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 29 Apr 2024 13:41:35 +0100 Subject: [PATCH 08/32] Reduced amount of logging calls in matching and align functions --- .../Sandpit/Exscientia/Align/_align.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 4922f95d7..01854882e 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -1339,9 +1339,9 @@ def roiMatch( ) res0_idx = [a.index() for a in molecule0_roi] - _logger.debug(f"res0 indices: {res0_idx}") + # _logger.debug(f"res0 indices: {res0_idx}") res1_idx = [a.index() for a in molecule1_roi] - _logger.debug(f"res1 indices: {res1_idx}") + # _logger.debug(f"res1 indices: {res1_idx}") # Append the ROI indices to the list roi_idx.append([res0_idx, res1_idx]) @@ -1350,10 +1350,10 @@ def roiMatch( res0_extracted = molecule0.extract(res0_idx) res1_extracted = molecule1.extract(res1_idx) - for a in res0_extracted.getAtoms(): - _logger.debug(f"res0 atom: {a}") - for b in res1_extracted.getAtoms(): - _logger.debug(f"res1 atom: {b}") + # for a in res0_extracted.getAtoms(): + # _logger.debug(f"res0 atom: {a}") + # for b in res1_extracted.getAtoms(): + # _logger.debug(f"res1 atom: {b}") # If force_backbone_match is enabled, # we are going to use the backbone atoms as a prematch @@ -1367,8 +1367,8 @@ def roiMatch( molecule1_roi.toMolecule() ) - _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") - _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") + # _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") + # _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") relative_backbone_mapping = matchAtoms( backbone_res0_atoms, backbone_res1_atoms, scoring_function="rmsd" @@ -1563,14 +1563,14 @@ def roiMatch( } # Print the matched atoms in the ROI - for idx0, idx1 in mapping.items(): - _logger.debug( - f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" - ) - - _logger.debug(f"Full matched atoms: {combined_dict}") - for idx0, idx1 in mapping.items(): - _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") + # for idx0, idx1 in mapping.items(): + # _logger.debug( + # f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" + # ) + + # _logger.debug(f"Full matched atoms: {combined_dict}") + # for idx0, idx1 in mapping.items(): + # _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") # TODO: Change the return type of roi_idx to a dictionary of lists return combined_dict, roi_idx From b7b8513d0b9ad50458682563b8115e667090d3b4 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 29 Apr 2024 17:18:59 +0100 Subject: [PATCH 09/32] Added a function for flexibly aligning ROI residues between two proteins --- .../Sandpit/Exscientia/Align/_align.py | 135 +++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 01854882e..8c41a8c9b 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -32,6 +32,7 @@ "roiMatch", "perResidueRmsdAlign", "flexAlign", + "roiFlexAlign", "merge", ] @@ -1326,7 +1327,9 @@ def roiMatch( _logger.debug("Molecule1 ROI:") _logger.debug(molecule1_roi) - # Don't allow for matching between the same residues + # Warn if matching between the same residues, in a case where we are + # transforming from one enantiomer to another, the atomtypes will + # be the same and trigger this warning. if ( molecule0_roi.name() == molecule1_roi.name() and molecule0_roi.nAtoms() == molecule1_roi.nAtoms() @@ -2006,6 +2009,136 @@ def flexAlign( return _Molecule(molecule0) +def roiFlexAlign( + molecule0, + molecule1, + roi=None, + mapping=None, + fkcombu_exe=None, + property_map0={}, + property_map1={}, +): + """ + Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 + using BioSimSpace.Align.flexAlign(). + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + mapping : dict + A dictionary mapping atoms in molecule0 to those in molecule1. + + fkcombu_exe : str + Path to the fkcombu executable. If None is passed, then BioSimSpace + will attempt to find fkcombu by searching your PATH. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + + Examples + -------- + + Align molecule0 to molecule1 based on a precomputed mapping. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) + + Align molecule0 to molecule1. Since no mapping is passed one will be + autogenerated using :class:`matchAtoms ` + with default options. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + # The user has passed an atom mapping. + if mapping is not None: + if not isinstance(mapping, dict): + raise TypeError("'mapping' must be of type 'dict'.") + else: + _validate_mapping(molecule0, molecule1, mapping, "mapping") + + for roi_idx in roi: + res0 = molecule0.getResidues()[roi_idx] + res1 = molecule1.getResidues()[roi_idx] + + if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): + res0_atoms = [a.name() for a in res0.getAtoms()] + res1_atoms = [a.name() for a in res0.getAtoms()] + if res0_atoms == res1_atoms: + _logger.warning( + f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." + ) + + res0_idx = [a.index() for a in res0.getAtoms()] + res1_idx = [a.index() for a in res1.getAtoms()] + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + res0_aligned = flexAlign(molecule0=res0_extracted, molecule1=res1_extracted) + + # Now update molecule0 with the aligned residue coordinates + mol0 = molecule0._getSireObject() + res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") + + # A list to store the updated coordinates for molecule0 + mol0_coords = [] + for i, res in enumerate(mol0.residues()): + if i == roi_idx: + mol0_coords.append(res0_aligned_coords) + else: + mol0_coords.append(res.property("coordinates")) + # Flatten the list + mol0_coords = [item for sublist in mol0_coords for item in sublist] + + # Create a cursor for updating the coordinates property + c = mol0.cursor() + for i, atom in enumerate(c.atoms()): + atom["coordinates"] = mol0_coords[i] + mol0 = c.commit() + + # Convert the Sire molecule back to a BioSimSpace molecule so we can + # loop over it again if needed + molecule0 = _Molecule(mol0) + + return molecule0 + + def merge( molecule0, molecule1, From e196bc951aa527a2dbab336d22f8fb6ff507a181 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 17 May 2024 13:18:35 +0100 Subject: [PATCH 10/32] Move Exscientia sandpit merge and align code outside of the sandpit --- python/BioSimSpace/Align/_align.py | 1025 ++++++++++++++++- python/BioSimSpace/Align/_merge.py | 673 ++++++----- .../Sandpit/Exscientia/Align/_align.py | 860 +------------- .../Sandpit/Exscientia/Align/_merge.py | 205 ++-- 4 files changed, 1548 insertions(+), 1215 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index bf4551787..f022f88d4 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -29,7 +29,10 @@ "matchAtoms", "viewMapping", "rmsdAlign", + "roiMatch", + "perResidueRmsdAlign", "flexAlign", + "roiFlexAlign", "merge", ] @@ -38,6 +41,9 @@ import subprocess as _subprocess import sys as _sys +# This logger is temporary. +from loguru import logger as _logger + from .._Utils import _try_import, _have_imported, _assert_imported import warnings as _warnings @@ -51,6 +57,8 @@ if _have_imported(_rdkit): from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS + + # No idea why, but RD_logger cannot be imported from rdkit, only RDLogger can from rdkit import RDLogger as _RDLogger # Disable RDKit warnings. @@ -594,7 +602,7 @@ def generateNetwork( rdmols.append(rdmol) except Exception as e: - msg = "RDKit was unable to load molecule!" + msg = "Unable to load molecule into RDKit!" if _isVerbose(): msg += ": " + getattr(e, "message", repr(e)) raise _AlignmentError(msg) from e @@ -713,12 +721,17 @@ def generateNetwork( def matchAtoms( molecule0, molecule1, + engine="SOMD", scoring_function="rmsd_align", matches=1, return_scores=False, prematch={}, timeout=5 * _Units.Time.second, + atomCompare=_rdFMCS.AtomCompare.CompareAny, complete_rings_only=True, + ring_matches_ring_only=True, + prune_perturbed_constraints=None, + prune_crossing_constraints=None, max_scoring_matches=1000, property_map0={}, property_map1={}, @@ -740,6 +753,10 @@ def matchAtoms( molecule1 : :class:`Molecule ` The reference molecule. + engine : str + The engine with which the simulations will be run. This is used for + setting some reasonable defaults for the other settings. + scoring_function : str The scoring function used to match atoms. Available options are: - "rmsd" @@ -751,7 +768,7 @@ def matchAtoms( computing the above RMSD score. - "rmsd_flex_align" Flexibly align molecule0 to molecule1 based on the mapping - before computing the above RMSD score. (based on the + before computing the above RMSD score. (Requires the 'fkcombu'. package: https://pdbj.org/kcombu) matches : int @@ -771,6 +788,16 @@ def matchAtoms( option is only relevant to MCS performed using RDKit and will be ignored when falling back on Sire. + prune_perturbed_constraints : bool + Whether to remove hydrogen atoms that are perturbed to heavy atoms + from the mapping. This is True for AMBER by default and False for + all other engines. + + prune_crossing_constraints : bool + Whether to remove atoms from the mapping such that there are no + constraints between dummy and non-dummy atoms. This is True for + AMBER by default and False for all other engines. + max_scoring_matches : int The maximum number of matching MCS substructures to consider when computing mapping scores. Consider reducing this if you find the @@ -890,6 +917,13 @@ def matchAtoms( if not isinstance(property_map1, dict): raise TypeError("'property_map1' must be of type 'dict'") + # Set some defaults. + is_amber = engine.upper() == "AMBER" + if prune_perturbed_constraints is None: + prune_perturbed_constraints = is_amber + if prune_crossing_constraints is None: + prune_crossing_constraints = is_amber + # Extract the Sire molecule from each BioSimSpace molecule. mol0 = molecule0._getSireObject() mol1 = molecule1._getSireObject() @@ -897,25 +931,22 @@ def matchAtoms( # Convert the timeout to seconds and take the value as an integer. timeout = int(timeout.seconds().value()) - # Create the working directory. - work_dir = _Utils.WorkDir() - # Use RDKkit to find the maximum common substructure. try: # Convert the molecules to RDKit format. mols = [ - _Convert.toRDKit(mol0, property_map=property_map0), - _Convert.toRDKit(mol1, property_map=property_map1), + _Convert.toRDKit(molecule0, property_map=property_map0), + _Convert.toRDKit(molecule1, property_map=property_map1), ] # Generate the MCS match. mcs = _rdFMCS.FindMCS( mols, - atomCompare=_rdFMCS.AtomCompare.CompareAny, + atomCompare=atomCompare, bondCompare=_rdFMCS.BondCompare.CompareAny, completeRingsOnly=complete_rings_only, - ringMatchesRingOnly=True, + ringMatchesRingOnly=ring_matches_ring_only, matchChiralTag=False, matchValences=False, maximizeBonds=False, @@ -1030,6 +1061,514 @@ def matchAtoms( return mappings[0:matches] +# NOTE: This function is currently experimental and has not gone through +# rigorous validation. Proceed with caution. +def _get_backbone(molecule): + """ + Extract the backbone atoms from a molecule. + Backbone atoms are defined as N, CA, C atoms in a residue. + This function is not intended to be applied to ligands. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule from which to extract the backbone atoms. + + Returns + ------- + + atom_idx : dict + A dictionary of the absolute and relative indices of the backbone atoms. + + relative_backbone_atoms : list + A list of the extracted backbone atoms. + """ + absolute_backbone_atom_indices = [] + for atom in molecule.getAtoms(): + if atom.name() in ["N", "CA", "C"]: + absolute_backbone_atom_indices.append(atom.index()) + + relative_backbone_atoms = molecule.extract(absolute_backbone_atom_indices) + relative_backbone_atom_indices = [ + atom.index() for atom in relative_backbone_atoms.getAtoms() + ] + atom_idx = dict(zip(absolute_backbone_atom_indices, relative_backbone_atom_indices)) + return atom_idx, relative_backbone_atoms + + +def _kartograf_map(molecule0, molecule1, kartograf_kwargs): + """ + A wrapper function for kartograf mapping algorithm. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + kartograf_kwargs : dict + A dictionary of keyword arguments to be passed to kartograf. + + Returns + ------- + + kartograf_mapping : gufe.mapping.ligandatommapping.LigandAtomMapping + The kartograf mapping object. + + """ + # try to import kartograf + try: + import rdkit.Chem as _Chem + from kartograf.atom_aligner import align_mol_shape as _align_mol_shape + from kartograf.atom_mapping_scorer import ( + MappingRMSDScorer as _MappingRMSDScorer, + ) + from kartograf import ( + KartografAtomMapper, + SmallMoleculeComponent as _SmallMoleculeComponent, + ) + except ImportError: + raise ImportError( + "Kartograf is not installed. Please install kartograf to use this function." + ) + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + from ..Convert import toRDKit as _toRDKit + + rdkit_mol0 = _toRDKit(molecule0) + rdkit_mol1 = _toRDKit(molecule1) + rdkit_mols = [rdkit_mol0, rdkit_mol1] + + # build small molecule components + mol0, mol1 = [_SmallMoleculeComponent.from_rdkit(m) for m in rdkit_mols] + + # align molecules first + a_mol1 = _align_mol_shape(mol1, ref_mol=mol0) + + # build Kartograf Atom Mapper + mapper = KartografAtomMapper(**kartograf_kwargs) + + # get mapping + kartograf_mapping = next(mapper.suggest_mappings(mol0, a_mol1)) + + # score mapping + rmsd_scorer = _MappingRMSDScorer() + score = rmsd_scorer(mapping=kartograf_mapping) + _logger.debug(f"RMSD score: {score:.2f}") + + return kartograf_mapping + + +# NOTE: This function is currently experimental and has not gone through +# rigorous validation. Proceed with caution. +def roiMatch( + molecule0, + molecule1, + roi, + force_backbone_match=False, + ring_matches_ring_only=True, + complete_rings_only=True, + atomCompare=_rdFMCS.AtomCompare.CompareAny, + use_kartograf=False, + kartograf_kwargs={}, +): + """ + Matching of two molecules based on a region of interest (ROI). + The ROI is defined as a list of residues in the molecule/protein. + The function will attempt to match the ROI in the two molecules and + return the mapping between the two molecules. Multiple ROIs can be + defined by providing a list of residues. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + roi : list + A list of regions/residues of interest in the molecule/protein. + + force_backbone_match : bool + If set to True, will force the backbone atoms to be matched which + is useful for ensuring a more stable match between the two molecules. + This is set to False by default. + + ring_matches_ring_only : bool + Whether ring bonds can only match ring bonds. + This is set to True by default. + + use_kartograf : bool + If set to True, will use the kartograf algorithm to match the + molecules. This is set to False by default. + + kartograf_kwargs : dict + A dictionary of keyword arguments to be passed to kartograf. + + Returns + ------- + + mapping : dict + A dictionary of the mapping between the two molecules. + + roi_idx : list + A list of the indices of the atoms in the ROI. This can be multiple + nested lists if the ROI is defined as a list of residues. + TODO: change the return type of roi_idx to a dictionary of lists + + Notes + ----- + + The key assumption of this function is that the two molecules are + structurally identical except for the region of interest (ROI). The ROI + could be a point mutation, or a residue that has been covalently modified. + The function will attempt to match the atoms in the ROI based on the + maximum common substructure (MCS) algorithm. First, the ROI is extracted + from the two molecules and then the atoms in the ROI are matched using + BioSimSpace.Align.matchAtoms function. The function will return the + mapping between the two molecules. This "relative" mapping will then be + used to map the atoms in the ROI to the "absolute" indices in the molecule. + So for example the relative mapping could be {0: 3, 1: 2, 2: 5} and + the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we + can bypass the need to map the entire molecule and only focus on the ROI, + which is significantly faster for large molecules. The rest of the mapping + is then composed of atoms before the ROI (pre-ROI) and after the ROI. + Every time we map the atoms in the ROI, we append the ROI + mapping to the pre-ROI mapping, which will then be used as the pre-ROI + mapping for the next ROI in the list. + + Examples + -------- + + Find the best maximum common substructure mapping between two molecules + with a region of interest defined as a list of residues. + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) + + Find the mapping between two molecules with multiple regions of interest + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) + + Find the best maximum common substructure mapping between two molecules, + while forcing the backbone atoms to be matched. + + >>> import BioSimSpace as BSS + >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], force_backbone_match=True) + """ + + # Validate input + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if roi is None: + raise ValueError("residue of interest list is not provided.") + + roi_idx = [] + + _logger.debug(f"Number of mol A atoms: {molecule0.nAtoms()}") + _logger.debug(f"Number of mol B atoms: {molecule1.nAtoms()}") + + # Get the atoms before the ROI. + # This is being done so that when we map the atoms in ROI, we can append + # the ROI mapping to this pre-ROI mapping which will then be used as + # the pre-ROI mapping for the next ROI in the list, i.e. + # pre_roi_mapping = pre_roi_mapping + roi mapping + mapping to next ROI + pre_roi_molecule0 = molecule0.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule0 = [a.index() for a in pre_roi_molecule0.atoms()] + + pre_roi_molecule1 = molecule1.search(f"residue[0:{roi[0]}]") + pre_roi_atom_idx_molecule1 = [a.index() for a in pre_roi_molecule1.atoms()] + + pre_roi_mapping = dict(zip(pre_roi_atom_idx_molecule0, pre_roi_atom_idx_molecule1)) + + # Loop over the residues of interest + for i, res_idx in enumerate(roi): + _logger.debug(f"Starting match for ROI id: {res_idx}") + + molecule0_roi = molecule0.getResidues()[res_idx] + _logger.debug("Molecule0 ROI:") + _logger.debug(molecule0_roi) + + molecule1_roi = molecule1.getResidues()[res_idx] + _logger.debug("Molecule1 ROI:") + _logger.debug(molecule1_roi) + + # Warn if matching between the same residues, in a case where we are + # transforming from one enantiomer to another, the atomtypes will + # be the same and trigger this warning. + if ( + molecule0_roi.name() == molecule1_roi.name() + and molecule0_roi.nAtoms() == molecule1_roi.nAtoms() + ): + molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] + molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] + if molecule0_atoms == molecule1_atoms: + _logger.warning( + f"Residue {res_idx} in molecule0 and molecule1 have identical atomtypes." + ) + + res0_idx = [a.index() for a in molecule0_roi] + # _logger.debug(f"res0 indices: {res0_idx}") + res1_idx = [a.index() for a in molecule1_roi] + # _logger.debug(f"res1 indices: {res1_idx}") + + # Append the ROI indices to the list + roi_idx.append([res0_idx, res1_idx]) + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + # for a in res0_extracted.getAtoms(): + # _logger.debug(f"res0 atom: {a}") + # for b in res1_extracted.getAtoms(): + # _logger.debug(f"res1 atom: {b}") + + # If force_backbone_match is enabled, + # we are going to use the backbone atoms as a prematch + if force_backbone_match: + _logger.debug("Forcing backbone match") + + backbone_res0_idx, backbone_res0_atoms = _get_backbone( + molecule0_roi.toMolecule() + ) + backbone_res1_idx, backbone_res1_atoms = _get_backbone( + molecule1_roi.toMolecule() + ) + + # _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") + # _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") + + relative_backbone_mapping = matchAtoms( + backbone_res0_atoms, backbone_res1_atoms, scoring_function="rmsd" + ) + + # Translate the relative mapping to the absolute indices. + # The lookup table contains the relative indices of the atoms + # that have been mapped from one residue to the other, for example + # [0, 2, 4, 5]. We can use these as positional indices to get the + # absolute indices of the atoms from the original residue. i.e. + # if the original residue had atom indices such as: + # [100, 101, 102, 103, 104, 105], we can use the lookup table to + # get [100, 102, 104, 105]. Doing the same for the other residue + # will give us an absolute mapping between the two residues. + res0_lookup_table = list(relative_backbone_mapping.keys()) + absolute_backbone_mapped_atoms_res0 = [ + list(backbone_res0_idx.keys())[i] for i in res0_lookup_table + ] + + res1_lookup_table = list(relative_backbone_mapping.values()) + absolute_backbone_mapped_atoms_res1 = [ + list(backbone_res1_idx.keys())[i] for i in res1_lookup_table + ] + + absolute_backbone_mapping = dict( + zip( + absolute_backbone_mapped_atoms_res0, + absolute_backbone_mapped_atoms_res1, + ) + ) + _logger.debug(f"Absolute backbone mapping: {absolute_backbone_mapping}") + + mapping = matchAtoms( + res0_extracted, + res1_extracted, + prematch=absolute_backbone_mapping, + complete_rings_only=complete_rings_only, + scoring_function="RMSD", + ring_matches_ring_only=ring_matches_ring_only, + ) + else: + if use_kartograf: + _logger.debug("Using kartograf to map the ROI.") + kartograf_mapping = _kartograf_map( + res0_extracted, res1_extracted, kartograf_kwargs + ) + mapping = kartograf_mapping.componentA_to_componentB + else: + _logger.debug("Using rdKit MCS to map the ROI.") + mapping = matchAtoms( + res0_extracted, + res1_extracted, + complete_rings_only=complete_rings_only, + ring_matches_ring_only=ring_matches_ring_only, + atomCompare=atomCompare, + ) + + _logger.debug(f"Mapping: {mapping}") + + # Check how many atoms got mapped from one residue to the other + _logger.debug( + f"Mapped {len(mapping.keys())} out of {len(res0_idx)} atoms for molecule0" + ) + _logger.debug( + f"Mapped {len(mapping.values())} out of {len(res1_idx)} atoms for molecule1" + ) + + # If the number of mapped atoms is not the same as the number of atoms + # in the ROI we need to use that molecule as the reference + # for the lookup table. + # NOTE: We don't really need the conditional statement here that checks + # if all of the atoms in the ROI have been mapped, because + # we are going to translate atoms relative indices to absolute ones + # and that case would be captured by the lookup table. + if len(mapping.keys()) != len(res0_idx): + _logger.debug("Some atoms for molecule0 in the ROI have not been mapped.") + _logger.debug("Using molecule0 as the reference for the lookup table.") + residue_lookup_table = list(mapping.keys()) + absolute_mapped_atoms_res0 = [res0_idx[i] for i in residue_lookup_table] + + # i.e this bit is unnecessary + else: + absolute_mapped_atoms_res0 = res0_idx + + if len(mapping.values()) != len(res1_idx): + _logger.debug("Some atoms for molecule1 in the ROI have not been mapped.") + _logger.debug("Using molecule1 as the reference for the lookup table.") + residue_lookup_table = list(mapping.values()) + absolute_mapped_atoms_res1 = [res1_idx[i] for i in residue_lookup_table] + + else: + absolute_mapped_atoms_res1 = res1_idx + + absolute_roi_mapping = dict( + zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) + ) + + # Check that pre_roi_atom_indices are not part of molecule ROI indices + # NOTE: This could be a costly operation, if pre_roi_atom_indices is + # large. + if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( + i in pre_roi_atom_idx_molecule1 for i in res1_idx + ): + raise ValueError("Found atoms in pre ROI region that are part of the ROI.") + + # Now we have to think about what to do with the atom indices + # after the mapping as these are not going to be the same + # between the two molecules. + # If we are at the last residue of interest, we don't need to worry + # too much about the after ROI region as this region will be all of the + # molecule atoms after the last residue of interest. + # In the case when we are not at the last residue of interest, + # we need to map the atoms to the next ROI. + if res_idx != roi[-1]: + + # If the next ROI residue index in the ROI list is next to + # the current ROI index, after_roi atom index list will be empty + # i.e. if we're currently at residue 10 and the next ROI is 11, + # we don't need to map the atoms. + # If we were at residue 10 and the next residue of interest is 12, + # we would need to map the atoms between residues 10 and 12. + if roi[i + 1] - roi[i] == 1: + _logger.debug( + "Next ROI is adjacent to the current ROI, no need to map atoms between the two." + ) + after_roi_atom_idx_molecule0 = [] + after_roi_atom_idx_molecule1 = [] + else: + after_roi_molecule0 = molecule0.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search( + f"residue[{res_idx+1}:{roi[i+1]}]" + ) + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + # Append the mappings to the pre_roi_mapping, which will then be + # used as the pre_roi_mapping for the next ROI in the list. + pre_roi_mapping = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + else: + # Get all of the remaining atoms after the last ROI + after_roi_molecule0 = molecule0.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule0 = [ + a.index() for a in after_roi_molecule0.atoms() + ] + + after_roi_molecule1 = molecule1.search(f"residue[{res_idx+1}:]") + after_roi_atom_idx_molecule1 = [ + b.index() for b in after_roi_molecule1.atoms() + ] + + after_roi_mapping = dict( + zip( + after_roi_atom_idx_molecule0, + after_roi_atom_idx_molecule1, + ) + ) + + # Check that after_roi_atom_indices are not part of absolute_roi_mapping + if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): + raise ValueError("Found atoms in after ROI region that are part of the ROI") + + if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): + raise ValueError("Found atoms in after ROI region that are part of the ROI") + + # Print all 3 dictionaries + _logger.debug(f"Pre ROI region mapping: {pre_roi_mapping}") + _logger.debug(f"Absolute ROI mapping: {absolute_roi_mapping}") + _logger.debug(f"After ROI region mapping: {after_roi_mapping}") + + # Combine the dictionaries to get the full mapping + combined_dict = { + **pre_roi_mapping, + **absolute_roi_mapping, + **after_roi_mapping, + } + + # Print the matched atoms in the ROI + # for idx0, idx1 in mapping.items(): + # _logger.debug( + # f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" + # ) + + # _logger.debug(f"Full matched atoms: {combined_dict}") + # for idx0, idx1 in mapping.items(): + # _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") + + # TODO: Change the return type of roi_idx to a dictionary of lists + return combined_dict, roi_idx + + def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): """ Align atoms in molecule0 to those in molecule1 using the mapping @@ -1138,6 +1677,169 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map return _Molecule(mol0) +def perResidueRmsdAlign( + molecule0, molecule1, mapping=None, property_map0={}, property_map1={} +): + """ + Align atoms in molecule0 to those in molecule1 using the mapping + between matched atom indices. The molecule is aligned using rigid-body + translation and rotations, with a root mean squared displacement (RMSD) + fit to find the optimal translation vector (as opposed to merely taking + the difference of centroids). This function is specifically designed to + be used for aligning protein residues. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + mapping : dict + A dictionary mapping atoms in molecule0 to those in molecule1. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + + Examples + -------- + + Align molecule0 to molecule1 based on a precomputed mapping. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.perResideRmsdAlign(molecule0, molecule1, mapping) + + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(property_map0, dict): + raise TypeError("'property_map0' must be of type 'dict'") + + if not isinstance(property_map1, dict): + raise TypeError("'property_map1' must be of type 'dict'") + + # The user has passed an atom mapping. + if mapping is not None: + if not isinstance(mapping, dict): + raise TypeError("'mapping' must be of type 'dict'.") + else: + _validate_mapping(molecule0, molecule1, mapping, "mapping") + + absolute_residue_mapping = {} + + for res in molecule1.getResidues(): + + # Get the mapping for the current residue using its atom indices + # i.e. {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} + # _logger.debug(f"Residue {res.index()}") + # _logger.debug(f"Residue atoms: {res.getAtoms()}") + + for atom in res.getAtoms(): + # try to get the mapping for the atom + value = mapping.get(atom.index()) + if value is not None: + residue_mapping = {atom.index(): value} + + # residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} + + # update the absolute mapping dictionary to contain the mapping for each residue + # i.e. {0: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}...} + absolute_residue_mapping[res.index()] = residue_mapping + + # Extract the Sire molecule from each BioSimSpace molecule. + mol0 = molecule0._getSireObject() + mol1 = molecule1._getSireObject() + + mol0_residues = mol0.residues() + mol1_residues = mol1.residues() + + # Update the mapping to contain the residue indices + # i.e. {100: 103, 101: 102, 102: 105} -> {10: {100: 103, 101: 102, 102: 105}} + + # Align each residue from molecule0 to molecule1 individually + mol0_updated_residue_coords = [] + for i, res in enumerate(mol0_residues): + # _logger.debug(f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}") + res0 = res.extract() + res1 = mol1_residues[i].extract() + + # Get the mapping for the current residue + residue_mapping = absolute_residue_mapping[i] + + # We will now offset the mapping with the first atom index of the residue + # ie. {100: 103, 101: 102, 102: 105} -> {0: 3, 1: 2, 2: 5} + relative_keys = list(residue_mapping.keys()) + relative_key = relative_keys[0] + relative_values = list(residue_mapping.values()) + relative_value = relative_values[0] + + relative_residue_mapping = { + k - relative_key: v - relative_value for k, v in residue_mapping.items() + } + + # Perform the alignment, res0 to res1. + # Convert the mapping to AtomIdx key:value pairs. + sire_mapping = _to_sire_mapping(mapping) + # _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") + try: + res0 = ( + res0.move() + .align(res1, _SireMol.AtomResultMatcher(sire_mapping)) + .molecule() + ) + # _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") + mol0_updated_residue_coords.append(res0.property("coordinates")) + + except Exception as e: + msg = f"Failed to align residues based on mapping: {mapping}" + if "Could not calculate the single value decomposition" in str(e): + msg += ". Try minimising your molecular coordinates prior to alignment." + if _isVerbose(): + raise _AlignmentError(msg) from e + else: + raise _AlignmentError(msg) from None + + # Retrieve the atoms coordiantes from the updated residues + mol0_updated_coords = [] + for res in mol0_updated_residue_coords: + for atom_coord in res: + mol0_updated_coords.append(atom_coord) + + # Create a cursor for updating the coordinates property + cursor = mol0.cursor() + for i, atom in enumerate(cursor.atoms()): + atom["coordinates"] = mol0_updated_coords[i] + + # Commit the update back to the original molecule + mol0 = cursor.commit() + + # Return the aligned molecule. + return _Molecule(mol0) + + def flexAlign( molecule0, molecule1, @@ -1296,6 +1998,263 @@ def flexAlign( # Return the aligned molecule. return _Molecule(molecule0) +def roiAlign( + molecule0, + molecule1, + roi=None, + mapping=None, + fkcombu_exe=None, + property_map0={}, + property_map1={}, +): + """ + Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 + using BioSimSpace.Align.flexAlign(). + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + mapping : dict + A dictionary mapping atoms in molecule0 to those in molecule1. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + + Examples + -------- + + Align molecule0 to molecule1 based on a precomputed mapping. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) + + Align molecule0 to molecule1. Since no mapping is passed one will be + autogenerated using :class:`matchAtoms ` + with default options. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + # The user has passed an atom mapping. + if mapping is not None: + if not isinstance(mapping, dict): + raise TypeError("'mapping' must be of type 'dict'.") + else: + _validate_mapping(molecule0, molecule1, mapping, "mapping") + + for roi_idx in roi: + res0 = molecule0.getResidues()[roi_idx] + res1 = molecule1.getResidues()[roi_idx] + + if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): + res0_atoms = [a.name() for a in res0.getAtoms()] + res1_atoms = [a.name() for a in res0.getAtoms()] + if res0_atoms == res1_atoms: + _logger.warning( + f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." + ) + + res0_idx = [a.index() for a in res0.getAtoms()] + res1_idx = [a.index() for a in res1.getAtoms()] + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + res0_aligned = rmsdAlign(molecule0=res0_extracted, molecule1=res1_extracted) + + # Now update molecule0 with the aligned residue coordinates + mol0 = molecule0._getSireObject() + res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") + + # A list to store the updated coordinates for molecule0 + mol0_coords = [] + for i, res in enumerate(mol0.residues()): + if i == roi_idx: + mol0_coords.append(res0_aligned_coords) + else: + mol0_coords.append(res.property("coordinates")) + # Flatten the list + mol0_coords = [item for sublist in mol0_coords for item in sublist] + + # Create a cursor for updating the coordinates property + c = mol0.cursor() + for i, atom in enumerate(c.atoms()): + atom["coordinates"] = mol0_coords[i] + mol0 = c.commit() + + # Convert the Sire molecule back to a BioSimSpace molecule so we can + # loop over it again if needed + molecule0 = _Molecule(mol0) + + return molecule0 + + + + +def roiFlexAlign( + molecule0, + molecule1, + roi=None, + mapping=None, + fkcombu_exe=None, + property_map0={}, + property_map1={}, +): + """ + Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 + using BioSimSpace.Align.flexAlign(). + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule to align. + + molecule1 : :class:`Molecule ` + The reference molecule. + + mapping : dict + A dictionary mapping atoms in molecule0 to those in molecule1. + + fkcombu_exe : str + Path to the fkcombu executable. If None is passed, then BioSimSpace + will attempt to find fkcombu by searching your PATH. + + property_map0 : dict + A dictionary that maps "properties" in molecule0 to their user + defined values. This allows the user to refer to properties + with their own naming scheme, e.g. { "charge" : "my-charge" } + + property_map1 : dict + A dictionary that maps "properties" in molecule1 to their user + defined values. + + Returns + ------- + + molecule : :class:`Molecule ` + The aligned molecule. + + Examples + -------- + + Align molecule0 to molecule1 based on a precomputed mapping. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) + + Align molecule0 to molecule1. Since no mapping is passed one will be + autogenerated using :class:`matchAtoms ` + with default options. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + """ + + if not isinstance(molecule0, _Molecule): + raise TypeError( + "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + + if not isinstance(molecule1, _Molecule): + raise TypeError( + "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" + ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + # The user has passed an atom mapping. + if mapping is not None: + if not isinstance(mapping, dict): + raise TypeError("'mapping' must be of type 'dict'.") + else: + _validate_mapping(molecule0, molecule1, mapping, "mapping") + + for roi_idx in roi: + res0 = molecule0.getResidues()[roi_idx] + res1 = molecule1.getResidues()[roi_idx] + + if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): + res0_atoms = [a.name() for a in res0.getAtoms()] + res1_atoms = [a.name() for a in res0.getAtoms()] + if res0_atoms == res1_atoms: + _logger.warning( + f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." + ) + + res0_idx = [a.index() for a in res0.getAtoms()] + res1_idx = [a.index() for a in res1.getAtoms()] + + # Extract the residues of interest from the molecules + res0_extracted = molecule0.extract(res0_idx) + res1_extracted = molecule1.extract(res1_idx) + + res0_aligned = flexAlign(molecule0=res0_extracted, molecule1=res1_extracted) + + # Now update molecule0 with the aligned residue coordinates + mol0 = molecule0._getSireObject() + res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") + + # A list to store the updated coordinates for molecule0 + mol0_coords = [] + for i, res in enumerate(mol0.residues()): + if i == roi_idx: + mol0_coords.append(res0_aligned_coords) + else: + mol0_coords.append(res.property("coordinates")) + # Flatten the list + mol0_coords = [item for sublist in mol0_coords for item in sublist] + + # Create a cursor for updating the coordinates property + c = mol0.cursor() + for i, atom in enumerate(c.atoms()): + atom["coordinates"] = mol0_coords[i] + mol0 = c.commit() + + # Convert the Sire molecule back to a BioSimSpace molecule so we can + # loop over it again if needed + molecule0 = _Molecule(mol0) + + return molecule0 + def merge( molecule0, @@ -1304,6 +2263,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, + roi_list=None, property_map0={}, property_map1={}, ): @@ -1340,6 +2300,9 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. + roi_list : list + The region of interest to merge. Consist of two lists of atom indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1397,6 +2360,12 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") + if roi_list is not None: + if not isinstance(roi_list, list): + raise TypeError("'roi_list' must be of type 'list'.") + else: + _validate_roi(molecule0, molecule1, roi_list) + # The user has passed an atom mapping. if mapping is not None: if not isinstance(mapping, dict): @@ -1426,6 +2395,7 @@ def merge( allow_ring_breaking=allow_ring_breaking, allow_ring_size_change=allow_ring_size_change, force=force, + roi_list=roi_list, property_map0=property_map0, property_map1=property_map1, ) @@ -1552,12 +2522,12 @@ def viewMapping( ) molecule0 = rmsdAlign(molecule0, molecule1, mapping) + import py3Dmol as _py3Dmol + # Convert the molecules to RDKit format. rdmol0 = _Convert.toRDKit(molecule0, property_map=property_map0) rdmol1 = _Convert.toRDKit(molecule1, property_map=property_map1) - import py3Dmol as _py3Dmol - # Set grid view properties. viewer0 = (0, 0) if orientation == "horizontal": @@ -2074,6 +3044,39 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) +def _validate_roi(molecule0, molecule1, roi_list): + """Internal function to validate that a mapping contains key:value pairs + of the correct type. + + Parameters + ---------- + + molecule0 : :class:`Molecule ` + The molecule of interest. + + molecule1 : :class:`Molecule ` + The reference molecule. + + roi_list : list + The region of interest to merge. + """ + for roi in roi_list: + if len(roi) != 2: + raise ValueError("The length of roi list must be 2.") + if not isinstance(roi[0], list) or not isinstance(roi[1], list): + raise ValueError("The element of roi must be of type list") + for mol_idx, ele in enumerate(roi): + for atom_idx in ele: + if type(atom_idx) is not int: + raise ValueError( + f"The element of roi[{mol_idx}] should be of type int" + ) + if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): + raise IndexError( + f"The element of roi[{mol_idx}] should within range of number of atoms" + ) + + def _to_sire_mapping(mapping): """ Internal function to convert a regular mapping to Sire AtomIdx format. diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index f4693e83e..add219661 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -32,6 +32,9 @@ from sire.legacy import Mol as _SireMol from sire.legacy import Units as _SireUnits +# This logger is temporary. +from loguru import logger as _logger + from .._Exceptions import IncompatibleError as _IncompatibleError from .._SireWrappers import Molecule as _Molecule @@ -43,6 +46,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, + roi_list=None, property_map0={}, property_map1={}, ): @@ -74,6 +78,9 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. + roi_list : list + The region of interest to merge. Consist of a multiple nested list lists of atom indices. + property_map0 : dict A dictionary that maps "properties" in this molecule to their user defined values. This allows the user to refer to properties @@ -152,9 +159,17 @@ def merge( # Get the atom indices from the mapping. idx0 = mapping.keys() idx1 = mapping.values() + _logger.debug(f"Mapping: {mapping}") + _logger.debug(f"idx0: {idx0}") + _logger.debug(f"idx1: {idx1}") # Create the reverse mapping: molecule1 --> molecule0 inv_mapping = {v: k for k, v in mapping.items()} + _logger.debug(f"Inverse mapping: {inv_mapping}") + + # Generate the mappings from each molecule to the merged molecule + mol0_merged_mapping = {} + mol1_merged_mapping = {} # Invert the user property mappings. inv_property_map0 = {v: k for k, v in property_map0.items()} @@ -193,6 +208,7 @@ def merge( if atom.index() not in idx0: atoms0.append(atom) atoms0_idx.append(atom.index()) + _logger.debug(f"Unique atoms in molecule0: {atoms0_idx}") # molecule1 for atom in molecule1.atoms(): @@ -200,37 +216,83 @@ def merge( atoms1.append(atom) atoms1_idx.append(atom.index()) + _logger.debug(f"Unique atoms in molecule1: {atoms1_idx}") + # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") + # Only part of the ligand is to be merged + if roi_list is not None: + if molecule0.nResidues() != molecule1.nResidues(): + raise ValueError( + "The two molecules need to have the same number of residues" + ) - # Add a single residue called LIG. - res = molecule.edit().add(_SireMol.ResNum(1)) - res.rename(_SireMol.ResName("LIG")) - - # Create a single cut-group. - cg = res.molecule().add(_SireMol.CGName("1")) - - # Counter for the number of atoms. - num = 1 - - # First add all of the atoms from molecule0. - for atom in molecule0.atoms(): - # Add the atom. - added = cg.add(atom.name()) - added.renumber(_SireMol.AtomNum(num)) - added.reparent(_SireMol.ResIdx(0)) - num += 1 - - # Now add all of the atoms from molecule1 that aren't mapped from molecule0. - for atom in atoms1: - added = cg.add(atom.name()) - added.renumber(_SireMol.AtomNum(num)) - added.reparent(_SireMol.ResIdx(0)) - inv_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) - num += 1 + num = 1 + for idx, (mol0_res, mol1_res) in enumerate( + zip(molecule0.residues(), molecule1.residues()) + ): + res = molecule.edit().add(_SireMol.ResNum(idx + 1)) + if mol0_res.name() == mol1_res.name(): + res.rename(mol0_res.name()) + else: + resname = "MUT" if len(molecule0.residues()) > 1 else "LIG" + res.rename(_SireMol.ResName(resname)) + + cg = res.molecule().add(_SireMol.CGName(f"{idx}")) + for atom in mol0_res.atoms(): + mol0_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(idx)) + num += 1 + + for atom in mol1_res.atoms(): + if atom.index() in atoms1_idx: + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(idx)) + mol1_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + else: + mol1_merged_mapping[atom.index()] = mol0_merged_mapping[ + inv_mapping[atom.index()] + ] + molecule = cg.molecule().commit() + else: + # Add a single residue called LIG. + res = molecule.edit().add(_SireMol.ResNum(1)) + res.rename(_SireMol.ResName("LIG")) + + # Create a single cut-group. + cg = res.molecule().add(_SireMol.CGName("1")) + + # Counter for the number of atoms. + num = 1 + + # First add all of the atoms from molecule0. + for atom in molecule0.atoms(): + # Add the atom. + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(0)) + mol0_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + + # Now add all of the atoms from molecule1 that aren't mapped from molecule0. + for atom in molecule1.atoms(): + if atom in atoms1: + added = cg.add(atom.name()) + added.renumber(_SireMol.AtomNum(num)) + added.reparent(_SireMol.ResIdx(0)) + mol1_merged_mapping[atom.index()] = _SireMol.AtomIdx(num - 1) + num += 1 + else: + mol1_merged_mapping[atom.index()] = mol0_merged_mapping[ + inv_mapping[atom.index()] + ] - # Commit the changes to the molecule. - molecule = cg.molecule().commit() + # Commit the changes to the molecule. + molecule = cg.molecule().commit() # Make the molecule editable. edit_mol = molecule.edit() @@ -340,11 +402,12 @@ def merge( # Add the atom properties from molecule0. for atom in molecule0.atoms(): + # Get the atom index in the merged molecule. + idx = mol0_merged_mapping[atom.index()] + # Add an "name0" property. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("name0", atom.name().value()) - .molecule() + edit_mol.atom(idx).setProperty("name0", atom.name().value()).molecule() ) # Loop over all atom properties. @@ -358,15 +421,13 @@ def merge( # Add the property to the atom in the merged molecule. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty(name, atom.property(prop)) - .molecule() + edit_mol.atom(idx).setProperty(name, atom.property(prop)).molecule() ) # Add the atom properties from molecule1. for atom in atoms1: # Get the atom index in the merged molecule. - idx = inv_mapping[atom.index()] + idx = mol1_merged_mapping[atom.index()] # Add an "name0" property. edit_mol = ( @@ -433,8 +494,8 @@ def merge( # Add all of the bonds from molecule0. for bond in bonds0.potentials(): - atom0 = info0.atomIdx(bond.atom0()) - atom1 = info0.atomIdx(bond.atom1()) + atom0 = mol0_merged_mapping[info0.atomIdx(bond.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(bond.atom1())] bonds.set(atom0, atom1, bond.function()) # Loop over all bonds in molecule1. @@ -450,8 +511,8 @@ def merge( exprn = bond.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] # Set the new bond. bonds.set(atom0, atom1, exprn) @@ -478,9 +539,9 @@ def merge( # Add all of the angles from molecule0. for angle in angles0.potentials(): - atom0 = info0.atomIdx(angle.atom0()) - atom1 = info0.atomIdx(angle.atom1()) - atom2 = info0.atomIdx(angle.atom2()) + atom0 = mol0_merged_mapping[info0.atomIdx(angle.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(angle.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(angle.atom2())] angles.set(atom0, atom1, atom2, angle.function()) # Loop over all angles in molecule1. @@ -498,9 +559,9 @@ def merge( exprn = angle.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] # Set the new angle. angles.set(atom0, atom1, atom2, exprn) @@ -527,10 +588,10 @@ def merge( # Add all of the dihedrals from molecule0. for dihedral in dihedrals0.potentials(): - atom0 = info0.atomIdx(dihedral.atom0()) - atom1 = info0.atomIdx(dihedral.atom1()) - atom2 = info0.atomIdx(dihedral.atom2()) - atom3 = info0.atomIdx(dihedral.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(dihedral.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(dihedral.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(dihedral.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(dihedral.atom3())] dihedrals.set(atom0, atom1, atom2, atom3, dihedral.function()) # Loop over all dihedrals in molecule1. @@ -550,10 +611,10 @@ def merge( exprn = dihedral.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new dihedral. dihedrals.set(atom0, atom1, atom2, atom3, exprn) @@ -580,10 +641,10 @@ def merge( # Add all of the impropers from molecule0. for improper in impropers0.potentials(): - atom0 = info0.atomIdx(improper.atom0()) - atom1 = info0.atomIdx(improper.atom1()) - atom2 = info0.atomIdx(improper.atom2()) - atom3 = info0.atomIdx(improper.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(improper.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(improper.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(improper.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(improper.atom3())] impropers.set(atom0, atom1, atom2, atom3, improper.function()) # Loop over all impropers in molecule1. @@ -603,10 +664,10 @@ def merge( exprn = improper.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new improper. impropers.set(atom0, atom1, atom2, atom3, exprn) @@ -621,7 +682,7 @@ def merge( # Add the atom properties from molecule1. for atom in molecule1.atoms(): # Get the atom index in the merged molecule. - idx = inv_mapping[atom.index()] + idx = mol1_merged_mapping[atom.index()] # Add an "name1" property. edit_mol = ( @@ -644,11 +705,12 @@ def merge( # Add the properties from atoms unique to molecule0. for atom in atoms0: + # Get the atom index in the merged molecule. + idx = mol0_merged_mapping[atom.index()] + # Add an "name1" property. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("name1", atom.name().value()) - .molecule() + edit_mol.atom(idx).setProperty("name1", atom.name().value()).molecule() ) # Loop over all atom properties. @@ -659,25 +721,21 @@ def merge( # Zero the "charge" and "LJ" property for atoms that are unique to molecule0. if name == "charge": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("charge1", 0 * _SireUnits.e_charge) .molecule() ) elif name == "LJ": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("LJ1", _SireMM.LJParameter()) .molecule() ) elif name == "ambertype": - edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty("ambertype1", "du") - .molecule() - ) + edit_mol = edit_mol.atom(idx).setProperty("ambertype1", "du").molecule() elif name == "element": edit_mol = ( - edit_mol.atom(atom.index()) + edit_mol.atom(idx) .setProperty("element1", _SireMol.Element(0)) .molecule() ) @@ -688,9 +746,7 @@ def merge( # Add the property to the atom in the merged molecule. edit_mol = ( - edit_mol.atom(atom.index()) - .setProperty(name, atom.property(prop)) - .molecule() + edit_mol.atom(idx).setProperty(name, atom.property(prop)).molecule() ) # We now need to merge "bond", "angle", "dihedral", and "improper" parameters. @@ -723,8 +779,8 @@ def merge( exprn = bond.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] # Set the new bond. bonds.set(atom0, atom1, exprn) @@ -737,8 +793,8 @@ def merge( or info0.atomIdx(bond.atom1()) in atoms0_idx ): # Extract the bond information. - atom0 = info0.atomIdx(bond.atom0()) - atom1 = info0.atomIdx(bond.atom1()) + atom0 = mol0_merged_mapping[info0.atomIdx(bond.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(bond.atom1())] exprn = bond.function() # Set the new bond. @@ -773,9 +829,9 @@ def merge( exprn = angle.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] # Set the new angle. angles.set(atom0, atom1, atom2, exprn) @@ -789,9 +845,9 @@ def merge( or info0.atomIdx(angle.atom2()) in atoms0_idx ): # Extract the angle information. - atom0 = info0.atomIdx(angle.atom0()) - atom1 = info0.atomIdx(angle.atom1()) - atom2 = info0.atomIdx(angle.atom2()) + atom0 = mol0_merged_mapping[info0.atomIdx(angle.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(angle.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(angle.atom2())] exprn = angle.function() # Set the new angle. @@ -827,10 +883,10 @@ def merge( exprn = dihedral.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new dihedral. dihedrals.set(atom0, atom1, atom2, atom3, exprn) @@ -845,10 +901,10 @@ def merge( or info0.atomIdx(dihedral.atom3()) in atoms0_idx ): # Extract the dihedral information. - atom0 = info0.atomIdx(dihedral.atom0()) - atom1 = info0.atomIdx(dihedral.atom1()) - atom2 = info0.atomIdx(dihedral.atom2()) - atom3 = info0.atomIdx(dihedral.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(dihedral.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(dihedral.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(dihedral.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(dihedral.atom3())] exprn = dihedral.function() # Set the new dihedral. @@ -884,10 +940,10 @@ def merge( exprn = improper.function() # Map the atom indices to their position in the merged molecule. - atom0 = inv_mapping[atom0] - atom1 = inv_mapping[atom1] - atom2 = inv_mapping[atom2] - atom3 = inv_mapping[atom3] + atom0 = mol1_merged_mapping[atom0] + atom1 = mol1_merged_mapping[atom1] + atom2 = mol1_merged_mapping[atom2] + atom3 = mol1_merged_mapping[atom3] # Set the new improper. impropers.set(atom0, atom1, atom2, atom3, exprn) @@ -902,10 +958,10 @@ def merge( or info0.atomIdx(improper.atom3()) in atoms0_idx ): # Extract the improper information. - atom0 = info0.atomIdx(improper.atom0()) - atom1 = info0.atomIdx(improper.atom1()) - atom2 = info0.atomIdx(improper.atom2()) - atom3 = info0.atomIdx(improper.atom3()) + atom0 = mol0_merged_mapping[info0.atomIdx(improper.atom0())] + atom1 = mol0_merged_mapping[info0.atomIdx(improper.atom1())] + atom2 = mol0_merged_mapping[info0.atomIdx(improper.atom2())] + atom3 = mol0_merged_mapping[info0.atomIdx(improper.atom3())] exprn = improper.function() # Set the new improper. @@ -928,156 +984,183 @@ def merge( "or 'allow_ring_size_change' options." ) - # Create the connectivity objects. + # Create the connectivity object. + conn = _SireMol.Connectivity(edit_mol.info()).edit() + + # Connectivity in the merged molecule. conn0 = _SireMol.Connectivity(edit_mol.info()).edit() conn1 = _SireMol.Connectivity(edit_mol.info()).edit() - # Connect the bonded atoms in both end states. - for bond in edit_mol.property("bond0").potentials(): - conn0.connect(bond.atom0(), bond.atom1()) + bonds_atoms0 = { + frozenset([bond.atom0(), bond.atom1()]) + for bond in edit_mol.property("bond0").potentials() + } + bonds_atoms1 = { + frozenset([bond.atom0(), bond.atom1()]) + for bond in edit_mol.property("bond1").potentials() + } + + for atom0, atom1 in bonds_atoms0: + conn0.connect(atom0, atom1) + + for atom0, atom1 in bonds_atoms1: + conn1.connect(atom0, atom1) + + # We only add a bond to the total connectivity if it's defined in both states + # This results in a "broken" topology if one writes it in GROMACS + # But GROMACS can't handle bond breaks anyway + for atom0, atom1 in bonds_atoms0 & bonds_atoms1: + conn.connect(atom0, atom1) + + conn = conn.commit() conn0 = conn0.commit() - for bond in edit_mol.property("bond1").potentials(): - conn1.connect(bond.atom0(), bond.atom1()) conn1 = conn1.commit() - # Get the original connectivity of the two molecules. + # Get the connectivity of the two molecules. c0 = molecule0.property("connectivity") c1 = molecule1.property("connectivity") # Check that the merge hasn't modified the connectivity. - # molecule0 - for x in range(0, molecule0.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - for y in range(x + 1, molecule0.nAtoms()): + # The checking was blocked when merging a protein + if roi_list is None: + # molecule0 + for x in range(0, molecule0.nAtoms()): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = _SireMol.AtomIdx(x) - # Was a ring opened/closed? - is_ring_broken = _is_ring_broken(c0, conn1, idx, idy, idx, idy) + # Map the index to its position in the merged molecule. + idx_map = mol0_merged_mapping[idx] - # A ring was broken and it is not allowed. - if is_ring_broken and not allow_ring_breaking: - raise _IncompatibleError( - "The merge has opened/closed a ring. To allow this " - "perturbation, set the 'allow_ring_breaking' option " - "to 'True'." - ) + for y in range(x + 1, molecule0.nAtoms()): + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(y) - # Did a ring change size? - is_ring_size_change = _is_ring_size_changed(c0, conn1, idx, idy, idx, idy) + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] - # A ring changed size and it is not allowed. - if ( - not is_ring_broken - and is_ring_size_change - and not allow_ring_size_change - ): - raise _IncompatibleError( - "The merge has changed the size of a ring. To allow this " - "perturbation, set the 'allow_ring_size_change' option " - "to 'True'. Be aware that this perturbation may not work " - "and a transition through an intermediate state may be " - "preferable." - ) + # Was a ring opened/closed? + is_ring_broken = _is_ring_broken(c0, conn, idx, idy, idx_map, idy_map) - # The connectivity has changed. - if c0.connectionType(idx, idy) != conn1.connectionType(idx, idy): - # The connectivity changed for an unknown reason. - if not (is_ring_broken or is_ring_size_change) and not force: + # A ring was broken and it is not allowed. + if is_ring_broken and not allow_ring_breaking: raise _IncompatibleError( - "The merge has changed the molecular connectivity " - "but a ring didn't open/close or change size. " - "If you want to proceed with this mapping pass " - "'force=True'. You are warned that the resulting " - "perturbation will likely be unstable." + "The merge has opened/closed a ring. To allow this " + "perturbation, set the 'allow_ring_breaking' option " + "to 'True'." ) - # molecule1 - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - # Map the index to its position in the merged molecule. - idx_map = inv_mapping[idx] + # Did a ring change size? + is_ring_size_change = _is_ring_size_changed( + c0, conn, idx, idy, idx_map, idy_map + ) + + # A ring changed size and it is not allowed. + if ( + not is_ring_broken + and is_ring_size_change + and not allow_ring_size_change + ): + raise _IncompatibleError( + "The merge has changed the size of a ring. To allow this " + "perturbation, set the 'allow_ring_size_change' option " + "to 'True'. Be aware that this perturbation may not work " + "and a transition through an intermediate state may be " + "preferable." + ) - for y in range(x + 1, molecule1.nAtoms()): + # The connectivity has changed. + if c0.connectionType(idx, idy) != conn.connectionType(idx_map, idy_map): + # The connectivity changed for an unknown reason. + if not (is_ring_broken or is_ring_size_change) and not force: + raise _IncompatibleError( + "The merge has changed the molecular connectivity " + "but a ring didn't open/close or change size. " + "If you want to proceed with this mapping pass " + "'force=True'. You are warned that the resulting " + "perturbation will likely be unstable." + ) + # molecule1 + for x in range(0, molecule1.nAtoms()): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = _SireMol.AtomIdx(x) # Map the index to its position in the merged molecule. - idy_map = inv_mapping[idy] + idx_map = mol1_merged_mapping[idx] - # Was a ring opened/closed? - is_ring_broken = _is_ring_broken(c1, conn0, idx, idy, idx_map, idy_map) + for y in range(x + 1, molecule1.nAtoms()): + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(y) - # A ring was broken and it is not allowed. - if is_ring_broken and not allow_ring_breaking: - raise _IncompatibleError( - "The merge has opened/closed a ring. To allow this " - "perturbation, set the 'allow_ring_breaking' option " - "to 'True'." - ) + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] - # Did a ring change size? - is_ring_size_change = _is_ring_size_changed( - c1, conn0, idx, idy, idx_map, idy_map - ) + # Was a ring opened/closed? + is_ring_broken = _is_ring_broken(c1, conn, idx, idy, idx_map, idy_map) - # A ring changed size and it is not allowed. - if ( - not is_ring_broken - and is_ring_size_change - and not allow_ring_size_change - ): - raise _IncompatibleError( - "The merge has changed the size of a ring. To allow this " - "perturbation, set the 'allow_ring_size_change' option " - "to 'True'. Be aware that this perturbation may not work " - "and a transition through an intermediate state may be " - "preferable." + # A ring was broken and it is not allowed. + if is_ring_broken and not allow_ring_breaking: + raise _IncompatibleError( + "The merge has opened/closed a ring. To allow this " + "perturbation, set the 'allow_ring_breaking' option " + "to 'True'." + ) + + # Did a ring change size? + is_ring_size_change = _is_ring_size_changed( + c1, conn, idx, idy, idx_map, idy_map ) - # The connectivity has changed. - if c1.connectionType(idx, idy) != conn0.connectionType(idx_map, idy_map): - # The connectivity changed for an unknown reason. - if not (is_ring_broken or is_ring_size_change) and not force: + # A ring changed size and it is not allowed. + if ( + not is_ring_broken + and is_ring_size_change + and not allow_ring_size_change + ): raise _IncompatibleError( - "The merge has changed the molecular connectivity " - "but a ring didn't open/close or change size. " - "If you want to proceed with this mapping pass " - "'force=True'. You are warned that the resulting " - "perturbation will likely be unstable." + "The merge has changed the size of a ring. To allow this " + "perturbation, set the 'allow_ring_size_change' option " + "to 'True'. Be aware that this perturbation may not work " + "and a transition through an intermediate state may be " + "preferable." ) - # Set the "connectivity" property. If the end state connectivity is the same, - # then we can just set the "connectivity" property. - if conn0 == conn1: - edit_mol.setProperty("connectivity", conn0) - else: - edit_mol.setProperty("connectivity0", conn0) - edit_mol.setProperty("connectivity1", conn1) + # The connectivity has changed. + if c1.connectionType(idx, idy) != conn.connectionType(idx_map, idy_map): + # The connectivity changed for an unknown reason. + if not (is_ring_broken or is_ring_size_change) and not force: + raise _IncompatibleError( + "The merge has changed the molecular connectivity " + "but a ring didn't open/close or change size. " + "If you want to proceed with this mapping pass " + "'force=True'. You are warned that the resulting " + "perturbation will likely be unstable." + ) + + # Set the "connectivity" property. + edit_mol.setProperty("connectivity", conn) # Create the CLJNBPairs matrices. ff = molecule0.property(ff0) - # Initialise the intrascale matrices for both end states. - clj_nb_pairs0 = _SireMM.CLJNBPairs(edit_mol.info(), _SireMM.CLJScaleFactor(0, 0)) - clj_nb_pairs1 = _SireMM.CLJNBPairs(edit_mol.info(), _SireMM.CLJScaleFactor(0, 0)) + scale_factor_14 = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0 = _SireMM.CLJNBPairs(conn0, scale_factor_14) + clj_nb_pairs1 = _SireMM.CLJNBPairs(conn1, scale_factor_14) # Loop over all atoms unique to molecule0. for idx0 in atoms0_idx: + # Map the index to its position in the merged molecule. + idx0 = mol0_merged_mapping[idx0] + # Loop over all atoms unique to molecule1. for idx1 in atoms1_idx: # Map the index to its position in the merged molecule. - idx1 = inv_mapping[idx1] + idx1 = mol1_merged_mapping[idx1] - # Work out the connection type between the atoms in both end states. + # Work out the connection type between the atoms, in molecule 0. conn_type0 = conn0.connectionType(idx0, idx1) - conn_type1 = conn1.connectionType(idx0, idx1) - - # Lambda = 0 # The atoms aren't bonded. if conn_type0 == 0: @@ -1091,7 +1174,13 @@ def merge( ) clj_nb_pairs0.set(idx0, idx1, clj_scale_factor) - # Lambda = 1 + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx0, idx1, clj_scale_factor) + + # Work out the connection type between the atoms, in molecule 1. + conn_type1 = conn1.connectionType(idx0, idx1) # The atoms aren't bonded. if conn_type1 == 0: @@ -1105,77 +1194,146 @@ def merge( ) clj_nb_pairs1.set(idx0, idx1, clj_scale_factor) - # Get the user defined "intrascale" property names. - prop0 = inv_property_map0.get("intrascale", "intrascale") - prop1 = inv_property_map1.get("intrascale", "intrascale") - - # Get the "intrascale" property from the two molecules. - intrascale0 = molecule0.property(prop0) - intrascale1 = molecule1.property(prop1) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx0, idx1, clj_scale_factor) # Copy the intrascale from molecule1 into clj_nb_pairs0. # Perform a triangular loop over atoms from molecule1. - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - # Map the index to its position in the merged molecule. - idx = inv_mapping[idx] - - for y in range(x + 1, molecule1.nAtoms()): + if roi_list is None: + iterlen = molecule1.nAtoms() + iterrange = list(range(molecule1.nAtoms())) + # When region of interest is defined, perfrom loop from these indices + else: + for roi in roi_list: + iterlen = len(roi[1]) + iterrange = roi[1] + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy = inv_mapping[idy] - - # Get the intrascale value. - intra = intrascale1.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) + idx_map = mol1_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] + + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # Only set if there is a non-zero value. - # Set using the re-mapped atom indices. - if not intra.coulomb() == 0: - clj_nb_pairs0.set(idx, idy, intra) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Now copy in all intrascale values from molecule0 into both # clj_nb_pairs matrices. + if roi_list is None: + iterlen = molecule0.nAtoms() + iterrange = list(range(molecule0.nAtoms())) + # When region of interest is defined, perfrom loop from these indices + else: + for roi in roi_list: + iterlen = len(roi[0]) + iterrange = roi[0] - # Perform a triangular loop over atoms from molecule0. - for x in range(0, molecule0.nAtoms()): - for y in range(x + 1, molecule0.nAtoms()): - # Get the intrascale value. - intra = intrascale0.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) + # Perform a triangular loop over atoms from molecule0. + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) - # Set the value in the new matrix, overwriting existing value. - clj_nb_pairs0.set(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y), intra) + # Map the index to its position in the merged molecule. + idx_map = mol0_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] + + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # Only set if there is a non-zero value. - if not intra.coulomb() == 0: - clj_nb_pairs1.set(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y), intra) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Finally, copy the intrascale from molecule1 into clj_nb_pairs1. + if roi_list is None: + iterlen = molecule1.nAtoms() + iterrange = list(range(molecule1.nAtoms())) + # When region of interest is defined, perfrom loop from these indices + else: + for roi in roi_list: + iterlen = len(roi[1]) + iterrange = roi[1] - # Perform a triangular loop over atoms from molecule1. - for x in range(0, molecule1.nAtoms()): - # Convert to an AtomIdx. - idx = _SireMol.AtomIdx(x) - - # Map the index to its position in the merged molecule. - idx = inv_mapping[idx] - - for y in range(x + 1, molecule1.nAtoms()): + # Perform a triangular loop over atoms from molecule1. + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(y) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy = inv_mapping[idy] + idx = mol1_merged_mapping[idx] - # Get the intrascale value. - intra = intrascale1.get(_SireMol.AtomIdx(x), _SireMol.AtomIdx(y)) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy = mol1_merged_mapping[idy] + + conn_type = conn1.connectionType(idx, idy) + + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) - # Set the value in the new matrix, overwriting existing value. - clj_nb_pairs1.set(idx, idy, intra) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) # Store the two molecular components. edit_mol.setProperty("molecule0", molecule0) @@ -1394,8 +1552,7 @@ def _is_on_ring(idx, conn): def _removeDummies(molecule, is_lambda1): """ - Internal function which removes the dummy atoms from one of the endstates - of a merged molecule. + Internal function which removes the dummy atoms from one of the endstates of a merged molecule. Parameters ---------- diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py index 8c41a8c9b..27731e95d 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_align.py @@ -29,10 +29,7 @@ "matchAtoms", "viewMapping", "rmsdAlign", - "roiMatch", - "perResidueRmsdAlign", "flexAlign", - "roiFlexAlign", "merge", ] @@ -41,9 +38,6 @@ import subprocess as _subprocess import sys as _sys -# This logger is temporary. -from loguru import logger as _logger - from .._Utils import _try_import, _have_imported, _assert_imported import warnings as _warnings @@ -57,8 +51,6 @@ if _have_imported(_rdkit): from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS - - # No idea why, but RD_logger cannot be imported from rdkit, only RDLogger can from rdkit import RDLogger as _RDLogger # Disable RDKit warnings. @@ -727,9 +719,7 @@ def matchAtoms( return_scores=False, prematch={}, timeout=5 * _Units.Time.second, - atomCompare=_rdFMCS.AtomCompare.CompareAny, complete_rings_only=True, - ring_matches_ring_only=True, prune_perturbed_constraints=None, prune_crossing_constraints=None, max_scoring_matches=1000, @@ -943,10 +933,10 @@ def matchAtoms( # Generate the MCS match. mcs = _rdFMCS.FindMCS( mols, - atomCompare=atomCompare, + atomCompare=_rdFMCS.AtomCompare.CompareAny, bondCompare=_rdFMCS.BondCompare.CompareAny, completeRingsOnly=complete_rings_only, - ringMatchesRingOnly=ring_matches_ring_only, + ringMatchesRingOnly=True, matchChiralTag=False, matchValences=False, maximizeBonds=False, @@ -1071,514 +1061,6 @@ def matchAtoms( return mappings[0:matches] -# NOTE: This function is currently experimental and has not gone through -# rigorous validation. Proceed with caution. -def _get_backbone(molecule): - """ - Extract the backbone atoms from a molecule. - Backbone atoms are defined as N, CA, C atoms in a residue. - This function is not intended to be applied to ligands. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule from which to extract the backbone atoms. - - Returns - ------- - - atom_idx : dict - A dictionary of the absolute and relative indices of the backbone atoms. - - relative_backbone_atoms : list - A list of the extracted backbone atoms. - """ - absolute_backbone_atom_indices = [] - for atom in molecule.getAtoms(): - if atom.name() in ["N", "CA", "C"]: - absolute_backbone_atom_indices.append(atom.index()) - - relative_backbone_atoms = molecule.extract(absolute_backbone_atom_indices) - relative_backbone_atom_indices = [ - atom.index() for atom in relative_backbone_atoms.getAtoms() - ] - atom_idx = dict(zip(absolute_backbone_atom_indices, relative_backbone_atom_indices)) - return atom_idx, relative_backbone_atoms - - -def _kartograf_map(molecule0, molecule1, kartograf_kwargs): - """ - A wrapper function for kartograf mapping algorithm. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule of interest. - - molecule1 : :class:`Molecule ` - The reference molecule. - - kartograf_kwargs : dict - A dictionary of keyword arguments to be passed to kartograf. - - Returns - ------- - - kartograf_mapping : gufe.mapping.ligandatommapping.LigandAtomMapping - The kartograf mapping object. - - """ - # try to import kartograf - try: - import rdkit.Chem as _Chem - from kartograf.atom_aligner import align_mol_shape as _align_mol_shape - from kartograf.atom_mapping_scorer import ( - MappingRMSDScorer as _MappingRMSDScorer, - ) - from kartograf import ( - KartografAtomMapper, - SmallMoleculeComponent as _SmallMoleculeComponent, - ) - except ImportError: - raise ImportError( - "Kartograf is not installed. Please install kartograf to use this function." - ) - - # Validate input - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - from ..Convert import toRDKit as _toRDKit - - rdkit_mol0 = _toRDKit(molecule0) - rdkit_mol1 = _toRDKit(molecule1) - rdkit_mols = [rdkit_mol0, rdkit_mol1] - - # build small molecule components - mol0, mol1 = [_SmallMoleculeComponent.from_rdkit(m) for m in rdkit_mols] - - # align molecules first - a_mol1 = _align_mol_shape(mol1, ref_mol=mol0) - - # build Kartograf Atom Mapper - mapper = KartografAtomMapper(**kartograf_kwargs) - - # get mapping - kartograf_mapping = next(mapper.suggest_mappings(mol0, a_mol1)) - - # score mapping - rmsd_scorer = _MappingRMSDScorer() - score = rmsd_scorer(mapping=kartograf_mapping) - _logger.debug(f"RMSD score: {score:.2f}") - - return kartograf_mapping - - -# NOTE: This function is currently experimental and has not gone through -# rigorous validation. Proceed with caution. -def roiMatch( - molecule0, - molecule1, - roi, - force_backbone_match=False, - ring_matches_ring_only=True, - complete_rings_only=True, - atomCompare=_rdFMCS.AtomCompare.CompareAny, - use_kartograf=False, - kartograf_kwargs={}, -): - """ - Matching of two molecules based on a region of interest (ROI). - The ROI is defined as a list of residues in the molecule/protein. - The function will attempt to match the ROI in the two molecules and - return the mapping between the two molecules. Multiple ROIs can be - defined by providing a list of residues. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule of interest. - - molecule1 : :class:`Molecule ` - The reference molecule. - - roi : list - A list of regions/residues of interest in the molecule/protein. - - force_backbone_match : bool - If set to True, will force the backbone atoms to be matched which - is useful for ensuring a more stable match between the two molecules. - This is set to False by default. - - ring_matches_ring_only : bool - Whether ring bonds can only match ring bonds. - This is set to True by default. - - use_kartograf : bool - If set to True, will use the kartograf algorithm to match the - molecules. This is set to False by default. - - kartograf_kwargs : dict - A dictionary of keyword arguments to be passed to kartograf. - - Returns - ------- - - mapping : dict - A dictionary of the mapping between the two molecules. - - roi_idx : list - A list of the indices of the atoms in the ROI. This can be multiple - nested lists if the ROI is defined as a list of residues. - TODO: change the return type of roi_idx to a dictionary of lists - - Notes - ----- - - The key assumption of this function is that the two molecules are - structurally identical except for the region of interest (ROI). The ROI - could be a point mutation, or a residue that has been covalently modified. - The function will attempt to match the atoms in the ROI based on the - maximum common substructure (MCS) algorithm. First, the ROI is extracted - from the two molecules and then the atoms in the ROI are matched using - BioSimSpace.Align.matchAtoms function. The function will return the - mapping between the two molecules. This "relative" mapping will then be - used to map the atoms in the ROI to the "absolute" indices in the molecule. - So for example the relative mapping could be {0: 3, 1: 2, 2: 5} and - the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we - can bypass the need to map the entire molecule and only focus on the ROI, - which is significantly faster for large molecules. The rest of the mapping - is then composed of atoms before the ROI (pre-ROI) and after the ROI. - Every time we map the atoms in the ROI, we append the ROI - mapping to the pre-ROI mapping, which will then be used as the pre-ROI - mapping for the next ROI in the list. - - Examples - -------- - - Find the best maximum common substructure mapping between two molecules - with a region of interest defined as a list of residues. - - >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) - - Find the mapping between two molecules with multiple regions of interest - - >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) - - Find the best maximum common substructure mapping between two molecules, - while forcing the backbone atoms to be matched. - - >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], force_backbone_match=True) - """ - - # Validate input - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if roi is None: - raise ValueError("residue of interest list is not provided.") - - roi_idx = [] - - _logger.debug(f"Number of mol A atoms: {molecule0.nAtoms()}") - _logger.debug(f"Number of mol B atoms: {molecule1.nAtoms()}") - - # Get the atoms before the ROI. - # This is being done so that when we map the atoms in ROI, we can append - # the ROI mapping to this pre-ROI mapping which will then be used as - # the pre-ROI mapping for the next ROI in the list, i.e. - # pre_roi_mapping = pre_roi_mapping + roi mapping + mapping to next ROI - pre_roi_molecule0 = molecule0.search(f"residue[0:{roi[0]}]") - pre_roi_atom_idx_molecule0 = [a.index() for a in pre_roi_molecule0.atoms()] - - pre_roi_molecule1 = molecule1.search(f"residue[0:{roi[0]}]") - pre_roi_atom_idx_molecule1 = [a.index() for a in pre_roi_molecule1.atoms()] - - pre_roi_mapping = dict(zip(pre_roi_atom_idx_molecule0, pre_roi_atom_idx_molecule1)) - - # Loop over the residues of interest - for i, res_idx in enumerate(roi): - _logger.debug(f"Starting match for ROI id: {res_idx}") - - molecule0_roi = molecule0.getResidues()[res_idx] - _logger.debug("Molecule0 ROI:") - _logger.debug(molecule0_roi) - - molecule1_roi = molecule1.getResidues()[res_idx] - _logger.debug("Molecule1 ROI:") - _logger.debug(molecule1_roi) - - # Warn if matching between the same residues, in a case where we are - # transforming from one enantiomer to another, the atomtypes will - # be the same and trigger this warning. - if ( - molecule0_roi.name() == molecule1_roi.name() - and molecule0_roi.nAtoms() == molecule1_roi.nAtoms() - ): - molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] - molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] - if molecule0_atoms == molecule1_atoms: - _logger.warning( - f"Residue {res_idx} in molecule0 and molecule1 have identical atomtypes." - ) - - res0_idx = [a.index() for a in molecule0_roi] - # _logger.debug(f"res0 indices: {res0_idx}") - res1_idx = [a.index() for a in molecule1_roi] - # _logger.debug(f"res1 indices: {res1_idx}") - - # Append the ROI indices to the list - roi_idx.append([res0_idx, res1_idx]) - - # Extract the residues of interest from the molecules - res0_extracted = molecule0.extract(res0_idx) - res1_extracted = molecule1.extract(res1_idx) - - # for a in res0_extracted.getAtoms(): - # _logger.debug(f"res0 atom: {a}") - # for b in res1_extracted.getAtoms(): - # _logger.debug(f"res1 atom: {b}") - - # If force_backbone_match is enabled, - # we are going to use the backbone atoms as a prematch - if force_backbone_match: - _logger.debug("Forcing backbone match") - - backbone_res0_idx, backbone_res0_atoms = _get_backbone( - molecule0_roi.toMolecule() - ) - backbone_res1_idx, backbone_res1_atoms = _get_backbone( - molecule1_roi.toMolecule() - ) - - # _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") - # _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") - - relative_backbone_mapping = matchAtoms( - backbone_res0_atoms, backbone_res1_atoms, scoring_function="rmsd" - ) - - # Translate the relative mapping to the absolute indices. - # The lookup table contains the relative indices of the atoms - # that have been mapped from one residue to the other, for example - # [0, 2, 4, 5]. We can use these as positional indices to get the - # absolute indices of the atoms from the original residue. i.e. - # if the original residue had atom indices such as: - # [100, 101, 102, 103, 104, 105], we can use the lookup table to - # get [100, 102, 104, 105]. Doing the same for the other residue - # will give us an absolute mapping between the two residues. - res0_lookup_table = list(relative_backbone_mapping.keys()) - absolute_backbone_mapped_atoms_res0 = [ - list(backbone_res0_idx.keys())[i] for i in res0_lookup_table - ] - - res1_lookup_table = list(relative_backbone_mapping.values()) - absolute_backbone_mapped_atoms_res1 = [ - list(backbone_res1_idx.keys())[i] for i in res1_lookup_table - ] - - absolute_backbone_mapping = dict( - zip( - absolute_backbone_mapped_atoms_res0, - absolute_backbone_mapped_atoms_res1, - ) - ) - _logger.debug(f"Absolute backbone mapping: {absolute_backbone_mapping}") - - mapping = matchAtoms( - res0_extracted, - res1_extracted, - prematch=absolute_backbone_mapping, - complete_rings_only=complete_rings_only, - scoring_function="RMSD", - ring_matches_ring_only=ring_matches_ring_only, - ) - else: - if use_kartograf: - _logger.debug("Using kartograf to map the ROI.") - kartograf_mapping = _kartograf_map( - res0_extracted, res1_extracted, kartograf_kwargs - ) - mapping = kartograf_mapping.componentA_to_componentB - else: - _logger.debug("Using rdKit MCS to map the ROI.") - mapping = matchAtoms( - res0_extracted, - res1_extracted, - complete_rings_only=complete_rings_only, - ring_matches_ring_only=ring_matches_ring_only, - atomCompare=atomCompare, - ) - - _logger.debug(f"Mapping: {mapping}") - - # Check how many atoms got mapped from one residue to the other - _logger.debug( - f"Mapped {len(mapping.keys())} out of {len(res0_idx)} atoms for molecule0" - ) - _logger.debug( - f"Mapped {len(mapping.values())} out of {len(res1_idx)} atoms for molecule1" - ) - - # If the number of mapped atoms is not the same as the number of atoms - # in the ROI we need to use that molecule as the reference - # for the lookup table. - # NOTE: We don't really need the conditional statement here that checks - # if all of the atoms in the ROI have been mapped, because - # we are going to translate atoms relative indices to absolute ones - # and that case would be captured by the lookup table. - if len(mapping.keys()) != len(res0_idx): - _logger.debug("Some atoms for molecule0 in the ROI have not been mapped.") - _logger.debug("Using molecule0 as the reference for the lookup table.") - residue_lookup_table = list(mapping.keys()) - absolute_mapped_atoms_res0 = [res0_idx[i] for i in residue_lookup_table] - - # i.e this bit is unnecessary - else: - absolute_mapped_atoms_res0 = res0_idx - - if len(mapping.values()) != len(res1_idx): - _logger.debug("Some atoms for molecule1 in the ROI have not been mapped.") - _logger.debug("Using molecule1 as the reference for the lookup table.") - residue_lookup_table = list(mapping.values()) - absolute_mapped_atoms_res1 = [res1_idx[i] for i in residue_lookup_table] - - else: - absolute_mapped_atoms_res1 = res1_idx - - absolute_roi_mapping = dict( - zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) - ) - - # Check that pre_roi_atom_indices are not part of molecule ROI indices - # NOTE: This could be a costly operation, if pre_roi_atom_indices is - # large. - if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( - i in pre_roi_atom_idx_molecule1 for i in res1_idx - ): - raise ValueError("Found atoms in pre ROI region that are part of the ROI.") - - # Now we have to think about what to do with the atom indices - # after the mapping as these are not going to be the same - # between the two molecules. - # If we are at the last residue of interest, we don't need to worry - # too much about the after ROI region as this region will be all of the - # molecule atoms after the last residue of interest. - # In the case when we are not at the last residue of interest, - # we need to map the atoms to the next ROI. - if res_idx != roi[-1]: - - # If the next ROI residue index in the ROI list is next to - # the current ROI index, after_roi atom index list will be empty - # i.e. if we're currently at residue 10 and the next ROI is 11, - # we don't need to map the atoms. - # If we were at residue 10 and the next residue of interest is 12, - # we would need to map the atoms between residues 10 and 12. - if roi[i + 1] - roi[i] == 1: - _logger.debug( - "Next ROI is adjacent to the current ROI, no need to map atoms between the two." - ) - after_roi_atom_idx_molecule0 = [] - after_roi_atom_idx_molecule1 = [] - else: - after_roi_molecule0 = molecule0.search( - f"residue[{res_idx+1}:{roi[i+1]}]" - ) - after_roi_atom_idx_molecule0 = [ - a.index() for a in after_roi_molecule0.atoms() - ] - - after_roi_molecule1 = molecule1.search( - f"residue[{res_idx+1}:{roi[i+1]}]" - ) - after_roi_atom_idx_molecule1 = [ - b.index() for b in after_roi_molecule1.atoms() - ] - - after_roi_mapping = dict( - zip( - after_roi_atom_idx_molecule0, - after_roi_atom_idx_molecule1, - ) - ) - # Append the mappings to the pre_roi_mapping, which will then be - # used as the pre_roi_mapping for the next ROI in the list. - pre_roi_mapping = { - **pre_roi_mapping, - **absolute_roi_mapping, - **after_roi_mapping, - } - else: - # Get all of the remaining atoms after the last ROI - after_roi_molecule0 = molecule0.search(f"residue[{res_idx+1}:]") - after_roi_atom_idx_molecule0 = [ - a.index() for a in after_roi_molecule0.atoms() - ] - - after_roi_molecule1 = molecule1.search(f"residue[{res_idx+1}:]") - after_roi_atom_idx_molecule1 = [ - b.index() for b in after_roi_molecule1.atoms() - ] - - after_roi_mapping = dict( - zip( - after_roi_atom_idx_molecule0, - after_roi_atom_idx_molecule1, - ) - ) - - # Check that after_roi_atom_indices are not part of absolute_roi_mapping - if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): - raise ValueError("Found atoms in after ROI region that are part of the ROI") - - if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): - raise ValueError("Found atoms in after ROI region that are part of the ROI") - - # Print all 3 dictionaries - _logger.debug(f"Pre ROI region mapping: {pre_roi_mapping}") - _logger.debug(f"Absolute ROI mapping: {absolute_roi_mapping}") - _logger.debug(f"After ROI region mapping: {after_roi_mapping}") - - # Combine the dictionaries to get the full mapping - combined_dict = { - **pre_roi_mapping, - **absolute_roi_mapping, - **after_roi_mapping, - } - - # Print the matched atoms in the ROI - # for idx0, idx1 in mapping.items(): - # _logger.debug( - # f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" - # ) - - # _logger.debug(f"Full matched atoms: {combined_dict}") - # for idx0, idx1 in mapping.items(): - # _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") - - # TODO: Change the return type of roi_idx to a dictionary of lists - return combined_dict, roi_idx - - def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): """ Align atoms in molecule0 to those in molecule1 using the mapping @@ -1687,169 +1169,6 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map return _Molecule(mol0) -def perResidueRmsdAlign( - molecule0, molecule1, mapping=None, property_map0={}, property_map1={} -): - """ - Align atoms in molecule0 to those in molecule1 using the mapping - between matched atom indices. The molecule is aligned using rigid-body - translation and rotations, with a root mean squared displacement (RMSD) - fit to find the optimal translation vector (as opposed to merely taking - the difference of centroids). This function is specifically designed to - be used for aligning protein residues. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule to align. - - molecule1 : :class:`Molecule ` - The reference molecule. - - mapping : dict - A dictionary mapping atoms in molecule0 to those in molecule1. - - property_map0 : dict - A dictionary that maps "properties" in molecule0 to their user - defined values. This allows the user to refer to properties - with their own naming scheme, e.g. { "charge" : "my-charge" } - - property_map1 : dict - A dictionary that maps "properties" in molecule1 to their user - defined values. - - Returns - ------- - - molecule : :class:`Molecule ` - The aligned molecule. - - Examples - -------- - - Align molecule0 to molecule1 based on a precomputed mapping. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.perResideRmsdAlign(molecule0, molecule1, mapping) - - """ - - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(property_map0, dict): - raise TypeError("'property_map0' must be of type 'dict'") - - if not isinstance(property_map1, dict): - raise TypeError("'property_map1' must be of type 'dict'") - - # The user has passed an atom mapping. - if mapping is not None: - if not isinstance(mapping, dict): - raise TypeError("'mapping' must be of type 'dict'.") - else: - _validate_mapping(molecule0, molecule1, mapping, "mapping") - - absolute_residue_mapping = {} - - for res in molecule1.getResidues(): - - # Get the mapping for the current residue using its atom indices - # i.e. {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} - # _logger.debug(f"Residue {res.index()}") - # _logger.debug(f"Residue atoms: {res.getAtoms()}") - - for atom in res.getAtoms(): - # try to get the mapping for the atom - value = mapping.get(atom.index()) - if value is not None: - residue_mapping = {atom.index(): value} - - # residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} - - # update the absolute mapping dictionary to contain the mapping for each residue - # i.e. {0: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}...} - absolute_residue_mapping[res.index()] = residue_mapping - - # Extract the Sire molecule from each BioSimSpace molecule. - mol0 = molecule0._getSireObject() - mol1 = molecule1._getSireObject() - - mol0_residues = mol0.residues() - mol1_residues = mol1.residues() - - # Update the mapping to contain the residue indices - # i.e. {100: 103, 101: 102, 102: 105} -> {10: {100: 103, 101: 102, 102: 105}} - - # Align each residue from molecule0 to molecule1 individually - mol0_updated_residue_coords = [] - for i, res in enumerate(mol0_residues): - # _logger.debug(f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}") - res0 = res.extract() - res1 = mol1_residues[i].extract() - - # Get the mapping for the current residue - residue_mapping = absolute_residue_mapping[i] - - # We will now offset the mapping with the first atom index of the residue - # ie. {100: 103, 101: 102, 102: 105} -> {0: 3, 1: 2, 2: 5} - relative_keys = list(residue_mapping.keys()) - relative_key = relative_keys[0] - relative_values = list(residue_mapping.values()) - relative_value = relative_values[0] - - relative_residue_mapping = { - k - relative_key: v - relative_value for k, v in residue_mapping.items() - } - - # Perform the alignment, res0 to res1. - # Convert the mapping to AtomIdx key:value pairs. - sire_mapping = _to_sire_mapping(mapping) - # _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") - try: - res0 = ( - res0.move() - .align(res1, _SireMol.AtomResultMatcher(sire_mapping)) - .molecule() - ) - # _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") - mol0_updated_residue_coords.append(res0.property("coordinates")) - - except Exception as e: - msg = f"Failed to align residues based on mapping: {mapping}" - if "Could not calculate the single value decomposition" in str(e): - msg += ". Try minimising your molecular coordinates prior to alignment." - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None - - # Retrieve the atoms coordiantes from the updated residues - mol0_updated_coords = [] - for res in mol0_updated_residue_coords: - for atom_coord in res: - mol0_updated_coords.append(atom_coord) - - # Create a cursor for updating the coordinates property - cursor = mol0.cursor() - for i, atom in enumerate(cursor.atoms()): - atom["coordinates"] = mol0_updated_coords[i] - - # Commit the update back to the original molecule - mol0 = cursor.commit() - - # Return the aligned molecule. - return _Molecule(mol0) - - def flexAlign( molecule0, molecule1, @@ -2009,136 +1328,6 @@ def flexAlign( return _Molecule(molecule0) -def roiFlexAlign( - molecule0, - molecule1, - roi=None, - mapping=None, - fkcombu_exe=None, - property_map0={}, - property_map1={}, -): - """ - Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 - using BioSimSpace.Align.flexAlign(). - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule to align. - - molecule1 : :class:`Molecule ` - The reference molecule. - - mapping : dict - A dictionary mapping atoms in molecule0 to those in molecule1. - - fkcombu_exe : str - Path to the fkcombu executable. If None is passed, then BioSimSpace - will attempt to find fkcombu by searching your PATH. - - property_map0 : dict - A dictionary that maps "properties" in molecule0 to their user - defined values. This allows the user to refer to properties - with their own naming scheme, e.g. { "charge" : "my-charge" } - - property_map1 : dict - A dictionary that maps "properties" in molecule1 to their user - defined values. - - Returns - ------- - - molecule : :class:`Molecule ` - The aligned molecule. - - Examples - -------- - - Align molecule0 to molecule1 based on a precomputed mapping. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) - - Align molecule0 to molecule1. Since no mapping is passed one will be - autogenerated using :class:`matchAtoms ` - with default options. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) - """ - - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - if roi is None: - raise ValueError("No region of interest (ROI) has been provided.") - else: - if not isinstance(roi, list): - raise TypeError("'roi' must be of type 'list'.") - - # The user has passed an atom mapping. - if mapping is not None: - if not isinstance(mapping, dict): - raise TypeError("'mapping' must be of type 'dict'.") - else: - _validate_mapping(molecule0, molecule1, mapping, "mapping") - - for roi_idx in roi: - res0 = molecule0.getResidues()[roi_idx] - res1 = molecule1.getResidues()[roi_idx] - - if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): - res0_atoms = [a.name() for a in res0.getAtoms()] - res1_atoms = [a.name() for a in res0.getAtoms()] - if res0_atoms == res1_atoms: - _logger.warning( - f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." - ) - - res0_idx = [a.index() for a in res0.getAtoms()] - res1_idx = [a.index() for a in res1.getAtoms()] - - # Extract the residues of interest from the molecules - res0_extracted = molecule0.extract(res0_idx) - res1_extracted = molecule1.extract(res1_idx) - - res0_aligned = flexAlign(molecule0=res0_extracted, molecule1=res1_extracted) - - # Now update molecule0 with the aligned residue coordinates - mol0 = molecule0._getSireObject() - res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") - - # A list to store the updated coordinates for molecule0 - mol0_coords = [] - for i, res in enumerate(mol0.residues()): - if i == roi_idx: - mol0_coords.append(res0_aligned_coords) - else: - mol0_coords.append(res.property("coordinates")) - # Flatten the list - mol0_coords = [item for sublist in mol0_coords for item in sublist] - - # Create a cursor for updating the coordinates property - c = mol0.cursor() - for i, atom in enumerate(c.atoms()): - atom["coordinates"] = mol0_coords[i] - mol0 = c.commit() - - # Convert the Sire molecule back to a BioSimSpace molecule so we can - # loop over it again if needed - molecule0 = _Molecule(mol0) - - return molecule0 - - def merge( molecule0, molecule1, @@ -2146,7 +1335,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi_list=None, + roi=None, property_map0={}, property_map1={}, ): @@ -2183,7 +1372,7 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi_list : list + roi : list The region of interest to merge. Consist of two lists of atom indices. property_map0 : dict @@ -2243,11 +1432,11 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") - if roi_list is not None: - if not isinstance(roi_list, list): - raise TypeError("'roi_list' must be of type 'list'.") + if roi is not None: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") else: - _validate_roi(molecule0, molecule1, roi_list) + _validate_roi(molecule0, molecule1, roi) # The user has passed an atom mapping. if mapping is not None: @@ -2278,7 +1467,7 @@ def merge( allow_ring_breaking=allow_ring_breaking, allow_ring_size_change=allow_ring_size_change, force=force, - roi_list=roi_list, + roi=roi, property_map0=property_map0, property_map1=property_map1, ) @@ -2927,7 +2116,7 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) -def _validate_roi(molecule0, molecule1, roi_list): +def _validate_roi(molecule0, molecule1, roi): """Internal function to validate that a mapping contains key:value pairs of the correct type. @@ -2940,24 +2129,21 @@ def _validate_roi(molecule0, molecule1, roi_list): molecule1 : :class:`Molecule ` The reference molecule. - roi_list : list + roi : list The region of interest to merge. """ - for roi in roi_list: - if len(roi) != 2: - raise ValueError("The length of roi list must be 2.") - if not isinstance(roi[0], list) or not isinstance(roi[1], list): - raise ValueError("The element of roi must be of type list") - for mol_idx, ele in enumerate(roi): - for atom_idx in ele: - if type(atom_idx) is not int: - raise ValueError( - f"The element of roi[{mol_idx}] should be of type int" - ) - if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): - raise IndexError( - f"The element of roi[{mol_idx}] should within range of number of atoms" - ) + if len(roi) != 2: + raise ValueError("The length of roi list must be 2.") + if not isinstance(roi[0], list) or not isinstance(roi[1], list): + raise ValueError("The element of roi must be of type list") + for mol_idx, ele in enumerate(roi): + for atom_idx in ele: + if type(atom_idx) is not int: + raise ValueError(f"The element of roi[{mol_idx}] should be of type int") + if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): + raise IndexError( + f"The element of roi[{mol_idx}] should within range of number of atoms" + ) def _to_sire_mapping(mapping): diff --git a/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py b/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py index f92fe90bb..29dbf731f 100644 --- a/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py +++ b/python/BioSimSpace/Sandpit/Exscientia/Align/_merge.py @@ -32,9 +32,6 @@ from sire.legacy import Mol as _SireMol from sire.legacy import Units as _SireUnits -# This logger is temporary. -from loguru import logger as _logger - from .._Exceptions import IncompatibleError as _IncompatibleError from .._SireWrappers import Molecule as _Molecule @@ -46,7 +43,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi_list=None, + roi=None, property_map0={}, property_map1={}, ): @@ -78,8 +75,8 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi_list : list - The region of interest to merge. Consist of a multiple nested list lists of atom indices. + roi : list + The region of interest to merge. Consist of two lists of atom indices. property_map0 : dict A dictionary that maps "properties" in this molecule to their @@ -159,13 +156,9 @@ def merge( # Get the atom indices from the mapping. idx0 = mapping.keys() idx1 = mapping.values() - _logger.debug(f"Mapping: {mapping}") - _logger.debug(f"idx0: {idx0}") - _logger.debug(f"idx1: {idx1}") # Create the reverse mapping: molecule1 --> molecule0 inv_mapping = {v: k for k, v in mapping.items()} - _logger.debug(f"Inverse mapping: {inv_mapping}") # Generate the mappings from each molecule to the merged molecule mol0_merged_mapping = {} @@ -208,7 +201,6 @@ def merge( if atom.index() not in idx0: atoms0.append(atom) atoms0_idx.append(atom.index()) - _logger.debug(f"Unique atoms in molecule0: {atoms0_idx}") # molecule1 for atom in molecule1.atoms(): @@ -216,12 +208,10 @@ def merge( atoms1.append(atom) atoms1_idx.append(atom.index()) - _logger.debug(f"Unique atoms in molecule1: {atoms1_idx}") - # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") # Only part of the ligand is to be merged - if roi_list is not None: + if roi is not None: if molecule0.nResidues() != molecule1.nResidues(): raise ValueError( "The two molecules need to have the same number of residues" @@ -1023,7 +1013,7 @@ def merge( # Check that the merge hasn't modified the connectivity. # The checking was blocked when merging a protein - if roi_list is None: + if roi is None: # molecule0 for x in range(0, molecule0.nAtoms()): # Convert to an AtomIdx. @@ -1202,138 +1192,135 @@ def merge( # Copy the intrascale from molecule1 into clj_nb_pairs0. # Perform a triangular loop over atoms from molecule1. - if roi_list is None: + if roi is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - for roi in roi_list: - iterlen = len(roi[1]) - iterrange = roi[1] - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) + iterlen = len(roi[1]) + iterrange = roi[1] + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) - # Map the index to its position in the merged molecule. - idx_map = mol1_merged_mapping[idx] + # Map the index to its position in the merged molecule. + idx_map = mol1_merged_mapping[idx] - for y in range(x + 1, iterlen): - idy = iterrange[y] - # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - # Map the index to its position in the merged molecule. - idy_map = mol1_merged_mapping[idy] + # Map the index to its position in the merged molecule. + idy_map = mol1_merged_mapping[idy] - conn_type = conn0.connectionType(idx_map, idy_map) - # The atoms aren't bonded. - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Now copy in all intrascale values from molecule0 into both # clj_nb_pairs matrices. - if roi_list is None: + if roi is None: iterlen = molecule0.nAtoms() iterrange = list(range(molecule0.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - for roi in roi_list: - iterlen = len(roi[0]) - iterrange = roi[0] + iterlen = len(roi[0]) + iterrange = roi[0] - # Perform a triangular loop over atoms from molecule0. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) + # Perform a triangular loop over atoms from molecule0. + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) - # Map the index to its position in the merged molecule. - idx_map = mol0_merged_mapping[idx] + # Map the index to its position in the merged molecule. + idx_map = mol0_merged_mapping[idx] - for y in range(x + 1, iterlen): - idy = iterrange[y] - # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - # Map the index to its position in the merged molecule. - idy_map = mol0_merged_mapping[idy] + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] - conn_type = conn0.connectionType(idx_map, idy_map) - # The atoms aren't bonded. - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Finally, copy the intrascale from molecule1 into clj_nb_pairs1. - if roi_list is None: + if roi is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - for roi in roi_list: - iterlen = len(roi[1]) - iterrange = roi[1] + iterlen = len(roi[1]) + iterrange = roi[1] - # Perform a triangular loop over atoms from molecule1. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) + # Perform a triangular loop over atoms from molecule1. + for x in range(0, iterlen): + # Convert to an AtomIdx. + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) - # Map the index to its position in the merged molecule. - idx = mol1_merged_mapping[idx] + # Map the index to its position in the merged molecule. + idx = mol1_merged_mapping[idx] - for y in range(x + 1, iterlen): - idy = iterrange[y] - # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - # Map the index to its position in the merged molecule. - idy = mol1_merged_mapping[idy] + # Map the index to its position in the merged molecule. + idy = mol1_merged_mapping[idy] - conn_type = conn1.connectionType(idx, idy) + conn_type = conn1.connectionType(idx, idy) - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) # Store the two molecular components. edit_mol.setProperty("molecule0", molecule0) From 6cb63aee5e46331a5b2b1e16993874612d3e0d48 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 17 May 2024 13:42:21 +0100 Subject: [PATCH 11/32] Remove backbone mapping function as extensive testing shows it's unnecessary. --- python/BioSimSpace/Align/_align.py | 115 +++-------------------------- 1 file changed, 9 insertions(+), 106 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index f022f88d4..1d4153430 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1061,42 +1061,6 @@ def matchAtoms( return mappings[0:matches] -# NOTE: This function is currently experimental and has not gone through -# rigorous validation. Proceed with caution. -def _get_backbone(molecule): - """ - Extract the backbone atoms from a molecule. - Backbone atoms are defined as N, CA, C atoms in a residue. - This function is not intended to be applied to ligands. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule from which to extract the backbone atoms. - - Returns - ------- - - atom_idx : dict - A dictionary of the absolute and relative indices of the backbone atoms. - - relative_backbone_atoms : list - A list of the extracted backbone atoms. - """ - absolute_backbone_atom_indices = [] - for atom in molecule.getAtoms(): - if atom.name() in ["N", "CA", "C"]: - absolute_backbone_atom_indices.append(atom.index()) - - relative_backbone_atoms = molecule.extract(absolute_backbone_atom_indices) - relative_backbone_atom_indices = [ - atom.index() for atom in relative_backbone_atoms.getAtoms() - ] - atom_idx = dict(zip(absolute_backbone_atom_indices, relative_backbone_atom_indices)) - return atom_idx, relative_backbone_atoms - - def _kartograf_map(molecule0, molecule1, kartograf_kwargs): """ A wrapper function for kartograf mapping algorithm. @@ -1204,11 +1168,6 @@ def roiMatch( roi : list A list of regions/residues of interest in the molecule/protein. - force_backbone_match : bool - If set to True, will force the backbone atoms to be matched which - is useful for ensuring a more stable match between the two molecules. - This is set to False by default. - ring_matches_ring_only : bool Whether ring bonds can only match ring bonds. This is set to True by default. @@ -1348,76 +1307,21 @@ def roiMatch( # for b in res1_extracted.getAtoms(): # _logger.debug(f"res1 atom: {b}") - # If force_backbone_match is enabled, - # we are going to use the backbone atoms as a prematch - if force_backbone_match: - _logger.debug("Forcing backbone match") - - backbone_res0_idx, backbone_res0_atoms = _get_backbone( - molecule0_roi.toMolecule() - ) - backbone_res1_idx, backbone_res1_atoms = _get_backbone( - molecule1_roi.toMolecule() - ) - - # _logger.debug(f"Backbone res0 indices: {backbone_res0_idx}") - # _logger.debug(f"Backbone res1 indices: {backbone_res1_idx}") - - relative_backbone_mapping = matchAtoms( - backbone_res0_atoms, backbone_res1_atoms, scoring_function="rmsd" - ) - - # Translate the relative mapping to the absolute indices. - # The lookup table contains the relative indices of the atoms - # that have been mapped from one residue to the other, for example - # [0, 2, 4, 5]. We can use these as positional indices to get the - # absolute indices of the atoms from the original residue. i.e. - # if the original residue had atom indices such as: - # [100, 101, 102, 103, 104, 105], we can use the lookup table to - # get [100, 102, 104, 105]. Doing the same for the other residue - # will give us an absolute mapping between the two residues. - res0_lookup_table = list(relative_backbone_mapping.keys()) - absolute_backbone_mapped_atoms_res0 = [ - list(backbone_res0_idx.keys())[i] for i in res0_lookup_table - ] - - res1_lookup_table = list(relative_backbone_mapping.values()) - absolute_backbone_mapped_atoms_res1 = [ - list(backbone_res1_idx.keys())[i] for i in res1_lookup_table - ] - - absolute_backbone_mapping = dict( - zip( - absolute_backbone_mapped_atoms_res0, - absolute_backbone_mapped_atoms_res1, - ) + if use_kartograf: + _logger.debug("Using kartograf to map the ROI.") + kartograf_mapping = _kartograf_map( + res0_extracted, res1_extracted, kartograf_kwargs ) - _logger.debug(f"Absolute backbone mapping: {absolute_backbone_mapping}") - + mapping = kartograf_mapping.componentA_to_componentB + else: + _logger.debug("Using rdKit MCS to map the ROI.") mapping = matchAtoms( res0_extracted, res1_extracted, - prematch=absolute_backbone_mapping, complete_rings_only=complete_rings_only, - scoring_function="RMSD", ring_matches_ring_only=ring_matches_ring_only, + atomCompare=atomCompare, ) - else: - if use_kartograf: - _logger.debug("Using kartograf to map the ROI.") - kartograf_mapping = _kartograf_map( - res0_extracted, res1_extracted, kartograf_kwargs - ) - mapping = kartograf_mapping.componentA_to_componentB - else: - _logger.debug("Using rdKit MCS to map the ROI.") - mapping = matchAtoms( - res0_extracted, - res1_extracted, - complete_rings_only=complete_rings_only, - ring_matches_ring_only=ring_matches_ring_only, - atomCompare=atomCompare, - ) _logger.debug(f"Mapping: {mapping}") @@ -1998,6 +1902,7 @@ def flexAlign( # Return the aligned molecule. return _Molecule(molecule0) + def roiAlign( molecule0, molecule1, @@ -2124,8 +2029,6 @@ def roiAlign( return molecule0 - - def roiFlexAlign( molecule0, molecule1, From 54a89e355855bfb253441a0411c11311a0cfca60 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 17 May 2024 14:21:11 +0100 Subject: [PATCH 12/32] Consolidate ROI alignment functions into a signle function. --- python/BioSimSpace/Align/_align.py | 333 +++-------------------------- 1 file changed, 33 insertions(+), 300 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 1d4153430..e65ab8c9a 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -30,9 +30,8 @@ "viewMapping", "rmsdAlign", "roiMatch", - "perResidueRmsdAlign", "flexAlign", - "roiFlexAlign", + "roiAlign", "merge", ] @@ -1581,169 +1580,6 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map return _Molecule(mol0) -def perResidueRmsdAlign( - molecule0, molecule1, mapping=None, property_map0={}, property_map1={} -): - """ - Align atoms in molecule0 to those in molecule1 using the mapping - between matched atom indices. The molecule is aligned using rigid-body - translation and rotations, with a root mean squared displacement (RMSD) - fit to find the optimal translation vector (as opposed to merely taking - the difference of centroids). This function is specifically designed to - be used for aligning protein residues. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule to align. - - molecule1 : :class:`Molecule ` - The reference molecule. - - mapping : dict - A dictionary mapping atoms in molecule0 to those in molecule1. - - property_map0 : dict - A dictionary that maps "properties" in molecule0 to their user - defined values. This allows the user to refer to properties - with their own naming scheme, e.g. { "charge" : "my-charge" } - - property_map1 : dict - A dictionary that maps "properties" in molecule1 to their user - defined values. - - Returns - ------- - - molecule : :class:`Molecule ` - The aligned molecule. - - Examples - -------- - - Align molecule0 to molecule1 based on a precomputed mapping. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.perResideRmsdAlign(molecule0, molecule1, mapping) - - """ - - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(property_map0, dict): - raise TypeError("'property_map0' must be of type 'dict'") - - if not isinstance(property_map1, dict): - raise TypeError("'property_map1' must be of type 'dict'") - - # The user has passed an atom mapping. - if mapping is not None: - if not isinstance(mapping, dict): - raise TypeError("'mapping' must be of type 'dict'.") - else: - _validate_mapping(molecule0, molecule1, mapping, "mapping") - - absolute_residue_mapping = {} - - for res in molecule1.getResidues(): - - # Get the mapping for the current residue using its atom indices - # i.e. {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5} - # _logger.debug(f"Residue {res.index()}") - # _logger.debug(f"Residue atoms: {res.getAtoms()}") - - for atom in res.getAtoms(): - # try to get the mapping for the atom - value = mapping.get(atom.index()) - if value is not None: - residue_mapping = {atom.index(): value} - - # residue_mapping = {a.index(): mapping[a.index()] for a in res.getAtoms()} - - # update the absolute mapping dictionary to contain the mapping for each residue - # i.e. {0: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5}...} - absolute_residue_mapping[res.index()] = residue_mapping - - # Extract the Sire molecule from each BioSimSpace molecule. - mol0 = molecule0._getSireObject() - mol1 = molecule1._getSireObject() - - mol0_residues = mol0.residues() - mol1_residues = mol1.residues() - - # Update the mapping to contain the residue indices - # i.e. {100: 103, 101: 102, 102: 105} -> {10: {100: 103, 101: 102, 102: 105}} - - # Align each residue from molecule0 to molecule1 individually - mol0_updated_residue_coords = [] - for i, res in enumerate(mol0_residues): - # _logger.debug(f"Aligning residue {res.index()} to reference residue {mol1_residues[i].index()}") - res0 = res.extract() - res1 = mol1_residues[i].extract() - - # Get the mapping for the current residue - residue_mapping = absolute_residue_mapping[i] - - # We will now offset the mapping with the first atom index of the residue - # ie. {100: 103, 101: 102, 102: 105} -> {0: 3, 1: 2, 2: 5} - relative_keys = list(residue_mapping.keys()) - relative_key = relative_keys[0] - relative_values = list(residue_mapping.values()) - relative_value = relative_values[0] - - relative_residue_mapping = { - k - relative_key: v - relative_value for k, v in residue_mapping.items() - } - - # Perform the alignment, res0 to res1. - # Convert the mapping to AtomIdx key:value pairs. - sire_mapping = _to_sire_mapping(mapping) - # _logger.debug(f"Residue coordinates before alignment: {res0.coords()}") - try: - res0 = ( - res0.move() - .align(res1, _SireMol.AtomResultMatcher(sire_mapping)) - .molecule() - ) - # _logger.debug(f"Residue coordinates after alignment: {res0.coords()}") - mol0_updated_residue_coords.append(res0.property("coordinates")) - - except Exception as e: - msg = f"Failed to align residues based on mapping: {mapping}" - if "Could not calculate the single value decomposition" in str(e): - msg += ". Try minimising your molecular coordinates prior to alignment." - if _isVerbose(): - raise _AlignmentError(msg) from e - else: - raise _AlignmentError(msg) from None - - # Retrieve the atoms coordiantes from the updated residues - mol0_updated_coords = [] - for res in mol0_updated_residue_coords: - for atom_coord in res: - mol0_updated_coords.append(atom_coord) - - # Create a cursor for updating the coordinates property - cursor = mol0.cursor() - for i, atom in enumerate(cursor.atoms()): - atom["coordinates"] = mol0_updated_coords[i] - - # Commit the update back to the original molecule - mol0 = cursor.commit() - - # Return the aligned molecule. - return _Molecule(mol0) - - def flexAlign( molecule0, molecule1, @@ -1908,6 +1744,7 @@ def roiAlign( molecule1, roi=None, mapping=None, + align_function="rmsd", fkcombu_exe=None, property_map0={}, property_map1={}, @@ -1928,134 +1765,23 @@ def roiAlign( mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. - property_map0 : dict - A dictionary that maps "properties" in molecule0 to their user - defined values. This allows the user to refer to properties - with their own naming scheme, e.g. { "charge" : "my-charge" } - - property_map1 : dict - A dictionary that maps "properties" in molecule1 to their user - defined values. - - Returns - ------- - - molecule : :class:`Molecule ` - The aligned molecule. - - Examples - -------- - - Align molecule0 to molecule1 based on a precomputed mapping. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) - - Align molecule0 to molecule1. Since no mapping is passed one will be - autogenerated using :class:`matchAtoms ` - with default options. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) - """ - - if not isinstance(molecule0, _Molecule): - raise TypeError( - "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - - if not isinstance(molecule1, _Molecule): - raise TypeError( - "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" - ) - if roi is None: - raise ValueError("No region of interest (ROI) has been provided.") - else: - if not isinstance(roi, list): - raise TypeError("'roi' must be of type 'list'.") - - # The user has passed an atom mapping. - if mapping is not None: - if not isinstance(mapping, dict): - raise TypeError("'mapping' must be of type 'dict'.") - else: - _validate_mapping(molecule0, molecule1, mapping, "mapping") - - for roi_idx in roi: - res0 = molecule0.getResidues()[roi_idx] - res1 = molecule1.getResidues()[roi_idx] - - if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): - res0_atoms = [a.name() for a in res0.getAtoms()] - res1_atoms = [a.name() for a in res0.getAtoms()] - if res0_atoms == res1_atoms: - _logger.warning( - f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." - ) - - res0_idx = [a.index() for a in res0.getAtoms()] - res1_idx = [a.index() for a in res1.getAtoms()] - - # Extract the residues of interest from the molecules - res0_extracted = molecule0.extract(res0_idx) - res1_extracted = molecule1.extract(res1_idx) - - res0_aligned = rmsdAlign(molecule0=res0_extracted, molecule1=res1_extracted) - - # Now update molecule0 with the aligned residue coordinates - mol0 = molecule0._getSireObject() - res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") - - # A list to store the updated coordinates for molecule0 - mol0_coords = [] - for i, res in enumerate(mol0.residues()): - if i == roi_idx: - mol0_coords.append(res0_aligned_coords) - else: - mol0_coords.append(res.property("coordinates")) - # Flatten the list - mol0_coords = [item for sublist in mol0_coords for item in sublist] - - # Create a cursor for updating the coordinates property - c = mol0.cursor() - for i, atom in enumerate(c.atoms()): - atom["coordinates"] = mol0_coords[i] - mol0 = c.commit() - - # Convert the Sire molecule back to a BioSimSpace molecule so we can - # loop over it again if needed - molecule0 = _Molecule(mol0) - - return molecule0 - - -def roiFlexAlign( - molecule0, - molecule1, - roi=None, - mapping=None, - fkcombu_exe=None, - property_map0={}, - property_map1={}, -): - """ - Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 - using BioSimSpace.Align.flexAlign(). - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule to align. - - molecule1 : :class:`Molecule ` - The reference molecule. - - mapping : dict - A dictionary mapping atoms in molecule0 to those in molecule1. + roi : list + A list of residue indices that define the region of interest (ROI) in + the molecule. + . + align_function : str + The alignment function used to align atoms. Available options are: + - "rmsd" + Align atoms in molecule0 to those in molecule1 using the mapping + between matched atom indices. + Uses :class:`rmsdAlign ` to align the atoms in the ROI. + - "rmsd_flex_align" + Flexibly align roi from molecule0 to molecule1 based on the mapping. + Uses :class:`flexAlign ` to align the atoms in the ROI. fkcombu_exe : str - Path to the fkcombu executable. If None is passed, then BioSimSpace + Path to the fkcombu executable. Will only be used if aligning with + "rmsd_flex_align" function. If None is passed, then BioSimSpace will attempt to find fkcombu by searching your PATH. property_map0 : dict @@ -2079,14 +1805,7 @@ def roiFlexAlign( Align molecule0 to molecule1 based on a precomputed mapping. >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) - - Align molecule0 to molecule1. Since no mapping is passed one will be - autogenerated using :class:`matchAtoms ` - with default options. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, mapping, roi=[12]) """ if not isinstance(molecule0, _Molecule): @@ -2111,6 +1830,11 @@ def roiFlexAlign( else: _validate_mapping(molecule0, molecule1, mapping, "mapping") + if align_function not in ["rmsd", "rmsd_flex_align"]: + raise ValueError( + "Invalid alignment function. Available options are: 'rmsd', 'rmsd_flex_align'" + ) + for roi_idx in roi: res0 = molecule0.getResidues()[roi_idx] res1 = molecule1.getResidues()[roi_idx] @@ -2130,7 +1854,15 @@ def roiFlexAlign( res0_extracted = molecule0.extract(res0_idx) res1_extracted = molecule1.extract(res1_idx) - res0_aligned = flexAlign(molecule0=res0_extracted, molecule1=res1_extracted) + if align_function == "rmsd": + res0_aligned = rmsdAlign(molecule0=res0_extracted, molecule1=res1_extracted) + + elif align_function == "rmsd_flex_align": + res0_aligned = flexAlign( + molecule0=res0_extracted, + molecule1=res1_extracted, + fkcombu_exe=fkcombu_exe, + ) # Now update molecule0 with the aligned residue coordinates mol0 = molecule0._getSireObject() @@ -2143,6 +1875,7 @@ def roiFlexAlign( mol0_coords.append(res0_aligned_coords) else: mol0_coords.append(res.property("coordinates")) + # Flatten the list mol0_coords = [item for sublist in mol0_coords for item in sublist] From 4f92709ad35edaf02323e8e4d6ecd6ac29e25d77 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 21 May 2024 11:04:09 +0100 Subject: [PATCH 13/32] Refactored merge function to no longer require ROI atom indices as input Instead the function takes in the ROI residue indices as inputs now, which is consistent with roiMatch and roiAlign functions --- python/BioSimSpace/Align/_align.py | 124 +++++++++------------ python/BioSimSpace/Align/_merge.py | 166 +++++++++++++++-------------- 2 files changed, 138 insertions(+), 152 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index e65ab8c9a..68842cb26 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -56,8 +56,6 @@ if _have_imported(_rdkit): from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS - - # No idea why, but RD_logger cannot be imported from rdkit, only RDLogger can from rdkit import RDLogger as _RDLogger # Disable RDKit warnings. @@ -601,7 +599,7 @@ def generateNetwork( rdmols.append(rdmol) except Exception as e: - msg = "Unable to load molecule into RDKit!" + msg = "RDKit was unable to load molecule!" if _isVerbose(): msg += ": " + getattr(e, "message", repr(e)) raise _AlignmentError(msg) from e @@ -1124,10 +1122,10 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): # build Kartograf Atom Mapper mapper = KartografAtomMapper(**kartograf_kwargs) - # get mapping + # get the mapping kartograf_mapping = next(mapper.suggest_mappings(mol0, a_mol1)) - # score mapping + # score the mapping rmsd_scorer = _MappingRMSDScorer() score = rmsd_scorer(mapping=kartograf_mapping) _logger.debug(f"RMSD score: {score:.2f}") @@ -1141,7 +1139,6 @@ def roiMatch( molecule0, molecule1, roi, - force_backbone_match=False, ring_matches_ring_only=True, complete_rings_only=True, atomCompare=_rdFMCS.AtomCompare.CompareAny, @@ -1181,14 +1178,9 @@ def roiMatch( Returns ------- - mapping : dict + full_mapping : dict A dictionary of the mapping between the two molecules. - roi_idx : list - A list of the indices of the atoms in the ROI. This can be multiple - nested lists if the ROI is defined as a list of residues. - TODO: change the return type of roi_idx to a dictionary of lists - Notes ----- @@ -1217,18 +1209,18 @@ def roiMatch( with a region of interest defined as a list of residues. >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) + >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) - Find the mapping between two molecules with multiple regions of interest + Find the mapping between two molecules with multiple regions of interest. >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) + >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) Find the best maximum common substructure mapping between two molecules, - while forcing the backbone atoms to be matched. + using kartograf as the MCS algorithm. >>> import BioSimSpace as BSS - >>> mapping, roi_idx = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], force_backbone_match=True) + >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) """ # Validate input @@ -1245,8 +1237,6 @@ def roiMatch( if roi is None: raise ValueError("residue of interest list is not provided.") - roi_idx = [] - _logger.debug(f"Number of mol A atoms: {molecule0.nAtoms()}") _logger.debug(f"Number of mol B atoms: {molecule1.nAtoms()}") @@ -1294,9 +1284,6 @@ def roiMatch( res1_idx = [a.index() for a in molecule1_roi] # _logger.debug(f"res1 indices: {res1_idx}") - # Append the ROI indices to the list - roi_idx.append([res0_idx, res1_idx]) - # Extract the residues of interest from the molecules res0_extracted = molecule0.extract(res0_idx) res1_extracted = molecule1.extract(res1_idx) @@ -1452,24 +1439,13 @@ def roiMatch( _logger.debug(f"After ROI region mapping: {after_roi_mapping}") # Combine the dictionaries to get the full mapping - combined_dict = { + full_mapping = { **pre_roi_mapping, **absolute_roi_mapping, **after_roi_mapping, } - # Print the matched atoms in the ROI - # for idx0, idx1 in mapping.items(): - # _logger.debug( - # f"{res0_extracted.getAtoms()[idx0]} <--> {res1_extracted.getAtoms()[idx1]}" - # ) - - # _logger.debug(f"Full matched atoms: {combined_dict}") - # for idx0, idx1 in mapping.items(): - # _logger.debug(f"{molecule0.getAtoms()[idx0]} <--> {molecule1.getAtoms()[idx1]}") - - # TODO: Change the return type of roi_idx to a dictionary of lists - return combined_dict, roi_idx + return full_mapping def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): @@ -1899,7 +1875,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi_list=None, + roi=None, property_map0={}, property_map1={}, ): @@ -1936,7 +1912,7 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi_list : list + roi : list The region of interest to merge. Consist of two lists of atom indices. property_map0 : dict @@ -1996,11 +1972,11 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") - if roi_list is not None: - if not isinstance(roi_list, list): - raise TypeError("'roi_list' must be of type 'list'.") - else: - _validate_roi(molecule0, molecule1, roi_list) + if roi is not None: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + # else: + # _validate_roi(molecule0, molecule1, roi) # The user has passed an atom mapping. if mapping is not None: @@ -2031,7 +2007,7 @@ def merge( allow_ring_breaking=allow_ring_breaking, allow_ring_size_change=allow_ring_size_change, force=force, - roi_list=roi_list, + roi=roi, property_map0=property_map0, property_map1=property_map1, ) @@ -2680,37 +2656,37 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) -def _validate_roi(molecule0, molecule1, roi_list): - """Internal function to validate that a mapping contains key:value pairs - of the correct type. - - Parameters - ---------- - - molecule0 : :class:`Molecule ` - The molecule of interest. - - molecule1 : :class:`Molecule ` - The reference molecule. - - roi_list : list - The region of interest to merge. - """ - for roi in roi_list: - if len(roi) != 2: - raise ValueError("The length of roi list must be 2.") - if not isinstance(roi[0], list) or not isinstance(roi[1], list): - raise ValueError("The element of roi must be of type list") - for mol_idx, ele in enumerate(roi): - for atom_idx in ele: - if type(atom_idx) is not int: - raise ValueError( - f"The element of roi[{mol_idx}] should be of type int" - ) - if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): - raise IndexError( - f"The element of roi[{mol_idx}] should within range of number of atoms" - ) +# def _validate_roi(molecule0, molecule1, roi): +# """Internal function to validate that a mapping contains key:value pairs +# of the correct type. + +# Parameters +# ---------- + +# molecule0 : :class:`Molecule ` +# The molecule of interest. + +# molecule1 : :class:`Molecule ` +# The reference molecule. + +# roi : list +# The region of interest to merge. +# """ +# for roi_res in roi: +# if len(roi_res) != 2: +# raise ValueError("The length of roi list must be 2.") +# if not isinstance(roi[0], list) or not isinstance(roi[1], list): +# raise ValueError("The element of roi must be of type list") +# for mol_idx, ele in enumerate(roi): +# for atom_idx in ele: +# if type(atom_idx) is not int: +# raise ValueError( +# f"The element of roi[{mol_idx}] should be of type int" +# ) +# if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): +# raise IndexError( +# f"The element of roi[{mol_idx}] should within range of number of atoms" +# ) def _to_sire_mapping(mapping): diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index add219661..54dcc89eb 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -46,7 +46,7 @@ def merge( allow_ring_breaking=False, allow_ring_size_change=False, force=False, - roi_list=None, + roi=None, property_map0={}, property_map1={}, ): @@ -78,8 +78,9 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi_list : list - The region of interest to merge. Consist of a multiple nested list lists of atom indices. + roi : list + The region of interest to merge. + Consists of a list of ROI residue indices. property_map0 : dict A dictionary that maps "properties" in this molecule to their @@ -221,10 +222,10 @@ def merge( # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") # Only part of the ligand is to be merged - if roi_list is not None: + if roi is not None: if molecule0.nResidues() != molecule1.nResidues(): raise ValueError( - "The two molecules need to have the same number of residues" + "The two molecules need to have the same number of residues." ) num = 1 @@ -1023,7 +1024,7 @@ def merge( # Check that the merge hasn't modified the connectivity. # The checking was blocked when merging a protein - if roi_list is None: + if roi is None: # molecule0 for x in range(0, molecule0.nAtoms()): # Convert to an AtomIdx. @@ -1202,14 +1203,18 @@ def merge( # Copy the intrascale from molecule1 into clj_nb_pairs0. # Perform a triangular loop over atoms from molecule1. - if roi_list is None: + if roi is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - for roi in roi_list: - iterlen = len(roi[1]) - iterrange = roi[1] + for roi_res in roi: + # Get atom indices of ROI residue in molecule1 + molecule1_roi = molecule1.residues()[roi_res] + molecule1_roi_idx = [a.index() for a in molecule1_roi] + iterlen = len(molecule1_roi_idx) + iterrange = molecule1_roi_idx + for x in range(0, iterlen): # Convert to an AtomIdx. idx = iterrange[x] @@ -1246,94 +1251,99 @@ def merge( # Now copy in all intrascale values from molecule0 into both # clj_nb_pairs matrices. - if roi_list is None: + if roi is None: iterlen = molecule0.nAtoms() iterrange = list(range(molecule0.nAtoms())) # When region of interest is defined, perfrom loop from these indices else: - for roi in roi_list: - iterlen = len(roi[0]) - iterrange = roi[0] - - # Perform a triangular loop over atoms from molecule0. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) - - # Map the index to its position in the merged molecule. - idx_map = mol0_merged_mapping[idx] - - for y in range(x + 1, iterlen): - idy = iterrange[y] + for roi_res in roi: + # Get atom indices of ROI residue in molecule0 + molecule0_roi = molecule0.residues()[roi_res] + molecule0_roi_idx = [a.index() for a in molecule0_roi] + iterlen = len(molecule0_roi_idx) + iterrange = molecule0_roi_idx + + # Perform a triangular loop over atoms from molecule0. + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy_map = mol0_merged_mapping[idy] - - conn_type = conn0.connectionType(idx_map, idy_map) - # The atoms aren't bonded. - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + idx_map = mol0_merged_mapping[idx] + + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) + + # Map the index to its position in the merged molecule. + idy_map = mol0_merged_mapping[idy] + + conn_type = conn0.connectionType(idx_map, idy_map) + # The atoms aren't bonded. + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs0.set(idx_map, idy_map, clj_scale_factor) # Finally, copy the intrascale from molecule1 into clj_nb_pairs1. - if roi_list is None: + if roi is None: iterlen = molecule1.nAtoms() iterrange = list(range(molecule1.nAtoms())) - # When region of interest is defined, perfrom loop from these indices - else: - for roi in roi_list: - iterlen = len(roi[1]) - iterrange = roi[1] - # Perform a triangular loop over atoms from molecule1. - for x in range(0, iterlen): - # Convert to an AtomIdx. - idx = iterrange[x] - idx = _SireMol.AtomIdx(idx) - - # Map the index to its position in the merged molecule. - idx = mol1_merged_mapping[idx] - - for y in range(x + 1, iterlen): - idy = iterrange[y] + else: + for roi_res in roi: + molecule1_roi = molecule1.residues()[roi_res] + molecule1_roi_idx = [a.index() for a in molecule1_roi] + iterlen = len(molecule1_roi_idx) + iterrange = molecule1_roi_idx + + # Perform a triangular loop over atoms from molecule1. + for x in range(0, iterlen): # Convert to an AtomIdx. - idy = _SireMol.AtomIdx(idy) + idx = iterrange[x] + idx = _SireMol.AtomIdx(idx) # Map the index to its position in the merged molecule. - idy = mol1_merged_mapping[idy] + idx = mol1_merged_mapping[idx] - conn_type = conn1.connectionType(idx, idy) + for y in range(x + 1, iterlen): + idy = iterrange[y] + # Convert to an AtomIdx. + idy = _SireMol.AtomIdx(idy) - if conn_type == 0: - clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + # Map the index to its position in the merged molecule. + idy = mol1_merged_mapping[idy] - # The atoms are part of a dihedral. - elif conn_type == 4: - clj_scale_factor = _SireMM.CLJScaleFactor( - ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() - ) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + conn_type = conn1.connectionType(idx, idy) - # The atoms are bonded - else: - clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) - clj_nb_pairs1.set(idx, idy, clj_scale_factor) + if conn_type == 0: + clj_scale_factor = _SireMM.CLJScaleFactor(1, 1) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are part of a dihedral. + elif conn_type == 4: + clj_scale_factor = _SireMM.CLJScaleFactor( + ff.electrostatic14ScaleFactor(), ff.vdw14ScaleFactor() + ) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) + + # The atoms are bonded + else: + clj_scale_factor = _SireMM.CLJScaleFactor(0, 0) + clj_nb_pairs1.set(idx, idy, clj_scale_factor) # Store the two molecular components. edit_mol.setProperty("molecule0", molecule0) From 00e4e5ad8fc670e8c4d9b7784a34a2979cbfd7a1 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 21 May 2024 13:42:10 +0100 Subject: [PATCH 14/32] Removed logging calls from align and merge functions that were used previously for debugging --- python/BioSimSpace/Align/_align.py | 158 ++++------------------------- python/BioSimSpace/Align/_merge.py | 9 -- 2 files changed, 21 insertions(+), 146 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 68842cb26..35c87edec 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -40,8 +40,6 @@ import subprocess as _subprocess import sys as _sys -# This logger is temporary. -from loguru import logger as _logger from .._Utils import _try_import, _have_imported, _assert_imported @@ -718,7 +716,6 @@ def generateNetwork( def matchAtoms( molecule0, molecule1, - engine="SOMD", scoring_function="rmsd_align", matches=1, return_scores=False, @@ -727,8 +724,6 @@ def matchAtoms( atomCompare=_rdFMCS.AtomCompare.CompareAny, complete_rings_only=True, ring_matches_ring_only=True, - prune_perturbed_constraints=None, - prune_crossing_constraints=None, max_scoring_matches=1000, property_map0={}, property_map1={}, @@ -750,10 +745,6 @@ def matchAtoms( molecule1 : :class:`Molecule ` The reference molecule. - engine : str - The engine with which the simulations will be run. This is used for - setting some reasonable defaults for the other settings. - scoring_function : str The scoring function used to match atoms. Available options are: - "rmsd" @@ -785,16 +776,6 @@ def matchAtoms( option is only relevant to MCS performed using RDKit and will be ignored when falling back on Sire. - prune_perturbed_constraints : bool - Whether to remove hydrogen atoms that are perturbed to heavy atoms - from the mapping. This is True for AMBER by default and False for - all other engines. - - prune_crossing_constraints : bool - Whether to remove atoms from the mapping such that there are no - constraints between dummy and non-dummy atoms. This is True for - AMBER by default and False for all other engines. - max_scoring_matches : int The maximum number of matching MCS substructures to consider when computing mapping scores. Consider reducing this if you find the @@ -914,13 +895,6 @@ def matchAtoms( if not isinstance(property_map1, dict): raise TypeError("'property_map1' must be of type 'dict'") - # Set some defaults. - is_amber = engine.upper() == "AMBER" - if prune_perturbed_constraints is None: - prune_perturbed_constraints = is_amber - if prune_crossing_constraints is None: - prune_crossing_constraints = is_amber - # Extract the Sire molecule from each BioSimSpace molecule. mol0 = molecule0._getSireObject() mol1 = molecule1._getSireObject() @@ -1083,7 +1057,6 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): """ # try to import kartograf try: - import rdkit.Chem as _Chem from kartograf.atom_aligner import align_mol_shape as _align_mol_shape from kartograf.atom_mapping_scorer import ( MappingRMSDScorer as _MappingRMSDScorer, @@ -1094,7 +1067,7 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): ) except ImportError: raise ImportError( - "Kartograf is not installed. Please install kartograf to use this function." + "Unable to import Kartograf. Make sure Kartograf is installed properly to use this function." ) # Validate input @@ -1128,7 +1101,7 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): # score the mapping rmsd_scorer = _MappingRMSDScorer() score = rmsd_scorer(mapping=kartograf_mapping) - _logger.debug(f"RMSD score: {score:.2f}") + print(f"RMSD score: {score:.2f}") return kartograf_mapping @@ -1217,7 +1190,7 @@ def roiMatch( >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) Find the best maximum common substructure mapping between two molecules, - using kartograf as the MCS algorithm. + using Kartograf as the MCS algorithm. >>> import BioSimSpace as BSS >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) @@ -1237,9 +1210,6 @@ def roiMatch( if roi is None: raise ValueError("residue of interest list is not provided.") - _logger.debug(f"Number of mol A atoms: {molecule0.nAtoms()}") - _logger.debug(f"Number of mol B atoms: {molecule1.nAtoms()}") - # Get the atoms before the ROI. # This is being done so that when we map the atoms in ROI, we can append # the ROI mapping to this pre-ROI mapping which will then be used as @@ -1255,15 +1225,8 @@ def roiMatch( # Loop over the residues of interest for i, res_idx in enumerate(roi): - _logger.debug(f"Starting match for ROI id: {res_idx}") - molecule0_roi = molecule0.getResidues()[res_idx] - _logger.debug("Molecule0 ROI:") - _logger.debug(molecule0_roi) - molecule1_roi = molecule1.getResidues()[res_idx] - _logger.debug("Molecule1 ROI:") - _logger.debug(molecule1_roi) # Warn if matching between the same residues, in a case where we are # transforming from one enantiomer to another, the atomtypes will @@ -1275,32 +1238,23 @@ def roiMatch( molecule0_atoms = [a.name() for a in molecule0_roi.getAtoms()] molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] if molecule0_atoms == molecule1_atoms: - _logger.warning( - f"Residue {res_idx} in molecule0 and molecule1 have identical atomtypes." + _warnings.warn( + f"Residue {res_idx} between molecule0 and molecule1 have identical atomtypes." ) res0_idx = [a.index() for a in molecule0_roi] - # _logger.debug(f"res0 indices: {res0_idx}") res1_idx = [a.index() for a in molecule1_roi] - # _logger.debug(f"res1 indices: {res1_idx}") # Extract the residues of interest from the molecules res0_extracted = molecule0.extract(res0_idx) res1_extracted = molecule1.extract(res1_idx) - # for a in res0_extracted.getAtoms(): - # _logger.debug(f"res0 atom: {a}") - # for b in res1_extracted.getAtoms(): - # _logger.debug(f"res1 atom: {b}") - if use_kartograf: - _logger.debug("Using kartograf to map the ROI.") kartograf_mapping = _kartograf_map( res0_extracted, res1_extracted, kartograf_kwargs ) mapping = kartograf_mapping.componentA_to_componentB else: - _logger.debug("Using rdKit MCS to map the ROI.") mapping = matchAtoms( res0_extracted, res1_extracted, @@ -1309,41 +1263,12 @@ def roiMatch( atomCompare=atomCompare, ) - _logger.debug(f"Mapping: {mapping}") - - # Check how many atoms got mapped from one residue to the other - _logger.debug( - f"Mapped {len(mapping.keys())} out of {len(res0_idx)} atoms for molecule0" - ) - _logger.debug( - f"Mapped {len(mapping.values())} out of {len(res1_idx)} atoms for molecule1" - ) - - # If the number of mapped atoms is not the same as the number of atoms - # in the ROI we need to use that molecule as the reference - # for the lookup table. - # NOTE: We don't really need the conditional statement here that checks - # if all of the atoms in the ROI have been mapped, because - # we are going to translate atoms relative indices to absolute ones - # and that case would be captured by the lookup table. - if len(mapping.keys()) != len(res0_idx): - _logger.debug("Some atoms for molecule0 in the ROI have not been mapped.") - _logger.debug("Using molecule0 as the reference for the lookup table.") - residue_lookup_table = list(mapping.keys()) - absolute_mapped_atoms_res0 = [res0_idx[i] for i in residue_lookup_table] - - # i.e this bit is unnecessary - else: - absolute_mapped_atoms_res0 = res0_idx - - if len(mapping.values()) != len(res1_idx): - _logger.debug("Some atoms for molecule1 in the ROI have not been mapped.") - _logger.debug("Using molecule1 as the reference for the lookup table.") - residue_lookup_table = list(mapping.values()) - absolute_mapped_atoms_res1 = [res1_idx[i] for i in residue_lookup_table] + # Look up the absolute atom indices in the molecule + res0_lookup_table = list(mapping.keys()) + absolute_mapped_atoms_res0 = [res0_idx[i] for i in res0_lookup_table] - else: - absolute_mapped_atoms_res1 = res1_idx + res1_lookup_table = list(mapping.values()) + absolute_mapped_atoms_res1 = [res1_idx[i] for i in res1_lookup_table] absolute_roi_mapping = dict( zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) @@ -1352,10 +1277,11 @@ def roiMatch( # Check that pre_roi_atom_indices are not part of molecule ROI indices # NOTE: This could be a costly operation, if pre_roi_atom_indices is # large. - if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( - i in pre_roi_atom_idx_molecule1 for i in res1_idx - ): - raise ValueError("Found atoms in pre ROI region that are part of the ROI.") + + # if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( + # i in pre_roi_atom_idx_molecule1 for i in res1_idx + # ): + # raise ValueError("Found atoms in pre ROI region that are part of the ROI.") # Now we have to think about what to do with the atom indices # after the mapping as these are not going to be the same @@ -1374,9 +1300,6 @@ def roiMatch( # If we were at residue 10 and the next residue of interest is 12, # we would need to map the atoms between residues 10 and 12. if roi[i + 1] - roi[i] == 1: - _logger.debug( - "Next ROI is adjacent to the current ROI, no need to map atoms between the two." - ) after_roi_atom_idx_molecule0 = [] after_roi_atom_idx_molecule1 = [] else: @@ -1400,6 +1323,7 @@ def roiMatch( after_roi_atom_idx_molecule1, ) ) + # Append the mappings to the pre_roi_mapping, which will then be # used as the pre_roi_mapping for the next ROI in the list. pre_roi_mapping = { @@ -1427,16 +1351,11 @@ def roiMatch( ) # Check that after_roi_atom_indices are not part of absolute_roi_mapping - if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): - raise ValueError("Found atoms in after ROI region that are part of the ROI") - - if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): - raise ValueError("Found atoms in after ROI region that are part of the ROI") + # if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): + # raise ValueError("Found atoms in after ROI region that are part of the ROI") - # Print all 3 dictionaries - _logger.debug(f"Pre ROI region mapping: {pre_roi_mapping}") - _logger.debug(f"Absolute ROI mapping: {absolute_roi_mapping}") - _logger.debug(f"After ROI region mapping: {after_roi_mapping}") + # if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): + # raise ValueError("Found atoms in after ROI region that are part of the ROI") # Combine the dictionaries to get the full mapping full_mapping = { @@ -1819,7 +1738,7 @@ def roiAlign( res0_atoms = [a.name() for a in res0.getAtoms()] res1_atoms = [a.name() for a in res0.getAtoms()] if res0_atoms == res1_atoms: - _logger.warning( + _warnings.warn( f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." ) @@ -1975,8 +1894,6 @@ def merge( if roi is not None: if not isinstance(roi, list): raise TypeError("'roi' must be of type 'list'.") - # else: - # _validate_roi(molecule0, molecule1, roi) # The user has passed an atom mapping. if mapping is not None: @@ -2656,39 +2573,6 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) -# def _validate_roi(molecule0, molecule1, roi): -# """Internal function to validate that a mapping contains key:value pairs -# of the correct type. - -# Parameters -# ---------- - -# molecule0 : :class:`Molecule ` -# The molecule of interest. - -# molecule1 : :class:`Molecule ` -# The reference molecule. - -# roi : list -# The region of interest to merge. -# """ -# for roi_res in roi: -# if len(roi_res) != 2: -# raise ValueError("The length of roi list must be 2.") -# if not isinstance(roi[0], list) or not isinstance(roi[1], list): -# raise ValueError("The element of roi must be of type list") -# for mol_idx, ele in enumerate(roi): -# for atom_idx in ele: -# if type(atom_idx) is not int: -# raise ValueError( -# f"The element of roi[{mol_idx}] should be of type int" -# ) -# if atom_idx >= [molecule0, molecule1][mol_idx].nAtoms(): -# raise IndexError( -# f"The element of roi[{mol_idx}] should within range of number of atoms" -# ) - - def _to_sire_mapping(mapping): """ Internal function to convert a regular mapping to Sire AtomIdx format. diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index 54dcc89eb..713e364e3 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -32,9 +32,6 @@ from sire.legacy import Mol as _SireMol from sire.legacy import Units as _SireUnits -# This logger is temporary. -from loguru import logger as _logger - from .._Exceptions import IncompatibleError as _IncompatibleError from .._SireWrappers import Molecule as _Molecule @@ -160,13 +157,9 @@ def merge( # Get the atom indices from the mapping. idx0 = mapping.keys() idx1 = mapping.values() - _logger.debug(f"Mapping: {mapping}") - _logger.debug(f"idx0: {idx0}") - _logger.debug(f"idx1: {idx1}") # Create the reverse mapping: molecule1 --> molecule0 inv_mapping = {v: k for k, v in mapping.items()} - _logger.debug(f"Inverse mapping: {inv_mapping}") # Generate the mappings from each molecule to the merged molecule mol0_merged_mapping = {} @@ -209,7 +202,6 @@ def merge( if atom.index() not in idx0: atoms0.append(atom) atoms0_idx.append(atom.index()) - _logger.debug(f"Unique atoms in molecule0: {atoms0_idx}") # molecule1 for atom in molecule1.atoms(): @@ -217,7 +209,6 @@ def merge( atoms1.append(atom) atoms1_idx.append(atom.index()) - _logger.debug(f"Unique atoms in molecule1: {atoms1_idx}") # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") From 53ac5dcad7abfe159864e1d8dbcf12e2a742064a Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 21 May 2024 15:08:25 +0100 Subject: [PATCH 15/32] Minor refactoring of roi functions --- python/BioSimSpace/Align/_align.py | 53 +++++++++++++++--------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 35c87edec..37c86bc74 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -907,8 +907,8 @@ def matchAtoms( try: # Convert the molecules to RDKit format. mols = [ - _Convert.toRDKit(molecule0, property_map=property_map0), - _Convert.toRDKit(molecule1, property_map=property_map1), + _Convert.toRDKit(mol0, property_map=property_map0), + _Convert.toRDKit(mol1, property_map=property_map1), ] # Generate the MCS match. @@ -1106,8 +1106,6 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): return kartograf_mapping -# NOTE: This function is currently experimental and has not gone through -# rigorous validation. Proceed with caution. def roiMatch( molecule0, molecule1, @@ -1123,7 +1121,7 @@ def roiMatch( The ROI is defined as a list of residues in the molecule/protein. The function will attempt to match the ROI in the two molecules and return the mapping between the two molecules. Multiple ROIs can be - defined by providing a list of residues. + provided. Parameters ---------- @@ -1135,7 +1133,8 @@ def roiMatch( The reference molecule. roi : list - A list of regions/residues of interest in the molecule/protein. + The region of interest to merge. + Consists of a list of ROI residue indices. ring_matches_ring_only : bool Whether ring bonds can only match ring bonds. @@ -1163,9 +1162,10 @@ def roiMatch( The function will attempt to match the atoms in the ROI based on the maximum common substructure (MCS) algorithm. First, the ROI is extracted from the two molecules and then the atoms in the ROI are matched using - BioSimSpace.Align.matchAtoms function. The function will return the - mapping between the two molecules. This "relative" mapping will then be - used to map the atoms in the ROI to the "absolute" indices in the molecule. + a mapping function such as BioSimSpace.Align.matchAtoms for example. + The function will return the mapping between the two molecules. + This "relative" mapping will then be used to map the atoms in the ROI to + the "absolute" indices in the molecule. So for example the relative mapping could be {0: 3, 1: 2, 2: 5} and the absolute mapping could be {100: 103, 101: 102, 102: 105}. This way we can bypass the need to map the entire molecule and only focus on the ROI, @@ -1638,7 +1638,6 @@ def roiAlign( molecule0, molecule1, roi=None, - mapping=None, align_function="rmsd", fkcombu_exe=None, property_map0={}, @@ -1657,12 +1656,9 @@ def roiAlign( molecule1 : :class:`Molecule ` The reference molecule. - mapping : dict - A dictionary mapping atoms in molecule0 to those in molecule1. - roi : list - A list of residue indices that define the region of interest (ROI) in - the molecule. + The region of interest to merge. + Consists of a list of ROI residue indices. . align_function : str The alignment function used to align atoms. Available options are: @@ -1697,10 +1693,15 @@ def roiAlign( Examples -------- - Align molecule0 to molecule1 based on a precomputed mapping. + Align residue of interest from molecule0 to molecule1. >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, mapping, roi=[12]) + >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, roi=[12]) + + Align multiple residues of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, roi=[12,13]) """ if not isinstance(molecule0, _Molecule): @@ -1718,13 +1719,6 @@ def roiAlign( if not isinstance(roi, list): raise TypeError("'roi' must be of type 'list'.") - # The user has passed an atom mapping. - if mapping is not None: - if not isinstance(mapping, dict): - raise TypeError("'mapping' must be of type 'dict'.") - else: - _validate_mapping(molecule0, molecule1, mapping, "mapping") - if align_function not in ["rmsd", "rmsd_flex_align"]: raise ValueError( "Invalid alignment function. Available options are: 'rmsd', 'rmsd_flex_align'" @@ -1831,8 +1825,9 @@ def merge( takes precedence over 'allow_ring_breaking' and 'allow_ring_size_change'. - roi : list - The region of interest to merge. Consist of two lists of atom indices. + roi : list + The region of interest to merge. + Consists of a list of ROI residue indices. property_map0 : dict A dictionary that maps "properties" in molecule0 to their user @@ -1857,6 +1852,12 @@ def merge( >>> import BioSimSpace as BSS >>> merged = BSS.Align.merge(molecule0, molecule1, mapping) + Merge molecule0 and molecule1 based on a precomputed mapping and a region + of interest. + + >>> import BioSimSpace as BSS + >>> merged = BSS.Align.merge(molecule0, molecule1, mapping, roi=[12]) + Merge molecule0 with molecule1. Since no mapping is passed one will be autogenerated using :class:`matchAtoms ` with default options, following which :class:`rmsdAlign ` From 6f5516d3f4c8aa0cffac49d3abac9bc68fccaa2c Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 21 May 2024 15:18:16 +0100 Subject: [PATCH 16/32] Removed warning from roiAlign function --- python/BioSimSpace/Align/_align.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 37c86bc74..642aedbaf 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1728,14 +1728,6 @@ def roiAlign( res0 = molecule0.getResidues()[roi_idx] res1 = molecule1.getResidues()[roi_idx] - if res0.name() == res1.name() and res0.nAtoms() == res1.nAtoms(): - res0_atoms = [a.name() for a in res0.getAtoms()] - res1_atoms = [a.name() for a in res0.getAtoms()] - if res0_atoms == res1_atoms: - _warnings.warn( - f"Residue {roi_idx} in molecule0 and molecule1 have identical atomtypes." - ) - res0_idx = [a.index() for a in res0.getAtoms()] res1_idx = [a.index() for a in res1.getAtoms()] From b73fabfc5b68560353baa8b731d927df5feacb2c Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Thu, 30 May 2024 13:03:44 +0100 Subject: [PATCH 17/32] Remove commented out code --- python/BioSimSpace/Align/_align.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 642aedbaf..df730c611 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1274,18 +1274,6 @@ def roiMatch( zip(absolute_mapped_atoms_res0, absolute_mapped_atoms_res1) ) - # Check that pre_roi_atom_indices are not part of molecule ROI indices - # NOTE: This could be a costly operation, if pre_roi_atom_indices is - # large. - - # if any(i in pre_roi_atom_idx_molecule0 for i in res0_idx) or any( - # i in pre_roi_atom_idx_molecule1 for i in res1_idx - # ): - # raise ValueError("Found atoms in pre ROI region that are part of the ROI.") - - # Now we have to think about what to do with the atom indices - # after the mapping as these are not going to be the same - # between the two molecules. # If we are at the last residue of interest, we don't need to worry # too much about the after ROI region as this region will be all of the # molecule atoms after the last residue of interest. @@ -1350,13 +1338,6 @@ def roiMatch( ) ) - # Check that after_roi_atom_indices are not part of absolute_roi_mapping - # if any(i in after_roi_atom_idx_molecule0 for i in res0_idx): - # raise ValueError("Found atoms in after ROI region that are part of the ROI") - - # if any(i in after_roi_atom_idx_molecule1 for i in res1_idx): - # raise ValueError("Found atoms in after ROI region that are part of the ROI") - # Combine the dictionaries to get the full mapping full_mapping = { **pre_roi_mapping, From 548c98937018bb0769e26279247e0410b1442b26 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Thu, 6 Jun 2024 16:50:17 +0100 Subject: [PATCH 18/32] Added tests for ROI functions They will need to be updated once the test input files are moved online. --- tests/Align/test_align.py | 97 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index ab022a8e4..2751228aa 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -470,3 +470,100 @@ def test_hydrogen_mass_repartitioning(): assert mass0 == masses1[idx] for idx, mass1 in dummy_masses1: assert mass1 == masses0[idx] + + +@pytest.fixture +def proteins(request): + p0 = BSS.IO.readMolecules(["ala_ser_size_5_capped.pdb"])[0] + p1 = BSS.IO.readMolecules(["ala_wt_size_5_capped.pdb"])[0] + return p0, p1 + + +@pytest.fixture( + params=[ + { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 23, + 22: 21, + 23: 22, + 25: 24, + 26: 25, + 27: 26, + 28: 27, + 29: 28, + 30: 29, + 31: 30, + 32: 31, + 33: 32, + 34: 33, + 35: 34, + 36: 35, + 37: 36, + 38: 37, + 39: 38, + 40: 39, + 41: 40, + 42: 41, + } + ] +) +def protein_mapping(request): + return request.param + + +def test_roi_match(proteins, protein_mapping): + p0, p1 = proteins + mapping = BSS.Align.roiMatch(p0, p1, roi=[2]) + assert mapping == protein_mapping + + +def test_roi_align(proteins, protein_mapping): + # p0 has been translated by 10 A in each direction. + p0, p1 = proteins + + aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=[2]) + + # Extract sire objects for the ROI and compare their coordinates + aligned_roi = aligned_p0.extract( + [a.index() for a in aligned_p0.getResidues()[2].getAtoms()] + ) + aligned_roi_coords = aligned_roi._sire_object.coordinates() + + p1_roi = p1.extract([a.index() for a in p1.getResidues()[2].getAtoms()]) + p1_roi_coords = p1_roi._sire_object.coordinates() + + for i, coord in enumerate(aligned_roi_coords): + # assume that the coordinates are the same if they are within 0.5 A + assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) + + +def test_roi_merge(proteins, protein_mapping): + p0, p1 = proteins + + p0 = BSS.Parameters.ff14SB(p0).getMolecule() + p1 = BSS.Parameters.ff14SB(p1).getMolecule() + + aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=[2]) + merged = BSS.Align.merge(aligned_p0, p1, protein_mapping) + merged_system = merged.toSystem() + assert merged_system.nPerturbableMolecules() == 1 From e8956b17a00c2af584c6410b194b3ecdd5f0a21b Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 11 Jun 2024 10:47:50 +0100 Subject: [PATCH 19/32] Added tests for multiple neighbouring mutations --- tests/Align/test_align.py | 207 ++++++++++++++++++++++++++------------ 1 file changed, 140 insertions(+), 67 deletions(-) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 2751228aa..12895e611 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -481,89 +481,162 @@ def proteins(request): @pytest.fixture( params=[ - { - 0: 0, - 1: 1, - 2: 2, - 3: 3, - 4: 4, - 5: 5, - 6: 6, - 7: 7, - 8: 8, - 9: 9, - 10: 10, - 11: 11, - 12: 12, - 13: 13, - 14: 14, - 15: 15, - 16: 16, - 17: 17, - 18: 18, - 19: 19, - 20: 20, - 21: 23, - 22: 21, - 23: 22, - 25: 24, - 26: 25, - 27: 26, - 28: 27, - 29: 28, - 30: 29, - 31: 30, - 32: 31, - 33: 32, - 34: 33, - 35: 34, - 36: 35, - 37: 36, - 38: 37, - 39: 38, - 40: 39, - 41: 40, - 42: 41, - } - ] + ( + "single_mutant", + { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 23, + 22: 21, + 23: 22, + 25: 24, + 26: 25, + 27: 26, + 28: 27, + 29: 28, + 30: 29, + 31: 30, + 32: 31, + 33: 32, + 34: 33, + 35: 34, + 36: 35, + 37: 36, + 38: 37, + 39: 38, + 40: 39, + 41: 40, + 42: 41, + }, + [2], + ), + ( + "double_neighbour_mutant", + { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 12: 12, + 13: 13, + 14: 14, + 15: 15, + 16: 16, + 17: 17, + 18: 18, + 19: 19, + 20: 20, + 21: 24, + 22: 25, + 23: 26, + 24: 27, + 25: 28, + 26: 29, + 27: 30, + 28: 34, + 29: 35, + 30: 36, + 31: 37, + 32: 38, + 33: 39, + 34: 40, + 35: 41, + 36: 42, + 37: 43, + 38: 44, + 39: 45, + 40: 46, + 41: 47, + 42: 48, + 43: 49, + 44: 50, + 45: 51, + 46: 52, + 47: 53, + 48: 54, + 49: 55, + 50: 56, + 51: 57, + 52: 58, + 53: 59, + 54: 60, + 55: 61, + }, + [2, 3], + ), + ], + ids=["single_mutant", "double_neighbour_mutant"], ) -def protein_mapping(request): +def protein_inputs(request): return request.param -def test_roi_match(proteins, protein_mapping): - p0, p1 = proteins - mapping = BSS.Align.roiMatch(p0, p1, roi=[2]) +def test_roi_match(protein_inputs): + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] + p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + mapping = BSS.Align.roiMatch(p0, p1, roi=roi) assert mapping == protein_mapping -def test_roi_align(proteins, protein_mapping): +def test_roi_align(protein_inputs): # p0 has been translated by 10 A in each direction. - p0, p1 = proteins - - aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=[2]) - - # Extract sire objects for the ROI and compare their coordinates - aligned_roi = aligned_p0.extract( - [a.index() for a in aligned_p0.getResidues()[2].getAtoms()] - ) - aligned_roi_coords = aligned_roi._sire_object.coordinates() + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] + p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + + aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=roi) + for res in roi: + # Extract sire objects for the ROI and compare their coordinates + aligned_roi = aligned_p0.extract( + [a.index() for a in aligned_p0.getResidues()[res].getAtoms()] + ) + aligned_roi_coords = aligned_roi._sire_object.coordinates() - p1_roi = p1.extract([a.index() for a in p1.getResidues()[2].getAtoms()]) - p1_roi_coords = p1_roi._sire_object.coordinates() + p1_roi = p1.extract([a.index() for a in p1.getResidues()[res].getAtoms()]) + p1_roi_coords = p1_roi._sire_object.coordinates() - for i, coord in enumerate(aligned_roi_coords): - # assume that the coordinates are the same if they are within 0.5 A - assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) + for i, coord in enumerate(aligned_roi_coords): + # assume that the test passes if the coordinates are within 0.5 A + assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) -def test_roi_merge(proteins, protein_mapping): - p0, p1 = proteins +def test_roi_merge(protein_inputs): + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] + p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] p0 = BSS.Parameters.ff14SB(p0).getMolecule() p1 = BSS.Parameters.ff14SB(p1).getMolecule() - aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=[2]) + aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=roi) merged = BSS.Align.merge(aligned_p0, p1, protein_mapping) merged_system = merged.toSystem() assert merged_system.nPerturbableMolecules() == 1 From 1bdcb4da96c5891b9826eff04654d58f757cd144 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 11 Jun 2024 13:18:04 +0100 Subject: [PATCH 20/32] Refactored code to consolidate ROI functions into standard Align functions and updated ROI tests to call the new functions accordingly. --- python/BioSimSpace/Align/_align.py | 206 ++++++++++++++++++++++------- tests/Align/test_align.py | 28 +++- 2 files changed, 181 insertions(+), 53 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index df730c611..ab6c26cda 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -29,9 +29,7 @@ "matchAtoms", "viewMapping", "rmsdAlign", - "roiMatch", "flexAlign", - "roiAlign", "merge", ] @@ -721,10 +719,9 @@ def matchAtoms( return_scores=False, prematch={}, timeout=5 * _Units.Time.second, - atomCompare=_rdFMCS.AtomCompare.CompareAny, complete_rings_only=True, - ring_matches_ring_only=True, max_scoring_matches=1000, + roi=None, property_map0={}, property_map1={}, ): @@ -734,7 +731,7 @@ def matchAtoms( When requesting more than one match, the mappings will be sorted using a scoring function and returned in order of best to worst score. (Note that, depending on the scoring function the "best" score may have the - lowest value.). + lowest value). Parameters ---------- @@ -783,6 +780,10 @@ def matchAtoms( option is only relevant to MCS performed using RDKit and will be ignored when falling back on Sire. + roi : list + The region of interest to match. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -831,8 +832,56 @@ def matchAtoms( >>> import BioSimSpace as BSS >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={0 : 10, 3 : 7}) + + Find the best maximum common substructure mapping between two molecules + with a region of interest defined as a list of residues. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12]) + + Find the mapping between two molecules with multiple regions of interest. + + >>> import BioSimSpace as BSS + >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, roi=[12, 13, 14]) """ + if roi is None: + return _matchAtoms( + molecule0=molecule0, + molecule1=molecule1, + scoring_function=scoring_function, + matches=matches, + return_scores=return_scores, + prematch=prematch, + timeout=timeout, + complete_rings_only=complete_rings_only, + max_scoring_matches=max_scoring_matches, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiMatch( + molecule0=molecule0, + molecule1=molecule1, + roi=roi, + use_kartograf=False, + kartograf_kwargs={}, + ) + + +def _matchAtoms( + molecule0, + molecule1, + scoring_function, + matches, + return_scores, + prematch, + timeout, + complete_rings_only, + max_scoring_matches, + property_map0, + property_map1, +): # A list of supported scoring functions. scoring_functions = ["RMSD", "RMSDALIGN", "RMSDFLEXALIGN"] @@ -914,10 +963,10 @@ def matchAtoms( # Generate the MCS match. mcs = _rdFMCS.FindMCS( mols, - atomCompare=atomCompare, + atomCompare=_rdFMCS.AtomCompare.CompareAny, bondCompare=_rdFMCS.BondCompare.CompareAny, completeRingsOnly=complete_rings_only, - ringMatchesRingOnly=ring_matches_ring_only, + ringMatchesRingOnly=True, matchChiralTag=False, matchValences=False, maximizeBonds=False, @@ -1106,15 +1155,12 @@ def _kartograf_map(molecule0, molecule1, kartograf_kwargs): return kartograf_mapping -def roiMatch( +def _roiMatch( molecule0, molecule1, roi, - ring_matches_ring_only=True, - complete_rings_only=True, - atomCompare=_rdFMCS.AtomCompare.CompareAny, - use_kartograf=False, - kartograf_kwargs={}, + use_kartograf, + kartograf_kwargs, ): """ Matching of two molecules based on a region of interest (ROI). @@ -1133,13 +1179,9 @@ def roiMatch( The reference molecule. roi : list - The region of interest to merge. + The region of interest to match. Consists of a list of ROI residue indices. - ring_matches_ring_only : bool - Whether ring bonds can only match ring bonds. - This is set to True by default. - use_kartograf : bool If set to True, will use the kartograf algorithm to match the molecules. This is set to False by default. @@ -1182,18 +1224,18 @@ def roiMatch( with a region of interest defined as a list of residues. >>> import BioSimSpace as BSS - >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12]) + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12]) Find the mapping between two molecules with multiple regions of interest. >>> import BioSimSpace as BSS - >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12, 13, 14]) + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12, 13, 14]) Find the best maximum common substructure mapping between two molecules, using Kartograf as the MCS algorithm. >>> import BioSimSpace as BSS - >>> mapping = BSS.Align.roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) + >>> mapping = BSS.Align._align._roiMatch(molecule0, molecule1, roi=[12], use_kartograf=True) """ # Validate input @@ -1207,8 +1249,8 @@ def roiMatch( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) - if roi is None: - raise ValueError("residue of interest list is not provided.") + if roi is type(list): + raise TypeError("'roi' must be of type 'list'") # Get the atoms before the ROI. # This is being done so that when we map the atoms in ROI, we can append @@ -1258,9 +1300,6 @@ def roiMatch( mapping = matchAtoms( res0_extracted, res1_extracted, - complete_rings_only=complete_rings_only, - ring_matches_ring_only=ring_matches_ring_only, - atomCompare=atomCompare, ) # Look up the absolute atom indices in the molecule @@ -1348,7 +1387,9 @@ def roiMatch( return full_mapping -def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): +def rmsdAlign( + molecule0, molecule1, mapping=None, roi=None, property_map0={}, property_map1={} +): """ Align atoms in molecule0 to those in molecule1 using the mapping between matched atom indices. The molecule is aligned using rigid-body @@ -1368,6 +1409,10 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. + roi : list + The region of interest to align. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1397,8 +1442,38 @@ def rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1) + + Align residue of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, roi=[12]) + + Align multiple residues of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.rmsdAlign(molecule0, molecule1, roi=[12,13]) """ + if roi is None: + return _rmsdAlign( + molecule0, + molecule1, + mapping=mapping, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiAlign( + molecule0, + molecule1, + roi=roi, + align_function="rmsd", + property_map0=property_map0, + property_map1=property_map1, + ) + + +def _rmsdAlign(molecule0, molecule1, mapping=None, property_map0={}, property_map1={}): if not isinstance(molecule0, _Molecule): raise TypeError( "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'" @@ -1461,6 +1536,7 @@ def flexAlign( molecule1, mapping=None, fkcombu_exe=None, + roi=None, property_map0={}, property_map1={}, ): @@ -1484,6 +1560,10 @@ def flexAlign( Path to the fkcombu executable. If None is passed, then BioSimSpace will attempt to find fkcombu by searching your PATH. + roi : list + The region of interest to align. + Consists of a list of ROI residue indices. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -1513,8 +1593,47 @@ def flexAlign( >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) + + Align residue of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, roi=[12]) + + Align multiple residues of interest from molecule0 to molecule1. + + >>> import BioSimSpace as BSS + >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, roi=[12,13]) """ + if roi is None: + return _flexAlign( + molecule0, + molecule1, + mapping=mapping, + fkcombu_exe=fkcombu_exe, + property_map0=property_map0, + property_map1=property_map1, + ) + else: + return _roiAlign( + molecule0, + molecule1, + roi=roi, + align_function="rmsd_flex_align", + fkcombu_exe=fkcombu_exe, + property_map0=property_map0, + property_map1=property_map1, + ) + + +def _flexAlign( + molecule0, + molecule1, + mapping, + fkcombu_exe, + property_map0, + property_map1, +): # Check that we found fkcombu in the PATH. if fkcombu_exe is None: if _fkcombu_exe is None: @@ -1615,18 +1734,18 @@ def flexAlign( return _Molecule(molecule0) -def roiAlign( +def _roiAlign( molecule0, molecule1, - roi=None, - align_function="rmsd", + roi, + align_function, + property_map0, + property_map1, fkcombu_exe=None, - property_map0={}, - property_map1={}, ): """ Flexibly align residue of interest (ROI) in molecule0 to that in molecule1 - using BioSimSpace.Align.flexAlign(). + using BioSimSpace.Align._flexAlign(). Parameters ---------- @@ -1638,18 +1757,18 @@ def roiAlign( The reference molecule. roi : list - The region of interest to merge. + The region of interest to align. Consists of a list of ROI residue indices. - . + align_function : str The alignment function used to align atoms. Available options are: - "rmsd" Align atoms in molecule0 to those in molecule1 using the mapping between matched atom indices. - Uses :class:`rmsdAlign ` to align the atoms in the ROI. + Uses :class:`rmsdAlign ` to align the atoms in the ROI. - "rmsd_flex_align" Flexibly align roi from molecule0 to molecule1 based on the mapping. - Uses :class:`flexAlign ` to align the atoms in the ROI. + Uses :class:`flexAlign ` to align the atoms in the ROI. fkcombu_exe : str Path to the fkcombu executable. Will only be used if aligning with @@ -1670,19 +1789,6 @@ def roiAlign( molecule : :class:`Molecule ` The aligned molecule. - - Examples - -------- - - Align residue of interest from molecule0 to molecule1. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, roi=[12]) - - Align multiple residues of interest from molecule0 to molecule1. - - >>> import BioSimSpace as BSS - >>> molecule0 = BSS.Align.roiAlign(molecule0, molecule1, roi=[12,13]) """ if not isinstance(molecule0, _Molecule): diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 12895e611..2fc874e00 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -602,7 +602,7 @@ def test_roi_match(protein_inputs): proteins, protein_mapping, roi = protein_inputs p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] - mapping = BSS.Align.roiMatch(p0, p1, roi=roi) + mapping = BSS.Align.matchAtoms(p0, p1, roi=roi) assert mapping == protein_mapping @@ -612,7 +612,29 @@ def test_roi_align(protein_inputs): p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] - aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=roi) + aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) + for res in roi: + # Extract sire objects for the ROI and compare their coordinates + aligned_roi = aligned_p0.extract( + [a.index() for a in aligned_p0.getResidues()[res].getAtoms()] + ) + aligned_roi_coords = aligned_roi._sire_object.coordinates() + + p1_roi = p1.extract([a.index() for a in p1.getResidues()[res].getAtoms()]) + p1_roi_coords = p1_roi._sire_object.coordinates() + + for i, coord in enumerate(aligned_roi_coords): + # assume that the test passes if the coordinates are within 0.5 A + assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) + + +def test_roi_flex_align(protein_inputs): + # p0 has been translated by 10 A in each direction. + proteins, protein_mapping, roi = protein_inputs + p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] + p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + + aligned_p0 = BSS.Align.flexAlign(p0, p1, roi=roi) for res in roi: # Extract sire objects for the ROI and compare their coordinates aligned_roi = aligned_p0.extract( @@ -636,7 +658,7 @@ def test_roi_merge(protein_inputs): p0 = BSS.Parameters.ff14SB(p0).getMolecule() p1 = BSS.Parameters.ff14SB(p1).getMolecule() - aligned_p0 = BSS.Align.roiAlign(p0, p1, roi=roi) + aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) merged = BSS.Align.merge(aligned_p0, p1, protein_mapping) merged_system = merged.toSystem() assert merged_system.nPerturbableMolecules() == 1 From 6fd42491a3aabeff196406a35258ea76381eb8cc Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 11 Jun 2024 13:21:25 +0100 Subject: [PATCH 21/32] Remove old pytests fixture --- tests/Align/test_align.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 2fc874e00..2205f468a 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -472,13 +472,6 @@ def test_hydrogen_mass_repartitioning(): assert mass1 == masses0[idx] -@pytest.fixture -def proteins(request): - p0 = BSS.IO.readMolecules(["ala_ser_size_5_capped.pdb"])[0] - p1 = BSS.IO.readMolecules(["ala_wt_size_5_capped.pdb"])[0] - return p0, p1 - - @pytest.fixture( params=[ ( From 5bf1f3e5138dc1f537022f548001ddd15a7e08dc Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Tue, 11 Jun 2024 16:50:06 +0100 Subject: [PATCH 22/32] Add support for viewing mappings for ROIs --- python/BioSimSpace/Align/_align.py | 36 ++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index ab6c26cda..f8726ba80 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -38,7 +38,6 @@ import subprocess as _subprocess import sys as _sys - from .._Utils import _try_import, _have_imported, _assert_imported import warnings as _warnings @@ -2014,6 +2013,7 @@ def viewMapping( molecule0, molecule1, mapping=None, + roi=None, property_map0={}, property_map1={}, style=None, @@ -2038,6 +2038,9 @@ def viewMapping( mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. + roi : int + The region of interest to highlight. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -2085,6 +2088,9 @@ def viewMapping( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) + if not isinstance(roi, int): + raise TypeError("'roi' must be of type 'int'") + if not isinstance(property_map0, dict): raise TypeError("'property_map0' must be of type 'dict'") @@ -2163,9 +2169,21 @@ def viewMapping( view.addModel(_Chem.MolToMolBlock(rdmol0), "mol0", viewer=viewer0) view.addModel(_Chem.MolToMolBlock(rdmol1), "mol1", viewer=viewer1) - # Set the style. - view.setStyle({"model": 0}, style, viewer=viewer0) - view.setStyle({"model": 0}, style, viewer=viewer1) + if roi is not None: + roi0_idx = [a.index() for a in molecule0.getResidues()[roi].getAtoms()] + roi1_idx = [a.index() for a in molecule1.getResidues()[roi].getAtoms()] + + # find the key in the mapping that corresponds to the ROI atoms + mapping = {k: v for k, v in mapping.items() if k in roi0_idx} + + # Set the style for the ROI atoms. + view.setStyle({"model": 0}, style={"stick": {"hidden": True}}, viewer=viewer0) + view.setStyle({"model": 0}, style={"stick": {"hidden": True}}, viewer=viewer1) + view.setStyle({"index": roi0_idx}, style, viewer=viewer0) + view.setStyle({"index": roi1_idx}, style, viewer=viewer1) + else: + view.setStyle({"model": 0}, style, viewer=viewer0) + view.setStyle({"model": 0}, style, viewer=viewer1) # Highlight the atoms from the mapping. for atom0, atom1 in mapping.items(): @@ -2204,9 +2222,13 @@ def viewMapping( view.setBackgroundColor("white", viewer=viewer0) view.setBackgroundColor("white", viewer=viewer1) - # Zoom to molecule. - view.zoomTo(viewer=viewer0) - view.zoomTo(viewer=viewer1) + if roi is not None: + view.zoomTo({"index": roi0_idx}, viewer=viewer0) + view.zoomTo({"index": roi1_idx}, viewer=viewer1) + else: + # Zoom to molecule. + view.zoomTo(viewer=viewer0) + view.zoomTo(viewer=viewer1) return view From dc5cf412b4eff604b8e0a118a1f7598d37db905d Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Wed, 12 Jun 2024 09:43:36 +0100 Subject: [PATCH 23/32] Fix ROI validation code in Align.viewMapping --- python/BioSimSpace/Align/_align.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index f8726ba80..31080e1ba 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -2088,7 +2088,7 @@ def viewMapping( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) - if not isinstance(roi, int): + if roi is not None and not isinstance(roi, int): raise TypeError("'roi' must be of type 'int'") if not isinstance(property_map0, dict): From 044769778c49c7c0198d9322d2221e8db9731374 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Wed, 12 Jun 2024 11:21:12 +0100 Subject: [PATCH 24/32] Update ROI code inputs to use tutorial link --- tests/Align/test_align.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index 2205f468a..ba262aff7 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -593,8 +593,12 @@ def protein_inputs(request): def test_roi_match(protein_inputs): proteins, protein_mapping, roi = protein_inputs - p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] - p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] mapping = BSS.Align.matchAtoms(p0, p1, roi=roi) assert mapping == protein_mapping @@ -602,8 +606,12 @@ def test_roi_match(protein_inputs): def test_roi_align(protein_inputs): # p0 has been translated by 10 A in each direction. proteins, protein_mapping, roi = protein_inputs - p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] - p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) for res in roi: @@ -624,8 +632,12 @@ def test_roi_align(protein_inputs): def test_roi_flex_align(protein_inputs): # p0 has been translated by 10 A in each direction. proteins, protein_mapping, roi = protein_inputs - p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] - p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] aligned_p0 = BSS.Align.flexAlign(p0, p1, roi=roi) for res in roi: @@ -645,8 +657,12 @@ def test_roi_flex_align(protein_inputs): def test_roi_merge(protein_inputs): proteins, protein_mapping, roi = protein_inputs - p0 = BSS.IO.readMolecules([f"{proteins}_mut_peptide.pdb"])[0] - p1 = BSS.IO.readMolecules([f"{proteins}_wt_peptide.pdb"])[0] + p0 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_mut_peptide.pdb") + )[0] + p1 = BSS.IO.readMolecules( + BSS.IO.expand(BSS.tutorialUrl(), f"{proteins}_wt_peptide.pdb") + )[0] p0 = BSS.Parameters.ff14SB(p0).getMolecule() p1 = BSS.Parameters.ff14SB(p1).getMolecule() From a22ad80bca41e1731b11dcb5e87c69560338acb0 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Thu, 13 Jun 2024 15:35:24 +0100 Subject: [PATCH 25/32] Added input file validation for ROI atom matching. Now the function will raise an exception if the atom ordering in the input molecules is not consistent outside of the ROI region. --- python/BioSimSpace/Align/_align.py | 32 +++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 31080e1ba..32da1d18f 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1251,6 +1251,34 @@ def _roiMatch( if roi is type(list): raise TypeError("'roi' must be of type 'list'") + # Make sure that the atoms in the pre-ROI region between two molecules are + # in the same order. While the residue sequences between two molecules + # might be the same, saving the molecules in different formats/editors + # might change the order of the atoms. The mapping function will not work + # if the atoms are not in the same order outside the ROI region. + # We will only test the first residue in the protein, as doing this for + # every residue would be computationally expensive. + if roi[0] != 0: + molecule0_res = molecule0.getResidues()[0] + molecule1_res = molecule1.getResidues()[0] + if [a.name() for a in molecule0_res.getAtoms()] != [ + b.name() for b in molecule1_res.getAtoms() + ]: + raise ValueError( + "The atoms outside the ROI region between the two molecules are not in the same order." + ) + # If the ROI is the first residue, then we will test the atoms in the last + # residue of the molecule. + else: + molecule0_res = molecule0.getResidues()[-1] + molecule1_res = molecule1.getResidues()[-1] + if [a.name() for a in molecule0_res.getAtoms()] != [ + b.name() for b in molecule1_res.getAtoms() + ]: + raise ValueError( + "The atoms outside the ROI region between the two molecules are not in the same order." + ) + # Get the atoms before the ROI. # This is being done so that when we map the atoms in ROI, we can append # the ROI mapping to this pre-ROI mapping which will then be used as @@ -1280,7 +1308,9 @@ def _roiMatch( molecule1_atoms = [a.name() for a in molecule1_roi.getAtoms()] if molecule0_atoms == molecule1_atoms: _warnings.warn( - f"Residue {res_idx} between molecule0 and molecule1 have identical atomtypes." + f"Residues {res_idx} between molecule0 and molecule1 have " + "identical atomtypes, which means you are likely attempting " + "to match two identical residues." ) res0_idx = [a.index() for a in molecule0_roi] From 886ebd5edd0a6422f11e07a42a314c8339652368 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 14 Jun 2024 13:09:49 +0100 Subject: [PATCH 26/32] Add ROI validation function and checks for each ROI function. Coordinates property name is also now retrieved properly too for the roiAlign function --- python/BioSimSpace/Align/_align.py | 56 +++++++++++++++++++++++++----- python/BioSimSpace/Align/_merge.py | 15 +++++++- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 32da1d18f..9acf1351c 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -1158,8 +1158,7 @@ def _roiMatch( molecule0, molecule1, roi, - use_kartograf, - kartograf_kwargs, + **kwargs, ): """ Matching of two molecules based on a region of interest (ROI). @@ -1181,6 +1180,9 @@ def _roiMatch( The region of interest to match. Consists of a list of ROI residue indices. + Keyword Args + ------------ + use_kartograf : bool If set to True, will use the kartograf algorithm to match the molecules. This is set to False by default. @@ -1247,9 +1249,14 @@ def _roiMatch( raise TypeError( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) + if roi is None: + raise ValueError("No region of interest (ROI) has been provided.") + else: + _validate_roi([molecule0, molecule1], roi) - if roi is type(list): - raise TypeError("'roi' must be of type 'list'") + # Check kwargs + use_kartograf = kwargs.get("use_kartograf", False) + kartograf_kwargs = kwargs.get("kartograf_kwargs", {}) # Make sure that the atoms in the pre-ROI region between two molecules are # in the same order. While the residue sequences between two molecules @@ -1832,14 +1839,16 @@ def _roiAlign( if roi is None: raise ValueError("No region of interest (ROI) has been provided.") else: - if not isinstance(roi, list): - raise TypeError("'roi' must be of type 'list'.") + _validate_roi([molecule0, molecule1], roi) if align_function not in ["rmsd", "rmsd_flex_align"]: raise ValueError( "Invalid alignment function. Available options are: 'rmsd', 'rmsd_flex_align'" ) + # Get the property name for the coordinates + prop0 = property_map0.get("coordinates", "coordinates") + for roi_idx in roi: res0 = molecule0.getResidues()[roi_idx] res1 = molecule1.getResidues()[roi_idx] @@ -1863,7 +1872,7 @@ def _roiAlign( # Now update molecule0 with the aligned residue coordinates mol0 = molecule0._getSireObject() - res0_aligned_coords = res0_aligned._getSireObject().property("coordinates") + res0_aligned_coords = res0_aligned._getSireObject().property(prop0) # A list to store the updated coordinates for molecule0 mol0_coords = [] @@ -1871,7 +1880,7 @@ def _roiAlign( if i == roi_idx: mol0_coords.append(res0_aligned_coords) else: - mol0_coords.append(res.property("coordinates")) + mol0_coords.append(res.property(prop0)) # Flatten the list mol0_coords = [item for sublist in mol0_coords for item in sublist] @@ -1879,7 +1888,7 @@ def _roiAlign( # Create a cursor for updating the coordinates property c = mol0.cursor() for i, atom in enumerate(c.atoms()): - atom["coordinates"] = mol0_coords[i] + atom[prop0] = mol0_coords[i] mol0 = c.commit() # Convert the Sire molecule back to a BioSimSpace molecule so we can @@ -2705,6 +2714,35 @@ def _validate_mapping(molecule0, molecule1, mapping, name): ) +def _validate_roi(molecules, roi): + """ + Internal function to validate that a region of interest (ROI) is a list + of integers that are within the range of the molecule. + + Parameters + ---------- + + molecules : list + Consits of a list of :class:`Molecule `. + + roi : list + The region of interest. + Consists of a list of ROI residue indices. + """ + + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + for mol in molecules: + for idx in roi: + if not isinstance(idx, int): + raise TypeError("'roi' must be a list of integers.") + if idx < 0 or idx > (mol.nResidues() - 1): + raise ValueError( + f"Residue index {idx} is out of range! The molecule contains {mol.nResidues()} residues." + ) + + def _to_sire_mapping(mapping): """ Internal function to convert a regular mapping to Sire AtomIdx format. diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index 713e364e3..b51df0150 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -140,6 +140,20 @@ def merge( "key:value pairs in 'mapping' must be of type 'Sire.Mol.AtomIdx'" ) + # Validate the region of interest. + if roi is not None: + if not isinstance(roi, list): + raise TypeError("'roi' must be of type 'list'.") + + for mol in [molecule0, molecule1]: + for idx in roi: + if not isinstance(idx, int): + raise TypeError("'roi' must be a list of integers.") + if idx < 0 or idx > (mol.nResidues() - 1): + raise ValueError( + f"Residue index {idx} is out of range! The molecule contains {mol.nResidues()} residues." + ) + # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the # user has requested to 'force' the merge, i.e. the 'force' argument # takes precedence. @@ -209,7 +223,6 @@ def merge( atoms1.append(atom) atoms1_idx.append(atom.index()) - # Create a new molecule to hold the merged molecule. molecule = _SireMol.Molecule("Merged_Molecule") # Only part of the ligand is to be merged From 22f1aa7ea6a71a0c70e6986f68f2da47313b27d5 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 14 Jun 2024 13:11:03 +0100 Subject: [PATCH 27/32] Skip roiMerge test if amber is not installed --- tests/Align/test_align.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index ba262aff7..a9a9d55c2 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -3,6 +3,7 @@ from sire.legacy.MM import InternalFF, IntraCLJFF, IntraFF from sire.legacy.Mol import AtomIdx, Element, PartialMolecule +from tests.conftest import has_amber import BioSimSpace as BSS @@ -655,6 +656,7 @@ def test_roi_flex_align(protein_inputs): assert coord.value() == pytest.approx(p1_roi_coords[i].value(), abs=0.5) +@pytest.mark.skipif(has_amber is False, reason="Requires AMBER and to be installed.") def test_roi_merge(protein_inputs): proteins, protein_mapping, roi = protein_inputs p0 = BSS.IO.readMolecules( From 2ce7f8650c3de42d46439ceee70516fe334a3416 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Fri, 14 Jun 2024 15:04:54 +0100 Subject: [PATCH 28/32] Replace viewMapping with gufe/visualization/mapping_visualization.py code. Lot's of refactoring needed. --- python/BioSimSpace/Align/_align.py | 401 ++++++++++++++++++++--------- python/BioSimSpace/Align/_merge.py | 2 +- 2 files changed, 277 insertions(+), 126 deletions(-) diff --git a/python/BioSimSpace/Align/_align.py b/python/BioSimSpace/Align/_align.py index 9acf1351c..0a93de7c7 100644 --- a/python/BioSimSpace/Align/_align.py +++ b/python/BioSimSpace/Align/_align.py @@ -37,6 +37,8 @@ import os as _os import subprocess as _subprocess import sys as _sys +from typing import Any, Collection, Optional +from itertools import chain from .._Utils import _try_import, _have_imported, _assert_imported @@ -52,6 +54,8 @@ from rdkit import Chem as _Chem from rdkit.Chem import rdFMCS as _rdFMCS from rdkit import RDLogger as _RDLogger + from rdkit.Chem import Draw + from rdkit.Chem import AllChem # Disable RDKit warnings. _RDLogger.DisableLog("rdApp.*") @@ -1178,16 +1182,13 @@ def _roiMatch( roi : list The region of interest to match. - Consists of a list of ROI residue indices. - - Keyword Args - ------------ + Consists of a list of ROI residue indices - use_kartograf : bool + use_kartograf : bool, optional, default=False If set to True, will use the kartograf algorithm to match the - molecules. This is set to False by default. + molecules. - kartograf_kwargs : dict + kartograf_kwargs : dict, optional, default={} A dictionary of keyword arguments to be passed to kartograf. Returns @@ -2053,14 +2054,13 @@ def viewMapping( molecule1, mapping=None, roi=None, + pixels=300, property_map0={}, property_map1={}, - style=None, - orientation="horizontal", - pixels=900, + **kwargs, ): """ - Visualise the mapping between molecule0 and molecule1. This draws a 3D + Visualise the mapping between molecule0 and molecule1. This draws a 2D depiction of both molecules with the mapped atoms highlighted in green. Labels specify the indices of the atoms, along with the indices of the atoms to which they map in the other molecule. @@ -2080,6 +2080,9 @@ def viewMapping( roi : int The region of interest to highlight. + pixels : int + The size in pixels of the 2D drawing. + property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties @@ -2089,31 +2092,15 @@ def viewMapping( A dictionary that maps "properties" in molecule1 to their user defined values. - style : dict - Drawing style. See https://3dmol.csb.pitt.edu/doc/$3Dmol.GLViewer.html - for some examples. - - orientation : str - Whether to display the two molecules in a "horizontal" or "vertical" - arrangement. - - pixels : int - The size of the largest view dimension in pixel, i.e. either the - "horizontal" or "vertical" size. - - Returns - ------- - - view : py3Dmol.view - A view of the two molecules with the mapped atoms highlighted and - labelled. + show_adjacent_residues : bool, optional default=False + If set to True, will show neighouring residues to the ROI region. """ - # Adapted from: https://gist.github.com/cisert/d05664d4c98ac1cf86ee70b8700e56a9 - # Only draw within a notebook. if not _is_notebook: return None + else: + from IPython.display import display, Image _assert_imported(_rdkit) @@ -2136,21 +2123,6 @@ def viewMapping( if not isinstance(property_map1, dict): raise TypeError("'property_map1' must be of type 'dict'") - if style is not None: - if not isinstance(style, dict): - raise TypeError("'style' must be of type 'dict'") - - if not isinstance(orientation, str): - raise TypeError("'orientation' must be of type 'str'") - else: - # Convert to lower case and strip whitespace. - orientation = orientation.lower().replace(" ", "") - - if orientation not in ["horizontal", "vertical"]: - raise ValueError( - "'orientation' must be equal to 'horizontal' " "or 'vertical'." - ) - if isinstance(pixels, float): pixels = int(pixels) if not type(pixels) is int: @@ -2158,6 +2130,9 @@ def viewMapping( if pixels <= 0: raise ValueError("pixels' must be > 0!") + # Get kwargs for the view + show_adjacent_residues = kwargs.get("show_adjacent_residues", False) + # The user has passed an atom mapping. if mapping is not None: if not isinstance(mapping, dict): @@ -2176,100 +2151,276 @@ def viewMapping( ) molecule0 = rmsdAlign(molecule0, molecule1, mapping) - import py3Dmol as _py3Dmol + if roi is not None: + if show_adjacent_residues: + # Extract the region of interest from the molecules plus one residue on each side. + # residue[roi-1:roi+1] would only extract the ROI residue. + roi0_region = molecule0.search(f"residue[{roi - 2}:{roi + 2}]") + roi1_region = molecule1.search(f"residue[{roi - 2}:{roi + 2}]") + else: + roi0_region = molecule0.search(f"residue[{roi - 1}:{roi + 1}]") + roi1_region = molecule1.search(f"residue[{roi - 1}:{roi + 1}]") + + roi0_idx = [a.index() for a in roi0_region.atoms()] + roi1_idx = [a.index() for a in roi1_region.atoms()] + + molecule0 = molecule0.extract(roi0_idx) + molecule1 = molecule1.extract(roi1_idx) + + # find the key in the mapping that corresponds to the ROI atoms + mapping = {k: v for k, v in mapping.items() if k in roi0_idx} + + # now we need to update the mapping to reflect the new atom indices + mapping = {roi0_idx.index(k): roi1_idx.index(v) for k, v in mapping.items()} # Convert the molecules to RDKit format. rdmol0 = _Convert.toRDKit(molecule0, property_map=property_map0) rdmol1 = _Convert.toRDKit(molecule1, property_map=property_map1) + text = _draw_mapping(mapping, rdmol0, rdmol1, pixels=pixels) + img = Image(data=text) + display(img) - # Set grid view properties. - viewer0 = (0, 0) - if orientation == "horizontal": - viewergrid = (1, 2) - viewer1 = (0, 1) - width = pixels - height = int(pixels / 2) - else: - viewergrid = (2, 1) - viewer1 = (1, 0) - width = int(pixels / 2) - height = pixels - - # Create the view. - view = _py3Dmol.view( - linked=False, width=width, height=height, viewergrid=viewergrid - ) - # Set default drawing style. - if style is None: - style = {"stick": {"colorscheme": "grayCarbon", "linewidth": 0.1}} +# This code is adopted from OpenFE and is licensed under the MIT license. +# For details, see https://github.com/OpenFreeEnergy/gufe/visualization/mapping_visualization.py +def _match_elements(mol1: _Chem.Mol, idx1: int, mol2: _Chem.Mol, idx2: int) -> bool: + """ + Convenience method to check if elements between two molecules (molA + and molB) are the same. - # Add the molecules to the views. - view.addModel(_Chem.MolToMolBlock(rdmol0), "mol0", viewer=viewer0) - view.addModel(_Chem.MolToMolBlock(rdmol1), "mol1", viewer=viewer1) + Parameters + ---------- + mol1 : RDKit.Mol + RDKit representation of molecule 1. + idx1 : int + Index of atom to check in molecule 1. + mol2 : RDKit.Mol + RDKit representation of molecule 2. + idx2 : int + Index of atom to check in molecule 2. - if roi is not None: - roi0_idx = [a.index() for a in molecule0.getResidues()[roi].getAtoms()] - roi1_idx = [a.index() for a in molecule1.getResidues()[roi].getAtoms()] + Returns + ------- + bool + True if elements are the same, False otherwise. + """ + elem_mol1 = mol1.GetAtomWithIdx(idx1).GetAtomicNum() + elem_mol2 = mol2.GetAtomWithIdx(idx2).GetAtomicNum() + return elem_mol1 == elem_mol2 - # find the key in the mapping that corresponds to the ROI atoms - mapping = {k: v for k, v in mapping.items() if k in roi0_idx} - # Set the style for the ROI atoms. - view.setStyle({"model": 0}, style={"stick": {"hidden": True}}, viewer=viewer0) - view.setStyle({"model": 0}, style={"stick": {"hidden": True}}, viewer=viewer1) - view.setStyle({"index": roi0_idx}, style, viewer=viewer0) - view.setStyle({"index": roi1_idx}, style, viewer=viewer1) - else: - view.setStyle({"model": 0}, style, viewer=viewer0) - view.setStyle({"model": 0}, style, viewer=viewer1) - - # Highlight the atoms from the mapping. - for atom0, atom1 in mapping.items(): - p = rdmol0.GetConformer().GetAtomPosition(atom0) - view.addSphere( - { - "center": {"x": p.x, "y": p.y, "z": p.z}, - "radius": 0.5, - "color": "green", - "alpha": 0.8, - }, - viewer=viewer0, - ) - view.addLabel( - f"{atom0} \u2192 {atom1}", - {"position": {"x": p.x, "y": p.y, "z": p.z}}, - viewer=viewer0, - ) - p = rdmol1.GetConformer().GetAtomPosition(atom1) - view.addSphere( - { - "center": {"x": p.x, "y": p.y, "z": p.z}, - "radius": 0.5, - "color": "green", - "alpha": 0.8, - }, - viewer=viewer1, +def _get_unique_bonds_and_atoms( + mapping: dict[int, int], mol1: _Chem.Mol, mol2: _Chem.Mol +) -> dict: + """ + Given an input mapping, returns new atoms, element changes, and + involved bonds. + + Parameters + ---------- + mapping : dict of int:int + Dictionary describing the atom mapping between molecules 1 and 2. + mol1 : RDKit.Mol + RDKit representation of molecule 1. + mol2 : RDKit.Mol + RDKit representation of molecule 2. + + Returns + ------- + uniques : dict + Dictionary containing; unique atoms ("atoms"), new elements + ("elements"), deleted bonds ("bond_deletions) and altered bonds + ("bond_changes) for molecule 1. + """ + + uniques: dict[str, set] = { + "atoms": set(), # atoms which fully don't exist in molB + "elements": set(), # atoms which exist but change elements in molB + "bond_deletions": set(), # bonds which are removed + "bond_changes": set(), # bonds which change + } + + for at in mol1.GetAtoms(): + idx = at.GetIdx() + if idx not in mapping: + uniques["atoms"].add(idx) + elif not _match_elements(mol1, idx, mol2, mapping[idx]): + uniques["elements"].add(idx) + + for bond in mol1.GetBonds(): + bond_at_idxs = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] + for at in chain(uniques["atoms"], uniques["elements"]): + if at in bond_at_idxs: + bond_idx = bond.GetIdx() + + if any(i in uniques["atoms"] for i in bond_at_idxs): + uniques["bond_deletions"].add(bond_idx) + else: + uniques["bond_changes"].add(bond_idx) + + return uniques + + +def _draw_molecules( + d2d, + mols: Collection[_Chem.Mol], + atoms_list: Collection[set[int]], + bonds_list: Collection[set[int]], + atom_colors: Collection[dict[Any, tuple[float, float, float, float]]], + bond_colors: Collection[dict[int, tuple[float, float, float, float]]], + highlight_color: tuple[float, float, float, float], + pixels: int, + atom_mapping: Optional[dict[tuple[int, int], dict[int, int]]] = None, +) -> str: + """ + Internal method to visualize a molecule, possibly with mapping info + + Parameters + ---------- + d2d : + renderer to draw the molecule; currently we only support + rdkit.rdMolDraw2D + mols : Collection[RDKitMol] + molecules to visualize + atoms_list: Collection[Set[int]] + iterable containing one set per molecule in ``mols``, with each set + containing the indices of the atoms to highlight + bonds_list: Collection[Set[int]] + iterable containing one set per molecule in ``mols``, with each set + containing the indices of the atoms involved in bonds to highlight + atom_colors: Collection[Dict[Any, Tuple[float, float, float, float]]] + iterable containing one dict per molecule in ``mols``, with each + dict containing a mapping of RDKit atom to color, expressed as an + RGBA tuple, for atoms that need special coloring (e.g., element + changes) + bond_colors: Collection[dict[int, tuple[float, float, float, float]]] + one dict for each molecule, each dict mapping + highlight_color: Tuple[float, float, float, float] + RGBA tuple for the default highlight color used in the mapping + visualization + pixels: int + size of the 2D image in pixels + atom_mapping: Dict[Tuple[int,int], Dict[int, int]], optional + used to align the molecules to each othter for clearer visualization. + The structure contains the indices of the molecules in mols as key + Tuple[molA, molB] and maps the atom indices via the value Dict[ + molA_atom_idx, molB_atom_idx] + default None + """ + # input standardization: + if atom_mapping is None: + atom_mapping = {} + + if d2d is None: + # select default layout based on number of molecules + grid_x, grid_y = { + 1: (1, 1), + 2: (2, 1), + }[len(mols)] + d2d = Draw.rdMolDraw2D.MolDraw2DCairo( + grid_x * pixels, grid_y * pixels, pixels, pixels ) - view.addLabel( - f"{atom1} \u2192 {atom0}", - {"position": {"x": p.x, "y": p.y, "z": p.z}}, - viewer=viewer1, + + # get molecule name labels + # labels = [m.GetProp("ofe-name") if(m.HasProp("ofe-name")) + # else "test" for m in mols] + + labels = ["molecule0", "molecule1"] + + # squash to 2D + copies = [_Chem.Mol(mol) for mol in mols] + for mol in copies: + AllChem.Compute2DCoords(mol) + + # mol alignments if atom_mapping present + for (i, j), atomMap in atom_mapping.items(): + AllChem.AlignMol( + copies[j], copies[i], atomMap=[(k, v) for v, k in atomMap.items()] ) - # Set background colour. - view.setBackgroundColor("white", viewer=viewer0) - view.setBackgroundColor("white", viewer=viewer1) + # standard settings for visualization + d2d.drawOptions().useBWAtomPalette() + d2d.drawOptions().continousHighlight = False + d2d.drawOptions().setHighlightColour(highlight_color) + d2d.drawOptions().addAtomIndices = True + d2d.DrawMolecules( + copies, + highlightAtoms=atoms_list, + highlightBonds=bonds_list, + highlightAtomColors=atom_colors, + highlightBondColors=bond_colors, + legends=labels, + ) + d2d.FinishDrawing() + return d2d.GetDrawingText() - if roi is not None: - view.zoomTo({"index": roi0_idx}, viewer=viewer0) - view.zoomTo({"index": roi1_idx}, viewer=viewer1) - else: - # Zoom to molecule. - view.zoomTo(viewer=viewer0) - view.zoomTo(viewer=viewer1) - return view +def _draw_mapping( + mol1_to_mol2: dict[int, int], mol1: _Chem.Mol, mol2: _Chem.Mol, d2d=None, pixels=300 +): + """ + Method to visualise the atom map correspondence between two rdkit + molecules given an input mapping. + + Legend: + * Red highlighted atoms: unique atoms, i.e. atoms which are not + mapped. + * Blue highlighted atoms: element changes, i.e. atoms which are + mapped but change elements. + * Red highlighted bonds: any bond which involves at least one + unique atom or one element change. + + Parameters + ---------- + mol1_to_mol2 : dict of int:int + Atom mapping between input molecules. + mol1 : RDKit.Mol + RDKit representation of molecule 1 + mol2 : RDKit.Mol + RDKit representation of molecule 2 + d2d : :class:`rdkit.Chem.Draw.rdMolDraw2D.MolDraw2D` + Optional MolDraw2D backend to use for visualisation. + pixels : int + Size of the 2D image in pixels. + """ + # highlight core element changes differently from unique atoms + # RGBA color value needs to be between 0 and 1, so divide by 255 + RED = (220 / 255, 50 / 255, 32 / 255, 1.0) + BLUE = (0.0, 90 / 255, 181 / 255, 1.0) + mol1_uniques = _get_unique_bonds_and_atoms(mol1_to_mol2, mol1, mol2) + + # invert map + mol2_to_mol1_map = {v: k for k, v in mol1_to_mol2.items()} + mol2_uniques = _get_unique_bonds_and_atoms(mol2_to_mol1_map, mol2, mol1) + + atoms_list = [ + mol1_uniques["atoms"] | mol1_uniques["elements"], + mol2_uniques["atoms"] | mol2_uniques["elements"], + ] + bonds_list = [ + mol1_uniques["bond_deletions"] | mol1_uniques["bond_changes"], + mol2_uniques["bond_deletions"] | mol2_uniques["bond_changes"], + ] + + at1_colors = {at: BLUE for at in mol1_uniques["elements"]} + at2_colors = {at: BLUE for at in mol2_uniques["elements"]} + bd1_colors = {bd: BLUE for bd in mol1_uniques["bond_changes"]} + bd2_colors = {bd: BLUE for bd in mol2_uniques["bond_changes"]} + + atom_colors = [at1_colors, at2_colors] + bond_colors = [bd1_colors, bd2_colors] + + return _draw_molecules( + d2d, + [mol1, mol2], + atoms_list=atoms_list, + bonds_list=bonds_list, + atom_colors=atom_colors, + bond_colors=bond_colors, + highlight_color=RED, + pixels=pixels, + atom_mapping={(0, 1): mol1_to_mol2}, + ) def _score_rdkit_mappings( diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index b51df0150..14bdba397 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -152,7 +152,7 @@ def merge( if idx < 0 or idx > (mol.nResidues() - 1): raise ValueError( f"Residue index {idx} is out of range! The molecule contains {mol.nResidues()} residues." - ) + ) # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the # user has requested to 'force' the merge, i.e. the 'force' argument From e84df845b06b8191de157d533c8b9aeef27ba456 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 17 Jun 2024 13:51:51 +0100 Subject: [PATCH 29/32] Add proper ROI validation to merge function --- python/BioSimSpace/Align/_merge.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index d7492423d..936dc808b 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -142,17 +142,9 @@ def merge( # Validate the region of interest. if roi is not None: - if not isinstance(roi, list): - raise TypeError("'roi' must be of type 'list'.") - - for mol in [molecule0, molecule1]: - for idx in roi: - if not isinstance(idx, int): - raise TypeError("'roi' must be a list of integers.") - if idx < 0 or idx > (mol.nResidues() - 1): - raise ValueError( - f"Residue index {idx} is out of range! The molecule contains {mol.nResidues()} residues." - ) + from ..Align import _validate_roi + + _validate_roi(roi, molecule0, molecule1) # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the # user has requested to 'force' the merge, i.e. the 'force' argument From b366f122b0bb8b63c2a419fa5408f6ad737c5d5d Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 17 Jun 2024 14:12:49 +0100 Subject: [PATCH 30/32] Add ROI keyword to the merge test code --- tests/Align/test_align.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Align/test_align.py b/tests/Align/test_align.py index a9a9d55c2..341e4bf4f 100644 --- a/tests/Align/test_align.py +++ b/tests/Align/test_align.py @@ -670,6 +670,6 @@ def test_roi_merge(protein_inputs): p1 = BSS.Parameters.ff14SB(p1).getMolecule() aligned_p0 = BSS.Align.rmsdAlign(p0, p1, roi=roi) - merged = BSS.Align.merge(aligned_p0, p1, protein_mapping) + merged = BSS.Align.merge(aligned_p0, p1, protein_mapping, roi=roi) merged_system = merged.toSystem() assert merged_system.nPerturbableMolecules() == 1 From 092662cc4423588b2752ee2c349825cf957fb5b2 Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 17 Jun 2024 14:27:51 +0100 Subject: [PATCH 31/32] Fix _validate_roi import in the merge function --- python/BioSimSpace/Align/_merge.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index 936dc808b..b9d391f8a 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -142,9 +142,8 @@ def merge( # Validate the region of interest. if roi is not None: - from ..Align import _validate_roi - - _validate_roi(roi, molecule0, molecule1) + from ._align import _validate_roi + _validate_roi([molecule0, molecule1], roi) # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the # user has requested to 'force' the merge, i.e. the 'force' argument From 40bac777849caf9e854c4fba880addfe03a2238d Mon Sep 17 00:00:00 2001 From: Audrius Kalpokas Date: Mon, 17 Jun 2024 16:05:34 +0100 Subject: [PATCH 32/32] Blacken _merge.py --- python/BioSimSpace/Align/_merge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/BioSimSpace/Align/_merge.py b/python/BioSimSpace/Align/_merge.py index b9d391f8a..ba8b61295 100644 --- a/python/BioSimSpace/Align/_merge.py +++ b/python/BioSimSpace/Align/_merge.py @@ -143,6 +143,7 @@ def merge( # Validate the region of interest. if roi is not None: from ._align import _validate_roi + _validate_roi([molecule0, molecule1], roi) # Set 'allow_ring_breaking' and 'allow_ring_size_change' to true if the