diff --git a/spectrum_fundamentals/constants.py b/spectrum_fundamentals/constants.py index 52783e2..cd8644c 100644 --- a/spectrum_fundamentals/constants.py +++ b/spectrum_fundamentals/constants.py @@ -168,7 +168,19 @@ "[UNIMOD:35]": 15.9949146, # Oxidation "[UNIMOD:1]": 42.010565, # Acetylation } - +MOD_MASSES_SAGE = { + 229.1629: "[UNIMOD:737]", + 304.2071: "[UNIMOD:2016]", + 144.1020: "[UNIMOD:214]", + 304.2053: "[UNIMOD:730]", + 8.0141: "[UNIMOD:259]", + 10.0082: "[UNIMOD:267]", + 79.9663: "[UNIMOD:21]", + -18.0105: "[UNIMOD:23]", + 57.0214: "[UNIMOD:4]", + 15.9949: "[UNIMOD:35]", + 42.0105: "[UNIMOD:1]", +} # these are only used for prosit_grpc, oktoberfest uses the masses from MOD_MASSES AA_MOD_MASSES = { "K[UNIMOD:737]": AA_MASSES["K"] + MOD_MASSES["[UNIMOD:737]"], diff --git a/spectrum_fundamentals/mod_string.py b/spectrum_fundamentals/mod_string.py index f64f296..471dda3 100644 --- a/spectrum_fundamentals/mod_string.py +++ b/spectrum_fundamentals/mod_string.py @@ -3,7 +3,55 @@ from itertools import repeat from typing import Dict, List, Optional, Tuple -from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS +from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_MASSES_SAGE, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS + + +def sage_to_internal(sequences: List[str]) -> List[str]: + """ + Convert mod string from sage to the internal format. + + This function converts sequences using the mass change of a modification in + square brackets as done by Sage to the internal format by replacing the mass + shift with the corresponding UNIMOD identifier of known and supported + modifications defined in the constants. + + :param sequences: A list of sequences with values inside square brackets. + :return: A list of modified sequences with values converted to internal format. + """ + # Define a regular expression pattern to match values within square brackets, like [+1.0] or [-2.0]. + pattern = r"[A-Z]?\[([\+\-]\d+\.\d+)\]-?" + + # Define a function 'replace' that takes a regex match object. + def replace(match): + # Extract the value inside the square brackets as a float. + value = float(match.group(1)) + key = match.string[match.start() : match.end()] + if key.endswith("-"): + unimod_expression = f"{MOD_MASSES_SAGE.get(value, match.group(0))}-" + elif key.startswith("C"): + unimod_expression = f"C{MOD_MASSES_SAGE.get(value, match.group(0))}" + elif key.startswith("K"): + unimod_expression = f"K{MOD_MASSES_SAGE.get(value, match.group(0))}" + elif key.startswith("M"): + unimod_expression = f"M{MOD_MASSES_SAGE.get(value, match.group(0))}" + + # Check if the 'MOD_MASSES_SAGE' dictionary has a replacement value for the extracted value. + # If it does, use the replacement value; otherwise, use the original value from the match. + return unimod_expression + + # Create an empty list 'modified_strings' to store the modified sequences. + modified_strings = [] + + # Iterate through the input 'sequences'. + for string in sequences: + # Use 're.sub' to search and replace values within square brackets in the 'string' using the 'replace' function. + modified_string = re.sub(pattern, replace, string) + + # Append the modified string to the 'modified_strings' list. + modified_strings.append(modified_string) + + # Return the list of modified sequences. + return modified_strings def internal_to_spectronaut(sequences: List[str]) -> List[str]: diff --git a/tests/unit_tests/test_mod_string.py b/tests/unit_tests/test_mod_string.py index ed36f35..17c6dc4 100644 --- a/tests/unit_tests/test_mod_string.py +++ b/tests/unit_tests/test_mod_string.py @@ -18,6 +18,25 @@ def test_internal_to_mod_names(self): ] +class TestSageToInternal(unittest.TestCase): + """Class to test MaxQuant to internal.""" + + def test_sage_to_internal_carbamidomethylation(self): + """Test maxquant_to_internal_carbamidomethylation.""" + self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DEFGH"]), ["ABC[UNIMOD:4]DEFGH"]) + + def test_sage_to_internal_variable_oxidation(self): + """Test maxquant_to_internal_variable_oxidation.""" + self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DM[+15.9949]EFGH"]), ["ABC[UNIMOD:4]DM[UNIMOD:35]EFGH"]) + + def test_sage_to_internal_tmt(self): + """Test maxquant_to_internal_tmt.""" + self.assertEqual( + mod.sage_to_internal(["[+229.1629]-ABC[+57.0214]DEFGHK[+229.1629]"]), + ["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"], + ) + + class TestMaxQuantToInternal(unittest.TestCase): """Class to test MaxQuant to internal."""