Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added sage reversed unimod dictionary #78

Merged
merged 9 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion spectrum_fundamentals/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,19 @@
"[UNIMOD:35]": 15.9949146, # Oxidation
"[UNIMOD:1]": 42.010565, # Acetylation
}

MOD_MASSES_SAGE = {
229.1629: "[UNIMOD:737]",
304.2071: "[UNIMOD:2016]",
144.1020: "[UNIMOD:214]",
304.2053: "[UNIMOD:730]",
8.0141: "[UNIMOD:259]",
10.0082: "[UNIMOD:267]",
79.9663: "[UNIMOD:21]",
-18.0105: "[UNIMOD:23]",
57.0214: "[UNIMOD:4]",
15.9949: "[UNIMOD:35]",
42.0105: "[UNIMOD:1]",
}
# these are only used for prosit_grpc, oktoberfest uses the masses from MOD_MASSES
AA_MOD_MASSES = {
"K[UNIMOD:737]": AA_MASSES["K"] + MOD_MASSES["[UNIMOD:737]"],
Expand Down
50 changes: 49 additions & 1 deletion spectrum_fundamentals/mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,55 @@
from itertools import repeat
from typing import Dict, List, Optional, Tuple

from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS
from .constants import MAXQUANT_VAR_MODS, MOD_MASSES, MOD_MASSES_SAGE, MOD_NAMES, MSFRAGGER_VAR_MODS, SPECTRONAUT_MODS


def sage_to_internal(sequences: List[str]) -> List[str]:
"""
Convert mod string from sage to the internal format.

This function converts sequences using the mass change of a modification in
square brackets as done by Sage to the internal format by replacing the mass
shift with the corresponding UNIMOD identifier of known and supported
modifications defined in the constants.

:param sequences: A list of sequences with values inside square brackets.
:return: A list of modified sequences with values converted to internal format.
"""
# Define a regular expression pattern to match values within square brackets, like [+1.0] or [-2.0].
pattern = r"[A-Z]?\[([\+\-]\d+\.\d+)\]-?"

# Define a function 'replace' that takes a regex match object.
def replace(match):
# Extract the value inside the square brackets as a float.
value = float(match.group(1))
key = match.string[match.start() : match.end()]
if key.endswith("-"):
unimod_expression = f"{MOD_MASSES_SAGE.get(value, match.group(0))}-"
elif key.startswith("C"):
unimod_expression = f"C{MOD_MASSES_SAGE.get(value, match.group(0))}"
elif key.startswith("K"):
unimod_expression = f"K{MOD_MASSES_SAGE.get(value, match.group(0))}"
elif key.startswith("M"):
unimod_expression = f"M{MOD_MASSES_SAGE.get(value, match.group(0))}"

# Check if the 'MOD_MASSES_SAGE' dictionary has a replacement value for the extracted value.
# If it does, use the replacement value; otherwise, use the original value from the match.
return unimod_expression

# Create an empty list 'modified_strings' to store the modified sequences.
modified_strings = []

# Iterate through the input 'sequences'.
for string in sequences:
# Use 're.sub' to search and replace values within square brackets in the 'string' using the 'replace' function.
modified_string = re.sub(pattern, replace, string)

# Append the modified string to the 'modified_strings' list.
modified_strings.append(modified_string)

# Return the list of modified sequences.
return modified_strings


def internal_to_spectronaut(sequences: List[str]) -> List[str]:
Expand Down
19 changes: 19 additions & 0 deletions tests/unit_tests/test_mod_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@ def test_internal_to_mod_names(self):
]


class TestSageToInternal(unittest.TestCase):
"""Class to test MaxQuant to internal."""

def test_sage_to_internal_carbamidomethylation(self):
"""Test maxquant_to_internal_carbamidomethylation."""
self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DEFGH"]), ["ABC[UNIMOD:4]DEFGH"])

def test_sage_to_internal_variable_oxidation(self):
"""Test maxquant_to_internal_variable_oxidation."""
self.assertEqual(mod.sage_to_internal(["ABC[+57.0214]DM[+15.9949]EFGH"]), ["ABC[UNIMOD:4]DM[UNIMOD:35]EFGH"])

def test_sage_to_internal_tmt(self):
"""Test maxquant_to_internal_tmt."""
self.assertEqual(
mod.sage_to_internal(["[+229.1629]-ABC[+57.0214]DEFGHK[+229.1629]"]),
["[UNIMOD:737]-ABC[UNIMOD:4]DEFGHK[UNIMOD:737]"],
)


class TestMaxQuantToInternal(unittest.TestCase):
"""Class to test MaxQuant to internal."""

Expand Down
Loading