Skip to content

Commit

Permalink
docs, tests, and more!
Browse files Browse the repository at this point in the history
  • Loading branch information
pgarrett-scripps committed Apr 18, 2024
1 parent 8bbcbcd commit 7cc25b0
Show file tree
Hide file tree
Showing 9 changed files with 373 additions and 329 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
- Take valid mod values from the respective dbs for randomizer
- Fix readthedocs build
- Have Mod objects work for fragment loss?
- Improve fragment loss handling based on ion type and sequence
- Can improve the performance of mass calculations with isotopes and use_isotope_on_mods, by not calculating the composition of mods
- Add MultiProformaAnnot support to mass/chem/fragment/isotope....

Expand Down
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@

# Peptacular

A spectacularly simple package for working with peptide sequences.
A spectacularly simple package for working with peptide sequences. Now proforma2.0 compliant.

# Warnings

- The fragment ion mass calculation may not be accurate. Fairly certain that the ay, by, and cy internal fragments are
correct since the y fragment is really just a smaller parent ion.
- GNO and RESID mods are disabled for now. I will add them back in later.
- Project is still under development. I will be adding more features and fixing bugs as I find them.

## ReadTheDocs
https://peptacular.readthedocs.io/en/latest/index.html
Expand Down
15 changes: 11 additions & 4 deletions src/peptacular/mass_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ def mod_mass(mod: Union[str, Mod], monoisotopic: bool = True, precision: Optiona
>>> mod_mass('info:HelloWorld', precision=3)
Traceback (most recent call last):
...
peptacular.errors.InvalidModificationMassError: Cannot determine mass for modification: info:HelloWorld
peptacular.errors.InvalidModificationMassError: Cannot determine mass for modification: "info:HelloWorld"
"""

Expand Down Expand Up @@ -1064,6 +1064,7 @@ def _pop_delta_mass_mods(annotation: ProFormaAnnotation) -> float:

return delta_mass


def _parse_adduct_mass(adduct: str,
precision: Optional[int] = None,
monoisotopic: bool = True) -> float:
Expand Down Expand Up @@ -1091,16 +1092,22 @@ def _parse_adduct_mass(adduct: str,
>>> _parse_adduct_mass('+2Na+', precision=5)
45.97899
>>> _parse_adduct_mass('+2Na-', precision=5)
45.98009
>>> _parse_adduct_mass('H+', precision=5)
1.00728
>>> _parse_adduct_mass('H-', precision=5)
1.00837
"""

mass = 0.0
element_count, element_symbol, element_charge = parse_ion_elements(adduct)

if element_symbol == 'e':
mass += element_count*ELECTRON_MASS
mass += element_count * ELECTRON_MASS

else:

Expand Down Expand Up @@ -1135,7 +1142,7 @@ def _parse_charge_adducts_mass(adducts: ModValue,
:return: The mass of the charge adducts.
:rtype: float
.. code-block:: python
. code-block:: python
# Parse the charge adducts and return their mass.
>>> _parse_charge_adducts_mass('+Na+,+H+', precision=5)
Expand All @@ -1162,4 +1169,4 @@ def _parse_charge_adducts_mass(adducts: ModValue,
if precision is not None:
mass = round(mass, precision)

return mass
return mass
46 changes: 2 additions & 44 deletions src/peptacular/mods/mod_db_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,46 +272,6 @@ def _get_parent_ids(term: Dict[str, Any]) -> List[str]:
return parent_ids


def _fix_unimod_entry(delta_formula: str) -> None:
"""
1 - Unimod entries can have a glycan based composition
"""

try:
_ = chem_mass(delta_formula)
except InvalidChemFormulaError as e: # could be a glycan composition

# replace 'H1O3P1' to Phospho and 'O3S1' to Sulpho
delta_formula = delta_formula.replace('Sulf', 'Sulpho').replace('Phos', 'Phospho')

# Sulphate looks like O(num1)S(num2)
sulfate_match = re.search(r'O(\d+)S(\d+)', delta_formula)

# replace with 'Sulpho(num2)'
if sulfate_match:
num1 = int(sulfate_match.group(1))
num2 = int(sulfate_match.group(2))

assert num1 / num2 == 3
delta_formula = delta_formula.replace(sulfate_match.group(), f'Sulpho{num2}')

# use regex to find phosphate location
# Sulphate loos like H(num1)O(num2)P(num3)
phosphate_match = re.search(r'H(\d+)O(\d+)P(\d+)', delta_formula)

# replace with 'Phospho(num3)'
if phosphate_match:
num1 = int(phosphate_match.group(1))
num2 = int(phosphate_match.group(2))
num3 = int(phosphate_match.group(3))

assert num2 / num3 == 3
assert num2 / num1 == 3
delta_formula = delta_formula.replace(phosphate_match.group(), f'Phospho{num3}')

return delta_formula


def _get_unimod_entries(terms: List[Dict[str, Any]]) -> List[ModEntry]:
for term in terms:

Expand Down Expand Up @@ -1168,10 +1128,8 @@ def count_invalid_entries(entries: List[ModEntry]) -> (int, int, int):
UNIMOD_DB.reload_from_file(os.path.join(_obo_path, "unimod.obo"))
PSI_MOD_DB.reload_from_file(os.path.join(_obo_path, "psi-mod.obo"))
XLMOD_DB.reload_from_file(os.path.join(_obo_path, "xlmod.obo"))


# GNO_DB.reload_from_file(os.path.join(_obo_path, "gno.obo"))
# RESID_DB.reload_from_file(os.path.join(_obo_path, "psi-mod.obo"))
#GNO_DB.reload_from_file(os.path.join(_obo_path, "gno.obo"))
#RESID_DB.reload_from_file(os.path.join(_obo_path, "psi-mod.obo"))


def reload_all_databases_from_online() -> None:
Expand Down
74 changes: 37 additions & 37 deletions src/peptacular/proforma/randomizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,31 @@
from peptacular.proforma.proforma_parser import ProFormaAnnotation


UNIMOD_LEVEL_BASE_MOD_VALS = ['Oxidation', 'UNIMOD:10']
UNIMOD_LEVEL2_MOD_VALS = ['U:Oxidation', 'U:10', 'U:+1', 'U:-1', 'U:+3.1415', 'U:-3.1415']
PSI_LEVEL_BASE_MOD_VALS = ['O-phospho-L-serine', 'MOD:00046']
PSI_LEVEL2_MOD_VALS = ['M:O-phospho-L-serine', 'M:00046', 'M:+1', 'M:-1', 'M:+3.1415', 'M:-3.1415']
DELTA_MASS_MOD_VALS = ['+1', '-1', '+3.1415', '-3.1415']
MOD_INFO_VALS = ['INFO:Cool', 'INFO:Awesome', 'INFO:Radical', 'INFO:Amazing', 'INFO:Fantastic']
CHEM_FORMULA_MOD_VALS = ['Formula:C12H22O11', 'Formula:[13C6]H12O6[12C-4]', 'Formula:CHO', 'Formula:C2H-5O']
GLYCAN_MOD_VALS = ['Glycan:HexNAc2Hex3Neu1', 'Glycan:Hex', 'Glycan:6BAAE1B1']
GNO_MOD_VALS = ['GNO:G59626AS', 'GNO:G62765YT', 'G:G59626AS', 'G:G62765YT', 'G:+1', 'G:-1', 'G:+3.1415', 'G:-3.1415']
RESID_MOD_VALS = ['RESID:AA0581', 'RESID:AA0037', 'R:AA0581', 'R:AA0037', 'R:+1', 'R:-1', 'R:+3.1415', 'R:-3.1415']
ISOTOPE_MOD_VALS = ['13C', '15N', '18O', '2H', 'T', 'D']
STATIC_MOD_VALS = ['[Oxidation]@M', '[Oxidation]@M,C,D', '[+1]@C', '[-1]@C', '[+3.1415]@C', '[-3.1415]@C']
XLMOD_VALS = ['XLMOD:02001', 'XLMOD:02010', 'XLMOD:02000', 'X:02001', 'X:02010', 'X:02000']
CHARGE_ADDUCT_VALS = ['+H+', '+2Na+,-H+', '+2Na+,+H+', '2I-', '+e-']

TOP_DOWN_MODS = CHEM_FORMULA_MOD_VALS + RESID_MOD_VALS
CROSS_LINKING_MODS = XLMOD_VALS
GLYCAN_MODS = GLYCAN_MOD_VALS + GNO_MOD_VALS

BASE_AMINO_ACIDS = "VWPSDCYTAIMHGQENFLKR"
LEVEL2_AMINO_ACIDS = BASE_AMINO_ACIDS + 'OUBZXJ'
LEVEL2_AMINO_ACIDS_WITHOUT_AMBIGUITY = BASE_AMINO_ACIDS + 'OU'

BASE_MODS = UNIMOD_LEVEL_BASE_MOD_VALS + PSI_LEVEL_BASE_MOD_VALS + DELTA_MASS_MOD_VALS
LEVEL2_MODS = UNIMOD_LEVEL2_MOD_VALS + PSI_LEVEL2_MOD_VALS + BASE_MODS
_UNIMOD_LEVEL_BASE_MOD_VALS = ['Oxidation', 'UNIMOD:10']
_UNIMOD_LEVEL2_MOD_VALS = ['U:Oxidation', 'U:10', 'U:+1', 'U:-1', 'U:+3.1415', 'U:-3.1415']
_PSI_LEVEL_BASE_MOD_VALS = ['O-phospho-L-serine', 'MOD:00046']
_PSI_LEVEL2_MOD_VALS = ['M:O-phospho-L-serine', 'M:00046', 'M:+1', 'M:-1', 'M:+3.1415', 'M:-3.1415']
_DELTA_MASS_MOD_VALS = ['+1', '-1', '+3.1415', '-3.1415']
_MOD_INFO_VALS = ['INFO:Cool', 'INFO:Awesome', 'INFO:Radical', 'INFO:Amazing', 'INFO:Fantastic']
_CHEM_FORMULA_MOD_VALS = ['Formula:C12H22O11', 'Formula:[13C6]H12O6[12C-4]', 'Formula:CHO', 'Formula:C2H-5O']
_GLYCAN_MOD_VALS = ['Glycan:HexNAc2Hex3Neu1', 'Glycan:Hex', 'Glycan:6BAAE1B1']
_GNO_MOD_VALS = ['GNO:G59626AS', 'GNO:G62765YT', 'G:G59626AS', 'G:G62765YT', 'G:+1', 'G:-1', 'G:+3.1415', 'G:-3.1415']
_RESID_MOD_VALS = ['RESID:AA0581', 'RESID:AA0037', 'R:AA0581', 'R:AA0037', 'R:+1', 'R:-1', 'R:+3.1415', 'R:-3.1415']
_ISOTOPE_MOD_VALS = ['13C', '15N', '18O', '2H', 'T', 'D']
_STATIC_MOD_VALS = ['[Oxidation]@M', '[Oxidation]@M,C,D', '[+1]@C', '[-1]@C', '[+3.1415]@C', '[-3.1415]@C']
_XLMOD_VALS = ['XLMOD:02001', 'XLMOD:02010', 'XLMOD:02000', 'X:02001', 'X:02010', 'X:02000']
_CHARGE_ADDUCT_VALS = ['+H+', '+2Na+,-H+', '+2Na+,+H+', '2I-', '+e-']

_TOP_DOWN_MODS = _CHEM_FORMULA_MOD_VALS + _RESID_MOD_VALS
_CROSS_LINKING_MODS = _XLMOD_VALS
_GLYCAN_MODS = _GLYCAN_MOD_VALS + _GNO_MOD_VALS

_BASE_AMINO_ACIDS = "VWPSDCYTAIMHGQENFLKR"
_LEVEL2_AMINO_ACIDS = _BASE_AMINO_ACIDS + 'OUBZXJ'
_LEVEL2_AMINO_ACIDS_WITHOUT_AMBIGUITY = _BASE_AMINO_ACIDS + 'OU'

_BASE_MODS = _UNIMOD_LEVEL_BASE_MOD_VALS + _PSI_LEVEL_BASE_MOD_VALS + _DELTA_MASS_MOD_VALS
_LEVEL2_MODS = _UNIMOD_LEVEL2_MOD_VALS + _PSI_LEVEL2_MOD_VALS + _BASE_MODS


class ProformaComplianceLevel(Enum):
Expand All @@ -49,11 +49,11 @@ def _random_sequence(amino_acids: str, min_sequence_length: int, max_sequence_le
def random_sequence(level: ProformaComplianceLevel, min_sequence_length: int = 5, max_sequence_length: int = 50,
sequence_ambiguity: bool = True) -> str:
if level == ProformaComplianceLevel.BASE:
return _random_sequence(BASE_AMINO_ACIDS, min_sequence_length, max_sequence_length)
return _random_sequence(_BASE_AMINO_ACIDS, min_sequence_length, max_sequence_length)
elif level == ProformaComplianceLevel.LEVEL2:
if sequence_ambiguity:
return _random_sequence(LEVEL2_AMINO_ACIDS, min_sequence_length, max_sequence_length)
return _random_sequence(LEVEL2_AMINO_ACIDS_WITHOUT_AMBIGUITY, min_sequence_length, max_sequence_length)
return _random_sequence(_LEVEL2_AMINO_ACIDS, min_sequence_length, max_sequence_length)
return _random_sequence(_LEVEL2_AMINO_ACIDS_WITHOUT_AMBIGUITY, min_sequence_length, max_sequence_length)
else:
raise ValueError("Invalid level")

Expand All @@ -62,21 +62,21 @@ def _random_mod(mods: List[str], count: int, info: bool) -> Mod:
mod = choice(mods)
if info:
for _ in range(randint(1, 2)):
mod += f"|{choice(MOD_INFO_VALS)}"
mod += f"|{choice(_MOD_INFO_VALS)}"
return Mod(mod, count)


def random_mod(level: ProformaComplianceLevel, count: int = 1, info: bool = False) -> Mod:
if level == ProformaComplianceLevel.BASE:
return _random_mod(BASE_MODS, count, info)
return _random_mod(_BASE_MODS, count, info)
elif level == ProformaComplianceLevel.LEVEL2:
return _random_mod(LEVEL2_MODS, count, info)
return _random_mod(_LEVEL2_MODS, count, info)
elif level == ProformaComplianceLevel.TOP_DOWN:
return _random_mod(TOP_DOWN_MODS, count, info)
return _random_mod(_TOP_DOWN_MODS, count, info)
elif level == ProformaComplianceLevel.CROSS_LINKING:
return _random_mod(CROSS_LINKING_MODS, count, info)
return _random_mod(_CROSS_LINKING_MODS, count, info)
elif level == ProformaComplianceLevel.GLYCAN:
return _random_mod(GLYCAN_MODS, count, info)
return _random_mod(_GLYCAN_MODS, count, info)
elif level == ProformaComplianceLevel.SPECTRUM:
return _random_mod([], count, info)
else:
Expand Down Expand Up @@ -262,17 +262,17 @@ def spectrum_randomizer(annotation: ProFormaAnnotation):
annotation.add_charge(choice([1, 2, 3]))

for _ in range(randint(0, 3)):
mod = _random_mod(ISOTOPE_MOD_VALS, 1, False)
mod = _random_mod(_ISOTOPE_MOD_VALS, 1, False)
annotation.add_isotope_mods(mod)

for _ in range(randint(0, 3)):
mod = _random_mod(STATIC_MOD_VALS, 1, False)
mod = _random_mod(_STATIC_MOD_VALS, 1, False)
annotation.add_static_mods(mod)

if annotation.has_charge():

# Add adducts
if choice([True, False]):
annotation.add_charge_adducts(Mod(choice(CHARGE_ADDUCT_VALS), 1))
annotation.add_charge_adducts(Mod(choice(_CHARGE_ADDUCT_VALS), 1))


Loading

0 comments on commit 7cc25b0

Please sign in to comment.