'''
This module provides functionality for post-translational modifications.
Wraps modifications in a structured class and allows filtering of
modifications by amino acid and modification type.
'''
from collections import defaultdict
import copy
LABEL_NAME_TARGETS = (
'TMT', 'ITRAQ', 'plex',
)
'''
Substrings used to identify and import novel label names from .msf files.
'''
LABEL_NAMES = defaultdict(set)
LABEL_NAMES['TMT10'].add('K')
LABEL_NAMES['TMT10'].add('N-term')
LABEL_NAMES['TMT6'].add('K')
LABEL_NAMES['TMT6'].add('N-term')
'''
Names of modifications used for quantification of peptide abundances.
'''
MERGE_UNDERLABELED = True
'''
Merge peptides that have satured TMT labeling with peptides that are underlabeled.
'''
[docs]class Modifications:
'''
A list of modifications.
Wraps the Modification objects and provides several utility functions.
Attributes
----------
mods : list of :class:`.Modification`
'''
def __init__(self, mods=None):
'''
Initialize from a list of modifications.
Parameters
----------
mods : list of :class:`.Modification`
'''
self.mods = mods or ()
def __iter__(self):
return iter(self.mods)
def __len__(self):
return len(self.mods)
[docs] def copy(self):
'''
Creates a copy of a set of modifications. Does not copy the underlying
sequence object.
Returns
-------
mods : :class:`.Modifications`
'''
new = copy.copy(self)
new.mods = tuple(i.copy() for i in new.mods)
return new
[docs] def skip_labels(self):
'''
Get modifications, skipping over any that are peptide labels.
Returns
-------
mods : list of :class:`.Modification`
'''
return [
mod
for mod in self.mods
if not any(label in mod.mod_type for label in LABEL_NAMES)
]
[docs] def get_mods(self, letter_mod_types):
'''
Filter the list of modifications.
Only keeps modifications with a given letter, mod_type, or both.
Examples
--------
>>> from pyproteome.sequence import Sequence
>>> from pyproteome.modification import Modification, Modifications
>>> s = Sequence(pep_seq='SVYTEIK')
>>> m = Modifications(
... [
... Modification(mod_type='TMT', nterm=True, sequence=s),
... Modification(mod_type='Phospho', rel_pos=2, sequence=s),
... Modification(mod_type='TMT', rel_pos=6, sequence=s),
... ]
... )
>>> m.get_mods('TMT')
['TMT A0', 'TMT K6']
>>> m.get_mods('Phospho')
['pY2']
>>> m.get_mods('Y')
['pY2']
>>> m.get_mods('S')
[]
>>> m.get_mods([('Y', 'Phospho')])
['pY2']
>>> m.get_mods([('S', 'Phospho')])
[]
Parameters
----------
letter_mod_types : list of tuple of str, str
Returns
-------
mods : :class:`.Modifications`
'''
any_letter, any_mod, letter_mod = \
_extract_letter_mods(letter_mod_types)
return Modifications(
tuple(
mod
for mod in self.mods
if allowed_mod_type(
mod,
any_letter=any_letter,
any_mod=any_mod,
letter_mod=letter_mod,
)
)
)
def __hash__(self):
return hash(
tuple(
sorted(self.skip_labels(), key=lambda x: x.to_tuple())
# sorted(self.mods, key=lambda x: x.to_tuple())
),
)
def __eq__(self, other):
if not isinstance(other, Modifications):
raise TypeError()
if MERGE_UNDERLABELED:
self_mods = sorted(self.skip_labels(), key=lambda x: x.to_tuple())
o_mods = sorted(other.skip_labels(), key=lambda x: x.to_tuple())
else:
self_mods = sorted(self.mods, key=lambda x: x.to_tuple())
o_mods = sorted(other.mods, key=lambda x: x.to_tuple())
return tuple(self_mods) == tuple(o_mods)
def __lt__(self, other):
return self.skip_labels() < other.skip_labels()
def __repr__(self, absolute=True, skip_labels=True):
return self.__str__(absolute=absolute, skip_labels=skip_labels)
def __str__(
self,
absolute=True,
skip_labels=True,
prot_index=None,
show_mod_type=True,
):
if len(self.mods) == 0:
return ''
if skip_labels:
lst = list(self.skip_labels())
else:
lst = list(iter(self))
if not lst:
return ''
def _mod_prot(i):
return ', '.join(
'{}{}{}{}'.format(
mod.display_mod_type() if show_mod_type else '',
mod.letter,
1 + (mod.abs_pos[i] if absolute else mod.rel_pos),
'' if mod.exact[i] else '*'
)
for mod in lst
)
if prot_index is None:
return ' / '.join(
_mod_prot(i)
for i in range(len(lst[0].exact))
)
else:
return _mod_prot(prot_index)
[docs]class Modification:
'''
Contains information for a single peptide modification.
Attributes
----------
rel_pos : int
The relative position of a modification in a peptide sequence
(0-indexed).
mod_type : str
A short name for this type of modification (i.e. 'Phospho',
'Carbamidomethyl', 'Oxidation', 'TMT6', 'TMT10')
nterm : bool
Boolean indicator of whether this modification is applied to the
peptide N-terminus.
cterm : bool
Boolean indicator of whether this modification is applied to the
peptide C-terminus.
'''
def __init__(
self,
rel_pos=0,
mod_type='',
sequence=None,
nterm=False,
cterm=False,
):
self.rel_pos = rel_pos
self.mod_type = mod_type
self.nterm = nterm
self.cterm = cterm
self.sequence = sequence
[docs] def display_mod_type(self):
'''
Return the mod_type in an abbreviated form (i.e. 'p' for 'Phospho')
Returns
-------
abbrev : str
'''
if self.mod_type in ['Phospho']:
return 'p'
if self.mod_type in ['Carbamidomethyl']:
return 'cm'
if self.mod_type in ['Oxidation']:
return 'ox'
return self.mod_type
[docs] def to_tuple(self):
return (
self.rel_pos,
self.mod_type,
self.nterm,
self.cterm,
self.letter,
self.abs_pos,
self.exact,
)
def __hash__(self):
return hash(self.to_tuple())
def __lt__(self, other):
return self.to_tuple() < other.to_tuple()
def __eq__(self, other):
if not isinstance(other, Modification):
raise TypeError()
return self.to_tuple() == other.to_tuple()
[docs] def copy(self):
'''
Creates a copy of a modification. Does not copy the underlying sequence
object.
Returns
-------
mod : :class:`.Modification`
'''
new = copy.copy(self)
return new
@property
def letter(self):
'''
This modification's one-letter amino acid code (i.e. 'Y'), or 'N-term'
/ 'C-term' for terminal modifications.
Returns
-------
letter : str
'''
if self.sequence is None:
return ''
if self.nterm:
return 'N-term'
elif self.cterm:
return 'C-term'
return self.sequence.pep_seq[self.rel_pos].upper()
@property
def abs_pos(self):
'''
The absolute positions of this modification in the full sequence
of each mapped protein (0-indexed).
Returns
-------
tuple of int
'''
if self.sequence is None:
return ()
return tuple(
self.rel_pos + match.rel_pos
for match in self.sequence.protein_matches
)
@property
def exact(self):
'''
Indicates whether each peptide-protein mapping for this modification is
an exact or partial match.
Returns
-------
exact : tuple of bool
'''
if self.sequence is None:
return ()
return tuple(
match.exact
for match in self.sequence.protein_matches
)
def __repr__(self):
return (
'<Modification {}{}({})>'
).format(
self.letter,
(self.rel_pos + 1) if not self.cterm and not self.nterm else '',
self.mod_type,
)
[docs]def allowed_mod_type(mod, any_letter=None, any_mod=None, letter_mod=None):
'''
Check if a modification is of a given type.
Filters by letter, mod_type, or both.
Parameters
----------
mod : :class:`.Modification`
any_letter : set of str
any_mod : set of str
letter_mod : set of tuple of str, str
Returns
-------
is_type : bool
'''
return (
(
any_letter is None or
mod.letter in any_letter
) or (
any_mod is None or
mod.mod_type in any_mod
) or (
letter_mod is None or
(mod.letter, mod.mod_type) in letter_mod
)
)
def _extract_letter_mods(letter_mod_types=None):
if letter_mod_types is None:
return None, None, None
if isinstance(letter_mod_types, str):
letter_mod_types = (letter_mod_types,)
any_letter = set()
any_mod = set()
letter_mod = set()
for elem in letter_mod_types:
if not isinstance(elem, tuple):
if len(elem) == 1:
any_letter.add(elem.upper())
else:
any_mod.add(elem)
continue
letter, mod_type = elem
if letter is None and mod_type is None:
raise Exception('Need at least one letter or mod type not None')
elif letter is None and mod_type is not None:
any_mod.add(mod_type)
elif letter is not None and mod_type is None:
any_letter.add(letter.upper())
else:
letter_mod.add((letter.upper(), mod_type))
return any_letter, any_mod, letter_mod