Source code for pyproteome.data_sets.protein
'''
This module provides functionality for interfacing with protein data.
'''
import logging
import pyproteome as pyp
LOGGER = logging.getLogger('pyproteome.protein')
[docs]class Proteins:
'''
Wraps a list of proteins.
Attributes
----------
proteins : tuple of :class:`.Protein`
List of proteins to which a peptide sequence is mapped.
'''
def __init__(self, proteins=None):
if proteins is None:
proteins = ()
self.proteins = tuple(sorted(proteins))
def __iter__(self):
return iter(self.proteins)
def __len__(self):
return len(self.proteins)
def __hash__(self):
return hash(
self.proteins,
)
def __eq__(self, other):
if isinstance(other, str):
return (
any(i == other for i in self.genes) or
any(i == other for i in self.accessions)
)
if not isinstance(other, Proteins):
raise TypeError(type(other))
return len(self.proteins) == len(other.proteins) and all(
i == j
for i, j in zip(self.proteins, other.proteins)
)
def __lt__(self, other):
return self.proteins < other.proteins
def __str__(self):
return ' / '.join(
str(i)
for i in self.proteins
)
@property
def accessions(self):
'''
List of UniPort accessions for a group of proteins.
Returns
-------
tuple of str
'''
return tuple(i.accession for i in self.proteins)
@property
def descriptions(self):
'''
List of protein descriptions for a group of proteins.
Returns
-------
tuple of str
'''
return tuple(i.description for i in self.proteins)
@property
def genes(self):
'''
List of UniPort gene names for a group of proteins.
Returns
-------
tuple of str
'''
return tuple(i.gene for i in self.proteins)
[docs]class Protein:
'''
Contains information about a single protein.
Attributes
----------
accession : str
The UniProt accession (i.e. 'P40763').
gene : str
The UniProt gene name (i.e. 'STAT3').
description : str
A brief description of the protein (i.e. 'Signal transducer and
activator of transcription 3').
full_sequence : str
The full sequence of the protein.
'''
def __init__(
self,
accession=None,
gene=None,
description=None,
full_sequence=None,
):
self.accession = accession
self.gene = gene
self.description = description
self.full_sequence = full_sequence
if any(i is None for i in [gene, description, full_sequence]):
up_data = pyp.pypuniprot.get_uniprot_data(accession)
if 'gene' in up_data:
self.gene = up_data['gene']
elif 'id' in up_data:
self.gene = up_data['id']
else:
LOGGER.warning(
'Unable to find {} in uniprot db'.format(accession)
)
self.gene = accession
self.description = up_data.get('descriptions', [''])[0]
self.full_sequence = up_data.get('sequence', None)
def __hash__(self):
return hash(self.accession)
def __eq__(self, other):
if not isinstance(other, Protein):
raise TypeError()
return self.accession == other.accession
def __lt__(self, other):
return self.gene < other.gene
def __str__(self):
return '{} ({})'.format(
self.description,
self.gene,
)