Source code for pyproteome.motifs.phosphosite

'''
This file includes functions for downloading kinase-substrate associations from
PhosphoSite Plus (https://www.phosphosite.org/).
'''

from io import BytesIO
import gzip
import os
import requests

import numpy as np
import pandas as pd

import pyproteome as pyp
from . import motif, logo


DATA_URL = 'https://www.phosphosite.org/downloads/Kinase_Substrate_Dataset.gz'


[docs]@pyp.utils.memoize def get_data(): ''' Download the Kinase-Substrate Dataset from Phosphosite Plus. Returns ------- df : :class:`pandas.DataFrame` ''' data = requests.get(DATA_URL, stream=True) content = BytesIO(data.content) with gzip.GzipFile(fileobj=content) as f: df = pd.read_csv(f, skiprows=range(2), sep='\t') return df
[docs]def generate_logos( species, kinases=None, min_foreground=10, ): ''' Generate logos for all kinases documented on Phosphosite Plus. Parameters ---------- species : str Species name (i.e. 'Human' or 'Homo sapiens') kinases : list of str, optional min_foreground : int, optional Minimum number of substrates needed for logo generation. Returns ------- list of :class:`matplotlib.figure.Figure` ''' species = pyp.species.ORGANISM_MAPPING.get(species, species).lower() df = get_data() df = df[ np.logical_and( df['KIN_ORGANISM'].apply(lambda x: x.lower()) == species, df['SUB_ORGANISM'].apply(lambda x: x.lower()) == species, ) ] if kinases is None: kinases = sorted(set(df['KINASE'])) figs = [] for kinase in kinases: fore = list(df[df['KINASE'] == kinase]['SITE_+/-7_AA']) if len(fore) < min_foreground: continue f = logo.logo( fore=fore, back=list(df['SITE_+/-7_AA']), title=kinase, )[0] figs.append(f) return figs