Source code for pyproteome.pathways.ptmsigdb


import os
import requests

import pandas as pd

import pyproteome as pyp


PTMSIGDB_URL = (
    'https://raw.githubusercontent.com/broadinstitute/ssGSEA2.0/master/db/'
    'ptmsigdb/ptm.sig.db.all.uniprot.{name}.v1.9.0.gmt'
)


[docs]@pyp.utils.memoize def get_ptmsigdb(species): ''' Download phospho sets for PTMSigDB. Parameters ---------- species : str Returns ------- df : :class:`pandas.DataFrame` ''' species = pyp.species.ORGANISM_MAPPING.get(species, species).lower() assert species in ['human', 'mouse', 'rat'] def _get_data(line): line = line.split('\t') sites = set( i.strip().replace(';', ',') for i in line[2:] ) up_sites = set( i.rsplit(',', 1)[0] for i in sites if i.endswith('u') ) down_sites = set( i.rsplit(',', 1)[0] for i in sites if i.endswith('d') ) title, description = line[:2] return ( title, description, up_sites, down_sites ) url = PTMSIGDB_URL.format(name=species.lower()) response = requests.get(url, stream=True) response.raise_for_status() data = [ _get_data(line) for line in response.content.decode('utf-8').split('\n') if line.strip() ] pathways_df = pd.DataFrame( data=data, columns=['name', 'Description', 'up_set', 'down_set'], ) return pathways_df[['name', 'up_set', 'down_set']]