Source code for pyproteome.pathways.go


from goatools.base import download_go_basic_obo
from goatools.base import download_ncbi_associations
from goatools.go_search import GoSearch
from goatools.anno.genetogo_reader import Gene2GoReader
from collections import defaultdict

import importlib
import re

TAXA = {
    'Mus musculus': 10090,
    'Homo sapiens': 9606,
}

[docs]def get_go_ids(go_ids, species='Homo sapiens', add_children=True): ''' Fetch all gene symbols associated with a list of gene ontology term IDs. Parameters ---------- go_ids : str or list of str species : str, optional add_children : bool, optional Include all child terms of input GO IDs. Returns ------- list of str ''' assert species in TAXA if isinstance(go_ids, str): go_ids = [go_ids] download_go_basic_obo('db/go/go-basic.obo') download_ncbi_associations('db/go/gene2go') taxid = TAXA[species] fin_symbols = 'genes_NCBI_{TAXID}_All.py'.format(TAXID=taxid) module_name = ''.join(['goatools.test_data.', fin_symbols[:-3]]) module = importlib.import_module(module_name) GeneID2nt = module.GENEID2NT go2geneids = Gene2GoReader( 'db/go/gene2go', taxids=[taxid], ) go2items = defaultdict(list) for i in go2geneids.taxid2asscs[taxid]: go2items[i.GO_ID].append(i.DB_ID) srchhelp = GoSearch('db/go/go-basic.obo', go2items=go2items) # Add children GOs gos_all = srchhelp.add_children_gos(go_ids) # Get Entrez GeneIDs for cell cycle GOs gene_ids = set() for go_items in [ go_ids, gos_all if add_children else [], ]: gene_ids.update(srchhelp.get_items(go_items)) genes = [] for geneid in gene_ids: nt = GeneID2nt.get(geneid, None) if nt is not None: genes.append(nt.Symbol) return genes