source: orange-bioinformatics/obiKEGG2/__init__.py @ 1550:9226929348de

Revision 1550:9226929348de, 10.8 KB checked in by ales_erjavec, 2 years ago (diff)

Removed unneeded imports, fixed some errors.

Line 
1"""\
2==============================================
3KEGG - Kyoto Encyclopedia of Genes and Genomes
4==============================================
5
6This is a python module for access to `KEGG`_ using its web services. To use this module you need to have
7`SUDS`_ python library installed (other backends are planed).
8
9.. _`KEGG`: http://www.genome.jp/kegg/
10
11"""
12from __future__ import absolute_import
13
14
15import os, sys
16from collections import defaultdict
17
18from datetime import datetime
19
20from Orange.misc import lru_cache, serverfiles
21
22from . import databases
23from . import entry
24
25from .brite import BriteEntry, Brite
26
27from . import api
28from . import conf
29from . import pathway
30
31KEGGGenome = databases.Genome
32KEGGGenes = databases.Genes
33KEGGEnzymes = databases.Enzymes
34KEGGReaction = databases.Reactions
35KEGGPathways = databases.Pathways
36
37KEGGBrite = Brite
38KEGGBriteEntry = BriteEntry
39
40KEGGPathway = pathway.Pathway
41
42DEFAULT_CACHE_DIR = conf.params["cache.path"]
43
44
45import obiProb
46from Orange.misc import deprecated_keywords, deprecated_attribute
47
48class Organism(object):
49    def __init__(self, org, genematcher=None):
50        self.org_code = self.organism_name_search(org)
51        self.genematcher = genematcher
52        self.api = api.CachedKeggApi()
53       
54    @property
55    def org(self):
56        return self.org_code
57   
58    @property
59    def genes(self):
60        if not hasattr(self, "_genes"):
61            genes = KEGGGenes(self.org_code)
62            self._genes = genes
63        return self._genes
64   
65    def gene_aliases(self):
66        return self.genes().gene_aliases()
67   
68    def pathways(self, with_ids=None):
69        if with_ids is not None:
70            return self.api.get_pathways_by_genes(with_ids)
71        else:
72            return [p.entry_id for p in self.api.list_pathways(self.org_code)]
73   
74    def list_pathways(self):
75        return self.pathways()
76   
77    def get_linked_pathways(self, pathway_id):
78        self.api.get_linked_pathways(pathway_id)
79       
80    def enzymes(self, genes=None):
81        raise NotImplementedError()
82   
83    def get_enriched_pathways(self, genes, reference=None, prob=obiProb.Binomial(), callback=None):
84        """ Return a dictionary with enriched pathways ids as keys
85        and (list_of_genes, p_value, num_of_reference_genes) tuples
86        as items.
87       
88        """
89        allPathways = defaultdict(lambda :[[], 1.0, []])
90        import orngMisc
91        milestones = orngMisc.progressBarMilestones(len(genes), 100)
92        pathways_db = KEGGPathways()
93       
94        pathways_for_gene = []
95        for i, gene in enumerate(genes):
96            pathways_for_gene.append(self.pathways([gene]))
97            if callback and i in milestones:
98                callback(i*50.0/len(genes))
99               
100        # precache for speed
101        pathways_db.pre_cache([pid for pfg in pathways_for_gene for pid in pfg]) 
102        for i, (gene, pathways) in enumerate(zip(genes, pathways_for_gene)):
103            for pathway in pathways:
104                if pathways_db.get_entry(pathway).gene: 
105                    allPathways[pathway][0].append(gene)
106            if callback and i in milestones:
107                callback(50.0 + i*50.0/len(genes))
108        reference = set(reference if reference is not None else self.genes.keys())
109       
110        pItems = allPathways.items()
111       
112        for i, (p_id, entry) in enumerate(pItems):
113            pathway = pathways_db.get_entry(p_id)
114            entry[2].extend(reference.intersection(pathway.gene or []))
115            entry[1] = prob.p_value(len(entry[0]), len(reference), len(entry[2]), len(genes))
116        return dict([(pid, (genes, p, len(ref))) for pid, (genes, p, ref) in allPathways.items()])
117       
118    def get_genes_by_enzyme(self, enzyme):
119        enzyme = Enzymes().get_entry(enzyme)
120        return enzyme.genes.get(self.org_code, []) if enzyme.genes else []
121   
122    def get_genes_by_pathway(self, pathway_id):
123        return KEGGPathway(pathway_id).genes()
124   
125    def get_enzymes_by_pathway(self, pathway_id):
126        return KEGGPathway(pathway_id).enzymes()
127   
128    def get_compounds_by_pathway(self, pathway_id):
129        return KEGGPathway(pathway_id).compounds()
130   
131    def get_pathways_by_genes(self, gene_ids):
132        return self.api.get_pathways_by_genes(gene_ids)
133        gene_ids = set(gene_ids)
134        pathways = [self.genes[id].pathway for id in gene_ids if self.genes[id].pathway]
135        pathways = reduce(set.union, pathways, set())
136        return [id for id in pathways if gene_ids.issubset(KEGGPathway(id).genes())] 
137   
138    def get_pathways_by_enzymes(self, enzyme_ids):
139        enzyme_ids = set(enzyme_ids)
140        pathways = [KEGGEnzymes()[id].pathway for id in enzyme_ids]
141        pathwats = reduce(set.union, pathways, set())
142        return [id for id in pathways if enzyme_ids.issubset(KEGGPathway(id).enzymes())]
143   
144    def get_pathways_by_compounds(self, compound_ids):
145        compound_ids = set(compound_ids)
146        pathways = [KEGGCompounds()[id].pathway for id in compound_ids]
147        pathwats = reduce(set.union, pathways, set())
148        return [id for id in pathways if compound_ids.issubset(KEGGPathway(id).compounds())]
149   
150    def get_enzymes_by_compound(self, compound_id):
151        return KEGGCompound()[compound_id].enzyme
152   
153    def get_enzymes_by_gene(self, gene_id):
154        return self.genes[gene_id].enzymes
155   
156    def get_compounds_by_enzyme(self, enzyme_id):
157        return self._enzymes_to_compounds.get(enzyme_id)
158   
159    @deprecated_keywords({"caseSensitive": "case_sensitive"})
160    def get_unique_gene_ids(self, genes, case_sensitive=True):
161        """Return a tuple with three elements. The first is a dictionary mapping from unique gene
162        ids to gene names in genes, the second is a list of conflicting gene names and the third is a list
163        of unknown genes.
164        """
165        unique, conflicting, unknown = {}, [], []
166        for gene in genes:
167            names = self.genematcher.match(gene)
168            if len(names) == 1:
169                unique[names[0]] = gene
170            elif len(names) == 0:
171                unknown.append(gene)
172            else:
173                conflicting.append(gene)
174        return unique, conflicting, unknown
175   
176    def get_genes(self):
177        return self.genes
178   
179    @classmethod
180    def organism_name_search(cls, name):
181        genome = KEGGGenome()
182        if name not in genome:
183            ids = genome.search(name)
184            if not ids:
185                import obiTaxonomy
186                ids = obiTaxonomy.search(name)
187                ids = [id for id in ids if genome.search(id)]
188            name = ids.pop(0) if ids else name
189           
190        try:
191            return genome[name].entry_key
192        except KeyError:
193            raise ValueError("Organism with name='%s' not found in KEGG." % name)
194       
195    @classmethod
196    def organism_version(cls, name):
197        name = cls.organism_name_search(name)
198        genome = KEGGGenome()
199        info = genome.api.binfo(name)
200        return info.release
201#        orngServerFiles.localpath_download("KEGG", "kegg_genes_%s.tar.gz" % name)
202#        return orngServerFiles.info("KEGG", "kegg_genes_%s.tar.gz" % name)["datetime"]
203   
204    def _set_genematcher(self, genematcher):
205        setattr(self, "_genematcher", genematcher)
206       
207    def _get_genematcher(self):
208        if getattr(self, "_genematcher", None) == None:
209            import obiGene
210            if self.org_code == "ddi":
211                self._genematcher = obiGene.matcher([obiGene.GMKEGG(self.org_code), obiGene.GMDicty(),
212                                                     [obiGene.GMKEGG(self.org_code), obiGene.GMDicty()]])
213            else:
214                self._genematcher = obiGene.matcher([obiGene.GMKEGG(self.org_code)])
215            self._genematcher.set_targets(self.genes.keys())
216        return self._genematcher
217   
218    genematcher = property(_get_genematcher, _set_genematcher)
219   
220KEGGOrganism = Organism
221   
222def organism_name_search(name):
223    return KEGGOrganism.organism_name_search(name)
224
225def pathways(org):
226    return KEGGPathway.list(org)
227
228def organisms():
229    return KEGGOrganism.organisms()
230
231def from_taxid(taxid):
232    genome = KEGGGenome()
233    res = genome.search(taxid)
234    print taxid, res
235    for r in res:
236        e = genome[r]
237       
238        if e.taxid in [taxid,  genome.TAXID_MAP.get(taxid, taxid)]:
239            return e.org_code()
240       
241    return None
242
243def to_taxid(name):
244    genome = KEGGGenome()
245    keys = genome.search(name)
246    if keys:
247        return genome[keys[0]].taxid
248    else:
249        return None
250
251def create_gene_sets():
252    pass
253
254import obiGene
255from Orange.misc import serverfiles
256from Orange.misc import ConsoleProgressBar
257
258class MatcherAliasesKEGG(obiGene.MatcherAliasesPickled):
259    DOMAIN = "KEGG"
260    VERSION = "v3.0"
261    def create_aliases(self):
262        import cPickle
263        files = set(serverfiles.ServerFiles().listfiles(self.DOMAIN))
264        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
265        if ids_filename in files:
266            filename = serverfiles.localpath_download(self.DOMAIN, ids_filename)
267           
268            aliases = cPickle.load(open(filename, "rb"))
269        else:
270            pb = ConsoleProgressBar("Retriving KEGG ids:")
271            kegg_org = KEGGOrganism(self.organism)
272            genes = kegg_org.genes
273            genes.pre_cache(progress_callback=pb.set_state)
274            aliases = []
275            for key, entry in genes.iteritems():
276                aliases.append(set([key]) | set(entry.alt_names))
277            filename = serverfiles.localpath_download(self.DOMAIN, ids_filename)
278            cPickle.dump(aliases, open(filename, "wb"))
279           
280        return aliases
281   
282    def filename(self):
283        return "kegg3_" + self.organism
284   
285    def aliases_path(self):
286        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
287        return serverfiles.localpath(self.DOMAIN, ids_filename)
288   
289    def create_aliases_version(self):
290        files = set(serverfiles.listfiles(self.DOMAIN))
291        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
292        if ids_filename in files:
293            version = serverfiles.info(self.DOMAIN, ids_filename)["datetime"]
294        else:
295            kegg_org = KEGGOrganism(self.organism)
296            genes = kegg_org.genes
297            version = genes.info.release
298        return version
299       
300    def __init__(self, organism, **kwargs):
301        self.organism = organism
302        sf = serverfiles.ServerFiles()
303        files = set(sf.listfiles(self.DOMAIN))
304        ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle"
305        if ids_filename in files:
306            serverfiles.update(self.DOMAIN, ids_filename)
307           
308        obiGene.MatcherAliasesPickled.__init__(self, **kwargs)
309
310def main():
311    KEGGGenome()
312    import doctest
313    extraglobs = {"api": KeggApi()}
314    doctest.testmod(optionflags=doctest.ELLIPSIS, extraglobs=extraglobs)
315
316if __name__ == "__main__":
317    sys.exit(main())
Note: See TracBrowser for help on using the repository browser.