source: orange-bioinformatics/obiKEGG2/api.py @ 1539:4d0cb852d2cd

Revision 1539:4d0cb852d2cd, 21.0 KB checked in by ales_erjavec, 2 years ago (diff)

If the batch has invalid ids try bisection of the ids.

Line 
1"""
2KEGG api interface.
3
4"""
5from __future__ import absolute_import
6
7from contextlib import closing
8
9from .service import web_service
10from .types import *
11
12class KeggApi(object):
13    """ KEGG API """
14   
15    def __init__(self):
16        self.service = web_service()
17       
18    ##################
19    # Meta information
20    ##################
21   
22    def list_databases(self):
23        """ Returns a list of available databases.
24       
25        >>> api.list_databases()
26        [Definition(entry_id='nt',...
27         
28        """
29        return map(Definition.from_items, self.service.list_databases())
30   
31    def list_organisms(self):
32        """ Return a list of all available organisms
33       
34        >>> api.list_organisms()
35        [Definition(entry_id='hsa',...
36       
37        """
38        return map(Definition.from_items, self.service.list_organisms())
39   
40    def list_pathways(self, organism):
41        """ Return a list of all available pathways for `organism`
42       
43        >>> api.list_pathways("hsa")
44        [Definition(entry_id=',...
45       
46        """
47        return map(Definition.from_items, self.service.list_pathways(organism))
48       
49    #######
50    # DBGET
51    #######
52     
53    def binfo(self, db):
54        """ Return info for database `db`
55       
56        >>> print api.dbinfo("gb")
57        genbank          GenBank nucleic acid sequence database
58        gb               Release 186.0, Oct 11
59                         National Center for Biotechnology Information
60                         144,458,648 entries, 132,067,413,372 bases
61                         Last update: 11/10/24
62                         <dbget> <fasta> <blast>
63                         
64        """
65        return BInfo.from_text(str(self.service.binfo(db)))
66   
67    def bfind(self, db, keywords):
68        """ Search database 'db' for keywords
69        """
70        result = self.service.bfind(" ".join([db, keywords]))
71        if result is not None:
72            return str(result)
73        else:
74            return result
75   
76    def bget(self, ids):
77        """
78        """
79        if not isinstance(ids, basestring):
80            # Sequence of ids
81            ids = " ".join(ids)
82        result = self.service.bget(ids)
83        if result is not None:
84            return str(result)
85        else:
86            return result
87   
88    def btit(self, ids):
89        """
90        """
91        if not isinstance(ids, basestring):
92            ids = " ".join(ids)
93           
94        result = self.service.btit(ids)
95        if result is not None:
96            return str(result)
97        else:
98            return result
99   
100    def bconv(self, ids):
101        if not isinstance(ids, basestring):
102            ids = " ".join(ids)
103           
104        result = self.service.bconv(ids)
105        if result is not None:
106            return str(result)
107        else:
108            return result
109   
110    ########
111    # LinkDB
112    ########
113   
114    def get_linkdb_by_entry(self, entry_id, db, offset, limit):
115        links = self.service.get_linkdb_by_entry(entry_id, db, offset, limit)
116        return [LinkDBRelation(**d) for d in \
117                map(dict, links)]
118       
119    def get_linkdb_between_databases(self, from_db, to_db, offset, limit):
120        links = self.service.get_linkdb_between_databases(from_db, to_db, offset, limit)
121        return [LinkDBRelation(**d) for d in \
122                map(dict, links)]
123       
124    def get_genes_by_enzyme(self, enzyme_id, org):
125        return self.service.get_genes_by_enzyme(enzyme_id, org)
126   
127    def get_enzymes_by_gene(self, genes_id):
128        return self.service.get_enzymes_by_gene(genes_id)
129   
130    def get_enzymes_by_compound(self, compound_id):
131        return self.service.get_enzymes_by_compound(compound_id)
132   
133    def get_enzymes_by_glycan(self, glycan_id):
134        return self.service.get_enzymes_by_glycan(glycan_id)
135   
136    def get_enzymes_by_reaction(self, reaction_id):
137        return self.service.get_enzymes_by_reaction(reaction_id)
138   
139    def get_compounds_by_enzyme(self, enzyme_id):
140        return self.service.get_compounds_by_enzyme(enzyme_id)
141   
142    def get_compounds_by_reaction(self, reaction_id):
143        return self.service.get_compounds_by_reaction(reaction_id)
144   
145    def get_glycans_by_enzyme(self, enzyme_id):
146        return self.service.get_glycans_by_enzyme(enzyme_id)
147   
148    def get_glycans_by_reaction(self, reaction_id):
149        return self.service.get_glycans_by_reaction(reaction_id)
150   
151    def get_reactions_by_enzyme(self, enzyme_id):
152        return self.service.get_reactions_by_enzyme(enzyme_id)
153   
154    def get_reactions_by_compound(self, compound_id):
155        return self.service.get_reactions_by_compound(compound_id)
156   
157    def get_reactions_by_glycan(self, glycan_id):
158        return self.service.get_reactions_by_glycan(glycan_id)
159   
160    ######
161    # SSDB
162    ######
163   
164    def get_best_best_neighbors_by_gene(self, genes_id, offset, limit):
165        ssr = self.service.get_best_best_neighbors_by_gene(genes_id, offset, limit)
166        return [SSDBRelation(**d) for d in \
167                map(dict, ssr)]
168   
169    def get_best_neighbors_by_gene(self, genes_id, offset, limit):
170        ssr = self.service.get_best_neighbors_by_gene(genes_id, offset, limit)
171        return [SSDBRelation(**d) for d in \
172                map(dict, ssr)]
173   
174    def get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit):
175        ssr = self.service.get_reverse_best_neighbors_by_gene(genes_id, offset, limit)
176        return [SSDBRelation(**d) for d in \
177                map(dict, ssr)]
178   
179    def get_paralogs_by_gene(self, genes_id, offset, limit):
180        ssr =  self.service.get_paralogs_by_gene(genes_id, offset, limit)
181        return [SSDBRelation(**d) for d in \
182                map(dict, ssr)]
183   
184    #######
185    # Motif
186    #######
187   
188    def get_motifs_by_gene(self, genes_id, db):
189        motif = self.service.get_motifs_by_gene(genes_id, db)
190        return [MotifResult(**d) for d in \
191                map(dict, motif)]
192   
193    def get_genes_by_motifs(self, motif_id_list, offset, limit):
194        genes = self.service.get_genes_by_motifs(motif_id_list, offset, limit)
195        return [Definition(**d) for d in \
196                map(dict, genes)]
197   
198    ####
199    # KO
200    ####
201   
202    def get_ko_by_gene(self, genes_id):
203        return self.service.get_ko_by_gene(genes_id)
204   
205    def get_ko_by_ko_class(self, ko_class_id):
206        return self.service.get_ko_by_ko_class(ko_class_id)
207   
208    def get_genes_by_ko_class(self, ko_class_id, org, offset, limit):
209        return self.service.get_genes_by_ko_class(ko_class_id, org, offset, limit)
210   
211    def get_genes_by_ko(self, ko_id, org):
212        return self.service.get_genes_by_ko(ko_id, org)
213   
214    #########
215    # Pathway
216    #########
217   
218    def mark_pathway_by_objects(self, pathway_id, object_id_list):
219        return self.service.mark_pathway_by_objects(pathway_id, object_id_list)
220   
221    def color_pathway_by_objects(self, pathway_id, object_id_list, fg_color_list, bg_color_list):
222        return self.service.color_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
223   
224    def color_pathway_by_elements(self, pathway_id, element_id_list, fg_color_list, bg_color_list):
225        return self.service.color_pathway_by_elements(pathway_id, element_id_list, fg_color_list, bg_color_list)
226   
227    def get_html_of_marked_pathway_by_objects(self, pathway_id, object_id_list):
228        return self.service.get_html_of_marked_pathway_by_objects(pathway_id, object_id_list)
229   
230    def get_html_of_colored_pathway_by_objects(self, pathway_id, object_id_list, fg_color_list, bg_color_list):
231        return self.service.get_html_of_colored_pathway_by_objects(pathway_id, object_id_list, fg_color_list, bg_color_list)
232   
233    def get_html_of_colored_pathway_by_elements(self, pathway_id, element_id_list, fg_color_list, bg_color_list):
234        return self.service.get_html_of_colored_pathway_by_elements(pathway_id, element_id_list, fg_color_list, bg_color_list)
235   
236    def get_references_by_pathway(self, pathway_id):
237        return self.service.get_references_by_pathway(pathway_id)
238   
239    def get_element_relations_by_pathway(self, pathway_id):
240        return self.service.get_element_relations_by_pathway(pathway_id)
241   
242   
243   
244    def get_genes_by_organism(self, organism, offset=None, limit=None):
245        if offset is None and limit is None:
246            offset = 0
247            limit = self.get_number_of_genes_by_organism(organism)
248           
249        return self.service.get_genes_by_organism(organism, offset, limit)
250   
251    def get_number_of_genes_by_organism(self, organism):
252        return self.service.get_number_of_genes_by_organism(organism)
253   
254    ####################
255    # Objects by pathway
256    ####################
257   
258    def get_elements_by_pathway(self, pathway_id):
259        return self.service.get_elements_by_pathway(pathway_id)
260   
261    def get_genes_by_pathway(self, pathway_id):
262        return self.service.get_genes_by_pathway(pathway_id)
263   
264    def get_enzymes_by_pathway(self, pathway_id):
265        return self.service.get_enzymes_by_pathway(pathway_id)
266   
267    def get_compounds_by_pathway(self, pathway_id):
268        return self.service.get_compounds_by_pathway(pathway_id)
269   
270    def get_drugs_by_pathway(self, pathway_id):
271        return self.service.get_drugs_by_pathway(pathway_id)
272   
273    def get_glycans_by_pathway(self, pathway_id):
274        return self.service.get_glycans_by_pathway(pathway_id)
275   
276    def get_reactions_by_pathway(self, pathway_id):
277        return self.get_reactions_by_pathway(pathway_id)
278   
279    def get_kos_by_pathway(self, pathway_id):
280        return self.service.get_kos_by_pathway(pathway_id)
281   
282    #####################
283    # Pathways by objects
284    #####################
285   
286    def get_pathways_by_genes(self, gene_list):
287        return map(str, self.service.get_pathways_by_genes(gene_list))
288   
289    def get_pathways_by_enzymes(self, enzyme_list):
290        return map(str, self.service.get_pathways_by_enzymes(enzyme_list))
291   
292    def get_pathways_by_compounds(self, compound_list):
293        return map(str, self.service.get_pathways_by_compounds(compound_list))
294   
295    def get_pathways_by_drugs(self, drug_list):
296        return map(str, self.service.get_pathways_by_drugs(drug_list))
297   
298    def get_pathways_by_glycans(self, glycan_list):
299        return map(str, self.service.get_pathways_by_glycans(glycan_list))
300   
301    def get_pathways_by_reactions(self, reaction_list):
302        return map(str, self.service.get_pathways_by_reactions(reaction_list))
303   
304    def get_pathways_by_kos(self, ko_list):
305        return map(str, self.service.get_pathways_by_kos(ko_list))
306   
307    ##########################
308    # Relations among pathways
309    ##########################
310   
311    def get_linked_pathways(self, pathway_id):
312        if not pathway_id.startswith("path:"):
313            pathway_id = "path:" + pathway_id
314        return map(str, self.service.get_linked_pathways(pathway_id))
315   
316   
317"""
318KEGG api with caching
319"""
320
321import os
322
323from . import caching
324from .caching import cached_method, cache_entry, touch_dir
325
326try:
327    from functools import lru_cache
328except ImportError:
329    # TODO: move a copy of lru_cache in .caching if distributing this as a
330    # standalone package
331    from Orange.misc import lru_cache
332
333   
334class CachedKeggApi(KeggApi):
335    def __init__(self, store=None):
336        KeggApi.__init__(self)
337        if store is None:
338            self.store = {}
339   
340    # Needed API for cached decorator.
341    def cache_store(self):
342        from . import conf
343        path = conf.params["cache.path"]
344        touch_dir(path)
345        return caching.Sqlite3Store(os.path.join(path,
346                                                 "kegg_api_cache.sqlite3"))
347   
348    def last_modified(self, args, kwargs=None):
349        return getattr(self, "default_release", "")
350   
351    def set_default_release(self, release):
352        self.default_release = release
353       
354   
355    ##################
356    # Meta information
357    ##################
358   
359    @lru_cache() # not persistently cached
360    def list_databases(self):
361        return KeggApi.list_databases(self)
362   
363    @cached_method
364    def list_organisms(self):
365        return KeggApi.list_organisms(self)
366   
367    @cached_method
368    def list_pathways(self, organism):
369        return KeggApi.list_pathways(self, organism)
370   
371    #######
372    # DBGET
373    #######
374   
375    @lru_cache() # not persistently cached
376    def binfo(self, db):
377        return KeggApi.binfo(self, db)
378   
379    @cached_method
380    def bfind(self, db, keywords):
381        return KeggApi.bfind(self, db, keywords)
382   
383    @cached_method
384    def bget(self, ids):
385        rval = KeggApi.bget(self, ids)
386        return rval
387   
388    @cached_method
389    def bget(self, ids):
390        if not isinstance(ids, basestring):
391            return self._batch_bget(ids)
392        else:
393            return KeggApi.bget(self, ids)
394       
395    def _batch_bget(self, ids):
396        if len(ids) > 100:
397            raise ValueError("Can batch at most 100 ids at a time.")
398       
399        bget = self.bget
400        uncached = []
401        with closing(bget.cache_store()) as store:
402            # Which ids are already cached
403            # TODO: Invalidate entries by release string.
404            for id in ids:
405                key = bget.key_from_args((id,))
406                if key not in store:
407                    uncached.append(id)
408               
409        if uncached:
410            # in case there are duplicate ids
411            uncached = sorted(set(uncached))
412            rval = KeggApi.bget(self, uncached)
413            if rval is not None:
414                entrys = rval.split("///\n")
415            else:
416                entrys = []
417               
418            if entrys and not entrys[-1].strip():
419                # Delete the last newline if present
420                del entrys[-1]
421           
422            if len(entrys) == len(uncached):
423                with closing(bget.cache_store()) as store:
424                    for id, entry in zip(uncached, entrys):
425                        key = bget.key_from_args((id,))
426                        if entry is not None:
427                            entry = entry + "///\n"
428                        store[key] = cache_entry(entry, mtime=datetime.now())
429                       
430            else:
431                # Try to bisect the uncached list
432                if len(uncached) > 1 and len(uncached) - len(entrys) < 4:
433                    split = len(uncached) / 2
434                    self._batch_bget(uncached[:split])
435                    self._batch_bget(uncached[split:])
436                else:
437                    import warnings
438                    warnings.warn("Batch contains invalid ids", UserWarning)
439       
440        # Finally join all the results, but drop all None objects
441        entries = filter(lambda e: e is not None, map(bget, ids))
442       
443        rval = "".join(entries)
444        return rval
445   
446    @cached_method
447    def btit(self, ids):
448        return KeggApi.btit(self, ids)
449   
450    @cached_method
451    def bconv(self, ids):
452        return KeggApi.bconv(self, ids)
453   
454    ########
455    # LinkDB
456    ########
457   
458    @cached_method
459    def get_linkdb_by_entry(self, entry_id, db, offset, limit):
460       return KeggApi.get_linkdb_by_entry(self, entry_id, db, offset, limit)
461       
462    @cached_method
463    def get_linkdb_between_databases(self, from_db, to_db, offset, limit):
464        return KeggApi.get_linkdb_between_databases(self, from_db, to_db, offset, limit)
465           
466    @cached_method
467    def get_genes_by_enzyme(self, enzyme_id, org):
468        return KeggApi.get_genes_by_enzyme(self, enzyme_id, org)
469   
470    @cached_method
471    def get_enzymes_by_gene(self, genes_id):
472        return KeggApi.get_enzymes_by_gene(self, genes_id)
473   
474    @cached_method
475    def get_enzymes_by_compound(self, compound_id):
476        return KeggApi.get_enzymes_by_compound(self, compound_id)
477   
478    @cached_method
479    def get_enzymes_by_glycan(self, glycan_id):
480        return KeggApi.get_enzymes_by_glycan(self, glycan_id)
481   
482    @cached_method
483    def get_enzymes_by_reaction(self, reaction_id):
484        return KeggApi.get_enzymes_by_reaction(self, reaction_id)
485   
486    @cached_method
487    def get_compounds_by_enzyme(self, enzyme_id):
488        return KeggApi.get_compounds_by_enzyme(self, enzyme_id)
489   
490    @cached_method
491    def get_compounds_by_reaction(self, reaction_id):
492        return KeggApi.get_compounds_by_reaction(self, reaction_id)
493   
494    @cached_method
495    def get_glycans_by_enzyme(self, enzyme_id):
496        return KeggApi.get_glycans_by_enzyme(self, enzyme_id)
497   
498    @cached_method
499    def get_glycans_by_reaction(self, reaction_id):
500        return KeggApi.get_glycans_by_reaction(self, reaction_id)
501   
502    @cached_method
503    def get_reactions_by_enzyme(self, enzyme_id):
504        return KeggApi.get_reactions_by_enzyme(self, enzyme_id)
505   
506    @cached_method
507    def get_reactions_by_compound(self, compound_id):
508        return KeggApi.get_reactions_by_compound(self, compound_id)
509   
510    @cached_method
511    def get_reactions_by_glycan(self, glycan_id):
512        return KeggApi.get_reactions_by_glycan(self, glycan_id)
513   
514    ######
515    # SSDB
516    ######
517   
518    @cached_method
519    def get_best_best_neighbors_by_gene(self, genes_id, offset, limit):
520        return KeggApi.get_best_best_neighbors_by_gene(self, genes_id, offset, limit)
521   
522    @cached_method
523    def get_best_neighbors_by_gene(self, genes_id, offset, limit):
524        return KeggApi.get_best_neighbors_by_gene(self, genes_id, offset, limit)
525   
526    @cached_method
527    def get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit):
528        return KeggApi.get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit)
529   
530    @cached_method
531    def get_paralogs_by_gene(self, genes_id, offset, limit):
532        return KeggApi.get_paralogs_by_gene(self, genes_id, offset, limit)
533   
534    #######
535    # Motif
536    #######
537   
538    @cached_method
539    def get_motifs_by_gene(self, genes_id, db):
540        return KeggApi.get_motifs_by_gene(self, genes_id, db)
541   
542    @cached_method
543    def get_genes_by_motifs(self, motif_id_list, offset, limit):
544        return KeggApi.get_genes_by_motifs(self, motif_id_list, offset, limit)
545
546    ####
547    # KO
548    ####
549   
550    @cached_method
551    def get_ko_by_gene(self, genes_id):
552        return KeggApi.get_ko_by_gene(self, genes_id)
553   
554    @cached_method
555    def get_ko_by_ko_class(self, ko_class_id):
556        return KeggApi.service.get_ko_by_ko_class(self, ko_class_id)
557   
558    @cached_method
559    def get_genes_by_ko_class(self, ko_class_id, org, offset, limit):
560        return KeggApi.get_genes_by_ko_class(self, ko_class_id, org, offset, limit)
561   
562    @cached_method
563    def get_genes_by_ko(self, ko_id, org):
564        return KeggApi.get_genes_by_ko(self, ko_id, org)
565   
566    #########
567    # Pathway
568    #########
569   
570    # TODO
571   
572   
573   
574    @cached_method
575    def get_genes_by_organism(self, organism, offset=None, limit=None):
576        return KeggApi.get_genes_by_organism(self, organism, offset=offset, limit=limit)
577   
578    @cached_method
579    def get_number_of_genes_by_organism(self, organism):
580        return KeggApi.get_number_of_genes_by_organism(self, organism)
581     
582    @cached_method
583    def get_pathways_by_genes(self, gene_list):
584        return KeggApi.get_pathways_by_genes(self, gene_list)
585   
586    @cached_method
587    def get_pathways_by_enzymes(self, enzyme_list):
588        return KeggApi.get_pathways_by_enzymes(self, enzyme_list)
589   
590    @cached_method
591    def get_pathways_by_compounds(self, compound_list):
592        return KeggApi.get_pathways_by_compounds(self, compound_list)
593   
594    @cached_method
595    def get_pathways_by_drugs(self, drug_list):
596        return KeggApi.get_pathways_by_drugs(self, drug_list)
597   
598    @cached_method
599    def get_pathways_by_glycans(self, glycan_list):
600        return KeggApi.get_pathways_by_glycans(self, glycan_list)
601   
602    @cached_method
603    def get_pathways_by_reactions(self, reaction_list):
604        return KeggApi.get_pathways_by_reactions(self, reaction_list)
605   
606    @cached_method
607    def get_pathways_by_kos(self, ko_list):
608        return KeggApi.get_pathways_by_kos(self, ko_list)
609   
610    @cached_method
611    def get_elements_by_pathway(self, pathway_id):
612        return KeggApi.get_elements_by_pathway(self, pathway_id)
613   
614    @cached_method
615    def get_genes_by_pathway(self, pathway_id):
616        return KeggApi.get_genes_by_pathway(self, pathway_id)
617   
618    @cached_method
619    def get_enzymes_by_pathway(self, pathway_id):
620        return KeggApi.get_enzymes_by_pathway(self, pathway_id)
621   
622    @cached_method
623    def get_compounds_by_pathway(self, pathway_id):
624        return KeggApi.get_compounds_by_pathway(self, pathway_id)
625   
626    @cached_method
627    def get_drugs_by_pathway(self, pathway_id):
628        return KeggApi.get_drugs_by_pathway(self, pathway_id)
629   
630    @cached_method
631    def get_glycans_by_pathway(self, pathway_id):
632        return KeggApi.get_glycans_by_pathway(self, pathway_id)
633   
634    @cached_method
635    def get_reactions_by_pathway(self, pathway_id):
636        return KeggApi.get_reactions_by_pathway(self, pathway_id)
637   
638    @cached_method
639    def get_kos_by_pathway(self, pathway_id):
640        return KeggApi.get_kos_by_pathway(self, pathway_id)
641   
Note: See TracBrowser for help on using the repository browser.