source: orange-bioinformatics/_bioinformatics/obiKEGG/api.py @ 1734:91d14dd2cf0e

Revision 1734:91d14dd2cf0e, 20.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 14 months ago (diff)

obiKEGG code style fixes.

Line 
1"""
2KEGG api interface.
3
4"""
5from __future__ import absolute_import
6
7from datetime import datetime
8from contextlib import closing
9from operator import itemgetter
10import warnings
11
12from .service import web_service
13from .types import OrganismSummary, Definition, BInfo, Link
14
15
16DATABASES = [
17    ("KEGG Pathway", "pathway", "path", None),
18    ("KEGG Brite", "brite", "br", None),
19    ("KEGG Module", "module", "md", "M"),
20    ("KEGG Disease", "disease", "ds", "H"),
21    ("KEGG Drug", "drug", "dr", "D"),
22    ("KEGG Orthology", "orthology", "ko", "K"),
23    ("KEGG Genome", "genome", "genome", "T"),
24    ("KEGG Genomes", "genomes", "gn", "T"),
25    ("KEGG Genes", "genes", None, None),
26    ("KEGG Ligand", "ligand", "ligand", None),
27    ("KEGG Compound", "compound", "cpd", "C"),
28    ("KEGG Glycan", "glycan", "gl", "G"),
29    ("KEGG Reaction", "reaction", "rn", "R"),
30    ("KEGG RPair", "rpair", "rp", "RP"),
31    ("KEGG RClass", "rclass", "rc", "RC"),
32    ("KEGG Enzyme", "enzyme", "ec", "E")
33]
34
35
36def _link_targets(links):
37    return sorted(set(map(itemgetter(1), links)))
38
39
40class KeggApi(object):
41    """
42    An abstraction of a kegg api.
43    """
44
45    def __init__(self):
46        self.service = web_service()
47
48    def list_organisms(self):
49        """
50        Return a list of all available organisms
51
52        >>> api.list_organisms()
53        [Definition(entry_id='hsa',...
54
55        """
56        return map(OrganismSummary.from_str,
57                   self.service.list.organism.get().splitlines())
58
59    def list_pathways(self, organism):
60        """
61        Return a list of all available pathways for `organism`
62
63        >>> api.list_pathways("hsa")
64        [Definition(entry_id=',...
65
66        """
67        return map(Definition.from_str,
68                   self.service.list.pathway(organism).get().splitlines())
69
70    def list(self, db):
71        """
72        Return a list of all available entries in database `db`.
73        """
74        return map(Definition.from_str,
75                   self.service.list(db).get().splitlines())
76
77    #######
78    # DBGET
79    #######
80
81    def info(self, db):
82        """
83        Return info for database `db`
84
85        >>> print api.info("pathway")
86        BInfo(entry_id='path', definition='KEGG Pathway Database', ...
87
88        """
89        result = self.service.info(db).get()
90        return BInfo.from_text(str(result))
91
92    def find(self, db, keywords):
93        """
94        Search database 'db' for keywords.
95        """
96        if isinstance(keywords, basestring):
97            keywords = [keywords]
98
99        return self.service.find(db)("+".join(keywords)).get()
100
101    def get(self, ids):
102        """
103        Retrieve database entries for `ids` list.
104        """
105        if not isinstance(ids, basestring):
106            # Sequence of ids
107            ids = "+".join(ids)
108
109        return self.service.get(ids).get()
110
111    def conv(self, ids):
112        raise NotImplementedError()
113
114    def link(self, target_db, source_db=None, ids=None):
115        if not (source_db or ids):
116            raise ValueError("One of 'source_db' or 'ids' must be supplied")
117        if source_db and ids:
118            raise ValueError("Only one 'source_db' or 'ids' must be supplied")
119
120        if source_db:
121            result = self.service.link(target_db)(source_db).get()
122        else:
123            result = self.service.link(target_db)("+".join(ids)).get()
124
125        return map(Link._make, map(str.split, result.splitlines()))
126
127    def get_genes_by_enzyme(self, enzyme_id, org):
128        return _link_targets(self.link(org, ids=[enzyme_id]))
129
130    def get_enzymes_by_gene(self, gene_id):
131        return _link_targets(self.link("ec", ids=[gene_id]))
132
133    def get_enzymes_by_compound(self, compound_id):
134        return _link_targets(self.link("ec", ids=[compound_id]))
135
136    def get_enzymes_by_glycan(self, glycan_id):
137        return _link_targets(self.link("ec", ids=[glycan_id]))
138
139    def get_enzymes_by_reaction(self, reaction_id):
140        return _link_targets(self.link("ec", ids=[reaction_id]))
141
142    def get_compounds_by_enzyme(self, enzyme_id):
143        return _link_targets(self.link("compound", ids=[enzyme_id]))
144
145    def get_compounds_by_reaction(self, reaction_id):
146        return _link_targets(self.link("compound", ids=[reaction_id]))
147
148    def get_glycans_by_enzyme(self, enzyme_id):
149        return _link_targets(self.link("gl", ids=[enzyme_id]))
150
151    def get_glycans_by_reaction(self, reaction_id):
152        return _link_targets(self.link("gl", ids=[reaction_id]))
153
154    def get_reactions_by_enzyme(self, enzyme_id):
155        return _link_targets(self.link("rn", ids=[enzyme_id]))
156
157    def get_reactions_by_compound(self, compound_id):
158        return _link_targets(self.link("rn", ids=[compound_id]))
159
160    def get_reactions_by_glycan(self, glycan_id):
161        return _link_targets(self.link("rn", ids=[glycan_id]))
162
163    ######
164    # SSDB
165    ######
166
167    # No replacement api in the KEGG REST api.
168    def get_best_best_neighbors_by_gene(self, genes_id, offset, limit):
169        raise NotImplementedError
170
171    def get_best_neighbors_by_gene(self, genes_id, offset, limit):
172        raise NotImplementedError
173
174    def get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit):
175        raise NotImplementedError
176
177    def get_paralogs_by_gene(self, genes_id, offset, limit):
178        raise NotImplementedError
179
180    #######
181    # Motif
182    #######
183
184    # No replacement api in KEGG REST api
185    def get_motifs_by_gene(self, genes_id, db):
186        raise NotImplementedError
187
188    def get_genes_by_motifs(self, motif_id_list, offset, limit):
189        raise NotImplementedError
190
191    ####
192    # KO
193    ####
194
195    def get_ko_by_gene(self, genes_id):
196        raise NotImplementedError
197
198    def get_ko_by_ko_class(self, ko_class_id):
199        raise NotImplementedError
200
201    def get_genes_by_ko_class(self, ko_class_id, org, offset, limit):
202        raise NotImplementedError
203
204    def get_genes_by_ko(self, ko_id, org):
205        raise NotImplementedError
206
207    #########
208    # Pathway
209    #########
210
211    def mark_pathway_by_objects(self, pathway_id, object_id_list):
212        raise NotImplementedError
213
214    def color_pathway_by_objects(self, pathway_id, object_id_list,
215                                 fg_color_list, bg_color_list):
216        raise NotImplementedError
217
218    def color_pathway_by_elements(self, pathway_id, element_id_list,
219                                  fg_color_list, bg_color_list):
220        raise NotImplementedError
221
222    def get_html_of_marked_pathway_by_objects(self, pathway_id,
223                                              object_id_list):
224        raise NotImplementedError
225
226    def get_html_of_colored_pathway_by_objects(self, pathway_id,
227                                               object_id_list, fg_color_list,
228                                               bg_color_list):
229        raise NotImplementedError
230
231    def get_html_of_colored_pathway_by_elements(self, pathway_id,
232                                                element_id_list, fg_color_list,
233                                                bg_color_list):
234        raise NotImplementedError
235
236    def get_references_by_pathway(self, pathway_id):
237        return self.service.get_references_by_pathway(pathway_id)
238
239    def get_element_relations_by_pathway(self, pathway_id):
240        return self.service.get_element_relations_by_pathway(pathway_id)
241
242    def get_genes_by_organism(self, organism, offset=None, limit=None):
243        if offset is not None:
244            raise NotImplementedError("offset is no longer supported")
245        if limit is not None:
246            raise NotImplementedError("limit is no longer supported.")
247
248        res = self.service.list(organism).get().splitlines()
249        return [r.split(None, 1)[0] for r in res]
250
251    def get_number_of_genes_by_organism(self, organism):
252        raise NotImplementedError
253
254    ####################
255    # Objects by pathway
256    ####################
257
258    def get_elements_by_pathway(self, pathway_id):
259        raise NotImplementedError
260
261    def get_genes_by_pathway(self, pathway_id):
262        return _link_targets(self.link("genes", ids=[pathway_id]))
263
264    def get_enzymes_by_pathway(self, pathway_id):
265        return _link_targets(self.link("ec", ids=[pathway_id]))
266
267    def get_compounds_by_pathway(self, pathway_id):
268        return _link_targets(self.link("compound", ids=[pathway_id]))
269
270    def get_drugs_by_pathway(self, pathway_id):
271        return _link_targets(self.link("drug", ids=[pathway_id]))
272
273    def get_glycans_by_pathway(self, pathway_id):
274        return _link_targets(self.link("gl", ids=[pathway_id]))
275
276    def get_reactions_by_pathway(self, pathway_id):
277        return _link_targets(self.link("rn", ids=[pathway_id]))
278
279    def get_kos_by_pathway(self, pathway_id):
280        return _link_targets(self.link("ko", ids=[pathway_id]))
281
282    #####################
283    # Pathways by objects
284    #####################
285
286    # These functions returned results intersections.
287    def get_pathways_by_genes(self, gene_list):
288        raise NotImplementedError
289
290    def get_pathways_by_enzymes(self, enzyme_list):
291        raise NotImplementedError
292
293    def get_pathways_by_compounds(self, compound_list):
294        raise NotImplementedError
295
296    def get_pathways_by_drugs(self, drug_list):
297        raise NotImplementedError
298
299    def get_pathways_by_glycans(self, glycan_list):
300        raise NotImplementedError
301
302    def get_pathways_by_reactions(self, reaction_list):
303        raise NotImplementedError
304
305    def get_pathways_by_kos(self, ko_list):
306        raise NotImplementedError
307
308    ##########################
309    # Relations among pathways
310    ##########################
311
312    def get_linked_pathways(self, pathway_id):
313        if not pathway_id.startswith("path:"):
314            pathway_id = "path:" + pathway_id
315        return _link_targets(self.link("pathway", ids=[pathway_id]))
316
317
318"""
319KEGG api with caching
320"""
321
322import os
323
324from . import caching
325from .caching import cached_method, cache_entry, touch_dir
326
327try:
328    from functools import lru_cache
329except ImportError:
330    # TODO: move a copy of lru_cache in .caching if distributing this as a
331    # standalone package
332    from Orange.utils import lru_cache
333
334
335class CachedKeggApi(KeggApi):
336    def __init__(self, store=None):
337        KeggApi.__init__(self)
338        if store is None:
339            self.store = {}
340
341    # Needed API for cached decorator.
342    def cache_store(self):
343        from . import conf
344        path = conf.params["cache.path"]
345        touch_dir(path)
346        return caching.Sqlite3Store(os.path.join(path,
347                                                 "kegg_api_cache_1.sqlite3"))
348
349    def last_modified(self, args, kwargs=None):
350        return getattr(self, "default_release", "")
351
352    def set_default_release(self, release):
353        self.default_release = release
354
355    @cached_method
356    def list_organisms(self):
357        return KeggApi.list_organisms(self)
358
359    @cached_method
360    def list_pathways(self, organism):
361        return KeggApi.list_pathways(self, organism)
362
363    @cached_method
364    def list(self, db):
365        return KeggApi.list(self, db)
366
367    @lru_cache()  # not persistently cached
368    def info(self, db):
369        return KeggApi.info(self, db)
370
371    @cached_method
372    def find(self, db, keywords):
373        return KeggApi.find(self, db, keywords)
374
375    @cached_method
376    def get(self, ids):
377        if not isinstance(ids, basestring):
378            return self._batch_get(ids)
379        else:
380            return KeggApi.get(self, ids)
381
382    def _batch_get(self, ids):
383        if len(ids) > 10:
384            raise ValueError("Can batch at most 10 ids at a time.")
385
386        get = self.get
387        uncached = []
388        unmatched = set()
389
390        with closing(get.cache_store()) as store:
391            # Which ids are already cached
392            # TODO: Invalidate entries by release string.
393            for id in ids:
394                key = get.key_from_args((id,))
395                if key not in store:
396                    uncached.append(id)
397
398        if uncached:
399            # in case there are duplicate ids
400            uncached = sorted(set(uncached))
401            rval = KeggApi.get(self, uncached)
402
403            if rval is not None:
404                entries = rval.split("///\n")
405            else:
406                entries = []
407
408            if entries and not entries[-1].strip():
409                # Delete the last single newline entry if present
410                del entries[-1]
411
412            if len(entries) != len(uncached):
413                new_uncached, entries = match_by_ids(uncached, entries)
414                unmatched = set(uncached) - set(new_uncached)
415                uncached = new_uncached
416                warnings.warn("Unable to match entries for keys: %s." %
417                              ", ".join(map(repr, unmatched)))
418
419            with closing(get.cache_store()) as store:
420                for id, entry in zip(uncached, entries):
421                    key = get.key_from_args((id,))
422                    if entry is not None:
423                        entry = entry + "///\n"
424                    store[key] = cache_entry(entry, mtime=datetime.now())
425
426        # Finally join all the results, but drop all None objects
427        entries = filter(lambda e: e is not None, map(get, ids))
428
429        rval = "".join(entries)
430        return rval
431
432    @cached_method
433    def conv(self, ids):
434        return KeggApi.conv(self, ids)
435
436    ########
437    # LinkDB
438    ########
439
440    @cached_method
441    def get_genes_by_enzyme(self, enzyme_id, org):
442        return KeggApi.get_genes_by_enzyme(self, enzyme_id, org)
443
444    @cached_method
445    def get_enzymes_by_gene(self, genes_id):
446        return KeggApi.get_enzymes_by_gene(self, genes_id)
447
448    @cached_method
449    def get_enzymes_by_compound(self, compound_id):
450        return KeggApi.get_enzymes_by_compound(self, compound_id)
451
452    @cached_method
453    def get_enzymes_by_glycan(self, glycan_id):
454        return KeggApi.get_enzymes_by_glycan(self, glycan_id)
455
456    @cached_method
457    def get_enzymes_by_reaction(self, reaction_id):
458        return KeggApi.get_enzymes_by_reaction(self, reaction_id)
459
460    @cached_method
461    def get_compounds_by_enzyme(self, enzyme_id):
462        return KeggApi.get_compounds_by_enzyme(self, enzyme_id)
463
464    @cached_method
465    def get_compounds_by_reaction(self, reaction_id):
466        return KeggApi.get_compounds_by_reaction(self, reaction_id)
467
468    @cached_method
469    def get_glycans_by_enzyme(self, enzyme_id):
470        return KeggApi.get_glycans_by_enzyme(self, enzyme_id)
471
472    @cached_method
473    def get_glycans_by_reaction(self, reaction_id):
474        return KeggApi.get_glycans_by_reaction(self, reaction_id)
475
476    @cached_method
477    def get_reactions_by_enzyme(self, enzyme_id):
478        return KeggApi.get_reactions_by_enzyme(self, enzyme_id)
479
480    @cached_method
481    def get_reactions_by_compound(self, compound_id):
482        return KeggApi.get_reactions_by_compound(self, compound_id)
483
484    @cached_method
485    def get_reactions_by_glycan(self, glycan_id):
486        return KeggApi.get_reactions_by_glycan(self, glycan_id)
487
488    ######
489    # SSDB
490    ######
491
492    @cached_method
493    def get_best_best_neighbors_by_gene(self, genes_id, offset, limit):
494        return KeggApi.get_best_best_neighbors_by_gene(self, genes_id, offset,
495                                                       limit)
496
497    @cached_method
498    def get_best_neighbors_by_gene(self, genes_id, offset, limit):
499        return KeggApi.get_best_neighbors_by_gene(self, genes_id, offset,
500                                                  limit)
501
502    @cached_method
503    def get_reverse_best_neighbors_by_gene(self, genes_id, offset, limit):
504        return KeggApi.get_reverse_best_neighbors_by_gene(self, genes_id,
505                                                          offset, limit)
506
507    @cached_method
508    def get_paralogs_by_gene(self, genes_id, offset, limit):
509        return KeggApi.get_paralogs_by_gene(self, genes_id, offset, limit)
510
511    #######
512    # Motif
513    #######
514
515    @cached_method
516    def get_motifs_by_gene(self, genes_id, db):
517        return KeggApi.get_motifs_by_gene(self, genes_id, db)
518
519    @cached_method
520    def get_genes_by_motifs(self, motif_id_list, offset, limit):
521        return KeggApi.get_genes_by_motifs(self, motif_id_list, offset, limit)
522
523    ####
524    # KO
525    ####
526
527    @cached_method
528    def get_ko_by_gene(self, genes_id):
529        return KeggApi.get_ko_by_gene(self, genes_id)
530
531    @cached_method
532    def get_ko_by_ko_class(self, ko_class_id):
533        return KeggApi.service.get_ko_by_ko_class(self, ko_class_id)
534
535    @cached_method
536    def get_genes_by_ko_class(self, ko_class_id, org, offset, limit):
537        return KeggApi.get_genes_by_ko_class(self, ko_class_id, org, offset,
538                                             limit)
539
540    @cached_method
541    def get_genes_by_ko(self, ko_id, org):
542        return KeggApi.get_genes_by_ko(self, ko_id, org)
543
544    #########
545    # Pathway
546    #########
547
548    @cached_method
549    def get_genes_by_organism(self, organism, offset=None, limit=None):
550        return KeggApi.get_genes_by_organism(self, organism, offset=offset,
551                                             limit=limit)
552
553    @cached_method
554    def get_number_of_genes_by_organism(self, organism):
555        return KeggApi.get_number_of_genes_by_organism(self, organism)
556
557    @cached_method
558    def get_pathways_by_genes(self, gene_list):
559        return KeggApi.get_pathways_by_genes(self, gene_list)
560
561    @cached_method
562    def get_pathways_by_enzymes(self, enzyme_list):
563        return KeggApi.get_pathways_by_enzymes(self, enzyme_list)
564
565    @cached_method
566    def get_pathways_by_compounds(self, compound_list):
567        return KeggApi.get_pathways_by_compounds(self, compound_list)
568
569    @cached_method
570    def get_pathways_by_drugs(self, drug_list):
571        return KeggApi.get_pathways_by_drugs(self, drug_list)
572
573    @cached_method
574    def get_pathways_by_glycans(self, glycan_list):
575        return KeggApi.get_pathways_by_glycans(self, glycan_list)
576
577    @cached_method
578    def get_pathways_by_reactions(self, reaction_list):
579        return KeggApi.get_pathways_by_reactions(self, reaction_list)
580
581    @cached_method
582    def get_pathways_by_kos(self, ko_list):
583        return KeggApi.get_pathways_by_kos(self, ko_list)
584
585    @cached_method
586    def get_elements_by_pathway(self, pathway_id):
587        return KeggApi.get_elements_by_pathway(self, pathway_id)
588
589    @cached_method
590    def get_genes_by_pathway(self, pathway_id):
591        return KeggApi.get_genes_by_pathway(self, pathway_id)
592
593    @cached_method
594    def get_enzymes_by_pathway(self, pathway_id):
595        return KeggApi.get_enzymes_by_pathway(self, pathway_id)
596
597    @cached_method
598    def get_compounds_by_pathway(self, pathway_id):
599        return KeggApi.get_compounds_by_pathway(self, pathway_id)
600
601    @cached_method
602    def get_drugs_by_pathway(self, pathway_id):
603        return KeggApi.get_drugs_by_pathway(self, pathway_id)
604
605    @cached_method
606    def get_glycans_by_pathway(self, pathway_id):
607        return KeggApi.get_glycans_by_pathway(self, pathway_id)
608
609    @cached_method
610    def get_reactions_by_pathway(self, pathway_id):
611        return KeggApi.get_reactions_by_pathway(self, pathway_id)
612
613    @cached_method
614    def get_kos_by_pathway(self, pathway_id):
615        return KeggApi.get_kos_by_pathway(self, pathway_id)
616
617
618def match_by_ids(ids, entries):
619    """
620
621    """
622
623    unmatched_ids = set(ids)
624    unmatched_entries = set(entries)
625
626    matched_ids = []
627    matched_entries = []
628
629    def match_add(search_id, entry):
630        """
631        Move search_id and entry to the matched lists.
632        """
633        matched_ids.append(search_id)
634        matched_entries.append(entry)
635
636        # Remove from the unmatched set
637        unmatched_ids.remove(search_id)
638        unmatched_entries.remove(entry)
639
640    def entry_split(entry_text):
641        line, _ = entry_text.split("\n", 1)
642        return line.split(None, 2)
643
644    entries_by_id = {}
645
646    for entry in entries:
647        _, eid, _ = entry_split(entry)
648        entries_by_id[eid] = entry
649
650    # First match full search ids
651    for search_id in list(unmatched_ids):
652        if search_id in entries_by_id:
653            entry = entries_by_id.pop(search_id)
654            match_add(search_id, entry)
655
656    # Second pass, split the search ids by ':' to db and identifier part,
657    # match by identifier
658    for search_id in list(unmatched_ids):
659        if ":" in search_id:
660            db_id, rest = search_id.split(":", 1)
661            if rest in entries_by_id:
662                entry = entries_by_id.pop(rest)
663                match_add(search_id, entry)
664
665    return matched_ids, matched_entries
Note: See TracBrowser for help on using the repository browser.