Changeset 1715:87468c6e6443 in orange-bioinformatics for _bioinformatics/obiKEGG2/__init__.py


Ignore:
Timestamp:
08/30/12 15:19:40 (20 months ago)
Author:
markotoplak
Branch:
default
Message:

Removed use of the old obiKEGG module. Hacked a REST version for gene aliases in obiKEGG2.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiKEGG2/__init__.py

    r1713 r1715  
    1414from __future__ import absolute_import 
    1515 
    16  
     16import urllib2 
    1717import os, sys 
    1818from collections import defaultdict 
     
    8585        raise NotImplementedError() 
    8686     
     87    def _gm_gene_aliases(self): 
     88        """ 
     89        Return a list of sets of equal genes. This is a hack for 
     90        gene matchers to work faster until the whole implementations 
     91        transitions to REST. Does not include links to DBs. 
     92        """ 
     93        s1 = urllib2.urlopen("http://rest.kegg.jp/list/%s" % self.org_code).read() 
     94        out = [] 
     95        for l in s1.split('\n'): 
     96            if l: 
     97                tabs = l.split("\t") 
     98                cset = set([tabs[0]]) 
     99                try: 
     100                    rest = tabs[1].split(";")[0] 
     101                    cset |= set(rest.split(", ")) 
     102                except: 
     103                    pass #do not crash if a line does not conform 
     104                out.append(cset) 
     105        return out 
     106 
    87107    def get_enriched_pathways(self, genes, reference=None, prob=obiProb.Binomial(), callback=None): 
    88108        """ Return a dictionary with enriched pathways ids as keys 
     
    258278    pass 
    259279 
    260 from .. import obiGene 
    261 from Orange.utils import ConsoleProgressBar 
    262  
    263 class MatcherAliasesKEGG(obiGene.MatcherAliasesPickled): 
    264     DOMAIN = "KEGG" 
    265     VERSION = "v3.0" 
    266     def create_aliases(self): 
    267         import cPickle 
    268         files = set(serverfiles.ServerFiles().listfiles(self.DOMAIN)) 
    269         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    270         if ids_filename in files: 
    271             filename = serverfiles.localpath_download(self.DOMAIN, ids_filename) 
    272              
    273             aliases = cPickle.load(open(filename, "rb")) 
    274         else: 
    275             pb = ConsoleProgressBar("Retriving KEGG ids:") 
    276             kegg_org = KEGGOrganism(self.organism) 
    277             genes = kegg_org.genes 
    278             genes.pre_cache(progress_callback=pb.set_state) 
    279             aliases = [] 
    280             for key, entry in genes.iteritems(): 
    281                 aliases.append(set([key]) | set(entry.alt_names)) 
    282             filename = serverfiles.localpath_download(self.DOMAIN, ids_filename) 
    283             cPickle.dump(aliases, open(filename, "wb")) 
    284              
    285         return aliases 
    286      
    287     def filename(self): 
    288         return "kegg3_" + self.organism 
    289      
    290     def aliases_path(self): 
    291         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    292         return serverfiles.localpath(self.DOMAIN, ids_filename) 
    293      
    294     def create_aliases_version(self): 
    295         files = set(serverfiles.listfiles(self.DOMAIN)) 
    296         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    297         if ids_filename in files: 
    298             version = serverfiles.info(self.DOMAIN, ids_filename)["datetime"] 
    299         else: 
    300             kegg_org = KEGGOrganism(self.organism) 
    301             genes = kegg_org.genes 
    302             version = genes.info.release 
    303         return version 
    304          
    305     def __init__(self, organism, **kwargs): 
    306         self.organism = organism 
    307         sf = serverfiles.ServerFiles() 
    308         files = set(sf.listfiles(self.DOMAIN)) 
    309         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    310         if ids_filename in files: 
    311             serverfiles.update(self.DOMAIN, ids_filename) 
    312              
    313         obiGene.MatcherAliasesPickled.__init__(self, **kwargs) 
    314  
    315280def main(): 
    316281    KEGGGenome() 
Note: See TracChangeset for help on using the changeset viewer.