Changeset 1715:87468c6e6443 in orange-bioinformatics


Ignore:
Timestamp:
08/30/12 15:19:40 (20 months ago)
Author:
markotoplak
Branch:
default
Message:

Removed use of the old obiKEGG module. Hacked a REST version for gene aliases in obiKEGG2.

Location:
_bioinformatics
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGene.py

    r1703 r1715  
    643643        self.filename() # test if valid filename can be built 
    644644 
     645from Orange.utils import ConsoleProgressBar 
     646 
    645647class MatcherAliasesKEGG(MatcherAliasesPickled): 
    646648 
    647649    def _organism_name(self, organism): 
    648         from . import obiKEGG  
    649         return obiKEGG.organism_name_search(organism) 
     650        from . import obiKEGG2 
     651        return obiKEGG2.organism_name_search(organism) 
    650652 
    651653    def create_aliases(self): 
    652654        organism = self._organism_name(self.organism) 
    653         from . import obiKEGG 
    654         org = obiKEGG.KEGGOrganism(self.organism, genematcher=GMDirect()) 
    655         genes = org.genes 
    656         osets = [ set([name]) | set(b.alt_names) for  
    657                 name,b in genes.items() ] 
     655        from . import obiKEGG2 
     656        org = obiKEGG2.KEGGOrganism(self.organism, genematcher=GMDirect()) 
     657        osets = org._gm_gene_aliases() 
    658658        return osets 
    659659 
    660660    def create_aliases_version(self): 
    661         from . import obiKEGG 
    662         return obiKEGG.KEGGOrganism.organism_version(self.organism) + ".1" 
     661        from . import obiKEGG2 
     662        return obiKEGG2.KEGGOrganism.organism_version(self.organism) + ".1" 
    663663 
    664664    def filename(self): 
    665         return "kegg_" + self._organism_name(self.organism)  
     665        return "kegg_2_" + self._organism_name(self.organism)  
    666666 
    667667    def __init__(self, organism, ignore_case=True): 
  • _bioinformatics/obiKEGG2/__init__.py

    r1713 r1715  
    1414from __future__ import absolute_import 
    1515 
    16  
     16import urllib2 
    1717import os, sys 
    1818from collections import defaultdict 
     
    8585        raise NotImplementedError() 
    8686     
     87    def _gm_gene_aliases(self): 
     88        """ 
     89        Return a list of sets of equal genes. This is a hack for 
     90        gene matchers to work faster until the whole implementations 
     91        transitions to REST. Does not include links to DBs. 
     92        """ 
     93        s1 = urllib2.urlopen("http://rest.kegg.jp/list/%s" % self.org_code).read() 
     94        out = [] 
     95        for l in s1.split('\n'): 
     96            if l: 
     97                tabs = l.split("\t") 
     98                cset = set([tabs[0]]) 
     99                try: 
     100                    rest = tabs[1].split(";")[0] 
     101                    cset |= set(rest.split(", ")) 
     102                except: 
     103                    pass #do not crash if a line does not conform 
     104                out.append(cset) 
     105        return out 
     106 
    87107    def get_enriched_pathways(self, genes, reference=None, prob=obiProb.Binomial(), callback=None): 
    88108        """ Return a dictionary with enriched pathways ids as keys 
     
    258278    pass 
    259279 
    260 from .. import obiGene 
    261 from Orange.utils import ConsoleProgressBar 
    262  
    263 class MatcherAliasesKEGG(obiGene.MatcherAliasesPickled): 
    264     DOMAIN = "KEGG" 
    265     VERSION = "v3.0" 
    266     def create_aliases(self): 
    267         import cPickle 
    268         files = set(serverfiles.ServerFiles().listfiles(self.DOMAIN)) 
    269         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    270         if ids_filename in files: 
    271             filename = serverfiles.localpath_download(self.DOMAIN, ids_filename) 
    272              
    273             aliases = cPickle.load(open(filename, "rb")) 
    274         else: 
    275             pb = ConsoleProgressBar("Retriving KEGG ids:") 
    276             kegg_org = KEGGOrganism(self.organism) 
    277             genes = kegg_org.genes 
    278             genes.pre_cache(progress_callback=pb.set_state) 
    279             aliases = [] 
    280             for key, entry in genes.iteritems(): 
    281                 aliases.append(set([key]) | set(entry.alt_names)) 
    282             filename = serverfiles.localpath_download(self.DOMAIN, ids_filename) 
    283             cPickle.dump(aliases, open(filename, "wb")) 
    284              
    285         return aliases 
    286      
    287     def filename(self): 
    288         return "kegg3_" + self.organism 
    289      
    290     def aliases_path(self): 
    291         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    292         return serverfiles.localpath(self.DOMAIN, ids_filename) 
    293      
    294     def create_aliases_version(self): 
    295         files = set(serverfiles.listfiles(self.DOMAIN)) 
    296         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    297         if ids_filename in files: 
    298             version = serverfiles.info(self.DOMAIN, ids_filename)["datetime"] 
    299         else: 
    300             kegg_org = KEGGOrganism(self.organism) 
    301             genes = kegg_org.genes 
    302             version = genes.info.release 
    303         return version 
    304          
    305     def __init__(self, organism, **kwargs): 
    306         self.organism = organism 
    307         sf = serverfiles.ServerFiles() 
    308         files = set(sf.listfiles(self.DOMAIN)) 
    309         ids_filename = "kegg_gene_id_aliases_" + self.organism + ".pickle" 
    310         if ids_filename in files: 
    311             serverfiles.update(self.DOMAIN, ids_filename) 
    312              
    313         obiGene.MatcherAliasesPickled.__init__(self, **kwargs) 
    314  
    315280def main(): 
    316281    KEGGGenome() 
  • _bioinformatics/obiTaxonomy.py

    r1693 r1715  
    333333    """ See if the code is a valid code in any database and return a set of its taxids. 
    334334    """ 
    335     from . import obiKEGG, obiGO 
     335    from . import obiKEGG2, obiGO 
    336336    results = set() 
    337     for test in [obiKEGG.to_taxid, obiGO.to_taxid]: 
     337    for test in [obiKEGG2.to_taxid, obiGO.to_taxid]: 
    338338        try: 
    339339            r = test(code) 
  • _bioinformatics/obimiRNA.py

    r1713 r1715  
    77import statc 
    88 
    9 from . import obiGene as ge, obiGO as go, obiKEGG as kg, obiProb as op, obiTaxonomy 
     9from . import obiGene as ge, obiGO as go, obiKEGG2 as kg, obiProb as op, obiTaxonomy 
    1010 
    1111mirnafile = osf.localpath_download('miRNA','miRNA.txt') 
     
    407407        kegg_genes = [keggNames[g] for g in get_info(m).targets.split(',') if g in keggNames] 
    408408        if enrichment: 
    409             mirnaPathways[m] = [path_id for path_id,(geneList,p,geneNum) in org.get_enriched_pathways_by_genes(kegg_genes).items() if p < pVal] 
     409            mirnaPathways[m] = [path_id for path_id,(geneList,p,geneNum) in org.get_enriched_pathways(kegg_genes).items() if p < pVal] 
    410410        else: 
    411411            paths = filter(None,[list(org.get_pathways_by_genes([k])) for k in kegg_genes])                    
  • _bioinformatics/widgets/OWKEGGPathwayBrowser.py

    r1712 r1715  
    2222from Orange.OrangeWidgets.OWWidget import * 
    2323 
    24 from .. import obiKEGG 
    2524from .. import obiTaxonomy 
    2625from .. import obiKEGG2 as obiKEGG 
Note: See TracChangeset for help on using the changeset viewer.