Changeset 1707:7d692228fea1 in orange-bioinformatics for _bioinformatics/obiGeneSetSig.py


Ignore:
Timestamp:
08/13/12 16:05:04 (21 months ago)
Author:
markotoplak
Branch:
default
Message:

Speed up of setsig (obiGeneSetSig).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGeneSetSig.py

    r1705 r1707  
    99 
    1010import Orange, Orange.utils, statc 
     11 
     12if __name__ == "__main__": 
     13    __package__ = "Orange.bio" 
    1114 
    1215from .obiGsea import takeClasses 
     
    261264        return attributes 
    262265    
    263 def setSig_example_geneset(ex, data): 
     266def setSig_example_geneset(ex, data, no_unknowns): 
    264267    """ Gets learning data and example with the same domain, both 
    265268    containing only genes from the gene set. """ 
     
    267270    distances = [ [], [] ]     
    268271 
    269     def pearsonr(v1, v2): 
    270         return numpy.corrcoef([v1, v2])[0,1] 
    271  
    272272    def pearson(ex1, ex2): 
     273        vals1 = ex1.native(0)[:-1] 
     274        vals2 = ex2.native(0)[:-1] 
     275 
    273276        #leaves undefined elements out 
    274  
    275         attrs = range(len(ex1.domain.attributes)) 
    276         vals1 = [ ex1[i].value for i in attrs ] 
    277         vals2 = [ ex2[i].value for i in attrs ] 
    278  
    279         common = [ True if v1 != "?" and v2 != "?" else False \ 
    280             for v1,v2 in zip(vals1,vals2) ] 
    281         vals1 = [ v for v,c in zip(vals1, common) if c ] 
    282         vals2 = [ v for v,c in zip(vals2, common) if c ] 
     277        if not no_unknowns: 
     278            common = [ True if v1 != "?" and v2 != "?" else False \ 
     279                for v1,v2 in zip(vals1,vals2) ] 
     280            vals1 = [ v for v,c in zip(vals1, common) if c ] 
     281            vals2 = [ v for v,c in zip(vals2, common) if c ] 
    283282 
    284283        return numpy.corrcoef([vals1, vals2])[0,1] 
     
    327326class SetSig(GeneSetTrans): 
    328327 
     328    def __init__(self, **kwargs): 
     329        self.no_unknowns = kwargs.pop("no_unknowns", False) 
     330        super(SetSig, self).__init__(**kwargs) 
     331 
    329332    def build_feature(self, data, gs): 
    330333 
    331334        at = Orange.feature.Continuous(name=str(gs)) 
    332  
    333         def t(ex, w, gs=gs, data=data): #copy od the data 
    334             geneset = list(gs.genes) 
    335  
    336             nm, name_ind, genes, takegenes = self._match_data(data, geneset) 
     335        geneset = list(gs.genes) 
     336        nm, name_ind, genes, takegenes = self._match_data(data, geneset) 
     337        indices = [ name_ind[gene] for gene in genes ] 
     338 
     339        def t(ex, w, gs=gs, data=data, indices=indices, takegenes=takegenes, geneset=geneset): 
    337340            nm2, name_ind2, genes2 = self._match_instance(ex, geneset, takegenes) 
    338341 
    339             domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var) 
     342            domain = Orange.data.Domain([data.domain.attributes[i] for i in indices], data.domain.class_var) 
    340343            datao = Orange.data.Table(domain, data) 
    341344            
     
    344347            example = Orange.data.Instance(domain, exvalues) 
    345348 
    346             return setSig_example_geneset(example, datao) #only this one is setsig specific 
     349            return setSig_example_geneset(example, datao, self.no_unknowns) #only this one is setsig specific 
    347350      
    348351        at.get_value_from = t 
Note: See TracChangeset for help on using the changeset viewer.