Changeset 1598:fda2dc9e724e in orange-bioinformatics


Ignore:
Timestamp:
03/10/12 22:33:35 (2 years ago)
Author:
markotoplak
Branch:
default
Message:

Some refactoring of obiGeneSetSig.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • obiGeneSetSig.py

    r1597 r1598  
    8080        return self._cache[data.domain] 
    8181 
     82    def _match_instance(self, instance, geneset, takegenes=None): 
     83        nm, name_ind = self._mat_ni(instance) 
     84        genes = [ nm.umatch(gene) for gene in geneset ] 
     85        if takegenes: 
     86            genes = [ genes[i] for i in takegenes ] 
     87        return nm, name_ind, genes 
     88 
     89    def _match_data(self, data, geneset, odic=False): 
     90        nm, name_ind = self._mat_ni(data) 
     91        genes = [ nm.umatch(gene) for gene in geneset ] 
     92        if odic: 
     93            to_geneset = dict(zip(genes, geneset)) 
     94        takegenes = [ i for i,a in enumerate(genes) if a != None ] 
     95        genes = [ genes[i] for i in takegenes ] 
     96        if odic: 
     97            return nm, name_ind, genes, takegenes, to_geneset 
     98        else: 
     99            return nm, name_ind, genes, takegenes 
     100 
    82101    def __init__(self, matcher=None, gene_sets=None, min_size=3, max_size=1000, min_part=0.1, class_values=None): 
    83102        self.matcher = matcher 
     
    120139            geneset = list(gs.genes) 
    121140 
    122             nm, name_ind = self._mat_ni(data) 
    123             nm2, name_ind2 = self._mat_ni(ex) 
    124  
    125             genes = [ nm.umatch(gene) for gene in geneset ] 
    126             genes2 = [ nm2.umatch(gene) for gene in geneset ] 
    127  
    128             takegenes = [ i for i,a in enumerate(genes) if a != None ] 
    129  
    130             genes = [ genes[i] for i in takegenes ] 
    131             genes2 = [ genes2[i] for i in takegenes ] 
     141            nm, name_ind, genes, takegenes = self._match_data(data, geneset) 
     142            nm2, name_ind2, genes2 = self._match_instance(ex, geneset, takegenes) 
    132143 
    133144            domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var) 
     
    150161 
    151162        geneset = list(gs.genes) 
    152  
    153         nm, name_ind = self._mat_ni(data) 
    154         genes = [ nm.umatch(gene) for gene in geneset ] 
    155         takegenes = [ i for i,a in enumerate(genes) if a != None ] 
    156         genes = [ genes[i] for i in takegenes ] 
    157  
     163        nm, name_ind, genes, takegenes = self._match_data(data, geneset) 
    158164        domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var) 
    159  
    160165        datao = Orange.data.Table(domain, data) 
    161166 
    162         xmean, W, P, T = PLSCall(datao, nc=1, y=[datao.domain.class_var]) 
    163         constructt = xmean, W, P 
     167        constructt = PLSCall(datao, nc=1, y=[datao.domain.class_var]) 
    164168 
    165169        def t(ex, w, geneset=geneset, constructt=constructt, takegenes=takegenes, domain=domain): 
    166  
    167             nm2, name_ind2 = self._mat_ni(ex) 
    168             genes2 = [ nm2.umatch(gene) for gene in geneset ] 
    169             genes2 = [ genes2[i] for i in takegenes ] 
     170            nm2, name_ind2, genes2 = self._match_instance(ex, geneset, takegenes) 
    170171           
    171172            #convert the example to the same domain 
    172173            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ] 
    173              
    174174            ex = numpy.array(exvalues[:-1]) 
    175175 
    176             xmean, W, P = constructt 
     176            xmean, W, P, _ = constructt 
    177177            ex = ex - xmean # same input transformation 
    178178 
     
    201201 
    202202        geneset = list(gs.genes) 
    203  
    204         nm, name_ind = self._mat_ni(data) 
    205         genes = [ nm.umatch(gene) for gene in geneset ] 
    206         takegenes = [ i for i,a in enumerate(genes) if a != None ] 
    207         genes = [ genes[i] for i in takegenes ] 
    208  
     203        nm, name_ind, genes, takegenes = self._match_data(data, geneset) 
    209204        domain = Orange.data.Domain([data.domain.attributes[name_ind[gene]] for gene in genes], data.domain.class_var) 
    210  
    211205        datao = Orange.data.Table(domain, data) 
    212206 
    213         evals, evect, xmean = pca(datao) 
    214         constructt = evals, evect, xmean 
     207        constructt = pca(datao) 
    215208 
    216209        def t(ex, w, geneset=geneset, constructt=constructt, takegenes=takegenes, domain=domain): 
    217  
    218             nm2, name_ind2 = self._mat_ni(ex) 
    219             genes2 = [ nm2.umatch(gene) for gene in geneset ] 
    220             genes2 = [ genes2[i] for i in takegenes ] 
     210            nm2, name_ind2, genes2 = self._match_instance(ex, geneset, takegenes) 
    221211           
    222212            #convert the example to the same domain 
     
    244234        def t(ex, w, gs=gs): 
    245235            geneset = list(gs.genes) 
    246             nm2, name_ind2 = self._mat_ni(ex) 
    247             genes2 = [ nm2.umatch(gene) for gene in geneset ] 
     236            nm2, name_ind2, genes2 = self._match_instance(ex, geneset) 
    248237            
    249238            exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ] 
     
    284273        zscores = map(to_z_score, tscores) 
    285274 
    286         nm, name_ind = self._mat_ni(data) 
    287  
    288275        for gs in gene_sets: 
    289276 
     
    291278 
    292279            geneset = list(gs.genes) 
    293  
    294             genes = [ nm.umatch(gene) for gene in geneset ] 
    295  
    296             to_geneset = dict(zip(genes, geneset)) 
    297  
    298             takegenes = [ i for i,a in enumerate(genes) if a != None ] 
    299             genes = [ genes[i] for i in takegenes ] 
    300  
     280            nm, name_ind, genes, takegenes, to_geneset = self._match_data(data, geneset, odic=True) 
    301281            #take each gene only once 
    302282            genes = set(genes) 
     
    311291 
    312292            def t(ex, w, consider_genes=consider_genes): 
    313                 #consider_genes included genes from the gene set that 
    314                 #should be combined 
    315                 nm2, name_ind2 = self._mat_ni(ex) 
    316                 genes2 = [ nm2.umatch(gene) for gene in consider_genes ] 
     293                nm2, name_ind2, genes2 = self._match_instance(ex, consider_genes) 
    317294               
    318295                #convert the example to the same domain 
Note: See TracChangeset for help on using the changeset viewer.