Changeset 1691:d2aeb3889a90 in orange-bioinformatics for _bioinformatics/obiGeneSetSig.py


Ignore:
Timestamp:
06/28/12 13:29:34 (22 months ago)
Author:
markotoplak
Branch:
default
rebase_source:
fa4b253bc704b815c252303f65a5db4769aabd6d
Message:

CV feature build for obiGeneSetSig.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGeneSetSig.py

    r1690 r1691  
    1313from .obiAssess import pca, PLSCall, corgs_activity_score 
    1414from . import obiExpression, obiGene, obiGeneSets, obiGsea, stats 
     15 
    1516 
    1617class GeneSetTrans(object): 
     
    4344            return nm, name_ind, genes, takegenes 
    4445 
    45     def __init__(self, matcher=None, gene_sets=None, min_size=3, max_size=1000, min_part=0.1, class_values=None): 
     46    def __init__(self, matcher=None, gene_sets=None, min_size=3, max_size=1000, min_part=0.1, class_values=None, cv=False): 
    4647        self.matcher = matcher 
    4748        self.gene_sets = gene_sets 
     
    5152        self.class_values = class_values 
    5253        self._cache = {} 
     54        self.cv = cv 
    5355 
    5456    def __call__(self, data, weight_id=None): 
     
    6264        newfeatures = self.build_features(data, gene_sets) 
    6365        newdomain = Orange.data.Domain(newfeatures, data.domain.class_var) 
    64         return Orange.data.Table(newdomain, data) 
     66 
     67        #build a data set with cross validation 
     68        if self.cv == False: 
     69            return Orange.data.Table(newdomain, data) 
     70        else: 
     71            # The domain has the transformer that is build on all samples, 
     72            # while the transformed data table uses cross-validation 
     73            # internally 
     74            folds = 5 
     75            cvi = Orange.data.sample.SubsetIndicesCV(data, folds) 
     76            data_cv = [ [] for _ in range(len(data)) ] 
     77            for f in range(folds): 
     78                learn = data.select(cvi, f, negate=True) 
     79                test = data.select(cvi, f) 
     80                lf = self.build_features(learn, gene_sets) 
     81                transd = Orange.data.Domain(lf, data.domain.class_var) 
     82                trans_test = Orange.data.Table(transd, test) 
     83                for ex, pos in \ 
     84                    zip(trans_test, [ i for i,n in enumerate(cvi) if n == f ]): 
     85                    data_cv[pos] = ex.native(0) 
     86            return Orange.data.Table(newdomain, data_cv) 
    6587 
    6688    def build_features(self, data, gene_sets): 
    6789        return [ self.build_feature(data, gs) for gs in gene_sets ] 
    68  
    6990 
    7091def normcdf(x, mi, st): 
Note: See TracChangeset for help on using the changeset viewer.