Changeset 1597:574fe4f8d1d3 in orange-bioinformatics


Ignore:
Timestamp:
03/09/12 21:06:10 (2 years ago)
Author:
markotoplak
Branch:
default
rebase_source:
ea5f635c5c3e412b91d382e6f24161e8775e0786
Message:

Added GSA to obiGeneSetSig.

Files:
2 edited

Legend:

Unmodified
Added
Removed
  • obiAssess.py

    r1595 r1597  
    660660    data = Orange.data.Table("iris") 
    661661    gsets = obiGeneSets.collections({ 
    662         "ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], 
     662        #"ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], 
    663663        "f3": ['sepal length', 'sepal width', 'petal length'], 
    664664        "l3": ['sepal width', 'petal length', 'petal width'], 
     
    673673    choosen_cv = ["Iris-setosa", "Iris-versicolor"] 
    674674    #ass = AssessLearner()(data, matcher, gsets, rankingf=AT_loessLearner()) 
     675    #ass = AssessLearner()(data, matcher, gsets, minPart=0.0) 
    675676    #ass = MeanLearner()(data, matcher, gsets, default=False) 
    676677    #ass = MedianLearner()(data, matcher, gsets) 
    677     ass = PLSLearner()(data, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
     678    #ass = PLSLearner()(data, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
    678679    #ass = SetSigLearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
    679680    #ass = PCALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
    680     #ass = GSALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
     681    ass = GSALearner()(ldata, matcher, gsets, classValues=choosen_cv, minPart=0.0) 
    681682 
    682683    ar = defaultdict(list) 
  • obiGeneSetSig.py

    r1595 r1597  
    88from obiGsea import takeClasses 
    99from obiAssess import pca, PLSCall 
     10import obiExpression 
     11import scipy.stats 
     12 
     13#STILL MISSING: Assess, CORGs 
    1014 
    1115def setSig_example_geneset(ex, data): 
     
    263267       super(Median, self).__init__(**kwargs) 
    264268 
     269class GSA(GeneSetTrans): 
     270 
     271    def build_features(self, data, gene_sets): 
     272 
     273        attributes = [] 
     274 
     275        def tscorec(data, at, cache=None): 
     276            ma = obiExpression.MA_t_test()(at,data) 
     277            return ma 
     278 
     279        tscores = [ tscorec(data, at) for at in data.domain.attributes ] 
     280 
     281        def to_z_score(t): 
     282            return float(scipy.stats.norm.ppf(scipy.stats.t.cdf(t, len(data)-2))) 
     283 
     284        zscores = map(to_z_score, tscores) 
     285 
     286        nm, name_ind = self._mat_ni(data) 
     287 
     288        for gs in gene_sets: 
     289 
     290            at = Orange.feature.Continuous(name=str(gs)) 
     291 
     292            geneset = list(gs.genes) 
     293 
     294            genes = [ nm.umatch(gene) for gene in geneset ] 
     295 
     296            to_geneset = dict(zip(genes, geneset)) 
     297 
     298            takegenes = [ i for i,a in enumerate(genes) if a != None ] 
     299            genes = [ genes[i] for i in takegenes ] 
     300 
     301            #take each gene only once 
     302            genes = set(genes) 
     303 
     304            D = numpy.mean([max(zscores[name_ind[g]],0) for g in genes]) \ 
     305                + numpy.mean([min(zscores[name_ind[g]],0) for g in genes]) 
     306 
     307            if D >= 0: 
     308                consider_genes = [ to_geneset[g] for g in genes if zscores[name_ind[g]] > 0.0 ] 
     309            else: 
     310                consider_genes = [ to_geneset[g] for g in genes if zscores[name_ind[g]] < 0.0 ] 
     311 
     312            def t(ex, w, consider_genes=consider_genes): 
     313                #consider_genes included genes from the gene set that 
     314                #should be combined 
     315                nm2, name_ind2 = self._mat_ni(ex) 
     316                genes2 = [ nm2.umatch(gene) for gene in consider_genes ] 
     317               
     318                #convert the example to the same domain 
     319                exvalues = [ vou(ex, gn, name_ind2) for gn in genes2 ] + [ "?" ] 
     320                exvalues = filter(lambda x: x != "?", exvalues) 
     321               
     322                return numpy.mean(exvalues) 
     323 
     324            at.get_value_from = t 
     325            attributes.append(at) 
     326 
     327        return attributes 
     328 
    265329if __name__ == "__main__": 
    266330 
    267331    data = Orange.data.Table("iris") 
    268332    gsets = obiGeneSets.collections({ 
    269         "ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], 
     333        #"ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], 
    270334        "f3": ['sepal length', 'sepal width', 'petal length'], 
    271335        "l3": ['sepal width', 'petal length', 'petal width'], 
     
    293357        print '\n'.join([ a + ": " +str(b) for a,b in ol]) 
    294358 
    295     ass = PLS(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
     359    ass = GSA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
    296360    ar = to_old_dic(ass.domain, data[:5]) 
    297361    pp2(ar) 
Note: See TracChangeset for help on using the changeset viewer.