Changeset 1785:b3a53ba8ada0 in orange-bioinformatics for _bioinformatics/obiGeneSetSig.py


Ignore:
Timestamp:
05/21/13 13:45:13 (11 months ago)
Author:
markotoplak
Branch:
default
Message:

obiGeneSetSig: added SPCA.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • _bioinformatics/obiGeneSetSig.py

    r1775 r1785  
    731731        return at 
    732732 
     733def estimate_linear_fit(data, i): 
     734    """ 
     735    Chen et 2008 write about t-score of the least square fit in the 
     736    original article, but here we can use just the t-test, because the 
     737    t-scores obtained are exactly the same (the authors used the 
     738    the more general version as they supported continous outcomes). 
     739    """ 
     740     
     741    """ 
     742    #implementation that could also support continous outcomes 
     743    x = numpy.array([ ex[i].value for ex in data ]) 
     744    y = numpy.array([ int(ex[-1]) for ex in data ]) #here classes are 0 and 1 
     745    ret = scipy.stats.linregress(x,y) 
     746    b = ret[0] 
     747    seb = ret[4] 
     748    return b/seb 
     749    """ 
     750    """ 
     751    #per mathword.wolfram.com - results are the same 
     752    c = numpy.cov(x,y) 
     753    n = len(x) 
     754    b = c[0,1]/c[0,0] #the same result as from li 
     755    s = math.sqrt((c[1,1]*n - b*c[0,1]*n)/(n-2)) 
     756    seb = s/math.sqrt(c[0,0]*n) 
     757    return b/seb 
     758    """ 
     759    return obiExpression.MA_t_test()(i, data) 
     760 
     761 
     762class SPCA(ParametrizedTransformation): 
     763    """ Per Chen et al. Supervised principal component analysis for 
     764    gene set enrichment of microarray data with continuous or survival 
     765    outcomes. Bioinformatics, 2008.  """ 
     766 
     767    def __init__(self, **kwargs): 
     768        self.threshold = kwargs.pop("threshold", None) 
     769        self.top = kwargs.pop("top", None) 
     770        self.atleast = kwargs.pop("atleast", 0) 
     771        super(SPCA, self).__init__(**kwargs) 
     772 
     773    def _get_par(self, datao): 
     774        scores = [ estimate_linear_fit(datao, a) for a in range(len(datao.domain.attributes)) ] 
     775        scores = list(enumerate(map(abs, scores))) 
     776        select = None 
     777        if self.threshold is not None: 
     778            select = [ i for i,s in scores if s > self.threshold ] 
     779        elif self.top is not None: 
     780            select = nth(sorted(scores, key=lambda x: -x[1])[:self.top], 0) 
     781 
     782        print sorted(scores, key=lambda x: -x[1]) 
     783 
     784        if len(select) < self.atleast: 
     785            select = nth(sorted(scores, key=lambda x: -x[1])[:self.atleast], 0) 
     786 
     787        if select == None: 
     788            somethingWrongWithSelection 
     789 
     790        doms = Orange.data.Domain([ datao.domain.attributes[i] for i in select ], datao.domain.class_var) 
     791        datas = Orange.data.Table(doms, datao) 
     792 
     793        return select, pca(datas) 
     794 
     795    def _use_par(self, arr, constructt): 
     796        select, constructt = constructt 
     797        select = set(select) 
     798 
     799        arr = [ arr[i].value for i in range(len(arr.domain.attributes))  
     800            if i in select ] 
     801 
     802        evals, evect, xmean = constructt 
     803 
     804        arr = arr - xmean # same input transformation - a row in a matrix 
     805        ev0 = evect[0] #this is a row in a matrix - do a dot product 
     806        a = numpy.dot(arr, ev0) 
     807 
     808        return a 
     809 
    733810if __name__ == "__main__": 
    734811 
    735812    data = Orange.data.Table("iris") 
     813 
    736814    gsets = obiGeneSets.collections({ 
    737815        #"ALL": ['sepal length', 'sepal width', 'petal length', 'petal width'], 
     
    761839        print '\n'.join([ a + ": " +str(b) for a,b in ol]) 
    762840 
    763     ass = Assess(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
     841    ass = SPCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, top=0) 
    764842    ar = to_old_dic(ass.domain, data[:5]) 
    765843    pp2(ar) 
Note: See TracChangeset for help on using the changeset viewer.