Changeset 1955:9b187b15f253 in orange-bioinformatics


Ignore:
Timestamp:
02/17/14 09:42:57 (2 months ago)
Author:
markotoplak
Branch:
default
Message:

obiGeneSetSig: pca refactor.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orangecontrib/bio/obiGeneSetSig.py

    r1952 r1955  
    1313 
    1414from . import obiExpression, obiGene, obiGeneSets, obiGsea, stats 
    15  
    1615 
    1716def corgs_activity_score(ex, corg): 
     
    7877 
    7978 
    80 def pca(data, snapshot=0): 
     79def pca(M, snapshot=None): 
    8180    "Perform PCA on M, return eigenvectors and eigenvalues, sorted." 
    82     M = data.toNumpy("a")[0] 
    8381    XMean = numpy.mean(M, axis = 0) 
    8482    M = M - XMean 
    85  
    86     T, N = numpy.shape(M) 
    87     # if there are less rows T than columns N, use snapshot method 
    88     if (T < N) or snapshot: 
    89         C = numpy.dot(M, numpy.transpose(M)) 
    90         evals, evecsC = numpy.linalg.eigh(C) #columns of evecsC are eigenvectors 
    91         evecs = numpy.dot(M.T, evecsC)/numpy.sqrt(numpy.abs(evals)) 
     83     
     84    if snapshot == None: 
     85        snapshot = M.shape[0] < M.shape[1] 
     86 
     87    if snapshot: #less columns than rows 
     88        evals, evecsC = numpy.linalg.eigh(M.dot(M.T)) #columns of evecsC are eigenvectors 
     89        evecs = M.T.dot(evecsC)/numpy.sqrt(numpy.abs(evals)) 
    9290    else: 
    93         K = numpy.dot(numpy.transpose(M), M) 
    94         evals, evecs = numpy.linalg.eigh(K) 
     91        evals, evecs = numpy.linalg.eigh(M.T.dot(M)) 
    9592     
    96     evecs = numpy.transpose(evecs) 
     93    evecs = evecs.T 
    9794 
    9895    # sort the eigenvalues and eigenvectors, decending order 
     
    10097    evecs = numpy.take(evecs, order, 0) 
    10198    evals = numpy.take(evals, order) 
     99 
    102100    return evals, evecs, XMean 
    103101 
    104  
     102def pca2(M): 
     103    """ Perform PCA on M, return eigenvectors and eigenvalues, sorted. 
     104    Mostly euivalent to pca(), slightly slower but more stable.""" 
     105    XMean = numpy.mean(M, axis = 0) 
     106    M = M - XMean 
     107    U,s,Vt = numpy.linalg.svd(M, full_matrices=False) 
     108    return s*s, Vt, XMean 
     109     
    105110class GeneSetTrans(object): 
    106111 
     
    440445  
    441446    #create distances to all learning data - save or other class 
     447 
    442448    for c in data: 
    443449        p = pearson(c, ex) 
     
    565571 
    566572        return TR[0][0] 
    567          
     573  
     574def eigvturn(A): 
     575    """ It multiplies rows (vectors of unit lengths) where  
     576        sum < 0 with -1. """ 
     577    turn = (numpy.sum(A, axis=1, keepdims=True) > 0)*2 - 1 
     578    return A*turn 
     579 
    568580class PCA(ParametrizedTransformation): 
    569581 
     582    def __init__(self, **kwargs): 
     583        self.turn = kwargs.pop("turn", False) #turn eigenvetors 
     584        if self.turn == True: 
     585            self.turn = eigvturn 
     586        super(PCA, self).__init__(**kwargs) 
     587 
    570588    def _get_par(self, datao): 
    571         return pca(datao) 
     589        M = datao.toNumpy("a")[0] 
     590        evals, evect, xmean = pca(M) 
     591        if self.turn: 
     592            evect = self.turn(evect) 
     593        return evals, evect, xmean 
    572594 
    573595    def _use_par(self, arr, constructt): 
     
    897919            return select, None 
    898920        else: 
    899             return set(select), pca(datas) 
     921            return set(select), pca(datas.toNumpy("a")[0]) 
    900922 
    901923    def _use_par(self, arr, constructt): 
     
    9941016    #ass = LLR(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, normalize=True) 
    9951017    #ass = LLR_slow(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 
    996     ass = CORGs(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, cv=True) 
     1018    ass = PCA(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, cv=True) 
    9971019    ar = to_old_dic(ass.domain, data[:5]) 
    9981020    pp2(ar) 
Note: See TracChangeset for help on using the changeset viewer.