Changeset 1952:db7cc518cd3d in orangebioinformatics
 Timestamp:
 01/29/14 12:22:49 (3 months ago)
 Branch:
 default
 Files:

 5 deleted
 1 edited
Legend:
 Unmodified
 Added
 Removed

orangecontrib/bio/obiGeneSetSig.py
r1933 r1952 6 6 7 7 import scipy.stats 8 9 8 import numpy 10 11 9 import Orange, Orange.utils, statc 12 10 … … 14 12 __package__ = "Orange.bio" 15 13 16 from .obiGsea import takeClasses17 from .obiAssess import pca, PLSCall, corgs_activity_score18 14 from . import obiExpression, obiGene, obiGeneSets, obiGsea, stats 15 16 17 def corgs_activity_score(ex, corg): 18 """ activity score for a sample for pathway given by corgs """ 19 #print [ ex[i].value for i in corg ] #FIXME what to do with unknown values? 20 return sum(ex[i].value if ex[i].value != '?' else 0.0 for i in corg)/len(corg)**0.5 21 22 23 def PLSCall(data, y=None, x=None, nc=None, weight=None, save_partial=False): 24 25 def normalize(vector): 26 return vector / numpy.linalg.norm(vector) 27 28 if y == None: 29 y = [ data.domain.classVar ] 30 if x == None: 31 x = [v for v in data.domain.variables if v not in y] 32 33 Ncomp = nc if nc is not None else len(x) 34 35 dataX = Orange.data.Table(Orange.data.Domain(x, False), data) 36 dataY = Orange.data.Table(Orange.data.Domain(y, False), data) 37 38 # transformation to numpy arrays 39 X = dataX.toNumpy()[0] 40 Y = dataY.toNumpy()[0] 41 42 # data dimensions 43 n, mx = numpy.shape(X) 44 my = numpy.shape(Y)[1] 45 46 # Zscores of original matrices 47 YMean = numpy.mean(Y, axis = 0) 48 XMean = numpy.mean(X, axis = 0) 49 50 X = (XXMean) 51 Y = (YYMean) 52 53 P = numpy.empty((mx,Ncomp)) 54 T = numpy.empty((n,Ncomp)) 55 W = numpy.empty((mx,Ncomp)) 56 E,F = X,Y 57 58 dot = numpy.dot 59 norm = numpy.linalg.norm 60 61 #PLS1  from Gutkin, shamir, Dror: SlimPLS 62 63 for i in range(Ncomp): 64 w = dot(E.T,F) 65 w = w/norm(w) #normalize w in Gutkin et al the do w*c, where c is 1/norm(w) 66 t = dot(E, w) #t_i > a row vector 67 p = dot(E.T, t)/dot(t.T, t) #p_i t.T is a row vector  this is inner(t.T, t.T) 68 q = dot(F.T, t)/dot(t.T, t) #q_i 69 70 E = E  dot(t, p.T) 71 F = F  dot(t, q.T) 72 73 T[:,i] = t.T 74 W[:,i] = w.T 75 P[:,i] = p.T 76 77 return XMean, W, P, T 78 79 80 def pca(data, snapshot=0): 81 "Perform PCA on M, return eigenvectors and eigenvalues, sorted." 82 M = data.toNumpy("a")[0] 83 XMean = numpy.mean(M, axis = 0) 84 M = M  XMean 85 86 T, N = numpy.shape(M) 87 # if there are less rows T than columns N, use snapshot method 88 if (T < N) or snapshot: 89 C = numpy.dot(M, numpy.transpose(M)) 90 evals, evecsC = numpy.linalg.eigh(C) #columns of evecsC are eigenvectors 91 evecs = numpy.dot(M.T, evecsC)/numpy.sqrt(numpy.abs(evals)) 92 else: 93 K = numpy.dot(numpy.transpose(M), M) 94 evals, evecs = numpy.linalg.eigh(K) 95 96 evecs = numpy.transpose(evecs) 97 98 # sort the eigenvalues and eigenvectors, decending order 99 order = (numpy.argsort(numpy.abs(evals))[::1]) 100 evecs = numpy.take(evecs, order, 0) 101 evals = numpy.take(evals, order) 102 return evals, evecs, XMean 103 19 104 20 105 class GeneSetTrans(object): … … 69 154 70 155 #selection of classes and gene sets 71 data = takeClasses(data, classValues=self.class_values)156 data = obiGsea.takeClasses(data, classValues=self.class_values) 72 157 nm,_ = self._mat_ni(data) 73 158 gene_sets = select_genesets(nm, self.gene_sets, self.min_size, self.max_size, self.min_part) … … 907 992 print '\n'.join([ a + ": " +str(b) for a,b in ol]) 908 993 909 ass = LLR(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, normalize=True)994 #ass = LLR(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, normalize=True) 910 995 #ass = LLR_slow(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0) 911 996 ass = CORGs(data, matcher=matcher, gene_sets=gsets, class_values=choosen_cv, min_part=0.0, cv=True)
Note: See TracChangeset
for help on using the changeset viewer.