source: orange-bioinformatics/orangecontrib/bio/widgets/chipappx.py @ 1873:0810c5708cc5

Revision 1873:0810c5708cc5, 18.8 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 months ago (diff)

Moved '_bioinformatics' into orangecontrib namespace.

Line 
1## Automatically adapted for numpy.oldnumeric Oct 04, 2007 by
2
3import math
4import numpy.oldnumeric as Numeric, numpy.oldnumeric.mlab as MLab
5import numpy.oldnumeric.linear_algebra as LinearAlgebra
6import scipy.stats
7
8class ApproxOrthPolyBasis:
9    """Approximation of expression profiles with orthogonal polynomials."""
10
11    # for filling missing values in F statistcs (0->p=0, 1e9->p=1)
12    _F_fillValue = 1e9
13
14    def __init__(self, points, k=None):
15        """sets the points for which the k polynomials of max. degree=k-1 are orthogonal
16        default k = number of points
17        """
18        self.points = points
19        if k == None:
20            self.k = len(points) #/ 2
21        else:
22            self.k = k
23        self.basis = OrthPolyBasis(self.points, self.k)
24
25
26    def getAppxCoef(self, arr2d, numCoef=None):
27        """returns approx. coeffients given the number of coefficients different from zero"""
28        assert len(arr2d.shape) == 2       
29        if numCoef == None:
30            numCoef = self.k
31        else:
32            assert 0 <= numCoef <= self.k, "number of coefficients not in range [0," + str(self.k) + "]"
33        coef = self.basis.getApproxCoeff(arr2d)
34        coef[:,numCoef:] = Numeric.zeros((coef.shape[0], coef.shape[1] - numCoef), Numeric.Float)
35        return coef
36
37
38    def coef_maxCut(self, appxCoef):
39        """returns the coefficients different from zero up to the abs. max. coefficient
40        where the first coefficient is excluded from finding the max.
41        accepts 2d matrix of coefficients where rows represent different curves
42        """
43        assert len(appxCoef.shape) == 2
44        k = Numeric.shape(appxCoef)[1]
45        maxInd = Numeric.argmax(Numeric.absolute(appxCoef[:,1:]),1) + 1
46        lowDiagOnes = Numeric.fromfunction(lambda i,j: i>=j, (k,k))
47        coefSelector = Numeric.take(lowDiagOnes, maxInd, 0)
48        return appxCoef*coefSelector
49
50
51    def getAppxCurve(self, appxCoef, points=None):
52        """returns polynomial curve approximation evaluated at given points and given the approx. coefficients
53        accepts 2d matrix of coefficients where rows represent different curves
54        """
55        assert len(appxCoef.shape) == 2
56        assert points == None or len(points) > 0, "None or list of points expected"
57        return self.basis.evalApproxPoly(appxCoef, points)
58
59
60    def getAppxCoef3d(self, arr3d, numCoef=None, maxCut=False):
61        """returns given number of approx. coefficeints cutted-off at the maximal value (excluding the first one)
62        """
63        assert len(arr3d.shape) == 3
64        if numCoef == None:
65            numCoef = self.k
66        else:
67            assert 0 < numCoef <= self.k, "number of coefficients not in range [1," + str(self.k) + "]"
68        coef3d = Numeric.zeros((arr3d.shape[0], self.k, arr3d.shape[2]), Numeric.Float)
69        if maxCut:
70            for idx2 in range(arr3d.shape[2]):
71                coef3d[:,:,idx2] = self.coef_maxCut(self.getAppxCoef(arr3d[:,:,idx2], numCoef=numCoef))
72        else:
73            for idx2 in range(arr3d.shape[2]):
74                coef3d[:,:,idx2] = self.getAppxCoef(arr3d[:,:,idx2], numCoef=numCoef)
75        return coef3d
76
77
78    def getAppxCurve3d(self, appxCoef3d, points=None):
79        """returns appx. curves given 3d appx. coefficients in axis 1
80        """
81        assert len(appxCoef3d.shape) == 3
82        assert points == None or len(points) > 0, "None or list of points expected"
83        if points == None:
84            points = self.points
85        curve3d = Numeric.zeros((appxCoef3d.shape[0], len(points), appxCoef3d.shape[2]), Numeric.Float)
86        for idx2 in range(appxCoef3d.shape[2]):
87            curve3d[:,:,idx2] = self.getAppxCurve(appxCoef3d[:,:,idx2], points)
88        return curve3d
89
90
91    def getAppxCoef2d_pVals(self, arr2d, maxNumCoef):
92        """Returns[:,j] the probability that coef. j and all subsequent coef. are equal to 0.
93        Reference: Ott, pp.606.
94        Use only the first k appx. coef. with pvals below some alpha.
95       
96        The significance of the coef. estimated by comparing the variance drop of the model2 with the variance of the model1, where:
97          model 1: complete model with maxNumCoef coef. different from 0
98          model 2: reduced model with coef. in range (0,k) different from 0
99        null hypothesis (H0): coef. k,k+1,...,maxNumCoef are equal to 0
100            if H0 rejected (pval below some alpha (e.g. 0.05) -> there exist an important coef. among coef. k,k+1,...,maxNumCoef
101        repeat the test for k=0,1,...maxNumCoef-1
102        """
103        assert len(arr2d.shape) == 2, "2d array expected"
104        assert 0 < maxNumCoef <= self.k
105        coefMax = self.getAppxCoef(arr2d, maxNumCoef)
106        curveMax = self.getAppxCurve(coefMax)
107        SSE1 = Numeric.add.reduce((arr2d-curveMax)**2,1)
108        MSE1 = SSE1 / (arr2d.shape[1]-maxNumCoef)
109        #print "SSE1[0], MSE1[0]",SSE1[0], MSE1[0]
110        pvals = Numeric.zeros((arr2d.shape[0], maxNumCoef), Numeric.Float)
111        for k in range(maxNumCoef):  # test cofInd: [maxNum-1, maxNum-2, ..., minNum]
112            #print "Keeping %i coeff" % (k)
113            shpk = list(coefMax.shape)
114            shpk[1] -= k
115            coefk = Numeric.concatenate((coefMax[:,:k], Numeric.zeros((shpk), Numeric.Float)),1)
116            curvek = self.getAppxCurve(coefk)
117            SSE2 = Numeric.add.reduce((arr2d-curvek)**2,1)
118            MSdrop =(SSE2-SSE1) / (maxNumCoef-k)
119            F = MSdrop / MSE1
120            #2007-10-11: F -> F.filled(???)
121            pvals[:,k] = scipy.stats.fprob((maxNumCoef-k), arr2d.shape[1]-maxNumCoef, F.filled(ApproxOrthPolyBasis._F_fillValue))
122        return pvals
123
124
125    def getAppxCoef2d_significant(self, arr2d, maxNumCoef, alpha):
126        """Returns[:,j] approx. coeffcients with p-value < alpha; subsequent coef. are equal to 0.
127        Reference: Ott, pp.606.
128       
129        The significance of the coef. estimated by comparing the variance drop of the model2 with the variance of the model1, where:
130          model 1: complete model with maxNumCoef coef. different from 0
131          model 2: reduced model with coef. in range (0,k) different from 0
132        null hypothesis (H0): coef. k,k+1,...,maxNumCoef are equal to 0
133            if H0 rejected (pval below some alpha (e.g. 0.05) -> there exist an important coef. among coef. k,k+1,...,maxNumCoef
134        repeat the test for k=0,1,...maxNumCoef-1
135        """
136        assert len(arr2d.shape) == 2, "2d array expected"
137        assert 0 < maxNumCoef <= self.k
138        coefMax = self.getAppxCoef(arr2d, maxNumCoef)
139        curveMax = self.getAppxCurve(coefMax)
140        SSE1 = Numeric.add.reduce((arr2d-curveMax)**2,1)
141        MSE1 = SSE1 / (arr2d.shape[1]-maxNumCoef)
142        #print "SSE1[0], MSE1[0]",SSE1[0], MSE1[0]
143        pvals = Numeric.zeros((arr2d.shape[0], maxNumCoef), Numeric.Float)
144        for k in range(maxNumCoef):  # test cofInd: [maxNum-1, maxNum-2, ..., minNum]
145            #print "Keeping %i coeff" % (k)
146            shpk = list(coefMax.shape)
147            shpk[1] -= k
148            coefk = Numeric.concatenate((coefMax[:,:k], Numeric.zeros((shpk), Numeric.Float)),1)
149            curvek = self.getAppxCurve(coefk)
150            SSE2 = Numeric.add.reduce((arr2d-curvek)**2,1)
151            MSdrop =(SSE2-SSE1) / (maxNumCoef-k)
152            F = MSdrop / MSE1
153            #2007-10-11: F -> F.filled(???)
154            pvals[:,k] = scipy.stats.fprob((maxNumCoef-k), arr2d.shape[1]-maxNumCoef, F.filled(ApproxOrthPolyBasis._F_fillValue))
155        pvals = Numeric.where(pvals > alpha, Numeric.resize(Numeric.arange(pvals.shape[1]),pvals.shape), pvals.shape[1])    # MAX where significant, idx where nonsignificant
156        firstNonSignIdx = MLab.min(pvals, 1)    # idx of the first non-significant coef.
157        coefSign = Numeric.zeros(coefMax.shape, Numeric.Float)
158        for idx in range(coefSign.shape[1]):
159            coefSign[:,idx] = Numeric.where(idx<firstNonSignIdx, coefMax[:,idx], 0)
160        return coefSign
161       
162   
163
164class OrthPolyBasis:
165    """Orthogonal polynomials on a set of distinct points of degree 0 to k-1.
166    - best satisfied for x=Numeric.arange(0,4,step) for arbitrary step
167    - fragile with increasing k and increasing range of x -> for large i T[i] tends to overflow
168    - orthonormal basis not fragile to increasing range of x
169    - points mapped to [-1,1]
170    TODO: plot() should be smoother !!!
171    DONE: go to interval [-1,1], test that k=100 works!!!
172    """
173    NORM_NONE = 0           # do not normalize the basis polynomials
174    NORM_NORM = 1           # generate orthonormal basis: divide base polynomial by sqrt sum squared values at approx. points
175    NORM_NORM_T0_1 = 2      # same as NORM_NORM + the first polynomial = 1
176    NORM_END1 = 3           # value of all polynomials at the last point equals 1
177
178    def __init__(self, points, k, normalization=NORM_NORM_T0_1, force=False):
179        """
180        calculate k polynomials of degree 0 to k-1 orthogonal on a set of distinct points
181        map points to interval [-1,1]
182        INPUT:  points: array of dictinct points where polynomials are orthogonal
183                k: number of polynomials of degree 0 to k-1
184                force=True creates basis even if orthogonality is not satisfied due to numerical error
185        USES:   x: array of points mapped to [-1,1]
186                T_: matrix of values of polynomials calculated at x, shape (k,len(x))
187                TT_ = T_ * Numeric.transpose(T_)
188                TTinv_ = inverse(TT_)
189                sc_: scaling factors
190                a, b: coefficients for calculating T (2k-4 different from 0, i.e. 6 for k=5)
191                n: number of points = len(points)
192                normalization = {0|1|2}
193        """
194        self.k = k  # number of basis polynomials of order 0 to k-1
195        self._force = force
196        self.points = Numeric.asarray(points, Numeric.Float)
197        self.pointsMin = min(points)
198        self.pointsMax = max(points)
199        # scaling x to [-1,1] results in smaller a and b, T is not affected; overflow is NOT a problem!
200        self.xMin = -1
201        self.xMax = 1
202        self.x = self._map(self.points, self.pointsMin, self.pointsMax, self.xMin, self.xMax)
203        # calculate basis polynomials
204        self.n = len(points) # the number of approximation points
205        t = Numeric.zeros((k,self.n),Numeric.Float)
206        a = Numeric.zeros((k,1),Numeric.Float)
207        b = Numeric.zeros((k,1),Numeric.Float)
208        t[0,:] = Numeric.ones(self.n,Numeric.Float)
209        if k > 1: t[1,:] = self.x - sum(self.x)/self.n
210        for i in range(1,k-1):
211            a[i+1] = Numeric.innerproduct(self.x, t[i,:] * t[i,:]) / Numeric.innerproduct(t[i,:],t[i,:])
212            b[i] = Numeric.innerproduct(t[i,:], t[i,:]) / Numeric.innerproduct(t[i-1,:],t[i-1,:])
213            t[i+1,:] = (self.x - a[i+1]) * t[i,:] - b[i] * t[i-1,:]
214        self.a = a
215        self.b = b
216        # prepare for approximation
217        self._T0 = t
218        # orthonormal
219        _TT0 = Numeric.matrixmultiply(self._T0, Numeric.transpose(self._T0))
220        self.sc1 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1))) # scaling factors = sqrt sum squared self._T0
221        self._T1 = self._T0 / self.sc1
222        # orthonormal and T[0] == 1
223        self.sc2 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1)) / self.n) # scaling factors = sqrt 1/n * sum squared self._T0
224        self._T2 = self._T0 / self.sc2
225        # T[:,-1] == 1
226        self.sc3 = Numeric.take(self._T0, (-1,), 1) # scaling factors = self._T0[:,-1]
227        self._T3 = self._T0 / self.sc3
228        # set the variables according to the chosen normalization
229        self.setNormalization(normalization)
230
231
232    def getNormalization(self):
233        return self._normalization
234
235
236    def setNormalization(self, normalization):
237        if normalization == OrthPolyBasis.NORM_NONE:
238            self.T = self._T0
239        elif normalization == OrthPolyBasis.NORM_NORM:
240            self.T = self._T1
241        elif normalization == OrthPolyBasis.NORM_NORM_T0_1:
242            self.T = self._T2
243        elif normalization == OrthPolyBasis.NORM_END1:
244            self.T = self._T3
245        else:
246            raise "Error: unknown normalization: " + str(normalization)
247        self.TT = Numeric.matrixmultiply(self.T, Numeric.transpose(self.T))
248        self.TTinv = LinearAlgebra.inverse(self.TT)
249        self.TTinvT = Numeric.matrixmultiply(self.TTinv, self.T)
250        self.basisCoef = self._getBasisCoef(self.x, self.T)
251        self._normalization = normalization
252        self._checkOrth(self.T, self.TT, output = self._force)
253##        print "self.T", self.T
254##        print "self.TT", self.TT
255##        print "self.TTinv", self.TTinv
256##        print "self.TTinvT", self.TTinvT
257       
258       
259
260    def _checkOrth(self, T, TT, eps=0.0001, output=False):
261        """check if the basis is orthogonal on a set of points x: TT == T*transpose(T) == c*Identity
262        INPUT:  T: matrix of values of polynomials calculated at common reference points (x)
263                TT = T * transpose(T)
264                eps: max numeric error
265        """
266        TTd0 = (-1.*Numeric.identity(Numeric.shape(TT)[0])+1) * TT  # TTd0 = TT with 0s on the main diagonal
267        s = Numeric.sum(Numeric.sum(Numeric.absolute(TTd0)))
268        minT = MLab.min(MLab.min(T))
269        maxT = MLab.max(MLab.max(T))
270        minTTd0 = MLab.min(MLab.min(TTd0))
271        maxTTd0 = MLab.max(MLab.max(TTd0))
272        if not s < eps:
273            out = "NOT ORTHOG, min(T), max(T):\t%f\t%f\n" % (minT, maxT)
274            out += "  min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s)
275            if output:
276                print out
277                return False
278            else:
279                raise out
280        elif output:
281            out = "ORTHOGONAL, min(T), max(T):\t%f\t%f\n" % (minT, maxT)
282            out += "  min(TTd0), max(TTd0), sum-abs-el(TTd0):\t%f\t%f\t%f" % (minTTd0, maxTTd0, s)
283            print out
284        return True
285
286
287    def zipab(self):
288        return zip(Numeric.reshape(self.a,(len(self.a),)).tolist(),Numeric.reshape(self.b, (len(self.b),)).tolist())
289
290
291    def _map(self, x, a,b, m,M):
292        """maps x from the interval [a,b] to the interval [m,M]"""
293        x = Numeric.asarray(x, Numeric.Float)
294        return (M-m)*(x-a)/(b-a) + m
295
296
297    #--------APPROXIMATION-------------------------
298
299
300    def getApproxCoeff(self, curves):
301        """curves: 1d or 2d array where each curve in separate line, e.g. curves[curveIdx,timePoints]"""
302##        print "curves"
303##        print curves
304##        print
305##        print "Numeric.transpose(self.TTinvT)"
306##        print Numeric.transpose(self.TTinvT)
307##        print
308##        print "Numeric.matrixmultiply(curves, Numeric.transpose(self.TTinvT))"
309##        print Numeric.matrixmultiply(curves, Numeric.transpose(self.TTinvT))
310        return Numeric.matrixmultiply(curves, Numeric.transpose(self.TTinvT))
311
312
313    #--------EVALUATION OF POLYNOMIAL-------------------------
314
315    def _getBasisCoef(self, x, T):
316        """returns coefficients of basis polynomials given T, i.e. their values at x: (poly0, xi)
317        where polynomials in rows, coefficients [a0,a1,...ak] where k=len(x)-1
318        similar goes for T
319        """
320        numPoints = T.shape[1]  # number of points that polynomials are calculated == len(x)
321        assert len(x) == numPoints, "len(x)=" + str(x) + ", T.shape[1]=" + str(numPoints) + " do not match"
322        numLinSys = T.shape[0]  # number of polynomials
323        lin_sys_b = Numeric.transpose(T)
324        lin_sys_a = Numeric.ones((numPoints, numPoints), Numeric.Float)
325        lin_sys_a[:,1] = x
326        for idx in range(2,numPoints):
327            lin_sys_a[:,idx] = lin_sys_a[:,idx-1] * x
328        return Numeric.transpose(LinearAlgebra.solve_linear_equations(lin_sys_a, lin_sys_b))
329
330
331    def _evalPolyHorner(self, polyCoef, x):
332        """returns (#poly, #x) polynomials evaluated at x where:
333            - result: polynomials in rows, values at x in columns: [f(x0)...f(xn)]
334            - polyCoef: polynomials in rows, coefficients in columns, sorted by increasing power of x: [c0, c1, ...]
335        uses Horner's rule for polynomial evaluation
336        """
337        x = Numeric.asarray(x, Numeric.Float)
338        one_poly = len(polyCoef.shape) == 1
339        if one_poly:
340            polyCoef = polyCoef[Numeric.NewAxis,:] # [polyIdx, coefIdx]
341        val = Numeric.zeros((polyCoef.shape[0],len(x)), Numeric.Float)  # [polyIdx, xIdx]
342        for idx in range(polyCoef.shape[1]-1,0,-1):
343            val = (val + polyCoef[:,idx:idx+1]) * x
344        val += polyCoef[:,0:1]
345        if one_poly:
346            return val[0,:]
347        else:
348            return val
349
350
351    def evalApproxPoly(self, appxCoef, points=None):
352        """returns (#curve, #points) an approx. polynomials calculated at points given approx. coeff in rows:
353            - appxCoef: curves for approximation in rows, appxCoef in columns [B0, B1, ...]
354            - points relative to self.points
355        TODO: evaluate on a matrix of appxCoef
356        """
357        if points == None:
358            return Numeric.matrixmultiply(appxCoef, self.T)
359        appxCoef = Numeric.asarray(appxCoef, Numeric.Float)
360        one_curve = len(appxCoef.shape) == 1
361        if one_curve:
362            appxCoef = appxCoef[Numeric.NewAxis,:]  # [curveIdx, coefIdx]
363        mappedPoints = self._map(points, self.pointsMin, self.pointsMax, self.xMin, self.xMax)
364        # eval basis polynomials on mapped points
365        basisEval = self._evalPolyHorner(self.basisCoef, mappedPoints) #[basisIdx == coefIdx, pointIdx]
366        result = Numeric.matrixmultiply(appxCoef, basisEval)
367        if one_curve:
368            return result[0,:]
369        else:
370            return result
371
372
373class TrigonomerticBasis:
374    """Approximation of expression profiles with trigonometric functions orthogonal polynomials."""
375
376    def __init__(self, numPoints, k):
377        """numPoints: number of approximation points; k: number of basis functions [2,...,numPoints]"""
378        self.numPoints = numPoints
379        self.k = k
380##        assert k > 1, "Error TrigonomerticBasis: k <= 1"
381        assert k <= numPoints, "Error TrigonomerticBasis: k > numPoints"
382        # evaluate trigonometric basis functions on the given number of points from [-pi,pi]
383        self.x = Numeric.arange(-1*math.pi, math.pi+0.0000001, 2*math.pi/(numPoints-1))
384        self.y = Numeric.ones((k, numPoints), Numeric.Float)
385        for kk in range(1, k, 2):
386##            print "kk, cos %ix" % ((kk+1)/2.)
387            self.y[kk] = MLab.cos(self.x*(kk+1)/2) 
388        for kk in range(2, k, 2):
389##            print "kk, sin %ix" % (kk/2.)
390            self.y[kk] = MLab.sin(self.x*kk/2)
391        # approx. matrix
392        self.Ainv = LinearAlgebra.inverse(Numeric.matrixmultiply(self.y, Numeric.transpose(self.y)))
393        self.yyTinvy = Numeric.matrixmultiply(LinearAlgebra.inverse(Numeric.matrixmultiply(self.y, Numeric.transpose(self.y))), self.y)
394
395    def getAppxCoef(self, curves):
396        return Numeric.matrixmultiply(curves, Numeric.transpose(self.yyTinvy))
397
398    def getAppxCurve(self, appxCoef):
399        return Numeric.matrixmultiply(appxCoef, self.y)
400
Note: See TracBrowser for help on using the repository browser.