source: orange-bioinformatics/orangecontrib/bio/widgets/numpyExtn.py @ 1873:0810c5708cc5

Revision 1873:0810c5708cc5, 37.0 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 months ago (diff)

Moved '_bioinformatics' into orangecontrib namespace.

Line 
1###################################################################################
2## numpy extension by PJ (peter.juvan@fri.uni-lj.si)
3##
4## TODO: fix default axis asignment (in several funcions 0 is the dedault):
5##      - for numpy default is None
6##      - for numpy.me default is still -1 (will this be fixed in future releases?)
7###################################################################################
8
9import numpy
10
11###################################################################################
12## pretty print
13###################################################################################
14
15def strSimMtrx(m, names, numPos=None, sort=False):
16    """ print similarity matrix with fixed number of positions,
17    appends names, optionally sorts names and m
18    """
19    assert len(names) == m.shape[0]
20    assert len(m.shape) == 2
21    m = numpy.asarray(m)
22    if sort == True:
23        nameIndList = zip(names, range(len(names)))
24        nameIndList.sort()
25        names = map(lambda x: x[0], nameIndList)
26        sortArgs = map(lambda x: x[1], nameIndList)
27        m = m[sortArgs][:,sortArgs]
28    out = ""
29    if numPos == None:
30        numPos = max(map(lambda n: len(n), names))
31    # number of decimal places
32    numWhole = len(str(int(numpy.max(m))))
33    numDec = numPos - numWhole - 1
34    if numDec < 0:
35        numDec = 0
36    # print header row
37    printStr = "%%s %%%is" % numPos
38    out += reduce(lambda a,b: printStr % (a, b[:numPos]), names, " "*numPos)
39    # print other rows
40    printStrName = "%%%is" % numPos
41    printStrMtrx2 = "%%.%if" % numDec
42    for rowIdx,row in enumerate(m):
43        rowStr = map(lambda x: printStrMtrx2 % x, row)
44        out += "\n%s" % reduce(lambda a,b: printStr % (a, b[:numPos]), rowStr, printStrName % names[rowIdx][:numPos])
45    return out
46
47
48###################################################################################
49## spearman r / pearson r between values which are present in both arrays
50## uses scipy.stats
51###################################################################################
52
53def spearmanrMA(m1, m2):
54    # calculate spearmanr between values which are present in both arrays
55    try:
56        import scipy.stats
57    except ImportError:
58        print "scipy.stats not found; you can download it from www.scipy.org"
59    nomask12 = numpy.logical_not(numpy.logical_or(numpy.ma.getmaskarray(m1), numpy.ma.getmaskarray(m2)))
60    return scipy.stats.spearmanr(m1.compress(nomask12), m2.compress(nomask12))
61
62def pearsonrMA(m1, m2):
63    # calculate pearson between values which are present in both arrays
64    try:
65        import scipy.stats
66    except ImportError:
67        print "scipy.stats not found; you can download it from www.scipy.org"
68    nomask12 = numpy.logical_not(numpy.logical_or(numpy.ma.getmaskarray(m1), numpy.ma.getmaskarray(m2)))
69    return scipy.stats.pearsonr(m1.compress(nomask12), m2.compress(nomask12))
70
71
72###################################################################################
73## argmaxMA(m,axis=-1)
74## replacement for numpy.ma.argmax which does not work with masked arrays:
75##  numpy.ma.argmax(numpy.ma.asarray([1,2])*numpy.ma.masked) => 0
76##  numpyextn.argmax(numpy.ma.asarray([1,2])*numpy.ma.masked) => numpy.ma.masked
77###################################################################################
78
79def argmaxMA(m,axis=-1):
80    return numpy.ma.where(numpy.all(numpy.ma.getmaskarray(m),axis), numpy.ma.masked, numpy.ma.argmax(m,axis))
81
82
83###################################################################################
84## write to tab delimited file, replace masked data by "?" by default
85###################################################################################
86
87def tofileTabMa(m, fileName, fill="?"):
88    assert numpy.rank(m) == 2
89    m = numpy.ma.asarray(m)
90    ms = numpy.asarray(numpy.ma.filled(m), "str")
91    mss = numpy.where(numpy.ma.getmaskarray(m), fill, ms)
92    fd = file(fileName, "w")
93    for row in mss:
94        fd.write("%s\n" % reduce(lambda a,b: "%s\t%s" % (a,b), row))
95    fd.close()
96
97
98###################################################################################
99## median
100###################################################################################
101
102def median(m,axis=0):
103    """Returns median of the given nonmasked m array along the given axis.
104    """
105    if numpy.ma.isMA(m):
106       raise TypeError, "wrong type for m (%s), try using medianMA instead" % type(m).__name__
107    m = numpy.asarray(m, numpy.float)
108    # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1)
109    m = numpy.transpose(m, [axis]+range(axis)+range(axis+1,numpy.rank(m)))
110    return numpy.median(m)
111
112
113def medianMA(m,axis=0):
114    """Returns median of the given masked array m along the given axis.
115    """
116    return percentilesMa(m,0.5,axis)
117
118
119def percentilesMa(m,perc,axis=0):
120    """Returns the percentiles of the given masked array m along the given axis.
121    """
122    assert 0 < perc < 1
123    m = numpy.ma.asarray(m, numpy.float)
124    # 2008-06-23: BUG: does not work with negative axis values: 0 axis is always taken
125    # assert -len(m.shape) <= axis < len(m.shape)
126    assert 0 <= axis < len(m.shape)
127    # 2008-06-23 BUG: at arbitrary axis, a single element is just returned
128    ##    if len(m.shape) == 0 or (len(m.shape)==1 and m.shape[0]==1):
129    if m.shape[axis] == 1:
130        return m[axis]
131    elif len(m.shape) == 1:
132        # returns (1,) array or ma.masked
133        mCount = numpy.ma.count(m)
134        if mCount == 0:
135            return numpy.ma.masked
136        elif mCount == 1:
137            return m.compressed()[0]
138        else:
139            _k = float(perc)*(mCount-1)
140            k = int(_k)
141            d = _k - k
142            mIndSort = numpy.ma.argsort(m,fill_value=1e20)
143            return m[mIndSort[k]] + d*(m[mIndSort[k+1]]-m[mIndSort[k]])
144    else:
145        # count the number of nonmasked indices along the given axis
146        _k = float(perc) * (numpy.ma.count(m, axis=axis)-1)
147        k = _k.astype(numpy.int)
148        d = _k - k
149        # prepare indices for other axis (except for the given)
150        cind = numpy.ma.indices(numpy.ma.shape(k))
151        # indices of m sorted by increasing value at axis 0; masked values placed at the end;
152        mtIndSort = numpy.ma.argsort(m,axis,fill_value=1e20)
153        # get indices to address median elements from m
154        # tuple(takeInd1): all lists in the tuple must be of the same shape,
155        #   the result mtIndSort[tuple(takeInd1)] is also of that shape
156        takeInd1 = cind.tolist()
157        takeInd1.insert(axis,k.tolist())
158        medInd1 = mtIndSort[tuple(takeInd1)]
159        takeInd2 = cind.tolist()
160        takeInd2.insert(axis,(k+1).tolist())
161        medInd2 = mtIndSort[tuple(takeInd2)]
162        # get both median elements; if c[i] odd: med1[i] == med2[i]
163        takeMed1 = cind.tolist()
164        takeMed1.insert(axis,medInd1.tolist())
165        med1 = m[tuple(takeMed1)]
166        takeMed2 = cind.tolist()
167        takeMed2.insert(axis,medInd2.tolist())
168        med2 = m[tuple(takeMed2)]
169##        if __name__=="__main__":
170##            print "m\n",numpy.ma.filled(m,-1)
171##            print "k", k
172##            print "[(k-1)/2,k/2]", [(k-1)/2,k/2]
173##            print "cind\n",cind
174##            print "mtIndSort\n",mtIndSort
175##            print "medInd1\n",medInd1
176##            print "medInd2\n",medInd2
177##            print "med1\n",med1
178##            print "med2\n",med2
179        return med1 + numpy.ma.filled(d*(med2-med1),0)
180
181if __name__ == "__main__":
182    # 2008-06-23: bug with negative indices (not fixed, added assertion
183##    print "first +", medianMA(numpy.asarray([[10,1],[2,11]]),0)
184##    print "first -", medianMA(numpy.asarray([[10,1],[2,11]]),-2)
185##    print "second +", medianMA(numpy.asarray([[10,1],[2,11]]),1)
186##    print "second -", medianMA(numpy.asarray([[10,1],[2,11]]),-1)
187    # 2008-06-23: bug with medianMA(numpy.asarray([[149.0, 87.0]]), 0)
188    print medianMA(numpy.asarray([149.0, 87.0]), 0)
189    print medianMA(numpy.asarray([[149.0, 87.0]]), 0)
190    print medianMA(numpy.asarray([[149.0, 87.0]]), 1)
191
192
193## Automatically adapted for numpy.oldnumeric Oct 04, 2007 by PJ
194
195import math
196import numpy.oldnumeric as Numeric
197import numpy.oldnumeric.ma as MA
198import numpy.oldnumeric.mlab as MLab, numpy.oldnumeric.linear_algebra as LinearAlgebra
199
200
201#####################################################################################
202## LOWESS (from Biopython, extended to interpolation/extrapolation)
203#####################################################################################
204
205def lowess2(x, y, xest, f=2./3., iter=3):
206    """Returns estimated values of y in data points xest (or None if estimation fails).
207    Lowess smoother: Robust locally weighted regression.
208    The lowess function fits a nonparametric regression curve to a scatterplot.
209    The arrays x and y contain an equal number of elements; each pair
210    (x[i], y[i]) defines a data point in the scatterplot. The function returns
211    the estimated (smooth) values of y.
212
213    The smoothing span is given by f. A larger value for f will result in a
214    smoother curve. The number of robustifying iterations is given by iter. The
215    function will run faster with a smaller number of iterations."""
216    x = Numeric.asarray(x, 'd')
217    y = Numeric.asarray(y, 'd')
218    xest = Numeric.asarray(xest, 'd')
219    n = len(x)
220    nest = len(xest)
221    r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR
222    h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)]    # distance of the r-th point from x[i]
223    w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0)
224    w = 1-w*w*w
225    w = w*w*w
226    hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)]    # r-th min. distance from xest[i] to x
227    west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0)  # shape: (len(x), len(xest)
228    west = 1-west*west*west
229    west = west*west*west
230    yest = Numeric.zeros(n,'d')
231    yest2 = Numeric.zeros(nest,'d')
232    delta = Numeric.ones(n,'d')
233    try:
234        for iteration in range(iter):
235            # fit xest
236            for i in range(nest):
237                weights = delta * west[:,i]
238                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
239                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
240                beta = LinearAlgebra.solve_linear_equations(A,b)
241                yest2[i] = beta[0] + beta[1]*xest[i]
242            # fit x (to calculate residuals and delta)
243            for i in range(n):
244                weights = delta * w[:,i]
245                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
246                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
247                beta = LinearAlgebra.solve_linear_equations(A,b)
248                yest[i] = beta[0] + beta[1]*x[i]
249            residuals = y-yest
250            s = MLab.median(abs(residuals))
251            delta = Numeric.clip(residuals/(6*s),-1,1)
252            delta = 1-delta*delta
253            delta = delta*delta
254    except LinearAlgebra.LinAlgError:
255        print "Warning: NumExtn.lowess2: LinearAlgebra.solve_linear_equations: Singular matrix"
256        yest2 = None
257    return yest2
258
259##x = Numeric.array([0,2.1,4.5, 6.], 'd')
260##xs = Numeric.array([0,1,2,3,4,5,6], 'd')
261##y = Numeric.array([0.8, 2.3, 2.9, 3.1], 'd')
262##f = 2./3.
263##iter=3
264##import Bio.Statistics.lowess
265##print Bio.Statistics.lowess.lowess(x, y, f, iter)
266##print lowess2(x, y, xs, f, iter)
267
268def lowessW(x, y, xest, f=2./3., iter=3, dWeights=None, callback=None):
269    """Returns estimated values of y in data points xest (or None if estimation fails).
270    Lowess smoother: Robust locally weighted regression.
271    The lowess function fits a nonparametric regression curve to a scatterplot.
272    The arrays x and y contain an equal number of elements; each pair
273    (x[i], y[i]) defines a data point in the scatterplot. The function returns
274    the estimated (smooth) values of y.
275
276    The smoothing span is given by f. A larger value for f will result in a
277    smoother curve. The number of robustifying iterations is given by iter. The
278    function will run faster with a smaller number of iterations.
279
280    Data points may be assigned weights; if None, all weights equal 1.
281    """
282    x = Numeric.asarray(x, 'd')
283    y = Numeric.asarray(y, 'd')
284    xest = Numeric.asarray(xest, 'd')
285    n = len(x)
286    if n <> len(y):
287        raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(x)=%i not equal to len(y)=%i" % (len(x), len(y))
288    nest = len(xest)
289    # weights of data points (optional)
290    if dWeights <> None:
291        dWeights = Numeric.asarray(dWeights, 'd')
292        if len(dWeights) <> n:
293            raise AttributeError, "Error: lowessW(x,y,xest,f,iter,dWeights): len(dWeights)=%i not equal to len(x)=%i" % (len(dWeights), len(x))
294##        dWeights = dWeights.reshape((n,1))
295    else:
296##        dWeights = Numeric.ones((n,1))
297        dWeights = Numeric.ones((n,))
298    r = min(int(Numeric.ceil(f*n)),n-1) # radius: num. of points to take into LR
299    h = [Numeric.sort(abs(x-x[i]))[r] for i in range(n)]    # distance of the r-th point from x[i]
300    w = Numeric.clip(abs(([x]-Numeric.transpose([x]))/h),0.0,1.0)
301    w = 1-w*w*w
302    w = w*w*w
303    hest = [Numeric.sort(abs(x-xest[i]))[r] for i in range(nest)]    # r-th min. distance from xest[i] to x
304    west = Numeric.clip(abs(([xest]-Numeric.transpose([x]))/hest),0.0,1.0)  # shape: (len(x), len(xest))
305    west = 1-west*west*west
306    west = west*west*west
307    yest = Numeric.zeros(n,'d')
308    yest2 = Numeric.zeros(nest,'d')
309    delta = Numeric.ones(n,'d')
310    try:
311        for iteration in range(int(iter)):
312            # fit xest
313            for i in range(nest):
314##                print delta.shape, west[:,i].shape, dWeights.shape
315                weights = delta * west[:,i] * dWeights
316                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
317                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
318                beta = LinearAlgebra.solve_linear_equations(A,b)
319                yest2[i] = beta[0] + beta[1]*xest[i]
320            # fit x (to calculate residuals and delta)
321            for i in range(n):
322                weights = delta * w[:,i] * dWeights
323                b = Numeric.array([sum(weights*y), sum(weights*y*x)])
324                A = Numeric.array([[sum(weights), sum(weights*x)], [sum(weights*x), sum(weights*x*x)]])
325                beta = LinearAlgebra.solve_linear_equations(A,b)
326                yest[i] = beta[0] + beta[1]*x[i]
327            residuals = y-yest
328            s = MLab.median(abs(residuals))
329            delta = Numeric.clip(residuals/(6*s),-1,1)
330            delta = 1-delta*delta
331            delta = delta*delta
332            if callback: callback()
333    except LinearAlgebra.LinAlgError:
334        print "Warning: NumExtn.lowessW: LinearAlgebra.solve_linear_equations: Singular matrix"
335        yest2 = None
336    return yest2
337
338
339##if __name__ == "__main__":
340##    x1 = numpy.asarray([0,1,2,3,4,5,6,7,8,9])
341##    xe1 = numpy.asarray([-1,0,1,2,3,4,5,6,7,8,9,10])
342##    xe2 = numpy.asarray([4,5,6])
343##    y1 = numpy.asarray([0.1, 0.9, 2.05, 3.11, 3.99, 4.95, 5.88, 6.5, 6.8, 7])
344##    w1 = numpy.asarray([1,   0.1,1,    1,    0.1, 0.1, 0.1, 0.1,0.1,0.1])
345##
346##    l2 = lowess2(x1, y1, xe1, f=0.3, iter=3)
347##    print l2
348##    l3 = lowessW(x1, y1, xe2, f=0.3, iter=3, dWeights=w1)
349##    print l3 #, "\n", w3, "\n", west3
350
351
352
353#####################################################################################
354#####################################################################################
355
356def denary2Binary(n):
357    '''convert denary integer n to binary string bStr'''
358    bStr = ''
359    if n < 0:  raise ValueError, "must be a positive integer"
360    if n == 0: return '0'
361    while n > 0:
362        bStr = str(n % 2) + bStr
363        n = n >> 1
364    return bStr
365
366
367#####################################################################################
368## position2index:
369##  given the position of an element in a flat array
370##  get the indices of that element in an array of given shape
371##
372## getIndices(m,val):
373##  Input: m=[[1,2],[2,3]], val=2
374##  Output: Numeric.array([(0,1),(1,0)])
375#####################################################################################
376
377def position2index(n, shape):
378    """Given a position in a flat array (n), returns indices in an array of given shape.
379    """
380    raise DeprecationWarning, "Depricated, use positions2indices(pos, shape) instead."
381    if n < 0:  raise ValueError, "must be a positive integer"
382    shapeL = list(shape)
383    shapeL.reverse()
384    ind = []
385    for shp in shapeL:
386        ind = [n % shp] + ind
387        n = int(n / shp)
388    return ind
389
390
391def positions2indices(pos, shape):
392    """Given postions and shape, returns indices shaped (numPositions, len(shape));
393    e.g.: pos=[0,2,8], shape=(3,3) ->  [[0,0],[2,0],[2,2]]
394    """
395    lenShape = len(shape)
396    pos = numpy.asarray(pos)
397    if pos.shape[0] == 0:
398        return numpy.empty([0] + list(shape[1:]))
399    posMax = numpy.max(pos)
400    assert posMax < numpy.multiply.reduce(shape), "Error: Position too large for a given shape."
401    assert numpy.min(pos) >= 0, "Error, position cannot be negative."
402    ind = numpy.ones((pos.shape[0], lenShape))
403    for dim in range(lenShape-1,-1,-1):
404        ind[:,dim] = numpy.mod(pos, shape[dim])
405        pos = numpy.divide(pos, shape[dim])
406    return numpy.asarray(ind, numpy.int)
407
408
409def indices2positions(ind, shape):
410    """Given indices shaped (numPositions, len(shape)), returns corresponding positions in a falt array; output shape = (ind.shape[0],);
411    e.g.: ind=[[0,0],[2,0],[2,2]] , shape=(3,3) -> [0,6,8]
412    """
413    ind = numpy.asarray(ind)
414    if ind.shape[0] == 0:
415        return numpy.empty((0,))
416    assert len(ind.shape) == 2 and ind.shape[1] == len(shape), "Error: ind should be 2D array, ind.shape[1] should match len(shape)."
417    assert numpy.less(numpy.max(ind,0),numpy.asarray(shape)).all(), "Error: indices do not fit the shape."
418    pos = numpy.zeros((ind.shape[0],))
419    for i,shp in enumerate(shape):
420        pos += ind[:,i]*numpy.multiply.reduce(shape[i+1:])
421    return numpy.asarray(pos,numpy.int)
422   
423
424def getPositions(m, val):
425    """Input: arbitrary (masked) array and a value from that array;
426    Output: array of positions of the given value in a flat m;
427    """
428    m = MA.asarray(m)
429    return Numeric.compress(MA.equal(MA.ravel(m),val), Numeric.arange(Numeric.multiply.reduce(m.shape)))
430
431def getIndices(m, val):
432    """Input: arbitrary (masked) array and a value from that array;
433    Output: array of indices corresponding to positions of the given value in array m;
434    Output shape: (numb. of val values in m, len(m.shape)).
435    """
436    posFlat = getPositions(m, val)
437    return positions2indices(posFlat, m.shape)
438
439
440#####################################################################################
441## Indices <-> Condition
442#####################################################################################
443
444def condition2indices(condition):
445    """Input: condition=[1,0,0,1]; output: indices=[0,3]
446    """
447    condition = Numeric.asarray(condition)
448    assert len(condition.shape) == 1
449    return Numeric.compress(condition, Numeric.arange(condition.shape[0]))
450
451
452def indices2condition(indices, length):
453    """Input: indices=[0,3]; output: condition=[1,0,0,1]
454    """
455    indices = Numeric.asarray(indices)
456    assert len(indices.shape) == 1
457    assert length >= indices.shape[0]
458    c = Numeric.zeros((length,), Numeric.Int)
459    Numeric.put(c, indices, 1)
460    return c
461
462#####################################################################################
463## Inverse permutation
464##  permutInverse([0,2,3,5,4,1]) -> [0,5,1,2,4,3,]
465##  permutInverse([0,5,1,2,4,3,]) -> [0,2,3,5,4,1]
466#####################################################################################
467
468#from Permutation import *
469
470def permutInverse(n):
471    """Returns inverse permutation given integers in range(len(n)),
472    such that permitInverse(permutInverse(range(4)))==range(4).
473    """
474    n = Numeric.asarray(n)
475    pInv = Numeric.argsort(n)
476    assert Numeric.all(Numeric.equal(n, Numeric.argsort(pInv))), "Inverse not successful; input should be permutation of range(len(input))."
477    return pInv
478
479###################################################################################################################################
480## Generator of random matrix of permutations
481## where each row and each column represents a permutation of n elements.
482###################################################################################################################################
483
484def getRandomMtrxPermut(n):
485    """Generator of random matrix of permutations;
486    input: number of elements;
487    returns a matrix where each row and each column represents a permutation of n elements.
488    """
489    mx = numpy.zeros((n,n), numpy.int)
490    pArr = numpy.random.permutation(n)
491    for i in range(n):
492        mx[i] = numpy.concatenate((pArr[i:], pArr[:i]))
493    return numpy.take(mx,numpy.random.permutation(n),0)   
494
495
496#####################################################################################
497## Ranks:
498##      - Numeric (instead of statc.rankdata), in range 1 ... shape[0]
499##      - MA: masked elements are not ranked; range: 1 ... #non-masked_values
500#####################################################################################
501
502def rankData(n, inverse=False):
503    """Returns ranks of 1D Numeric array in range 1...shape[0].
504    """
505    n = Numeric.asarray(n)
506    assert Numeric.rank(n) == 1
507    r = Numeric.zeros(n.shape[0], Numeric.Float)
508    Numeric.put(r, Numeric.argsort(n), Numeric.arange(n.shape[0]))
509    if inverse:
510        return -1*r+n.shape[0]
511    else:
512        return r+1
513
514def rankDataMA(m, inverse=False):
515    """Returns ranks of 1D masked array; masked values ignored, range 1...#non-masked_values.
516    """
517    m = MA.asarray(m)
518    assert MA.rank(m) == 1
519    fill_val = m.fill_value()
520    m.set_fill_value(MA.maximum(m) + 1)
521    r = MA.zeros(m.shape[0], Numeric.Float)
522    MA.put(r, MA.argsort(m), Numeric.arange(m.shape[0]))
523    m.set_fill_value(fill_val)
524    r = MA.array(r, mask=MA.getmaskarray(m))
525    if inverse:
526        return -1*r+MA.count(m)
527    else:
528        return r+1
529   
530
531
532#####################################################################################
533## Unary functions: a mask is set only in places where both a arrays are masked.
534##  e.g.:
535##    minus_unary(10,--)  -> 10
536##    minus_unary(--,--) -> --
537#####################################################################################
538
539def subtract_unary(a, b):
540    """Returns a-b with masked values only in places where both a and b are masked.
541    """
542    a = MA.asarray(a)
543    b = MA.asarray(b)
544    el = MA.subtract(a.filled(0), b.filled(0))
545    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
546    return MA.array(el, mask=mask)
547
548def add_unary(a, b):
549    """Returns a+b with masked values only in places where both a and b are masked.
550    """
551    a = MA.asarray(a)
552    b = MA.asarray(b)
553    el = MA.add(a.filled(0), b.filled(0))
554    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
555    return MA.array(el, mask=mask)
556
557def multiply_unary(a, b):
558    """Returns a*b with masked values only in places where both a and b are masked.
559    """
560    a = MA.asarray(a)
561    b = MA.asarray(b)
562    el = MA.multiply(a.filled(1), b.filled(1))
563    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
564    return MA.array(el, mask=mask)
565
566def divide_unary(a, b):
567    """Returns a*b with masked values only in places where both a and b are masked.
568    """
569    a = MA.asarray(a)
570    b = MA.asarray(b)
571    el = MA.divide(a.filled(1), b.filled(1))
572    mask = Numeric.logical_and(MA.getmaskarray(a), MA.getmaskarray(b))
573    return MA.array(el, mask=mask)
574
575
576#####################################################################################
577## Logical OR / AND of two masked arrays where:
578##  - the unary OR equals to the element: OR(0) == 0, OR(1) == 1
579##  - the mask equals to logical AND of the corresponding masks
580##
581## For example:
582##  m1 = MA.array([0,0,0,-,-,-,1,1,1], mask=[0,0,0,1,1,1,0,0,0])
583##  m2 = MA.array([0,-,1,0,-,1,0,-,1], mask=[0,1,0,0,1,0,0,1,0])
584##
585## logical_unary_or(m1,m2) =
586##       MA.array([0,0,1,0,-,1,1,1,1], mask=[0,0,0,0,1,0,0,0,0])
587## The built-in logical_or returns:
588##       MA.array([0,-,0,-,-,-,0,-,1], mask=[0,1,0,1,1,1,0,1,0])
589##
590## logical_unary_and(m1,m2) =
591##       MA.array([0,0,0,0,-,1,0,1,1], mask=[0,0,0,0,1,0,0,0,0])
592## The built-in logical_and returns:
593##       MA.array([0,-,0,-,-,-,0,-,1], mask=[0,1,0,1,1,1,0,1,0])
594#####################################################################################
595
596def logical_unary_or(m1, m2):
597    el = Numeric.logical_or(m1.filled(0), m2.filled(0))
598    mask = Numeric.logical_and(MA.getmaskarray(m1), MA.getmaskarray(m2))
599    return MA.array(el, mask=mask)   
600
601
602def logical_unary_and(m1, m2):
603    el = Numeric.logical_and(m1.filled(1), m2.filled(1))
604    mask = Numeric.logical_and(MA.getmaskarray(m1), MA.getmaskarray(m2))
605    return MA.array(el, mask=mask)   
606
607
608#####################################################################################
609## MA.dot fix:
610##  - MA.dot returns does not set the masked values (it places 0 instead)
611##  - MA.dot(x,y)[i,j] == MA.masked iff MA.add.reduce(x[i,:]*y[:,j]) == MA.masked
612##  - done faste enugh - it requires two matrix multiplications
613#####################################################################################
614
615def dotMA(a, b):
616    """Returns dot-product for MA arrays; fixed masked values.
617    """
618    a = MA.asarray(a)
619    b = MA.asarray(b)
620    ab = MA.dot(a,b)
621    # fix masked values in ab (MA.dot returns 0 instead of MA.masked)
622    nonMasked = Numeric.dot(1-MA.getmaskarray(a).astype(Numeric.Int), 1-MA.getmaskarray(b).astype(Numeric.Int))
623    return MA.where(Numeric.equal(nonMasked,0), MA.masked, ab)
624
625#####################################################################################
626## Apply a binary function on a sequence of arrays
627#####################################################################################
628
629def apply_binary(aFunction, *args):
630    assert len(args) > 1, "at least two arguments required"
631    return reduce(lambda x,y: aFunction(x,y), list(args[1:]), args[0])
632##    return reduce(lambda x,y: aFunction(x,y), list(args))
633       
634
635#####################################################################################
636## Strictly upper (lower) triangular matrix <-> 1D array
637#####################################################################################
638
639def triangularPut(m1d, upper=1, lower=0):
640    """Returns 2D masked array with elements of the given 1D array in the strictly upper (lower) triangle.
641    Elements of the 1D array should be ordered according to the upper triangular part of the 2D matrix.
642    The lower triangular part (if requested) equals to the transposed upper triangular part.
643    If upper == lower == 1 a symetric matrix is returned.
644    """
645    assert upper in [0,1] and lower in [0,1], "[0|1] expected for upper / lower"
646    m1d = MA.asarray(m1d)
647    assert MA.rank(m1d) == 1, "1D masked array expected"
648    m2dShape0 = math.ceil(math.sqrt(2*m1d.shape[0]))
649    assert m1d.shape[0] == m2dShape0*(m2dShape0-1)/2, "the length of m1d does not correspond to n(n-1)/2"
650    if upper:
651        if lower:
652            mask = Numeric.fromfunction(lambda i,j: i==j, (m2dShape0, m2dShape0))
653        else:
654            mask = Numeric.fromfunction(lambda i,j: i>=j, (m2dShape0, m2dShape0))
655    else:
656        if lower:
657            mask = Numeric.fromfunction(lambda i,j: i<=j, (m2dShape0, m2dShape0))
658        else:
659            mask = Numeric.ones((m2dShape0, m2dShape0))
660
661    m2d = MA.ravel(MA.zeros((m2dShape0, m2dShape0), m1d.dtype.char))
662    condUpperTriang = Numeric.fromfunction(lambda i,j: i<j, (m2dShape0, m2dShape0))
663    putIndices = Numeric.compress(Numeric.ravel(condUpperTriang), Numeric.arange(0, m2dShape0**2, typecode=Numeric.Int))
664    MA.put(m2d, putIndices, m1d)
665    m2d = MA.reshape(m2d, (m2dShape0, m2dShape0))
666    m2d = MA.where(condUpperTriang, m2d, MA.transpose(m2d))
667    return MA.array(m2d, mask=Numeric.logical_or(mask, MA.getmaskarray(m2d)))
668
669
670def triangularGet(m2d, upper=1):
671    """Returns 1D masked array with elements from the upper (lower) triangular part of the given matrix.
672    For a symetric matrix triangularGet(m2d, 0) and triangularGet(m2d, 1) return elements in different order.
673    """
674    assert upper in [0,1], "upper: [0|1] expected"
675    m2d = MA.asarray(m2d)
676    assert MA.rank(m2d) == 2, "2D (masked) array expected"
677    if upper:
678        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i<j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
679    else:
680        takeInd = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i>j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
681    return MA.ravel(m2d).take(takeInd)
682
683
684#####################################################################################
685## Put 1D array on the diagonal of the given 2D array and returns a copy.
686#####################################################################################
687
688def diagonalPut(m1d, m2d):
689    """Puts the given 1D masked array into the diagonal of the given 2D masked array and returns a new copy of the 2D array.
690    """
691    m1d = MA.asarray(m1d)
692    m2d = MA.asarray(m2d)
693    assert MA.rank(m1d) == 1 and MA.rank(m2d) == 2, "1D and 2D masked array expected"
694    assert m1d.shape[0] == m2d.shape[0] == m2d.shape[1], "the shape of the given arrays does not match"
695    putIndices = Numeric.compress(Numeric.ravel(Numeric.fromfunction(lambda i,j: i==j, m2d.shape)), Numeric.arange(0, Numeric.multiply.reduce(m2d.shape), typecode=Numeric.Int))
696    m2dShape = m2d.shape
697    m2d = MA.ravel(m2d)
698    MA.put(m2d, putIndices, m1d)
699    return MA.reshape(m2d, m2dShape)
700   
701
702#####################################################################################
703#### linear interpolation of missing values by the given axis
704#####################################################################################
705##
706##def fillLinear1(ma, axis=0):
707##    """BUG: does not handle successive missing values
708##    """
709##    maflat = MA.array(MA.ravel(ma))
710##    mod = ma.shape[axis]
711##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))
712##    for idx in maskInd:
713##        if (idx % mod) == 0:  # the first one
714##            maflat[idx] = maflat[idx+1]
715##        elif (idx % mod) == mod-1:
716##            maflat[idx] = maflat[idx-1]
717##        else:
718##            maflat[idx] = (maflat[idx-1] + maflat[idx+1]) / 2.
719##    return MA.reshape(maflat, ma.shape)
720##
721##
722##def fillLinear2(ma, axis=0):
723##    """BUG: does not handle successive missing values
724##    """
725##    maflat = MA.array(MA.ravel(ma))
726##    mod = ma.shape[axis]
727##    maskInd = Numeric.compress(MA.getmaskarray(maflat), Numeric.arange(maflat.shape[0]))
728##    for idx in maskInd:
729##        idxFirst = idx-(idx%mod)
730##        idxNext = idxFirst+mod
731##        if MA.count(maflat[idxFirst:idxNext]) == 0:                                 # all elements are masked
732##            print "Warning: cannot do linear interpolation, all values are masked"
733##        elif idx == idxFirst:                                                        # the first element is masked
734##            maflat[idx] = maflat[idx+1:idxNext].compressed()[0]
735##        elif idx == idxNext-1:                                                      # the last element is masked
736##            maflat[idx] = maflat[idxFirst:idx].compressed()[-1]
737##        else:
738##            maflat[idx] = MA.average([maflat.take(range(idx-1,idxFirst-1,-1)).compressed(), maflat[idx+1:idxNext].compressed(), MA.masked])
739####            maflat[idx] = (maflat[idxFirst:idx].compressed()[-1] + maflat[idx+1:idxNext].compressed()[0]) / 2.
740##    return MA.reshape(maflat, ma.shape)
741##
742##def fillLinear(ma, axis=0):
743##    """Linear interpolation of missing values;
744##    """
745##    pass
746##
747##
748##si = scipy.interpolate.interp1d(Numeric.array([[0,1,3,4],[0,1,3,4]]), Numeric.array([[0.1,1.1,3.2,4.4],[0,1,3,4]]))
749##si(2)
750
751###################################################################################
752## swapaxes for masked array
753###################################################################################
754
755def swapaxesMA(ma, axis1, axis2):
756    if axis1 < axis2:
757        m,M = axis1, axis2
758    elif axis1 > axis2:
759        m,M = axis2, axis1
760    elif 0 <= axis1 == axis2 < len(ma.shape):
761        return ma
762    else:
763        raise ValueError("Bad axis1 or axis2 argument to swapaxes.")
764    axList = range(m) + [M] + range(m+1,M) + [m] + range(M+1,len(ma.shape))
765    return MA.transpose(ma, axList)
766
767
768###################################################################################
769## compress MA to Numeric and return Numeric + indices of non-masked places
770###################################################################################
771
772def compressIndices(ma):
773    """Returns 1D compressed Numeric array and the indices of the non-masked places.
774    usage:  nu,ind = compressIndices(ma)
775            nu = Numeric.elementwise_function(nu)
776            ma = MA.put(ma, ind, nu)
777    """
778    ma = MA.asarray(ma)
779    nonMaskedInd = Numeric.compress(1-Numeric.ravel(MA.getmaskarray(ma)), Numeric.arange(Numeric.multiply.reduce(ma.shape)))
780    return MA.filled(ma.compressed()), nonMaskedInd
781
782
783###################################################################################
784## compact print of Numeric or MA
785###################################################################################
786
787def aprint(a,decimals=2):
788    """prints array / masked array of floats"""
789    if isinstance(a, Numeric.ArrayType):
790        print Numeric.around(a.astype(Numeric.Float0),decimals)
791    elif type(a) == MA.MaskedArray:
792        print MA.around(a.astype(Numeric.Float0),decimals)
793
794
795###################################################################################
796## min / max along the given axis
797## SLOW DUE TO SORT
798###################################################################################
799
800def minMA(m,axis=0):
801    """slow: remove sorting"""
802    m = MA.asarray(m, Numeric.Float)
803    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))
804    m = MA.transpose(m, transList)    # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1)
805    return MA.sort(m, 0, fill_value=1e20)[0]
806   
807def maxMA(m,axis=0):
808    """slow: remove sorting"""
809    m = MA.asarray(m, Numeric.Float)
810    transList = [axis] + range(0,axis) + range(axis+1,MA.rank(m))
811    m = MA.transpose(m, transList)    # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1)
812    return MA.sort(m, 0, fill_value=-1e20)[-1]
813
814
815###################################################################################
816## median absolute deviation
817###################################################################################
818
819def mad(m,axis=0):
820    """Returns Median Absolute Deviation of the given array along the given axis.
821    """
822    m = Numeric.asarray(m)
823    mx = Numeric.asarray(median(m,axis),Numeric.Float)
824    xt = Numeric.transpose(m, [axis]+range(axis)+range(axis+1,Numeric.rank(m))) # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1)
825    return MLab.median(Numeric.absolute(xt-mx))
826
827def madMA(m,axis=0):
828    """Returns Median Absolute Deviation of the given masked array along the given axis.
829    """
830    m = MA.asarray(m)
831    mx = MA.asarray(medianMA(m,axis),Numeric.Float)
832    xt = MA.transpose(m, [axis]+range(axis)+range(axis+1,MA.rank(m)))   # do not use swapaxes: (0,1,2) -swap-> (2,1,0); (0,1,2) -transpose-> (2,0,1)
833    return medianMA(MA.absolute(xt-mx))
834
835
836
837if __name__ == "__main__":
838##    n2 = numpy.asarray([[1,2],[3,4],[5,6]], numpy.float)    #shape (3,2)
839##    n3 = numpy.asarray([n2, 2*n2, 3*n2, 4*n2])              #shape (4,3,2)
840##
841##    m2 = numpy.ma.asarray(n2)
842##    m2[0,0] = numpy.ma.masked
843##    m2[1,1] = numpy.ma.masked
844##
845##    m3 = numpy.ma.asarray(n3)
846##    m3[0,0,0] = 10000
847##    m3[1,1,1] = 10000
848##    m3[2,2,0] = 10000
849##    m3[3,0,1] = 10000
850##    m3[0,0,0] = numpy.ma.masked
851##    m3[1,1,1] = numpy.ma.masked
852##    m3[2,2,0] = numpy.ma.masked
853##    m3[3,0,1] = numpy.ma.masked
854##
855##    mm3 = numpy.ma.ones((4,3,2), numpy.float) * numpy.ma.masked
856##    mm3[0,0,0] = 33
857##    mm3[1,1,1] = 333
858##    mm3[2,2,0] = 3333
859##    mm3[3,0,1] = 33333
860##
861##    # compare to Numeric
862##    import Numeric, MA
863##    n2n = Numeric.asarray([[1,2],[3,4],[5,6]], Numeric.Float)    #shape (3,2)
864##    n3n = Numeric.asarray([n2n, 2*n2n, 3*n2n, 4*n2n])              #shape (4,3,2)
865##
866##    m2n = MA.asarray(n2n)
867##    m2n[0,0] = MA.masked
868##    m2n[1,1] = MA.masked
869##
870##    m3n = MA.asarray(n3n)
871##    m3n[0,0,0] = 10000
872##    m3n[1,1,1] = 10000
873##    m3n[2,2,0] = 10000
874##    m3n[3,0,1] = 10000
875##    m3n[0,0,0] = MA.masked
876##    m3n[1,1,1] = MA.masked
877##    m3n[2,2,0] = MA.masked
878##    m3n[3,0,1] = MA.masked
879##
880##    mm3n = MA.ones((4,3,2), Numeric.Float) * MA.masked
881##    mm3n[0,0,0] = 33
882##    mm3n[1,1,1] = 333
883##    mm3n[2,2,0] = 3333
884##    mm3n[3,0,1] = 33333
885
886    # argmaxMA   
887    m2 = numpy.ma.asarray([[1,2,3],[4,5,6]])*numpy.ma.masked
888    m2[1,2] = 10   
889##    print "orig, axis None\t", numpy.ma.argmax(m2, None)
890##    print "extn, axis None\t", argmaxMA(m2, None)
891##    print "orig, axis 0\t", numpy.ma.argmax(m2,0)
892##    print "extn, axis 0\t", argmaxMA(m2,0)
893##    print "orig, axis 1\t", numpy.ma.argmax(m2,1)
894##    print "extn, axis 1\t", argmaxMA(m2,1)
895    ma = numpy.ma.argmax(m2,1)
896    mb = argmaxMA(m2,1)
Note: See TracBrowser for help on using the repository browser.