source: orange-bioinformatics/orangecontrib/bio/pstat.py @ 1873:0810c5708cc5

Revision 1873:0810c5708cc5, 36.5 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 months ago (diff)

Moved '_bioinformatics' into orangecontrib namespace.

Line 
1# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
2#
3# This software is distributable under the terms of the GNU
4# General Public License (GPL) v2, the text of which can be found at
5# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
6# using this module constitutes acceptance of the terms of this License.
7#
8# Disclaimer
9#
10# This software is provided "as-is".  There are no expressed or implied
11# warranties of any kind, including, but not limited to, the warranties
12# of merchantability and fittness for a given application.  In no event
13# shall Gary Strangman be liable for any direct, indirect, incidental,
14# special, exemplary or consequential damages (including, but not limited
15# to, loss of use, data or profits, or business interruption) however
16# caused and on any theory of liability, whether in contract, strict
17# liability or tort (including negligence or otherwise) arising in any way
18# out of the use of this software, even if advised of the possibility of
19# such damage.
20#
21# Comments and/or additions are welcome (send e-mail to:
22# strang@nmr.mgh.harvard.edu).
23#
24"""
25pstat.py module
26
27#################################################
28#######  Written by:  Gary Strangman  ###########
29#######  Last modified:  Jun 29, 2001 ###########
30#################################################
31
32This module provides some useful list and array manipulation routines
33modeled after those found in the |Stat package by Gary Perlman, plus a
34number of other useful list/file manipulation functions.  The list-based
35functions include:
36
37      abut (source,*args)
38      simpleabut (source, addon)
39      colex (listoflists,cnums)
40      collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
41      dm (listoflists,criterion)
42      flat (l)
43      linexand (listoflists,columnlist,valuelist)
44      linexor (listoflists,columnlist,valuelist)
45      linedelimited (inlist,delimiter)
46      lineincols (inlist,colsize)
47      lineincustcols (inlist,colsizes)
48      list2string (inlist)
49      makelol(inlist)
50      makestr(x)
51      printcc (lst,extra=2)
52      printincols (listoflists,colsize)
53      pl (listoflists)
54      printl(listoflists)
55      replace (lst,oldval,newval)
56      recode (inlist,listmap,cols='all')
57      remap (listoflists,criterion)
58      roundlist (inlist,num_digits_to_round_floats_to)
59      sortby(listoflists,sortcols)
60      unique (inlist)
61      duplicates(inlist)
62      writedelimited (listoflists, delimiter, file, writetype='w')
63
64Some of these functions have alternate versions which are defined only if
65Numeric (NumPy) can be imported.  These functions are generally named as
66above, with an 'a' prefix.
67
68      aabut (source, *args)
69      acolex (a,indices,axis=1)
70      acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
71      adm (a,criterion)
72      alinexand (a,columnlist,valuelist)
73      alinexor (a,columnlist,valuelist)
74      areplace (a,oldval,newval)
75      arecode (a,listmap,col='all')
76      arowcompare (row1, row2)
77      arowsame (row1, row2)
78      asortrows(a,axis=0)
79      aunique(inarray)
80      aduplicates(inarray)
81
82Currently, the code is all but completely un-optimized.  In many cases, the
83array versions of functions amount simply to aliases to built-in array
84functions/methods.  Their inclusion here is for function name consistency.
85"""
86
87## CHANGE LOG:
88## ==========
89## 01-11-15 ... changed list2string() to accept a delimiter
90## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
91## 01-05-31 ... added duplicates() and aduplicates() functions
92## 00-12-28 ... license made GPL, docstring and import requirements
93## 99-11-01 ... changed version to 0.3
94## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
95## 03/27/99 ... added areplace function, made replace fcn recursive
96## 12/31/98 ... added writefc function for ouput to fixed column sizes
97## 12/07/98 ... fixed import problem (failed on collapse() fcn)
98##              added __version__ variable (now 0.2)
99## 12/05/98 ... updated doc-strings
100##              added features to collapse() function
101##              added flat() function for lists
102##              fixed a broken asortrows()
103## 11/16/98 ... fixed minor bug in aput for 1D arrays
104##
105## 11/08/98 ... fixed aput to output large arrays correctly
106
107from __future__ import absolute_import
108
109import string, copy
110from types import *
111
112__version__ = 0.4
113
114###===========================  LIST FUNCTIONS  ==========================
115###
116### Here are the list functions, DEFINED FOR ALL SYSTEMS.
117### Array functions (for NumPy-enabled computers) appear below.
118###
119
120def abut (source,*args):
121    """
122Like the |Stat abut command.  It concatenates two lists side-by-side
123and returns the result.  '2D' lists are also accomodated for either argument
124(source or addon).  CAUTION:  If one list is shorter, it will be repeated
125until it is as long as the longest list.  If this behavior is not desired,
126use pstat.simpleabut().
127
128Usage:   abut(source, args)   where args=any # of lists
129Returns: a list of lists as long as the LONGEST list past, source on the
130         'left', lists in <args> attached consecutively on the 'right'
131"""
132
133    if type(source) not in [ListType,TupleType]:
134        source = [source]
135    for addon in args:
136        if type(addon) not in [ListType,TupleType]:
137            addon = [addon]
138        if len(addon) < len(source):                # is source list longer?
139            if len(source) % len(addon) == 0:        # are they integer multiples?
140                repeats = len(source)/len(addon)    # repeat addon n times
141                origadd = copy.deepcopy(addon)
142                for i in range(repeats-1):
143                    addon = addon + origadd
144            else:
145                repeats = len(source)/len(addon)+1  # repeat addon x times,
146                origadd = copy.deepcopy(addon)      #    x is NOT an integer
147                for i in range(repeats-1):
148                    addon = addon + origadd
149                    addon = addon[0:len(source)]
150        elif len(source) < len(addon):                # is addon list longer?
151            if len(addon) % len(source) == 0:        # are they integer multiples?
152                repeats = len(addon)/len(source)    # repeat source n times
153                origsour = copy.deepcopy(source)
154                for i in range(repeats-1):
155                    source = source + origsour
156            else:
157                repeats = len(addon)/len(source)+1  # repeat source x times,
158                origsour = copy.deepcopy(source)    #   x is NOT an integer
159                for i in range(repeats-1):
160                    source = source + origsour
161                source = source[0:len(addon)]
162
163        source = simpleabut(source,addon)
164    return source
165
166
167def simpleabut (source, addon):
168    """
169Concatenates two lists as columns and returns the result.  '2D' lists
170are also accomodated for either argument (source or addon).  This DOES NOT
171repeat either list to make the 2 lists of equal length.  Beware of list pairs
172with different lengths ... the resulting list will be the length of the
173FIRST list passed.
174
175Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
176Returns: a list of lists as long as source, with source on the 'left' and
177                 addon on the 'right'
178"""
179    if type(source) not in [ListType,TupleType]:
180        source = [source]
181    if type(addon) not in [ListType,TupleType]:
182        addon = [addon]
183    minlen = min(len(source),len(addon))
184    list = copy.deepcopy(source)                # start abut process
185    if type(source[0]) not in [ListType,TupleType]:
186        if type(addon[0]) not in [ListType,TupleType]:
187            for i in range(minlen):
188                list[i] = [source[i]] + [addon[i]]        # source/addon = column
189        else:
190            for i in range(minlen):
191                list[i] = [source[i]] + addon[i]        # addon=list-of-lists
192    else:
193        if type(addon[0]) not in [ListType,TupleType]:
194            for i in range(minlen):
195                list[i] = source[i] + [addon[i]]        # source=list-of-lists
196        else:
197            for i in range(minlen):
198                list[i] = source[i] + addon[i]        # source/addon = list-of-lists
199    source = list
200    return source
201
202
203def colex (listoflists,cnums):
204    """
205Extracts from listoflists the columns specified in the list 'cnums'
206(cnums can be an integer, a sequence of integers, or a string-expression that
207corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
208columns 3 onward from the listoflists).
209
210Usage:   colex (listoflists,cnums)
211Returns: a list-of-lists corresponding to the columns from listoflists
212         specified by cnums, in the order the column numbers appear in cnums
213"""
214    global index
215    column = 0
216    if type(cnums) in [ListType,TupleType]:   # if multiple columns to get
217        index = cnums[0]
218        column = map(lambda x: x[index], listoflists)
219        for col in cnums[1:]:
220            index = col
221            column = abut(column,map(lambda x: x[index], listoflists))
222    elif type(cnums) == StringType:              # if an 'x[3:]' type expr.
223        evalstring = 'map(lambda x: x'+cnums+', listoflists)'
224        column = eval(evalstring)
225    else:                                     # else it's just 1 col to get
226        index = cnums
227        column = map(lambda x: x[index], listoflists)
228    return column
229
230
231def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
232     """
233Averages data in collapsecol, keeping all unique items in keepcols
234(using unique, which keeps unique LISTS of column numbers), retaining the
235unique sets of values in keepcols, the mean for each.  Setting fcn1
236and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
237will append those results (e.g., the sterr, N) after each calculated mean.
238cfcn is the collapse function to apply (defaults to mean, defined here in the
239pstat module to avoid circular imports with stats.py, but harmonicmean or
240others could be passed).
241
242Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
243Returns: a list of lists with all unique permutations of entries appearing in
244     columns ("conditions") specified by keepcols, abutted with the result of
245     cfcn (if cfcn=None, defaults to the mean) of each column specified by
246     collapsecols.
247"""
248     def collmean (inlist):
249         s = 0
250         for item in inlist:
251             s = s + item
252         return s/float(len(inlist))
253
254     if type(keepcols) not in [ListType,TupleType]:
255         keepcols = [keepcols]
256     if type(collapsecols) not in [ListType,TupleType]:
257         collapsecols = [collapsecols]
258     if cfcn == None:
259         cfcn = collmean
260     if keepcols == []:
261         means = [0]*len(collapsecols)
262         for i in range(len(collapsecols)):
263             avgcol = colex(listoflists,collapsecols[i])
264             means[i] = cfcn(avgcol)
265             if fcn1:
266                 try:
267                     test = fcn1(avgcol)
268                 except:
269                     test = 'N/A'
270                     means[i] = [means[i], test]
271             if fcn2:
272                 try:
273                     test = fcn2(avgcol)
274                 except:
275                     test = 'N/A'
276                 try:
277                     means[i] = means[i] + [len(avgcol)]
278                 except TypeError:
279                     means[i] = [means[i],len(avgcol)]
280         return means
281     else:
282         values = colex(listoflists,keepcols)
283         uniques = unique(values)
284         uniques.sort()
285         newlist = []
286         if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols]
287         for item in uniques:
288             if type(item) not in [ListType,TupleType]:  item =[item]
289             tmprows = linexand(listoflists,keepcols,item)
290             for col in collapsecols:
291                 avgcol = colex(tmprows,col)
292                 item.append(cfcn(avgcol))
293                 if fcn1 <> None:
294                     try:
295                         test = fcn1(avgcol)
296                     except:
297                         test = 'N/A'
298                     item.append(test)
299                 if fcn2 <> None:
300                     try:
301                         test = fcn2(avgcol)
302                     except:
303                         test = 'N/A'
304                     item.append(test)
305                 newlist.append(item)
306         return newlist
307
308
309def dm (listoflists,criterion):
310    """
311Returns rows from the passed list of lists that meet the criteria in
312the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
313will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
314with column 2 equal to the string 'N').
315
316Usage:   dm (listoflists, criterion)
317Returns: rows from listoflists that meet the specified criterion.
318"""
319    function = 'filter(lambda x: '+criterion+',listoflists)'
320    lines = eval(function)
321    return lines
322
323
324def flat(l):
325    """
326Returns the flattened version of a '2D' list.  List-correlate to the a.flat()
327method of NumPy arrays.
328
329Usage:    flat(l)
330"""
331    newl = []
332    for i in range(len(l)):
333        for j in range(len(l[i])):
334            newl.append(l[i][j])
335    return newl
336
337
338def linexand (listoflists,columnlist,valuelist):
339    """
340Returns the rows of a list of lists where col (from columnlist) = val
341(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
342len(columnlist) must equal len(valuelist).
343
344Usage:   linexand (listoflists,columnlist,valuelist)
345Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
346"""
347    if type(columnlist) not in [ListType,TupleType]:
348        columnlist = [columnlist]
349    if type(valuelist) not in [ListType,TupleType]:
350        valuelist = [valuelist]
351    criterion = ''
352    for i in range(len(columnlist)):
353        if type(valuelist[i])==StringType:
354            critval = '\'' + valuelist[i] + '\''
355        else:
356            critval = str(valuelist[i])
357        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
358    criterion = criterion[0:-3]         # remove the "and" after the last crit
359    function = 'filter(lambda x: '+criterion+',listoflists)'
360    lines = eval(function)
361    return lines
362
363
364def linexor (listoflists,columnlist,valuelist):
365    """
366Returns the rows of a list of lists where col (from columnlist) = val
367(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
368One value is required for each column in columnlist.  If only one value
369exists for columnlist but multiple values appear in valuelist, the
370valuelist values are all assumed to pertain to the same column.
371
372Usage:   linexor (listoflists,columnlist,valuelist)
373Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
374"""
375    if type(columnlist) not in [ListType,TupleType]:
376        columnlist = [columnlist]
377    if type(valuelist) not in [ListType,TupleType]:
378        valuelist = [valuelist]
379    criterion = ''
380    if len(columnlist) == 1 and len(valuelist) > 1:
381        columnlist = columnlist*len(valuelist)
382    for i in range(len(columnlist)):          # build an exec string
383        if type(valuelist[i])==StringType:
384            critval = '\'' + valuelist[i] + '\''
385        else:
386            critval = str(valuelist[i])
387        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
388    criterion = criterion[0:-2]         # remove the "or" after the last crit
389    function = 'filter(lambda x: '+criterion+',listoflists)'
390    lines = eval(function)
391    return lines
392
393
394def linedelimited (inlist,delimiter):
395    """
396Returns a string composed of elements in inlist, with each element
397separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
398for tab-delimiting.
399
400Usage:   linedelimited (inlist,delimiter)
401"""
402    outstr = ''
403    for item in inlist:
404        if type(item) <> StringType:
405            item = str(item)
406        outstr = outstr + item + delimiter
407    outstr = outstr[0:-1]
408    return outstr
409
410
411def lineincols (inlist,colsize):
412    """
413Returns a string composed of elements in inlist, with each element
414right-aligned in columns of (fixed) colsize.
415
416Usage:   lineincols (inlist,colsize)   where colsize is an integer
417"""
418    outstr = ''
419    for item in inlist:
420        if type(item) <> StringType:
421            item = str(item)
422        size = len(item)
423        if size <= colsize:
424            for i in range(colsize-size):
425                outstr = outstr + ' '
426            outstr = outstr + item
427        else:
428            outstr = outstr + item[0:colsize+1]
429    return outstr
430
431
432def lineincustcols (inlist,colsizes):
433    """
434Returns a string composed of elements in inlist, with each element
435right-aligned in a column of width specified by a sequence colsizes.  The
436length of colsizes must be greater than or equal to the number of columns
437in inlist.
438
439Usage:   lineincustcols (inlist,colsizes)
440Returns: formatted string created from inlist
441"""
442    outstr = ''
443    for i in range(len(inlist)):
444        if type(inlist[i]) <> StringType:
445            item = str(inlist[i])
446        else:
447            item = inlist[i]
448        size = len(item)
449        if size <= colsizes[i]:
450            for j in range(colsizes[i]-size):
451                outstr = outstr + ' '
452            outstr = outstr + item
453        else:
454            outstr = outstr + item[0:colsizes[i]+1]
455    return outstr
456
457
458def list2string (inlist,delimit=' '):
459    """
460Converts a 1D list to a single long string for file output, using
461the string.join function.
462
463Usage:   list2string (inlist,delimit=' ')
464Returns: the string created from inlist
465"""
466    stringlist = map(makestr,inlist)
467    return string.join(stringlist,delimit)
468
469
470def makelol(inlist):
471    """
472Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
473want to use put() to write a 1D list one item per line in the file.
474
475Usage:   makelol(inlist)
476Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
477"""
478    x = []
479    for item in inlist:
480        x.append([item])
481    return x
482
483
484def makestr (x):
485    if type(x) <> StringType:
486        x = str(x)
487    return x
488
489
490def printcc (lst,extra=2):
491    """
492Prints a list of lists in columns, customized by the max size of items
493within the columns (max size of items in col, plus 'extra' number of spaces).
494Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
495respectively.
496
497Usage:   printcc (lst,extra=2)
498Returns: None
499"""
500    if type(lst[0]) not in [ListType,TupleType]:
501        lst = [lst]
502    rowstokill = []
503    list2print = copy.deepcopy(lst)
504    for i in range(len(lst)):
505        if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
506            rowstokill = rowstokill + [i]
507    rowstokill.reverse()   # delete blank rows from the end
508    for row in rowstokill:
509        del list2print[row]
510    maxsize = [0]*len(list2print[0])
511    for col in range(len(list2print[0])):
512        items = colex(list2print,col)
513        items = map(makestr,items)
514        maxsize[col] = max(map(len,items)) + extra
515    for row in lst:
516        if row == ['\n'] or row == '\n' or row == '' or row == ['']:
517            print
518        elif row == ['dashes'] or row == 'dashes':
519            dashes = [0]*len(maxsize)
520            for j in range(len(maxsize)):
521                dashes[j] = '-'*(maxsize[j]-2)
522            print lineincustcols(dashes,maxsize)
523        else:
524            print lineincustcols(row,maxsize)
525    return None
526
527
528def printincols (listoflists,colsize):
529    """
530Prints a list of lists in columns of (fixed) colsize width, where
531colsize is an integer.
532
533Usage:   printincols (listoflists,colsize)
534Returns: None
535"""
536    for row in listoflists:
537        print lineincols(row,colsize)
538    return None
539
540
541def pl (listoflists):
542    """
543Prints a list of lists, 1 list (row) at a time.
544
545Usage:   pl(listoflists)
546Returns: None
547"""
548    for row in listoflists:
549        if row[-1] == '\n':
550            print row,
551        else:
552            print row
553    return None
554
555
556def printl(listoflists):
557    """Alias for pl."""
558    pl(listoflists)
559    return
560
561
562def replace (inlst,oldval,newval):
563    """
564Replaces all occurrences of 'oldval' with 'newval', recursively.
565
566Usage:   replace (inlst,oldval,newval)
567"""
568    lst = inlst*1
569    for i in range(len(lst)):
570        if type(lst[i]) not in [ListType,TupleType]:
571            if lst[i]==oldval: lst[i]=newval
572        else:
573            lst[i] = replace(lst[i],oldval,newval)
574    return lst
575
576
577def recode (inlist,listmap,cols=None):
578    """
579Changes the values in a list to a new set of values (useful when
580you need to recode data from (e.g.) strings to numbers.  cols defaults
581to None (meaning all columns are recoded).
582
583Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
584Returns: inlist with the appropriate values replaced with new ones
585"""
586    lst = copy.deepcopy(inlist)
587    if cols != None:
588        if type(cols) not in [ListType,TupleType]:
589            cols = [cols]
590        for col in cols:
591            for row in range(len(lst)):
592                try:
593                    idx = colex(listmap,0).index(lst[row][col])
594                    lst[row][col] = listmap[idx][1]
595                except ValueError:
596                    pass
597    else:
598        for row in range(len(lst)):
599            for col in range(len(lst)):
600                try:
601                    idx = colex(listmap,0).index(lst[row][col])
602                    lst[row][col] = listmap[idx][1]
603                except ValueError:
604                    pass
605    return lst
606
607
608def remap (listoflists,criterion):
609    """
610Remaps values in a given column of a 2D list (listoflists).  This requires
611a criterion as a function of 'x' so that the result of the following is
612returned ... map(lambda x: 'criterion',listoflists). 
613
614Usage:   remap(listoflists,criterion)    criterion=string
615Returns: remapped version of listoflists
616"""
617    function = 'map(lambda x: '+criterion+',listoflists)'
618    lines = eval(function)
619    return lines
620
621
622def roundlist (inlist,digits):
623    """
624Goes through each element in a 1D or 2D inlist, and applies the following
625function to all elements of FloatType ... round(element,digits).
626
627Usage:   roundlist(inlist,digits)
628Returns: list with rounded floats
629"""
630    if type(inlist[0]) in [IntType, FloatType]:
631        inlist = [inlist]
632    l = inlist*1
633    for i in range(len(l)):
634        for j in range(len(l[i])):
635            if type(l[i][j])==FloatType:
636                l[i][j] = round(l[i][j],digits)
637    return l
638
639
640def sortby(listoflists,sortcols):
641    """
642Sorts a list of lists on the column(s) specified in the sequence
643sortcols.
644
645Usage:   sortby(listoflists,sortcols)
646Returns: sorted list, unchanged column ordering
647"""
648    newlist = abut(colex(listoflists,sortcols),listoflists)
649    newlist.sort()
650    try:
651        numcols = len(sortcols)
652    except TypeError:
653        numcols = 1
654    crit = '[' + str(numcols) + ':]'
655    newlist = colex(newlist,crit)
656    return newlist
657
658
659def unique (inlist):
660    """
661Returns all unique items in the passed list.  If the a list-of-lists
662is passed, unique LISTS are found (i.e., items in the first dimension are
663compared).
664
665Usage:   unique (inlist)
666Returns: the unique elements (or rows) in inlist
667"""
668    uniques = []
669    for item in inlist:
670        if item not in uniques:
671            uniques.append(item)
672    return uniques
673
674def duplicates(inlist):
675    """
676Returns duplicate items in the FIRST dimension of the passed list.
677
678Usage:   duplicates (inlist)
679"""
680    dups = []
681    for i in range(len(inlist)):
682        if inlist[i] in inlist[i+1:]:
683            dups.append(inlist[i])
684    return dups
685
686
687def nonrepeats(inlist):
688    """
689Returns items that are NOT duplicated in the first dim of the passed list.
690
691Usage:   nonrepeats (inlist)
692"""
693    nonrepeats = []
694    for i in range(len(inlist)):
695        if inlist.count(inlist[i]) == 1:
696            nonrepeats.append(inlist[i])
697    return nonrepeats
698
699
700#===================   PSTAT ARRAY FUNCTIONS  =====================
701#===================   PSTAT ARRAY FUNCTIONS  =====================
702#===================   PSTAT ARRAY FUNCTIONS  =====================
703#===================   PSTAT ARRAY FUNCTIONS  =====================
704#===================   PSTAT ARRAY FUNCTIONS  =====================
705#===================   PSTAT ARRAY FUNCTIONS  =====================
706#===================   PSTAT ARRAY FUNCTIONS  =====================
707#===================   PSTAT ARRAY FUNCTIONS  =====================
708#===================   PSTAT ARRAY FUNCTIONS  =====================
709#===================   PSTAT ARRAY FUNCTIONS  =====================
710#===================   PSTAT ARRAY FUNCTIONS  =====================
711#===================   PSTAT ARRAY FUNCTIONS  =====================
712#===================   PSTAT ARRAY FUNCTIONS  =====================
713#===================   PSTAT ARRAY FUNCTIONS  =====================
714#===================   PSTAT ARRAY FUNCTIONS  =====================
715#===================   PSTAT ARRAY FUNCTIONS  =====================
716
717try:                         # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
718 import Numeric
719 N = Numeric
720
721 def aabut (source, *args):
722    """
723Like the |Stat abut command.  It concatenates two arrays column-wise
724and returns the result.  CAUTION:  If one array is shorter, it will be
725repeated until it is as long as the other.
726
727Usage:   aabut (source, args)    where args=any # of arrays
728Returns: an array as long as the LONGEST array past, source appearing on the
729         'left', arrays in <args> attached on the 'right'.
730"""
731    if len(source.shape)==1:
732        width = 1
733        source = N.resize(source,[source.shape[0],width])
734    else:
735        width = source.shape[1]
736    for addon in args:
737        if len(addon.shape)==1:
738            width = 1
739            addon = N.resize(addon,[source.shape[0],width])
740        else:
741            width = source.shape[1]
742        if len(addon) < len(source):
743            addon = N.resize(addon,[source.shape[0],addon.shape[1]])
744        elif len(source) < len(addon):
745            source = N.resize(source,[addon.shape[0],source.shape[1]])
746        source = N.concatenate((source,addon),1)
747    return source
748
749
750 def acolex (a,indices,axis=1):
751    """
752Extracts specified indices (a list) from passed array, along passed
753axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
754column-array (and that the whole array will be returned as a column).
755
756Usage:   acolex (a,indices,axis=1)
757Returns: the columns of a specified by indices
758"""
759    if type(indices) not in [ListType,TupleType,N.ArrayType]:
760        indices = [indices]
761    if len(N.shape(a)) == 1:
762        cols = N.resize(a,[a.shape[0],1])
763    else:
764        cols = N.take(a,indices,axis)
765    return cols
766
767
768 def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
769    """
770Averages data in collapsecol, keeping all unique items in keepcols
771(using unique, which keeps unique LISTS of column numbers), retaining
772the unique sets of values in keepcols, the mean for each.  If stderror or
773N of the mean are desired, set either or both parameters to 1.
774
775Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
776Returns: unique 'conditions' specified by the contents of columns specified
777         by keepcols, abutted with the mean(s) of column(s) specified by
778         collapsecols
779"""
780    def acollmean (inarray):
781        return N.sum(N.ravel(inarray))
782
783    if cfcn == None:
784        cfcn = acollmean
785    if keepcols == []:
786        avgcol = acolex(a,collapsecols)
787        means = N.sum(avgcol)/float(len(avgcol))
788        if fcn1<>None:
789            try:
790                test = fcn1(avgcol)
791            except:
792                test = N.array(['N/A']*len(means))
793            means = aabut(means,test)
794        if fcn2<>None:
795            try:
796                test = fcn2(avgcol)
797            except:
798                test = N.array(['N/A']*len(means))
799            means = aabut(means,test)
800        return means
801    else:
802        if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
803            keepcols = [keepcols]
804        values = colex(a,keepcols)   # so that "item" can be appended (below)
805        uniques = unique(values)  # get a LIST, so .sort keeps rows intact
806        uniques.sort()
807        newlist = []
808        for item in uniques:
809            if type(item) not in [ListType,TupleType,N.ArrayType]:
810                item =[item]
811            tmprows = alinexand(a,keepcols,item)
812            for col in collapsecols:
813                avgcol = acolex(tmprows,col)
814                item.append(acollmean(avgcol))
815                if fcn1<>None:
816                    try:
817                        test = fcn1(avgcol)
818                    except:
819                        test = 'N/A'
820                    item.append(test)
821                if fcn2<>None:
822                    try:
823                        test = fcn2(avgcol)
824                    except:
825                        test = 'N/A'
826                    item.append(test)
827                newlist.append(item)
828        try:
829            new_a = N.array(newlist)
830        except TypeError:
831            new_a = N.array(newlist,'O')
832        return new_a
833
834
835 def adm (a,criterion):
836    """
837Returns rows from the passed list of lists that meet the criteria in
838the passed criterion expression (a string as a function of x).
839
840Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
841"""
842    function = 'filter(lambda x: '+criterion+',a)'
843    lines = eval(function)
844    try:
845        lines = N.array(lines)
846    except:
847        lines = N.array(lines,'O')
848    return lines
849
850
851 def isstring(x):
852    if type(x)==StringType:
853        return 1
854    else:
855        return 0
856
857
858 def alinexand (a,columnlist,valuelist):
859    """
860Returns the rows of an array where col (from columnlist) = val
861(from valuelist).  One value is required for each column in columnlist.
862
863Usage:   alinexand (a,columnlist,valuelist)
864Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
865"""
866    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
867        columnlist = [columnlist]
868    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
869        valuelist = [valuelist]
870    criterion = ''
871    for i in range(len(columnlist)):
872        if type(valuelist[i])==StringType:
873            critval = '\'' + valuelist[i] + '\''
874        else:
875            critval = str(valuelist[i])
876        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
877    criterion = criterion[0:-3]         # remove the "and" after the last crit
878    return adm(a,criterion)
879
880
881 def alinexor (a,columnlist,valuelist):
882    """
883Returns the rows of an array where col (from columnlist) = val (from
884valuelist).  One value is required for each column in columnlist.
885The exception is if either columnlist or valuelist has only 1 value,
886in which case that item will be expanded to match the length of the
887other list.
888
889Usage:   alinexor (a,columnlist,valuelist)
890Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
891"""
892    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
893        columnlist = [columnlist]
894    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
895        valuelist = [valuelist]
896    criterion = ''
897    if len(columnlist) == 1 and len(valuelist) > 1:
898        columnlist = columnlist*len(valuelist)
899    elif len(valuelist) == 1 and len(columnlist) > 1:
900        valuelist = valuelist*len(columnlist)
901    for i in range(len(columnlist)):
902        if type(valuelist[i])==StringType:
903            critval = '\'' + valuelist[i] + '\''
904        else:
905            critval = str(valuelist[i])
906        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
907    criterion = criterion[0:-2]         # remove the "or" after the last crit
908    return adm(a,criterion)
909
910
911 def areplace (a,oldval,newval):
912    """
913Replaces all occurrences of oldval with newval in array a.
914
915Usage:   areplace(a,oldval,newval)
916"""
917    newa = N.not_equal(a,oldval)*a
918    return newa+N.equal(a,oldval)*newval
919
920
921 def arecode (a,listmap,col='all'):
922    """
923Remaps the values in an array to a new set of values (useful when
924you need to recode data from (e.g.) strings to numbers as most stats
925packages require.  Can work on SINGLE columns, or 'all' columns at once.
926
927Usage:   arecode (a,listmap,col='all')
928Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
929"""
930    ashape = a.shape
931    if col == 'all':
932        work = a.flat
933    else:
934        work = acolex(a,col)
935        work = work.flat
936    for pair in listmap:
937        if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
938            work = N.array(work,'O')
939            a = N.array(a,'O')
940            for i in range(len(work)):
941                if work[i]==pair[0]:
942                    work[i] = pair[1]
943            if col == 'all':
944                return N.reshape(work,ashape)
945            else:
946                return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
947        else:   # must be a non-Object type array and replacement
948            work = N.where(N.equal(work,pair[0]),pair[1],work)
949            return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
950
951
952 def arowcompare(row1, row2):
953    """
954Compares two rows from an array, regardless of whether it is an
955array of numbers or of python objects (which requires the cmp function).
956
957Usage:   arowcompare(row1,row2)
958Returns: an array of equal length containing 1s where the two rows had
959         identical elements and 0 otherwise
960"""
961    if row1.typecode()=='O' or row2.typecode=='O':
962        cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
963    else:
964        cmpvect = N.equal(row1,row2)
965    return cmpvect
966
967
968 def arowsame(row1, row2):
969    """
970Compares two rows from an array, regardless of whether it is an
971array of numbers or of python objects (which requires the cmp function).
972
973Usage:   arowsame(row1,row2)
974Returns: 1 if the two rows are identical, 0 otherwise.
975"""
976    cmpval = N.alltrue(arowcompare(row1,row2))
977    return cmpval
978
979
980 def asortrows(a,axis=0):
981    """
982Sorts an array "by rows".  This differs from the Numeric.sort() function,
983which sorts elements WITHIN the given axis.  Instead, this function keeps
984the elements along the given axis intact, but shifts them 'up or down'
985relative to one another.
986
987Usage:   asortrows(a,axis=0)
988Returns: sorted version of a
989"""
990    if axis != 0:
991        a = N.swapaxes(a, axis, 0)
992    l = a.tolist()
993    l.sort()           # or l.sort(_sort)
994    y = N.array(l)
995    if axis != 0:
996        y = N.swapaxes(y, axis, 0)
997    return y
998
999
1000 def aunique(inarray):
1001    """
1002Returns unique items in the FIRST dimension of the passed array. Only
1003works on arrays NOT including string items.
1004
1005Usage:   aunique (inarray)
1006"""
1007    uniques = N.array([inarray[0]])
1008    if len(uniques.shape) == 1:            # IF IT'S A 1D ARRAY
1009        for item in inarray[1:]:
1010            if N.add.reduce(N.equal(uniques,item).flat) == 0:
1011                try:
1012                    uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
1013                except TypeError:
1014                    uniques = N.concatenate([uniques,N.array([item])])
1015    else:                                  # IT MUST BE A 2+D ARRAY
1016        if inarray.typecode() != 'O':  # not an Object array
1017            for item in inarray[1:]:
1018                if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1019                    try:
1020                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1021                    except TypeError:    # the item to add isn't a list
1022                        uniques = N.concatenate([uniques,N.array([item])])
1023                else:
1024                    pass  # this item is already in the uniques array
1025        else:   # must be an Object array, alltrue/equal functions don't work
1026            for item in inarray[1:]:
1027                newflag = 1
1028                for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=>
1029                    test = N.sum(abs(N.array(map(cmp,item,unq))))
1030                    if test == 0:   # if item identical to any 1 row in uniques
1031                        newflag = 0 # then not a novel item to add
1032                        break
1033                if newflag == 1:
1034                    try:
1035                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1036                    except TypeError:    # the item to add isn't a list
1037                        uniques = N.concatenate([uniques,N.array([item])])
1038    return uniques
1039
1040
1041 def aduplicates(inarray):
1042    """
1043Returns duplicate items in the FIRST dimension of the passed array. Only
1044works on arrays NOT including string items.
1045
1046Usage:   aunique (inarray)
1047"""
1048    inarray = N.array(inarray)
1049    if len(inarray.shape) == 1:            # IF IT'S A 1D ARRAY
1050        dups = []
1051        inarray = inarray.tolist()
1052        for i in range(len(inarray)):
1053            if inarray[i] in inarray[i+1:]:
1054                dups.append(inarray[i])
1055        dups = aunique(dups)
1056    else:                                  # IT MUST BE A 2+D ARRAY
1057        dups = []
1058        aslist = inarray.tolist()
1059        for i in range(len(aslist)):
1060            if aslist[i] in aslist[i+1:]:
1061                dups.append(aslist[i])
1062        dups = unique(dups)
1063        dups = N.array(dups)
1064    return dups
1065
1066except ImportError:    # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
1067 pass
Note: See TracBrowser for help on using the repository browser.