source: orange-bioinformatics/Orange/bioinformatics/pstat.py @ 1629:0d6de7936e04

Revision 1629:0d6de7936e04, 36.5 KB checked in by mitar, 2 years ago (diff)

Moving files around.

Line 
1# Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved.
2#
3# This software is distributable under the terms of the GNU
4# General Public License (GPL) v2, the text of which can be found at
5# http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise
6# using this module constitutes acceptance of the terms of this License.
7#
8# Disclaimer
9#
10# This software is provided "as-is".  There are no expressed or implied
11# warranties of any kind, including, but not limited to, the warranties
12# of merchantability and fittness for a given application.  In no event
13# shall Gary Strangman be liable for any direct, indirect, incidental,
14# special, exemplary or consequential damages (including, but not limited
15# to, loss of use, data or profits, or business interruption) however
16# caused and on any theory of liability, whether in contract, strict
17# liability or tort (including negligence or otherwise) arising in any way
18# out of the use of this software, even if advised of the possibility of
19# such damage.
20#
21# Comments and/or additions are welcome (send e-mail to:
22# strang@nmr.mgh.harvard.edu).
23#
24"""
25pstat.py module
26
27#################################################
28#######  Written by:  Gary Strangman  ###########
29#######  Last modified:  Jun 29, 2001 ###########
30#################################################
31
32This module provides some useful list and array manipulation routines
33modeled after those found in the |Stat package by Gary Perlman, plus a
34number of other useful list/file manipulation functions.  The list-based
35functions include:
36
37      abut (source,*args)
38      simpleabut (source, addon)
39      colex (listoflists,cnums)
40      collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
41      dm (listoflists,criterion)
42      flat (l)
43      linexand (listoflists,columnlist,valuelist)
44      linexor (listoflists,columnlist,valuelist)
45      linedelimited (inlist,delimiter)
46      lineincols (inlist,colsize)
47      lineincustcols (inlist,colsizes)
48      list2string (inlist)
49      makelol(inlist)
50      makestr(x)
51      printcc (lst,extra=2)
52      printincols (listoflists,colsize)
53      pl (listoflists)
54      printl(listoflists)
55      replace (lst,oldval,newval)
56      recode (inlist,listmap,cols='all')
57      remap (listoflists,criterion)
58      roundlist (inlist,num_digits_to_round_floats_to)
59      sortby(listoflists,sortcols)
60      unique (inlist)
61      duplicates(inlist)
62      writedelimited (listoflists, delimiter, file, writetype='w')
63
64Some of these functions have alternate versions which are defined only if
65Numeric (NumPy) can be imported.  These functions are generally named as
66above, with an 'a' prefix.
67
68      aabut (source, *args)
69      acolex (a,indices,axis=1)
70      acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
71      adm (a,criterion)
72      alinexand (a,columnlist,valuelist)
73      alinexor (a,columnlist,valuelist)
74      areplace (a,oldval,newval)
75      arecode (a,listmap,col='all')
76      arowcompare (row1, row2)
77      arowsame (row1, row2)
78      asortrows(a,axis=0)
79      aunique(inarray)
80      aduplicates(inarray)
81
82Currently, the code is all but completely un-optimized.  In many cases, the
83array versions of functions amount simply to aliases to built-in array
84functions/methods.  Their inclusion here is for function name consistency.
85"""
86
87## CHANGE LOG:
88## ==========
89## 01-11-15 ... changed list2string() to accept a delimiter
90## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
91## 01-05-31 ... added duplicates() and aduplicates() functions
92## 00-12-28 ... license made GPL, docstring and import requirements
93## 99-11-01 ... changed version to 0.3
94## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
95## 03/27/99 ... added areplace function, made replace fcn recursive
96## 12/31/98 ... added writefc function for ouput to fixed column sizes
97## 12/07/98 ... fixed import problem (failed on collapse() fcn)
98##              added __version__ variable (now 0.2)
99## 12/05/98 ... updated doc-strings
100##              added features to collapse() function
101##              added flat() function for lists
102##              fixed a broken asortrows()
103## 11/16/98 ... fixed minor bug in aput for 1D arrays
104##
105## 11/08/98 ... fixed aput to output large arrays correctly
106
107import stats  # required 3rd party module
108import string, copy
109from types import *
110
111__version__ = 0.4
112
113###===========================  LIST FUNCTIONS  ==========================
114###
115### Here are the list functions, DEFINED FOR ALL SYSTEMS.
116### Array functions (for NumPy-enabled computers) appear below.
117###
118
119def abut (source,*args):
120    """
121Like the |Stat abut command.  It concatenates two lists side-by-side
122and returns the result.  '2D' lists are also accomodated for either argument
123(source or addon).  CAUTION:  If one list is shorter, it will be repeated
124until it is as long as the longest list.  If this behavior is not desired,
125use pstat.simpleabut().
126
127Usage:   abut(source, args)   where args=any # of lists
128Returns: a list of lists as long as the LONGEST list past, source on the
129         'left', lists in <args> attached consecutively on the 'right'
130"""
131
132    if type(source) not in [ListType,TupleType]:
133        source = [source]
134    for addon in args:
135        if type(addon) not in [ListType,TupleType]:
136            addon = [addon]
137        if len(addon) < len(source):                # is source list longer?
138            if len(source) % len(addon) == 0:        # are they integer multiples?
139                repeats = len(source)/len(addon)    # repeat addon n times
140                origadd = copy.deepcopy(addon)
141                for i in range(repeats-1):
142                    addon = addon + origadd
143            else:
144                repeats = len(source)/len(addon)+1  # repeat addon x times,
145                origadd = copy.deepcopy(addon)      #    x is NOT an integer
146                for i in range(repeats-1):
147                    addon = addon + origadd
148                    addon = addon[0:len(source)]
149        elif len(source) < len(addon):                # is addon list longer?
150            if len(addon) % len(source) == 0:        # are they integer multiples?
151                repeats = len(addon)/len(source)    # repeat source n times
152                origsour = copy.deepcopy(source)
153                for i in range(repeats-1):
154                    source = source + origsour
155            else:
156                repeats = len(addon)/len(source)+1  # repeat source x times,
157                origsour = copy.deepcopy(source)    #   x is NOT an integer
158                for i in range(repeats-1):
159                    source = source + origsour
160                source = source[0:len(addon)]
161
162        source = simpleabut(source,addon)
163    return source
164
165
166def simpleabut (source, addon):
167    """
168Concatenates two lists as columns and returns the result.  '2D' lists
169are also accomodated for either argument (source or addon).  This DOES NOT
170repeat either list to make the 2 lists of equal length.  Beware of list pairs
171with different lengths ... the resulting list will be the length of the
172FIRST list passed.
173
174Usage:   simpleabut(source,addon)  where source, addon=list (or list-of-lists)
175Returns: a list of lists as long as source, with source on the 'left' and
176                 addon on the 'right'
177"""
178    if type(source) not in [ListType,TupleType]:
179        source = [source]
180    if type(addon) not in [ListType,TupleType]:
181        addon = [addon]
182    minlen = min(len(source),len(addon))
183    list = copy.deepcopy(source)                # start abut process
184    if type(source[0]) not in [ListType,TupleType]:
185        if type(addon[0]) not in [ListType,TupleType]:
186            for i in range(minlen):
187                list[i] = [source[i]] + [addon[i]]        # source/addon = column
188        else:
189            for i in range(minlen):
190                list[i] = [source[i]] + addon[i]        # addon=list-of-lists
191    else:
192        if type(addon[0]) not in [ListType,TupleType]:
193            for i in range(minlen):
194                list[i] = source[i] + [addon[i]]        # source=list-of-lists
195        else:
196            for i in range(minlen):
197                list[i] = source[i] + addon[i]        # source/addon = list-of-lists
198    source = list
199    return source
200
201
202def colex (listoflists,cnums):
203    """
204Extracts from listoflists the columns specified in the list 'cnums'
205(cnums can be an integer, a sequence of integers, or a string-expression that
206corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex
207columns 3 onward from the listoflists).
208
209Usage:   colex (listoflists,cnums)
210Returns: a list-of-lists corresponding to the columns from listoflists
211         specified by cnums, in the order the column numbers appear in cnums
212"""
213    global index
214    column = 0
215    if type(cnums) in [ListType,TupleType]:   # if multiple columns to get
216        index = cnums[0]
217        column = map(lambda x: x[index], listoflists)
218        for col in cnums[1:]:
219            index = col
220            column = abut(column,map(lambda x: x[index], listoflists))
221    elif type(cnums) == StringType:              # if an 'x[3:]' type expr.
222        evalstring = 'map(lambda x: x'+cnums+', listoflists)'
223        column = eval(evalstring)
224    else:                                     # else it's just 1 col to get
225        index = cnums
226        column = map(lambda x: x[index], listoflists)
227    return column
228
229
230def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
231     """
232Averages data in collapsecol, keeping all unique items in keepcols
233(using unique, which keeps unique LISTS of column numbers), retaining the
234unique sets of values in keepcols, the mean for each.  Setting fcn1
235and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len)
236will append those results (e.g., the sterr, N) after each calculated mean.
237cfcn is the collapse function to apply (defaults to mean, defined here in the
238pstat module to avoid circular imports with stats.py, but harmonicmean or
239others could be passed).
240
241Usage:    collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
242Returns: a list of lists with all unique permutations of entries appearing in
243     columns ("conditions") specified by keepcols, abutted with the result of
244     cfcn (if cfcn=None, defaults to the mean) of each column specified by
245     collapsecols.
246"""
247     def collmean (inlist):
248         s = 0
249         for item in inlist:
250             s = s + item
251         return s/float(len(inlist))
252
253     if type(keepcols) not in [ListType,TupleType]:
254         keepcols = [keepcols]
255     if type(collapsecols) not in [ListType,TupleType]:
256         collapsecols = [collapsecols]
257     if cfcn == None:
258         cfcn = collmean
259     if keepcols == []:
260         means = [0]*len(collapsecols)
261         for i in range(len(collapsecols)):
262             avgcol = colex(listoflists,collapsecols[i])
263             means[i] = cfcn(avgcol)
264             if fcn1:
265                 try:
266                     test = fcn1(avgcol)
267                 except:
268                     test = 'N/A'
269                     means[i] = [means[i], test]
270             if fcn2:
271                 try:
272                     test = fcn2(avgcol)
273                 except:
274                     test = 'N/A'
275                 try:
276                     means[i] = means[i] + [len(avgcol)]
277                 except TypeError:
278                     means[i] = [means[i],len(avgcol)]
279         return means
280     else:
281         values = colex(listoflists,keepcols)
282         uniques = unique(values)
283         uniques.sort()
284         newlist = []
285         if type(keepcols) not in [ListType,TupleType]:  keepcols = [keepcols]
286         for item in uniques:
287             if type(item) not in [ListType,TupleType]:  item =[item]
288             tmprows = linexand(listoflists,keepcols,item)
289             for col in collapsecols:
290                 avgcol = colex(tmprows,col)
291                 item.append(cfcn(avgcol))
292                 if fcn1 <> None:
293                     try:
294                         test = fcn1(avgcol)
295                     except:
296                         test = 'N/A'
297                     item.append(test)
298                 if fcn2 <> None:
299                     try:
300                         test = fcn2(avgcol)
301                     except:
302                         test = 'N/A'
303                     item.append(test)
304                 newlist.append(item)
305         return newlist
306
307
308def dm (listoflists,criterion):
309    """
310Returns rows from the passed list of lists that meet the criteria in
311the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9'
312will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows
313with column 2 equal to the string 'N').
314
315Usage:   dm (listoflists, criterion)
316Returns: rows from listoflists that meet the specified criterion.
317"""
318    function = 'filter(lambda x: '+criterion+',listoflists)'
319    lines = eval(function)
320    return lines
321
322
323def flat(l):
324    """
325Returns the flattened version of a '2D' list.  List-correlate to the a.flat()
326method of NumPy arrays.
327
328Usage:    flat(l)
329"""
330    newl = []
331    for i in range(len(l)):
332        for j in range(len(l[i])):
333            newl.append(l[i][j])
334    return newl
335
336
337def linexand (listoflists,columnlist,valuelist):
338    """
339Returns the rows of a list of lists where col (from columnlist) = val
340(from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]).
341len(columnlist) must equal len(valuelist).
342
343Usage:   linexand (listoflists,columnlist,valuelist)
344Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i
345"""
346    if type(columnlist) not in [ListType,TupleType]:
347        columnlist = [columnlist]
348    if type(valuelist) not in [ListType,TupleType]:
349        valuelist = [valuelist]
350    criterion = ''
351    for i in range(len(columnlist)):
352        if type(valuelist[i])==StringType:
353            critval = '\'' + valuelist[i] + '\''
354        else:
355            critval = str(valuelist[i])
356        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
357    criterion = criterion[0:-3]         # remove the "and" after the last crit
358    function = 'filter(lambda x: '+criterion+',listoflists)'
359    lines = eval(function)
360    return lines
361
362
363def linexor (listoflists,columnlist,valuelist):
364    """
365Returns the rows of a list of lists where col (from columnlist) = val
366(from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[).
367One value is required for each column in columnlist.  If only one value
368exists for columnlist but multiple values appear in valuelist, the
369valuelist values are all assumed to pertain to the same column.
370
371Usage:   linexor (listoflists,columnlist,valuelist)
372Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i
373"""
374    if type(columnlist) not in [ListType,TupleType]:
375        columnlist = [columnlist]
376    if type(valuelist) not in [ListType,TupleType]:
377        valuelist = [valuelist]
378    criterion = ''
379    if len(columnlist) == 1 and len(valuelist) > 1:
380        columnlist = columnlist*len(valuelist)
381    for i in range(len(columnlist)):          # build an exec string
382        if type(valuelist[i])==StringType:
383            critval = '\'' + valuelist[i] + '\''
384        else:
385            critval = str(valuelist[i])
386        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
387    criterion = criterion[0:-2]         # remove the "or" after the last crit
388    function = 'filter(lambda x: '+criterion+',listoflists)'
389    lines = eval(function)
390    return lines
391
392
393def linedelimited (inlist,delimiter):
394    """
395Returns a string composed of elements in inlist, with each element
396separated by 'delimiter.'  Used by function writedelimited.  Use '\t'
397for tab-delimiting.
398
399Usage:   linedelimited (inlist,delimiter)
400"""
401    outstr = ''
402    for item in inlist:
403        if type(item) <> StringType:
404            item = str(item)
405        outstr = outstr + item + delimiter
406    outstr = outstr[0:-1]
407    return outstr
408
409
410def lineincols (inlist,colsize):
411    """
412Returns a string composed of elements in inlist, with each element
413right-aligned in columns of (fixed) colsize.
414
415Usage:   lineincols (inlist,colsize)   where colsize is an integer
416"""
417    outstr = ''
418    for item in inlist:
419        if type(item) <> StringType:
420            item = str(item)
421        size = len(item)
422        if size <= colsize:
423            for i in range(colsize-size):
424                outstr = outstr + ' '
425            outstr = outstr + item
426        else:
427            outstr = outstr + item[0:colsize+1]
428    return outstr
429
430
431def lineincustcols (inlist,colsizes):
432    """
433Returns a string composed of elements in inlist, with each element
434right-aligned in a column of width specified by a sequence colsizes.  The
435length of colsizes must be greater than or equal to the number of columns
436in inlist.
437
438Usage:   lineincustcols (inlist,colsizes)
439Returns: formatted string created from inlist
440"""
441    outstr = ''
442    for i in range(len(inlist)):
443        if type(inlist[i]) <> StringType:
444            item = str(inlist[i])
445        else:
446            item = inlist[i]
447        size = len(item)
448        if size <= colsizes[i]:
449            for j in range(colsizes[i]-size):
450                outstr = outstr + ' '
451            outstr = outstr + item
452        else:
453            outstr = outstr + item[0:colsizes[i]+1]
454    return outstr
455
456
457def list2string (inlist,delimit=' '):
458    """
459Converts a 1D list to a single long string for file output, using
460the string.join function.
461
462Usage:   list2string (inlist,delimit=' ')
463Returns: the string created from inlist
464"""
465    stringlist = map(makestr,inlist)
466    return string.join(stringlist,delimit)
467
468
469def makelol(inlist):
470    """
471Converts a 1D list to a 2D list (i.e., a list-of-lists).  Useful when you
472want to use put() to write a 1D list one item per line in the file.
473
474Usage:   makelol(inlist)
475Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc.
476"""
477    x = []
478    for item in inlist:
479        x.append([item])
480    return x
481
482
483def makestr (x):
484    if type(x) <> StringType:
485        x = str(x)
486    return x
487
488
489def printcc (lst,extra=2):
490    """
491Prints a list of lists in columns, customized by the max size of items
492within the columns (max size of items in col, plus 'extra' number of spaces).
493Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines,
494respectively.
495
496Usage:   printcc (lst,extra=2)
497Returns: None
498"""
499    if type(lst[0]) not in [ListType,TupleType]:
500        lst = [lst]
501    rowstokill = []
502    list2print = copy.deepcopy(lst)
503    for i in range(len(lst)):
504        if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']:
505            rowstokill = rowstokill + [i]
506    rowstokill.reverse()   # delete blank rows from the end
507    for row in rowstokill:
508        del list2print[row]
509    maxsize = [0]*len(list2print[0])
510    for col in range(len(list2print[0])):
511        items = colex(list2print,col)
512        items = map(makestr,items)
513        maxsize[col] = max(map(len,items)) + extra
514    for row in lst:
515        if row == ['\n'] or row == '\n' or row == '' or row == ['']:
516            print
517        elif row == ['dashes'] or row == 'dashes':
518            dashes = [0]*len(maxsize)
519            for j in range(len(maxsize)):
520                dashes[j] = '-'*(maxsize[j]-2)
521            print lineincustcols(dashes,maxsize)
522        else:
523            print lineincustcols(row,maxsize)
524    return None
525
526
527def printincols (listoflists,colsize):
528    """
529Prints a list of lists in columns of (fixed) colsize width, where
530colsize is an integer.
531
532Usage:   printincols (listoflists,colsize)
533Returns: None
534"""
535    for row in listoflists:
536        print lineincols(row,colsize)
537    return None
538
539
540def pl (listoflists):
541    """
542Prints a list of lists, 1 list (row) at a time.
543
544Usage:   pl(listoflists)
545Returns: None
546"""
547    for row in listoflists:
548        if row[-1] == '\n':
549            print row,
550        else:
551            print row
552    return None
553
554
555def printl(listoflists):
556    """Alias for pl."""
557    pl(listoflists)
558    return
559
560
561def replace (inlst,oldval,newval):
562    """
563Replaces all occurrences of 'oldval' with 'newval', recursively.
564
565Usage:   replace (inlst,oldval,newval)
566"""
567    lst = inlst*1
568    for i in range(len(lst)):
569        if type(lst[i]) not in [ListType,TupleType]:
570            if lst[i]==oldval: lst[i]=newval
571        else:
572            lst[i] = replace(lst[i],oldval,newval)
573    return lst
574
575
576def recode (inlist,listmap,cols=None):
577    """
578Changes the values in a list to a new set of values (useful when
579you need to recode data from (e.g.) strings to numbers.  cols defaults
580to None (meaning all columns are recoded).
581
582Usage:   recode (inlist,listmap,cols=None)  cols=recode cols, listmap=2D list
583Returns: inlist with the appropriate values replaced with new ones
584"""
585    lst = copy.deepcopy(inlist)
586    if cols != None:
587        if type(cols) not in [ListType,TupleType]:
588            cols = [cols]
589        for col in cols:
590            for row in range(len(lst)):
591                try:
592                    idx = colex(listmap,0).index(lst[row][col])
593                    lst[row][col] = listmap[idx][1]
594                except ValueError:
595                    pass
596    else:
597        for row in range(len(lst)):
598            for col in range(len(lst)):
599                try:
600                    idx = colex(listmap,0).index(lst[row][col])
601                    lst[row][col] = listmap[idx][1]
602                except ValueError:
603                    pass
604    return lst
605
606
607def remap (listoflists,criterion):
608    """
609Remaps values in a given column of a 2D list (listoflists).  This requires
610a criterion as a function of 'x' so that the result of the following is
611returned ... map(lambda x: 'criterion',listoflists). 
612
613Usage:   remap(listoflists,criterion)    criterion=string
614Returns: remapped version of listoflists
615"""
616    function = 'map(lambda x: '+criterion+',listoflists)'
617    lines = eval(function)
618    return lines
619
620
621def roundlist (inlist,digits):
622    """
623Goes through each element in a 1D or 2D inlist, and applies the following
624function to all elements of FloatType ... round(element,digits).
625
626Usage:   roundlist(inlist,digits)
627Returns: list with rounded floats
628"""
629    if type(inlist[0]) in [IntType, FloatType]:
630        inlist = [inlist]
631    l = inlist*1
632    for i in range(len(l)):
633        for j in range(len(l[i])):
634            if type(l[i][j])==FloatType:
635                l[i][j] = round(l[i][j],digits)
636    return l
637
638
639def sortby(listoflists,sortcols):
640    """
641Sorts a list of lists on the column(s) specified in the sequence
642sortcols.
643
644Usage:   sortby(listoflists,sortcols)
645Returns: sorted list, unchanged column ordering
646"""
647    newlist = abut(colex(listoflists,sortcols),listoflists)
648    newlist.sort()
649    try:
650        numcols = len(sortcols)
651    except TypeError:
652        numcols = 1
653    crit = '[' + str(numcols) + ':]'
654    newlist = colex(newlist,crit)
655    return newlist
656
657
658def unique (inlist):
659    """
660Returns all unique items in the passed list.  If the a list-of-lists
661is passed, unique LISTS are found (i.e., items in the first dimension are
662compared).
663
664Usage:   unique (inlist)
665Returns: the unique elements (or rows) in inlist
666"""
667    uniques = []
668    for item in inlist:
669        if item not in uniques:
670            uniques.append(item)
671    return uniques
672
673def duplicates(inlist):
674    """
675Returns duplicate items in the FIRST dimension of the passed list.
676
677Usage:   duplicates (inlist)
678"""
679    dups = []
680    for i in range(len(inlist)):
681        if inlist[i] in inlist[i+1:]:
682            dups.append(inlist[i])
683    return dups
684
685
686def nonrepeats(inlist):
687    """
688Returns items that are NOT duplicated in the first dim of the passed list.
689
690Usage:   nonrepeats (inlist)
691"""
692    nonrepeats = []
693    for i in range(len(inlist)):
694        if inlist.count(inlist[i]) == 1:
695            nonrepeats.append(inlist[i])
696    return nonrepeats
697
698
699#===================   PSTAT ARRAY FUNCTIONS  =====================
700#===================   PSTAT ARRAY FUNCTIONS  =====================
701#===================   PSTAT ARRAY FUNCTIONS  =====================
702#===================   PSTAT ARRAY FUNCTIONS  =====================
703#===================   PSTAT ARRAY FUNCTIONS  =====================
704#===================   PSTAT ARRAY FUNCTIONS  =====================
705#===================   PSTAT ARRAY FUNCTIONS  =====================
706#===================   PSTAT ARRAY FUNCTIONS  =====================
707#===================   PSTAT ARRAY FUNCTIONS  =====================
708#===================   PSTAT ARRAY FUNCTIONS  =====================
709#===================   PSTAT ARRAY FUNCTIONS  =====================
710#===================   PSTAT ARRAY FUNCTIONS  =====================
711#===================   PSTAT ARRAY FUNCTIONS  =====================
712#===================   PSTAT ARRAY FUNCTIONS  =====================
713#===================   PSTAT ARRAY FUNCTIONS  =====================
714#===================   PSTAT ARRAY FUNCTIONS  =====================
715
716try:                         # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE
717 import Numeric
718 N = Numeric
719
720 def aabut (source, *args):
721    """
722Like the |Stat abut command.  It concatenates two arrays column-wise
723and returns the result.  CAUTION:  If one array is shorter, it will be
724repeated until it is as long as the other.
725
726Usage:   aabut (source, args)    where args=any # of arrays
727Returns: an array as long as the LONGEST array past, source appearing on the
728         'left', arrays in <args> attached on the 'right'.
729"""
730    if len(source.shape)==1:
731        width = 1
732        source = N.resize(source,[source.shape[0],width])
733    else:
734        width = source.shape[1]
735    for addon in args:
736        if len(addon.shape)==1:
737            width = 1
738            addon = N.resize(addon,[source.shape[0],width])
739        else:
740            width = source.shape[1]
741        if len(addon) < len(source):
742            addon = N.resize(addon,[source.shape[0],addon.shape[1]])
743        elif len(source) < len(addon):
744            source = N.resize(source,[addon.shape[0],source.shape[1]])
745        source = N.concatenate((source,addon),1)
746    return source
747
748
749 def acolex (a,indices,axis=1):
750    """
751Extracts specified indices (a list) from passed array, along passed
752axis (column extraction is default).  BEWARE: A 1D array is presumed to be a
753column-array (and that the whole array will be returned as a column).
754
755Usage:   acolex (a,indices,axis=1)
756Returns: the columns of a specified by indices
757"""
758    if type(indices) not in [ListType,TupleType,N.ArrayType]:
759        indices = [indices]
760    if len(N.shape(a)) == 1:
761        cols = N.resize(a,[a.shape[0],1])
762    else:
763        cols = N.take(a,indices,axis)
764    return cols
765
766
767 def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
768    """
769Averages data in collapsecol, keeping all unique items in keepcols
770(using unique, which keeps unique LISTS of column numbers), retaining
771the unique sets of values in keepcols, the mean for each.  If stderror or
772N of the mean are desired, set either or both parameters to 1.
773
774Usage:   acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
775Returns: unique 'conditions' specified by the contents of columns specified
776         by keepcols, abutted with the mean(s) of column(s) specified by
777         collapsecols
778"""
779    def acollmean (inarray):
780        return N.sum(N.ravel(inarray))
781
782    if cfcn == None:
783        cfcn = acollmean
784    if keepcols == []:
785        avgcol = acolex(a,collapsecols)
786        means = N.sum(avgcol)/float(len(avgcol))
787        if fcn1<>None:
788            try:
789                test = fcn1(avgcol)
790            except:
791                test = N.array(['N/A']*len(means))
792            means = aabut(means,test)
793        if fcn2<>None:
794            try:
795                test = fcn2(avgcol)
796            except:
797                test = N.array(['N/A']*len(means))
798            means = aabut(means,test)
799        return means
800    else:
801        if type(keepcols) not in [ListType,TupleType,N.ArrayType]:
802            keepcols = [keepcols]
803        values = colex(a,keepcols)   # so that "item" can be appended (below)
804        uniques = unique(values)  # get a LIST, so .sort keeps rows intact
805        uniques.sort()
806        newlist = []
807        for item in uniques:
808            if type(item) not in [ListType,TupleType,N.ArrayType]:
809                item =[item]
810            tmprows = alinexand(a,keepcols,item)
811            for col in collapsecols:
812                avgcol = acolex(tmprows,col)
813                item.append(acollmean(avgcol))
814                if fcn1<>None:
815                    try:
816                        test = fcn1(avgcol)
817                    except:
818                        test = 'N/A'
819                    item.append(test)
820                if fcn2<>None:
821                    try:
822                        test = fcn2(avgcol)
823                    except:
824                        test = 'N/A'
825                    item.append(test)
826                newlist.append(item)
827        try:
828            new_a = N.array(newlist)
829        except TypeError:
830            new_a = N.array(newlist,'O')
831        return new_a
832
833
834 def adm (a,criterion):
835    """
836Returns rows from the passed list of lists that meet the criteria in
837the passed criterion expression (a string as a function of x).
838
839Usage:   adm (a,criterion)   where criterion is like 'x[2]==37'
840"""
841    function = 'filter(lambda x: '+criterion+',a)'
842    lines = eval(function)
843    try:
844        lines = N.array(lines)
845    except:
846        lines = N.array(lines,'O')
847    return lines
848
849
850 def isstring(x):
851    if type(x)==StringType:
852        return 1
853    else:
854        return 0
855
856
857 def alinexand (a,columnlist,valuelist):
858    """
859Returns the rows of an array where col (from columnlist) = val
860(from valuelist).  One value is required for each column in columnlist.
861
862Usage:   alinexand (a,columnlist,valuelist)
863Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i
864"""
865    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
866        columnlist = [columnlist]
867    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
868        valuelist = [valuelist]
869    criterion = ''
870    for i in range(len(columnlist)):
871        if type(valuelist[i])==StringType:
872            critval = '\'' + valuelist[i] + '\''
873        else:
874            critval = str(valuelist[i])
875        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and'
876    criterion = criterion[0:-3]         # remove the "and" after the last crit
877    return adm(a,criterion)
878
879
880 def alinexor (a,columnlist,valuelist):
881    """
882Returns the rows of an array where col (from columnlist) = val (from
883valuelist).  One value is required for each column in columnlist.
884The exception is if either columnlist or valuelist has only 1 value,
885in which case that item will be expanded to match the length of the
886other list.
887
888Usage:   alinexor (a,columnlist,valuelist)
889Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i
890"""
891    if type(columnlist) not in [ListType,TupleType,N.ArrayType]:
892        columnlist = [columnlist]
893    if type(valuelist) not in [ListType,TupleType,N.ArrayType]:
894        valuelist = [valuelist]
895    criterion = ''
896    if len(columnlist) == 1 and len(valuelist) > 1:
897        columnlist = columnlist*len(valuelist)
898    elif len(valuelist) == 1 and len(columnlist) > 1:
899        valuelist = valuelist*len(columnlist)
900    for i in range(len(columnlist)):
901        if type(valuelist[i])==StringType:
902            critval = '\'' + valuelist[i] + '\''
903        else:
904            critval = str(valuelist[i])
905        criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or'
906    criterion = criterion[0:-2]         # remove the "or" after the last crit
907    return adm(a,criterion)
908
909
910 def areplace (a,oldval,newval):
911    """
912Replaces all occurrences of oldval with newval in array a.
913
914Usage:   areplace(a,oldval,newval)
915"""
916    newa = N.not_equal(a,oldval)*a
917    return newa+N.equal(a,oldval)*newval
918
919
920 def arecode (a,listmap,col='all'):
921    """
922Remaps the values in an array to a new set of values (useful when
923you need to recode data from (e.g.) strings to numbers as most stats
924packages require.  Can work on SINGLE columns, or 'all' columns at once.
925
926Usage:   arecode (a,listmap,col='all')
927Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1]
928"""
929    ashape = a.shape
930    if col == 'all':
931        work = a.flat
932    else:
933        work = acolex(a,col)
934        work = work.flat
935    for pair in listmap:
936        if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O':
937            work = N.array(work,'O')
938            a = N.array(a,'O')
939            for i in range(len(work)):
940                if work[i]==pair[0]:
941                    work[i] = pair[1]
942            if col == 'all':
943                return N.reshape(work,ashape)
944            else:
945                return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
946        else:   # must be a non-Object type array and replacement
947            work = N.where(N.equal(work,pair[0]),pair[1],work)
948            return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1)
949
950
951 def arowcompare(row1, row2):
952    """
953Compares two rows from an array, regardless of whether it is an
954array of numbers or of python objects (which requires the cmp function).
955
956Usage:   arowcompare(row1,row2)
957Returns: an array of equal length containing 1s where the two rows had
958         identical elements and 0 otherwise
959"""
960    if row1.typecode()=='O' or row2.typecode=='O':
961        cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1
962    else:
963        cmpvect = N.equal(row1,row2)
964    return cmpvect
965
966
967 def arowsame(row1, row2):
968    """
969Compares two rows from an array, regardless of whether it is an
970array of numbers or of python objects (which requires the cmp function).
971
972Usage:   arowsame(row1,row2)
973Returns: 1 if the two rows are identical, 0 otherwise.
974"""
975    cmpval = N.alltrue(arowcompare(row1,row2))
976    return cmpval
977
978
979 def asortrows(a,axis=0):
980    """
981Sorts an array "by rows".  This differs from the Numeric.sort() function,
982which sorts elements WITHIN the given axis.  Instead, this function keeps
983the elements along the given axis intact, but shifts them 'up or down'
984relative to one another.
985
986Usage:   asortrows(a,axis=0)
987Returns: sorted version of a
988"""
989    if axis != 0:
990        a = N.swapaxes(a, axis, 0)
991    l = a.tolist()
992    l.sort()           # or l.sort(_sort)
993    y = N.array(l)
994    if axis != 0:
995        y = N.swapaxes(y, axis, 0)
996    return y
997
998
999 def aunique(inarray):
1000    """
1001Returns unique items in the FIRST dimension of the passed array. Only
1002works on arrays NOT including string items.
1003
1004Usage:   aunique (inarray)
1005"""
1006    uniques = N.array([inarray[0]])
1007    if len(uniques.shape) == 1:            # IF IT'S A 1D ARRAY
1008        for item in inarray[1:]:
1009            if N.add.reduce(N.equal(uniques,item).flat) == 0:
1010                try:
1011                    uniques = N.concatenate([uniques,N.array[N.NewAxis,:]])
1012                except TypeError:
1013                    uniques = N.concatenate([uniques,N.array([item])])
1014    else:                                  # IT MUST BE A 2+D ARRAY
1015        if inarray.typecode() != 'O':  # not an Object array
1016            for item in inarray[1:]:
1017                if not N.sum(N.alltrue(N.equal(uniques,item),1)):
1018                    try:
1019                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1020                    except TypeError:    # the item to add isn't a list
1021                        uniques = N.concatenate([uniques,N.array([item])])
1022                else:
1023                    pass  # this item is already in the uniques array
1024        else:   # must be an Object array, alltrue/equal functions don't work
1025            for item in inarray[1:]:
1026                newflag = 1
1027                for unq in uniques:  # NOTE: cmp --> 0=same, -1=<, 1=>
1028                    test = N.sum(abs(N.array(map(cmp,item,unq))))
1029                    if test == 0:   # if item identical to any 1 row in uniques
1030                        newflag = 0 # then not a novel item to add
1031                        break
1032                if newflag == 1:
1033                    try:
1034                        uniques = N.concatenate( [uniques,item[N.NewAxis,:]] )
1035                    except TypeError:    # the item to add isn't a list
1036                        uniques = N.concatenate([uniques,N.array([item])])
1037    return uniques
1038
1039
1040 def aduplicates(inarray):
1041    """
1042Returns duplicate items in the FIRST dimension of the passed array. Only
1043works on arrays NOT including string items.
1044
1045Usage:   aunique (inarray)
1046"""
1047    inarray = N.array(inarray)
1048    if len(inarray.shape) == 1:            # IF IT'S A 1D ARRAY
1049        dups = []
1050        inarray = inarray.tolist()
1051        for i in range(len(inarray)):
1052            if inarray[i] in inarray[i+1:]:
1053                dups.append(inarray[i])
1054        dups = aunique(dups)
1055    else:                                  # IT MUST BE A 2+D ARRAY
1056        dups = []
1057        aslist = inarray.tolist()
1058        for i in range(len(aslist)):
1059            if aslist[i] in aslist[i+1:]:
1060                dups.append(aslist[i])
1061        dups = unique(dups)
1062        dups = N.array(dups)
1063    return dups
1064
1065except ImportError:    # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs
1066 pass
Note: See TracBrowser for help on using the repository browser.