#
source:
orange-bioinformatics/_bioinformatics/pstat.py
@
1636:10d234fdadb9

Revision 1636:10d234fdadb9, 36.5 KB checked in by mitar, 2 years ago (diff) |
---|

Rev | Line | |
---|---|---|

[62] | 1 | # Copyright (c) 1999-2000 Gary Strangman; All Rights Reserved. |

2 | # | |

3 | # This software is distributable under the terms of the GNU | |

4 | # General Public License (GPL) v2, the text of which can be found at | |

5 | # http://www.gnu.org/copyleft/gpl.html. Installing, importing or otherwise | |

6 | # using this module constitutes acceptance of the terms of this License. | |

7 | # | |

8 | # Disclaimer | |

9 | # | |

10 | # This software is provided "as-is". There are no expressed or implied | |

11 | # warranties of any kind, including, but not limited to, the warranties | |

12 | # of merchantability and fittness for a given application. In no event | |

13 | # shall Gary Strangman be liable for any direct, indirect, incidental, | |

14 | # special, exemplary or consequential damages (including, but not limited | |

15 | # to, loss of use, data or profits, or business interruption) however | |

16 | # caused and on any theory of liability, whether in contract, strict | |

17 | # liability or tort (including negligence or otherwise) arising in any way | |

18 | # out of the use of this software, even if advised of the possibility of | |

19 | # such damage. | |

20 | # | |

21 | # Comments and/or additions are welcome (send e-mail to: | |

22 | # strang@nmr.mgh.harvard.edu). | |

23 | # | |

24 | """ | |

25 | pstat.py module | |

26 | ||

27 | ################################################# | |

28 | ####### Written by: Gary Strangman ########### | |

29 | ####### Last modified: Jun 29, 2001 ########### | |

30 | ################################################# | |

31 | ||

32 | This module provides some useful list and array manipulation routines | |

33 | modeled after those found in the |Stat package by Gary Perlman, plus a | |

34 | number of other useful list/file manipulation functions. The list-based | |

35 | functions include: | |

36 | ||

37 | abut (source,*args) | |

38 | simpleabut (source, addon) | |

39 | colex (listoflists,cnums) | |

40 | collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) | |

41 | dm (listoflists,criterion) | |

42 | flat (l) | |

43 | linexand (listoflists,columnlist,valuelist) | |

44 | linexor (listoflists,columnlist,valuelist) | |

45 | linedelimited (inlist,delimiter) | |

46 | lineincols (inlist,colsize) | |

47 | lineincustcols (inlist,colsizes) | |

48 | list2string (inlist) | |

49 | makelol(inlist) | |

50 | makestr(x) | |

51 | printcc (lst,extra=2) | |

52 | printincols (listoflists,colsize) | |

53 | pl (listoflists) | |

54 | printl(listoflists) | |

55 | replace (lst,oldval,newval) | |

56 | recode (inlist,listmap,cols='all') | |

57 | remap (listoflists,criterion) | |

58 | roundlist (inlist,num_digits_to_round_floats_to) | |

59 | sortby(listoflists,sortcols) | |

60 | unique (inlist) | |

61 | duplicates(inlist) | |

62 | writedelimited (listoflists, delimiter, file, writetype='w') | |

63 | ||

64 | Some of these functions have alternate versions which are defined only if | |

65 | Numeric (NumPy) can be imported. These functions are generally named as | |

66 | above, with an 'a' prefix. | |

67 | ||

68 | aabut (source, *args) | |

69 | acolex (a,indices,axis=1) | |

70 | acollapse (a,keepcols,collapsecols,sterr=0,ns=0) | |

71 | adm (a,criterion) | |

72 | alinexand (a,columnlist,valuelist) | |

73 | alinexor (a,columnlist,valuelist) | |

74 | areplace (a,oldval,newval) | |

75 | arecode (a,listmap,col='all') | |

76 | arowcompare (row1, row2) | |

77 | arowsame (row1, row2) | |

78 | asortrows(a,axis=0) | |

79 | aunique(inarray) | |

80 | aduplicates(inarray) | |

81 | ||

82 | Currently, the code is all but completely un-optimized. In many cases, the | |

83 | array versions of functions amount simply to aliases to built-in array | |

84 | functions/methods. Their inclusion here is for function name consistency. | |

85 | """ | |

86 | ||

87 | ## CHANGE LOG: | |

88 | ## ========== | |

89 | ## 01-11-15 ... changed list2string() to accept a delimiter | |

90 | ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1 | |

91 | ## 01-05-31 ... added duplicates() and aduplicates() functions | |

92 | ## 00-12-28 ... license made GPL, docstring and import requirements | |

93 | ## 99-11-01 ... changed version to 0.3 | |

94 | ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py) | |

95 | ## 03/27/99 ... added areplace function, made replace fcn recursive | |

96 | ## 12/31/98 ... added writefc function for ouput to fixed column sizes | |

97 | ## 12/07/98 ... fixed import problem (failed on collapse() fcn) | |

98 | ## added __version__ variable (now 0.2) | |

99 | ## 12/05/98 ... updated doc-strings | |

100 | ## added features to collapse() function | |

101 | ## added flat() function for lists | |

102 | ## fixed a broken asortrows() | |

103 | ## 11/16/98 ... fixed minor bug in aput for 1D arrays | |

104 | ## | |

105 | ## 11/08/98 ... fixed aput to output large arrays correctly | |

106 | ||

[1632] | 107 | from __future__ import absolute_import |

108 | ||

[62] | 109 | import string, copy |

110 | from types import * | |

111 | ||

112 | __version__ = 0.4 | |

113 | ||

114 | ###=========================== LIST FUNCTIONS ========================== | |

115 | ### | |

116 | ### Here are the list functions, DEFINED FOR ALL SYSTEMS. | |

117 | ### Array functions (for NumPy-enabled computers) appear below. | |

118 | ### | |

119 | ||

120 | def abut (source,*args): | |

121 | """ | |

122 | Like the |Stat abut command. It concatenates two lists side-by-side | |

123 | and returns the result. '2D' lists are also accomodated for either argument | |

124 | (source or addon). CAUTION: If one list is shorter, it will be repeated | |

125 | until it is as long as the longest list. If this behavior is not desired, | |

126 | use pstat.simpleabut(). | |

127 | ||

128 | Usage: abut(source, args) where args=any # of lists | |

129 | Returns: a list of lists as long as the LONGEST list past, source on the | |

130 | 'left', lists in <args> attached consecutively on the 'right' | |

131 | """ | |

132 | ||

133 | if type(source) not in [ListType,TupleType]: | |

134 | source = [source] | |

135 | for addon in args: | |

136 | if type(addon) not in [ListType,TupleType]: | |

137 | addon = [addon] | |

138 | if len(addon) < len(source): # is source list longer? | |

139 | if len(source) % len(addon) == 0: # are they integer multiples? | |

140 | repeats = len(source)/len(addon) # repeat addon n times | |

141 | origadd = copy.deepcopy(addon) | |

142 | for i in range(repeats-1): | |

143 | addon = addon + origadd | |

144 | else: | |

145 | repeats = len(source)/len(addon)+1 # repeat addon x times, | |

146 | origadd = copy.deepcopy(addon) # x is NOT an integer | |

147 | for i in range(repeats-1): | |

148 | addon = addon + origadd | |

149 | addon = addon[0:len(source)] | |

150 | elif len(source) < len(addon): # is addon list longer? | |

151 | if len(addon) % len(source) == 0: # are they integer multiples? | |

152 | repeats = len(addon)/len(source) # repeat source n times | |

153 | origsour = copy.deepcopy(source) | |

154 | for i in range(repeats-1): | |

155 | source = source + origsour | |

156 | else: | |

157 | repeats = len(addon)/len(source)+1 # repeat source x times, | |

158 | origsour = copy.deepcopy(source) # x is NOT an integer | |

159 | for i in range(repeats-1): | |

160 | source = source + origsour | |

161 | source = source[0:len(addon)] | |

162 | ||

163 | source = simpleabut(source,addon) | |

164 | return source | |

165 | ||

166 | ||

167 | def simpleabut (source, addon): | |

168 | """ | |

169 | Concatenates two lists as columns and returns the result. '2D' lists | |

170 | are also accomodated for either argument (source or addon). This DOES NOT | |

171 | repeat either list to make the 2 lists of equal length. Beware of list pairs | |

172 | with different lengths ... the resulting list will be the length of the | |

173 | FIRST list passed. | |

174 | ||

175 | Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) | |

176 | Returns: a list of lists as long as source, with source on the 'left' and | |

177 | addon on the 'right' | |

178 | """ | |

179 | if type(source) not in [ListType,TupleType]: | |

180 | source = [source] | |

181 | if type(addon) not in [ListType,TupleType]: | |

182 | addon = [addon] | |

183 | minlen = min(len(source),len(addon)) | |

184 | list = copy.deepcopy(source) # start abut process | |

185 | if type(source[0]) not in [ListType,TupleType]: | |

186 | if type(addon[0]) not in [ListType,TupleType]: | |

187 | for i in range(minlen): | |

188 | list[i] = [source[i]] + [addon[i]] # source/addon = column | |

189 | else: | |

190 | for i in range(minlen): | |

191 | list[i] = [source[i]] + addon[i] # addon=list-of-lists | |

192 | else: | |

193 | if type(addon[0]) not in [ListType,TupleType]: | |

194 | for i in range(minlen): | |

195 | list[i] = source[i] + [addon[i]] # source=list-of-lists | |

196 | else: | |

197 | for i in range(minlen): | |

198 | list[i] = source[i] + addon[i] # source/addon = list-of-lists | |

199 | source = list | |

200 | return source | |

201 | ||

202 | ||

203 | def colex (listoflists,cnums): | |

204 | """ | |

205 | Extracts from listoflists the columns specified in the list 'cnums' | |

206 | (cnums can be an integer, a sequence of integers, or a string-expression that | |

207 | corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex | |

208 | columns 3 onward from the listoflists). | |

209 | ||

210 | Usage: colex (listoflists,cnums) | |

211 | Returns: a list-of-lists corresponding to the columns from listoflists | |

212 | specified by cnums, in the order the column numbers appear in cnums | |

213 | """ | |

214 | global index | |

215 | column = 0 | |

216 | if type(cnums) in [ListType,TupleType]: # if multiple columns to get | |

217 | index = cnums[0] | |

218 | column = map(lambda x: x[index], listoflists) | |

219 | for col in cnums[1:]: | |

220 | index = col | |

221 | column = abut(column,map(lambda x: x[index], listoflists)) | |

222 | elif type(cnums) == StringType: # if an 'x[3:]' type expr. | |

223 | evalstring = 'map(lambda x: x'+cnums+', listoflists)' | |

224 | column = eval(evalstring) | |

225 | else: # else it's just 1 col to get | |

226 | index = cnums | |

227 | column = map(lambda x: x[index], listoflists) | |

228 | return column | |

229 | ||

230 | ||

231 | def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): | |

232 | """ | |

233 | Averages data in collapsecol, keeping all unique items in keepcols | |

234 | (using unique, which keeps unique LISTS of column numbers), retaining the | |

235 | unique sets of values in keepcols, the mean for each. Setting fcn1 | |

236 | and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) | |

237 | will append those results (e.g., the sterr, N) after each calculated mean. | |

238 | cfcn is the collapse function to apply (defaults to mean, defined here in the | |

239 | pstat module to avoid circular imports with stats.py, but harmonicmean or | |

240 | others could be passed). | |

241 | ||

242 | Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) | |

243 | Returns: a list of lists with all unique permutations of entries appearing in | |

244 | columns ("conditions") specified by keepcols, abutted with the result of | |

245 | cfcn (if cfcn=None, defaults to the mean) of each column specified by | |

246 | collapsecols. | |

247 | """ | |

248 | def collmean (inlist): | |

249 | s = 0 | |

250 | for item in inlist: | |

251 | s = s + item | |

252 | return s/float(len(inlist)) | |

253 | ||

254 | if type(keepcols) not in [ListType,TupleType]: | |

255 | keepcols = [keepcols] | |

256 | if type(collapsecols) not in [ListType,TupleType]: | |

257 | collapsecols = [collapsecols] | |

258 | if cfcn == None: | |

259 | cfcn = collmean | |

260 | if keepcols == []: | |

261 | means = [0]*len(collapsecols) | |

262 | for i in range(len(collapsecols)): | |

263 | avgcol = colex(listoflists,collapsecols[i]) | |

264 | means[i] = cfcn(avgcol) | |

265 | if fcn1: | |

266 | try: | |

267 | test = fcn1(avgcol) | |

268 | except: | |

269 | test = 'N/A' | |

270 | means[i] = [means[i], test] | |

271 | if fcn2: | |

272 | try: | |

273 | test = fcn2(avgcol) | |

274 | except: | |

275 | test = 'N/A' | |

276 | try: | |

277 | means[i] = means[i] + [len(avgcol)] | |

278 | except TypeError: | |

279 | means[i] = [means[i],len(avgcol)] | |

280 | return means | |

281 | else: | |

282 | values = colex(listoflists,keepcols) | |

283 | uniques = unique(values) | |

284 | uniques.sort() | |

285 | newlist = [] | |

286 | if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] | |

287 | for item in uniques: | |

288 | if type(item) not in [ListType,TupleType]: item =[item] | |

289 | tmprows = linexand(listoflists,keepcols,item) | |

290 | for col in collapsecols: | |

291 | avgcol = colex(tmprows,col) | |

292 | item.append(cfcn(avgcol)) | |

293 | if fcn1 <> None: | |

294 | try: | |

295 | test = fcn1(avgcol) | |

296 | except: | |

297 | test = 'N/A' | |

298 | item.append(test) | |

299 | if fcn2 <> None: | |

300 | try: | |

301 | test = fcn2(avgcol) | |

302 | except: | |

303 | test = 'N/A' | |

304 | item.append(test) | |

305 | newlist.append(item) | |

306 | return newlist | |

307 | ||

308 | ||

309 | def dm (listoflists,criterion): | |

310 | """ | |

311 | Returns rows from the passed list of lists that meet the criteria in | |

312 | the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' | |

313 | will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows | |

314 | with column 2 equal to the string 'N'). | |

315 | ||

316 | Usage: dm (listoflists, criterion) | |

317 | Returns: rows from listoflists that meet the specified criterion. | |

318 | """ | |

319 | function = 'filter(lambda x: '+criterion+',listoflists)' | |

320 | lines = eval(function) | |

321 | return lines | |

322 | ||

323 | ||

324 | def flat(l): | |

325 | """ | |

326 | Returns the flattened version of a '2D' list. List-correlate to the a.flat() | |

327 | method of NumPy arrays. | |

328 | ||

329 | Usage: flat(l) | |

330 | """ | |

331 | newl = [] | |

332 | for i in range(len(l)): | |

333 | for j in range(len(l[i])): | |

334 | newl.append(l[i][j]) | |

335 | return newl | |

336 | ||

337 | ||

338 | def linexand (listoflists,columnlist,valuelist): | |

339 | """ | |

340 | Returns the rows of a list of lists where col (from columnlist) = val | |

341 | (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). | |

342 | len(columnlist) must equal len(valuelist). | |

343 | ||

344 | Usage: linexand (listoflists,columnlist,valuelist) | |

345 | Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i | |

346 | """ | |

347 | if type(columnlist) not in [ListType,TupleType]: | |

348 | columnlist = [columnlist] | |

349 | if type(valuelist) not in [ListType,TupleType]: | |

350 | valuelist = [valuelist] | |

351 | criterion = '' | |

352 | for i in range(len(columnlist)): | |

353 | if type(valuelist[i])==StringType: | |

354 | critval = '\'' + valuelist[i] + '\'' | |

355 | else: | |

356 | critval = str(valuelist[i]) | |

357 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' | |

358 | criterion = criterion[0:-3] # remove the "and" after the last crit | |

359 | function = 'filter(lambda x: '+criterion+',listoflists)' | |

360 | lines = eval(function) | |

361 | return lines | |

362 | ||

363 | ||

364 | def linexor (listoflists,columnlist,valuelist): | |

365 | """ | |

366 | Returns the rows of a list of lists where col (from columnlist) = val | |

367 | (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). | |

368 | One value is required for each column in columnlist. If only one value | |

369 | exists for columnlist but multiple values appear in valuelist, the | |

370 | valuelist values are all assumed to pertain to the same column. | |

371 | ||

372 | Usage: linexor (listoflists,columnlist,valuelist) | |

373 | Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i | |

374 | """ | |

375 | if type(columnlist) not in [ListType,TupleType]: | |

376 | columnlist = [columnlist] | |

377 | if type(valuelist) not in [ListType,TupleType]: | |

378 | valuelist = [valuelist] | |

379 | criterion = '' | |

380 | if len(columnlist) == 1 and len(valuelist) > 1: | |

381 | columnlist = columnlist*len(valuelist) | |

382 | for i in range(len(columnlist)): # build an exec string | |

383 | if type(valuelist[i])==StringType: | |

384 | critval = '\'' + valuelist[i] + '\'' | |

385 | else: | |

386 | critval = str(valuelist[i]) | |

387 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' | |

388 | criterion = criterion[0:-2] # remove the "or" after the last crit | |

389 | function = 'filter(lambda x: '+criterion+',listoflists)' | |

390 | lines = eval(function) | |

391 | return lines | |

392 | ||

393 | ||

394 | def linedelimited (inlist,delimiter): | |

395 | """ | |

396 | Returns a string composed of elements in inlist, with each element | |

397 | separated by 'delimiter.' Used by function writedelimited. Use '\t' | |

398 | for tab-delimiting. | |

399 | ||

400 | Usage: linedelimited (inlist,delimiter) | |

401 | """ | |

402 | outstr = '' | |

403 | for item in inlist: | |

404 | if type(item) <> StringType: | |

405 | item = str(item) | |

406 | outstr = outstr + item + delimiter | |

407 | outstr = outstr[0:-1] | |

408 | return outstr | |

409 | ||

410 | ||

411 | def lineincols (inlist,colsize): | |

412 | """ | |

413 | Returns a string composed of elements in inlist, with each element | |

414 | right-aligned in columns of (fixed) colsize. | |

415 | ||

416 | Usage: lineincols (inlist,colsize) where colsize is an integer | |

417 | """ | |

418 | outstr = '' | |

419 | for item in inlist: | |

420 | if type(item) <> StringType: | |

421 | item = str(item) | |

422 | size = len(item) | |

423 | if size <= colsize: | |

424 | for i in range(colsize-size): | |

425 | outstr = outstr + ' ' | |

426 | outstr = outstr + item | |

427 | else: | |

428 | outstr = outstr + item[0:colsize+1] | |

429 | return outstr | |

430 | ||

431 | ||

432 | def lineincustcols (inlist,colsizes): | |

433 | """ | |

434 | Returns a string composed of elements in inlist, with each element | |

435 | right-aligned in a column of width specified by a sequence colsizes. The | |

436 | length of colsizes must be greater than or equal to the number of columns | |

437 | in inlist. | |

438 | ||

439 | Usage: lineincustcols (inlist,colsizes) | |

440 | Returns: formatted string created from inlist | |

441 | """ | |

442 | outstr = '' | |

443 | for i in range(len(inlist)): | |

444 | if type(inlist[i]) <> StringType: | |

445 | item = str(inlist[i]) | |

446 | else: | |

447 | item = inlist[i] | |

448 | size = len(item) | |

449 | if size <= colsizes[i]: | |

450 | for j in range(colsizes[i]-size): | |

451 | outstr = outstr + ' ' | |

452 | outstr = outstr + item | |

453 | else: | |

454 | outstr = outstr + item[0:colsizes[i]+1] | |

455 | return outstr | |

456 | ||

457 | ||

458 | def list2string (inlist,delimit=' '): | |

459 | """ | |

460 | Converts a 1D list to a single long string for file output, using | |

461 | the string.join function. | |

462 | ||

463 | Usage: list2string (inlist,delimit=' ') | |

464 | Returns: the string created from inlist | |

465 | """ | |

466 | stringlist = map(makestr,inlist) | |

467 | return string.join(stringlist,delimit) | |

468 | ||

469 | ||

470 | def makelol(inlist): | |

471 | """ | |

472 | Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you | |

473 | want to use put() to write a 1D list one item per line in the file. | |

474 | ||

475 | Usage: makelol(inlist) | |

476 | Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. | |

477 | """ | |

478 | x = [] | |

479 | for item in inlist: | |

480 | x.append([item]) | |

481 | return x | |

482 | ||

483 | ||

484 | def makestr (x): | |

485 | if type(x) <> StringType: | |

486 | x = str(x) | |

487 | return x | |

488 | ||

489 | ||

490 | def printcc (lst,extra=2): | |

491 | """ | |

492 | Prints a list of lists in columns, customized by the max size of items | |

493 | within the columns (max size of items in col, plus 'extra' number of spaces). | |

494 | Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, | |

495 | respectively. | |

496 | ||

497 | Usage: printcc (lst,extra=2) | |

498 | Returns: None | |

499 | """ | |

500 | if type(lst[0]) not in [ListType,TupleType]: | |

501 | lst = [lst] | |

502 | rowstokill = [] | |

503 | list2print = copy.deepcopy(lst) | |

504 | for i in range(len(lst)): | |

505 | if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: | |

506 | rowstokill = rowstokill + [i] | |

507 | rowstokill.reverse() # delete blank rows from the end | |

508 | for row in rowstokill: | |

509 | del list2print[row] | |

510 | maxsize = [0]*len(list2print[0]) | |

511 | for col in range(len(list2print[0])): | |

512 | items = colex(list2print,col) | |

513 | items = map(makestr,items) | |

514 | maxsize[col] = max(map(len,items)) + extra | |

515 | for row in lst: | |

516 | if row == ['\n'] or row == '\n' or row == '' or row == ['']: | |

517 | ||

518 | elif row == ['dashes'] or row == 'dashes': | |

519 | dashes = [0]*len(maxsize) | |

520 | for j in range(len(maxsize)): | |

521 | dashes[j] = '-'*(maxsize[j]-2) | |

522 | print lineincustcols(dashes,maxsize) | |

523 | else: | |

524 | print lineincustcols(row,maxsize) | |

525 | return None | |

526 | ||

527 | ||

528 | def printincols (listoflists,colsize): | |

529 | """ | |

530 | Prints a list of lists in columns of (fixed) colsize width, where | |

531 | colsize is an integer. | |

532 | ||

533 | Usage: printincols (listoflists,colsize) | |

534 | Returns: None | |

535 | """ | |

536 | for row in listoflists: | |

537 | print lineincols(row,colsize) | |

538 | return None | |

539 | ||

540 | ||

541 | def pl (listoflists): | |

542 | """ | |

543 | Prints a list of lists, 1 list (row) at a time. | |

544 | ||

545 | Usage: pl(listoflists) | |

546 | Returns: None | |

547 | """ | |

548 | for row in listoflists: | |

549 | if row[-1] == '\n': | |

550 | print row, | |

551 | else: | |

552 | print row | |

553 | return None | |

554 | ||

555 | ||

556 | def printl(listoflists): | |

557 | """Alias for pl.""" | |

558 | pl(listoflists) | |

559 | return | |

560 | ||

561 | ||

562 | def replace (inlst,oldval,newval): | |

563 | """ | |

564 | Replaces all occurrences of 'oldval' with 'newval', recursively. | |

565 | ||

566 | Usage: replace (inlst,oldval,newval) | |

567 | """ | |

568 | lst = inlst*1 | |

569 | for i in range(len(lst)): | |

570 | if type(lst[i]) not in [ListType,TupleType]: | |

571 | if lst[i]==oldval: lst[i]=newval | |

572 | else: | |

573 | lst[i] = replace(lst[i],oldval,newval) | |

574 | return lst | |

575 | ||

576 | ||

577 | def recode (inlist,listmap,cols=None): | |

578 | """ | |

579 | Changes the values in a list to a new set of values (useful when | |

580 | you need to recode data from (e.g.) strings to numbers. cols defaults | |

581 | to None (meaning all columns are recoded). | |

582 | ||

583 | Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list | |

584 | Returns: inlist with the appropriate values replaced with new ones | |

585 | """ | |

586 | lst = copy.deepcopy(inlist) | |

587 | if cols != None: | |

588 | if type(cols) not in [ListType,TupleType]: | |

589 | cols = [cols] | |

590 | for col in cols: | |

591 | for row in range(len(lst)): | |

592 | try: | |

593 | idx = colex(listmap,0).index(lst[row][col]) | |

594 | lst[row][col] = listmap[idx][1] | |

595 | except ValueError: | |

596 | pass | |

597 | else: | |

598 | for row in range(len(lst)): | |

599 | for col in range(len(lst)): | |

600 | try: | |

601 | idx = colex(listmap,0).index(lst[row][col]) | |

602 | lst[row][col] = listmap[idx][1] | |

603 | except ValueError: | |

604 | pass | |

605 | return lst | |

606 | ||

607 | ||

608 | def remap (listoflists,criterion): | |

609 | """ | |

610 | Remaps values in a given column of a 2D list (listoflists). This requires | |

611 | a criterion as a function of 'x' so that the result of the following is | |

612 | returned ... map(lambda x: 'criterion',listoflists). | |

613 | ||

614 | Usage: remap(listoflists,criterion) criterion=string | |

615 | Returns: remapped version of listoflists | |

616 | """ | |

617 | function = 'map(lambda x: '+criterion+',listoflists)' | |

618 | lines = eval(function) | |

619 | return lines | |

620 | ||

621 | ||

622 | def roundlist (inlist,digits): | |

623 | """ | |

624 | Goes through each element in a 1D or 2D inlist, and applies the following | |

625 | function to all elements of FloatType ... round(element,digits). | |

626 | ||

627 | Usage: roundlist(inlist,digits) | |

628 | Returns: list with rounded floats | |

629 | """ | |

630 | if type(inlist[0]) in [IntType, FloatType]: | |

631 | inlist = [inlist] | |

632 | l = inlist*1 | |

633 | for i in range(len(l)): | |

634 | for j in range(len(l[i])): | |

635 | if type(l[i][j])==FloatType: | |

636 | l[i][j] = round(l[i][j],digits) | |

637 | return l | |

638 | ||

639 | ||

640 | def sortby(listoflists,sortcols): | |

641 | """ | |

642 | Sorts a list of lists on the column(s) specified in the sequence | |

643 | sortcols. | |

644 | ||

645 | Usage: sortby(listoflists,sortcols) | |

646 | Returns: sorted list, unchanged column ordering | |

647 | """ | |

648 | newlist = abut(colex(listoflists,sortcols),listoflists) | |

649 | newlist.sort() | |

650 | try: | |

651 | numcols = len(sortcols) | |

652 | except TypeError: | |

653 | numcols = 1 | |

654 | crit = '[' + str(numcols) + ':]' | |

655 | newlist = colex(newlist,crit) | |

656 | return newlist | |

657 | ||

658 | ||

659 | def unique (inlist): | |

660 | """ | |

661 | Returns all unique items in the passed list. If the a list-of-lists | |

662 | is passed, unique LISTS are found (i.e., items in the first dimension are | |

663 | compared). | |

664 | ||

665 | Usage: unique (inlist) | |

666 | Returns: the unique elements (or rows) in inlist | |

667 | """ | |

668 | uniques = [] | |

669 | for item in inlist: | |

670 | if item not in uniques: | |

671 | uniques.append(item) | |

672 | return uniques | |

673 | ||

674 | def duplicates(inlist): | |

675 | """ | |

676 | Returns duplicate items in the FIRST dimension of the passed list. | |

677 | ||

678 | Usage: duplicates (inlist) | |

679 | """ | |

680 | dups = [] | |

681 | for i in range(len(inlist)): | |

682 | if inlist[i] in inlist[i+1:]: | |

683 | dups.append(inlist[i]) | |

684 | return dups | |

685 | ||

686 | ||

687 | def nonrepeats(inlist): | |

688 | """ | |

689 | Returns items that are NOT duplicated in the first dim of the passed list. | |

690 | ||

691 | Usage: nonrepeats (inlist) | |

692 | """ | |

693 | nonrepeats = [] | |

694 | for i in range(len(inlist)): | |

695 | if inlist.count(inlist[i]) == 1: | |

696 | nonrepeats.append(inlist[i]) | |

697 | return nonrepeats | |

698 | ||

699 | ||

700 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

701 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

702 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

703 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

704 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

705 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

706 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

707 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

708 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

709 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

710 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

711 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

712 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

713 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

714 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

715 | #=================== PSTAT ARRAY FUNCTIONS ===================== | |

716 | ||

717 | try: # DEFINE THESE *ONLY* IF NUMERIC IS AVAILABLE | |

718 | import Numeric | |

719 | N = Numeric | |

720 | ||

721 | def aabut (source, *args): | |

722 | """ | |

723 | Like the |Stat abut command. It concatenates two arrays column-wise | |

724 | and returns the result. CAUTION: If one array is shorter, it will be | |

725 | repeated until it is as long as the other. | |

726 | ||

727 | Usage: aabut (source, args) where args=any # of arrays | |

728 | Returns: an array as long as the LONGEST array past, source appearing on the | |

729 | 'left', arrays in <args> attached on the 'right'. | |

730 | """ | |

731 | if len(source.shape)==1: | |

732 | width = 1 | |

733 | source = N.resize(source,[source.shape[0],width]) | |

734 | else: | |

735 | width = source.shape[1] | |

736 | for addon in args: | |

737 | if len(addon.shape)==1: | |

738 | width = 1 | |

739 | addon = N.resize(addon,[source.shape[0],width]) | |

740 | else: | |

741 | width = source.shape[1] | |

742 | if len(addon) < len(source): | |

743 | addon = N.resize(addon,[source.shape[0],addon.shape[1]]) | |

744 | elif len(source) < len(addon): | |

745 | source = N.resize(source,[addon.shape[0],source.shape[1]]) | |

746 | source = N.concatenate((source,addon),1) | |

747 | return source | |

748 | ||

749 | ||

750 | def acolex (a,indices,axis=1): | |

751 | """ | |

752 | Extracts specified indices (a list) from passed array, along passed | |

753 | axis (column extraction is default). BEWARE: A 1D array is presumed to be a | |

754 | column-array (and that the whole array will be returned as a column). | |

755 | ||

756 | Usage: acolex (a,indices,axis=1) | |

757 | Returns: the columns of a specified by indices | |

758 | """ | |

759 | if type(indices) not in [ListType,TupleType,N.ArrayType]: | |

760 | indices = [indices] | |

761 | if len(N.shape(a)) == 1: | |

762 | cols = N.resize(a,[a.shape[0],1]) | |

763 | else: | |

764 | cols = N.take(a,indices,axis) | |

765 | return cols | |

766 | ||

767 | ||

768 | def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None): | |

769 | """ | |

770 | Averages data in collapsecol, keeping all unique items in keepcols | |

771 | (using unique, which keeps unique LISTS of column numbers), retaining | |

772 | the unique sets of values in keepcols, the mean for each. If stderror or | |

773 | N of the mean are desired, set either or both parameters to 1. | |

774 | ||

775 | Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) | |

776 | Returns: unique 'conditions' specified by the contents of columns specified | |

777 | by keepcols, abutted with the mean(s) of column(s) specified by | |

778 | collapsecols | |

779 | """ | |

780 | def acollmean (inarray): | |

781 | return N.sum(N.ravel(inarray)) | |

782 | ||

783 | if cfcn == None: | |

784 | cfcn = acollmean | |

785 | if keepcols == []: | |

786 | avgcol = acolex(a,collapsecols) | |

787 | means = N.sum(avgcol)/float(len(avgcol)) | |

788 | if fcn1<>None: | |

789 | try: | |

790 | test = fcn1(avgcol) | |

791 | except: | |

792 | test = N.array(['N/A']*len(means)) | |

793 | means = aabut(means,test) | |

794 | if fcn2<>None: | |

795 | try: | |

796 | test = fcn2(avgcol) | |

797 | except: | |

798 | test = N.array(['N/A']*len(means)) | |

799 | means = aabut(means,test) | |

800 | return means | |

801 | else: | |

802 | if type(keepcols) not in [ListType,TupleType,N.ArrayType]: | |

803 | keepcols = [keepcols] | |

804 | values = colex(a,keepcols) # so that "item" can be appended (below) | |

805 | uniques = unique(values) # get a LIST, so .sort keeps rows intact | |

806 | uniques.sort() | |

807 | newlist = [] | |

808 | for item in uniques: | |

809 | if type(item) not in [ListType,TupleType,N.ArrayType]: | |

810 | item =[item] | |

811 | tmprows = alinexand(a,keepcols,item) | |

812 | for col in collapsecols: | |

813 | avgcol = acolex(tmprows,col) | |

814 | item.append(acollmean(avgcol)) | |

815 | if fcn1<>None: | |

816 | try: | |

817 | test = fcn1(avgcol) | |

818 | except: | |

819 | test = 'N/A' | |

820 | item.append(test) | |

821 | if fcn2<>None: | |

822 | try: | |

823 | test = fcn2(avgcol) | |

824 | except: | |

825 | test = 'N/A' | |

826 | item.append(test) | |

827 | newlist.append(item) | |

828 | try: | |

829 | new_a = N.array(newlist) | |

830 | except TypeError: | |

831 | new_a = N.array(newlist,'O') | |

832 | return new_a | |

833 | ||

834 | ||

835 | def adm (a,criterion): | |

836 | """ | |

837 | Returns rows from the passed list of lists that meet the criteria in | |

838 | the passed criterion expression (a string as a function of x). | |

839 | ||

840 | Usage: adm (a,criterion) where criterion is like 'x[2]==37' | |

841 | """ | |

842 | function = 'filter(lambda x: '+criterion+',a)' | |

843 | lines = eval(function) | |

844 | try: | |

845 | lines = N.array(lines) | |

846 | except: | |

847 | lines = N.array(lines,'O') | |

848 | return lines | |

849 | ||

850 | ||

851 | def isstring(x): | |

852 | if type(x)==StringType: | |

853 | return 1 | |

854 | else: | |

855 | return 0 | |

856 | ||

857 | ||

858 | def alinexand (a,columnlist,valuelist): | |

859 | """ | |

860 | Returns the rows of an array where col (from columnlist) = val | |

861 | (from valuelist). One value is required for each column in columnlist. | |

862 | ||

863 | Usage: alinexand (a,columnlist,valuelist) | |

864 | Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i | |

865 | """ | |

866 | if type(columnlist) not in [ListType,TupleType,N.ArrayType]: | |

867 | columnlist = [columnlist] | |

868 | if type(valuelist) not in [ListType,TupleType,N.ArrayType]: | |

869 | valuelist = [valuelist] | |

870 | criterion = '' | |

871 | for i in range(len(columnlist)): | |

872 | if type(valuelist[i])==StringType: | |

873 | critval = '\'' + valuelist[i] + '\'' | |

874 | else: | |

875 | critval = str(valuelist[i]) | |

876 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' | |

877 | criterion = criterion[0:-3] # remove the "and" after the last crit | |

878 | return adm(a,criterion) | |

879 | ||

880 | ||

881 | def alinexor (a,columnlist,valuelist): | |

882 | """ | |

883 | Returns the rows of an array where col (from columnlist) = val (from | |

884 | valuelist). One value is required for each column in columnlist. | |

885 | The exception is if either columnlist or valuelist has only 1 value, | |

886 | in which case that item will be expanded to match the length of the | |

887 | other list. | |

888 | ||

889 | Usage: alinexor (a,columnlist,valuelist) | |

890 | Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i | |

891 | """ | |

892 | if type(columnlist) not in [ListType,TupleType,N.ArrayType]: | |

893 | columnlist = [columnlist] | |

894 | if type(valuelist) not in [ListType,TupleType,N.ArrayType]: | |

895 | valuelist = [valuelist] | |

896 | criterion = '' | |

897 | if len(columnlist) == 1 and len(valuelist) > 1: | |

898 | columnlist = columnlist*len(valuelist) | |

899 | elif len(valuelist) == 1 and len(columnlist) > 1: | |

900 | valuelist = valuelist*len(columnlist) | |

901 | for i in range(len(columnlist)): | |

902 | if type(valuelist[i])==StringType: | |

903 | critval = '\'' + valuelist[i] + '\'' | |

904 | else: | |

905 | critval = str(valuelist[i]) | |

906 | criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' | |

907 | criterion = criterion[0:-2] # remove the "or" after the last crit | |

908 | return adm(a,criterion) | |

909 | ||

910 | ||

911 | def areplace (a,oldval,newval): | |

912 | """ | |

913 | Replaces all occurrences of oldval with newval in array a. | |

914 | ||

915 | Usage: areplace(a,oldval,newval) | |

916 | """ | |

917 | newa = N.not_equal(a,oldval)*a | |

918 | return newa+N.equal(a,oldval)*newval | |

919 | ||

920 | ||

921 | def arecode (a,listmap,col='all'): | |

922 | """ | |

923 | Remaps the values in an array to a new set of values (useful when | |

924 | you need to recode data from (e.g.) strings to numbers as most stats | |

925 | packages require. Can work on SINGLE columns, or 'all' columns at once. | |

926 | ||

927 | Usage: arecode (a,listmap,col='all') | |

928 | Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] | |

929 | """ | |

930 | ashape = a.shape | |

931 | if col == 'all': | |

932 | work = a.flat | |

933 | else: | |

934 | work = acolex(a,col) | |

935 | work = work.flat | |

936 | for pair in listmap: | |

937 | if type(pair[1]) == StringType or work.typecode()=='O' or a.typecode()=='O': | |

938 | work = N.array(work,'O') | |

939 | a = N.array(a,'O') | |

940 | for i in range(len(work)): | |

941 | if work[i]==pair[0]: | |

942 | work[i] = pair[1] | |

943 | if col == 'all': | |

944 | return N.reshape(work,ashape) | |

945 | else: | |

946 | return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) | |

947 | else: # must be a non-Object type array and replacement | |

948 | work = N.where(N.equal(work,pair[0]),pair[1],work) | |

949 | return N.concatenate([a[:,0:col],work[:,N.NewAxis],a[:,col+1:]],1) | |

950 | ||

951 | ||

952 | def arowcompare(row1, row2): | |

953 | """ | |

954 | Compares two rows from an array, regardless of whether it is an | |

955 | array of numbers or of python objects (which requires the cmp function). | |

956 | ||

957 | Usage: arowcompare(row1,row2) | |

958 | Returns: an array of equal length containing 1s where the two rows had | |

959 | identical elements and 0 otherwise | |

960 | """ | |

961 | if row1.typecode()=='O' or row2.typecode=='O': | |

962 | cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 | |

963 | else: | |

964 | cmpvect = N.equal(row1,row2) | |

965 | return cmpvect | |

966 | ||

967 | ||

968 | def arowsame(row1, row2): | |

969 | """ | |

970 | Compares two rows from an array, regardless of whether it is an | |

971 | array of numbers or of python objects (which requires the cmp function). | |

972 | ||

973 | Usage: arowsame(row1,row2) | |

974 | Returns: 1 if the two rows are identical, 0 otherwise. | |

975 | """ | |

976 | cmpval = N.alltrue(arowcompare(row1,row2)) | |

977 | return cmpval | |

978 | ||

979 | ||

980 | def asortrows(a,axis=0): | |

981 | """ | |

982 | Sorts an array "by rows". This differs from the Numeric.sort() function, | |

983 | which sorts elements WITHIN the given axis. Instead, this function keeps | |

984 | the elements along the given axis intact, but shifts them 'up or down' | |

985 | relative to one another. | |

986 | ||

987 | Usage: asortrows(a,axis=0) | |

988 | Returns: sorted version of a | |

989 | """ | |

990 | if axis != 0: | |

991 | a = N.swapaxes(a, axis, 0) | |

992 | l = a.tolist() | |

993 | l.sort() # or l.sort(_sort) | |

994 | y = N.array(l) | |

995 | if axis != 0: | |

996 | y = N.swapaxes(y, axis, 0) | |

997 | return y | |

998 | ||

999 | ||

1000 | def aunique(inarray): | |

1001 | """ | |

1002 | Returns unique items in the FIRST dimension of the passed array. Only | |

1003 | works on arrays NOT including string items. | |

1004 | ||

1005 | Usage: aunique (inarray) | |

1006 | """ | |

1007 | uniques = N.array([inarray[0]]) | |

1008 | if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY | |

1009 | for item in inarray[1:]: | |

1010 | if N.add.reduce(N.equal(uniques,item).flat) == 0: | |

1011 | try: | |

1012 | uniques = N.concatenate([uniques,N.array[N.NewAxis,:]]) | |

1013 | except TypeError: | |

1014 | uniques = N.concatenate([uniques,N.array([item])]) | |

1015 | else: # IT MUST BE A 2+D ARRAY | |

1016 | if inarray.typecode() != 'O': # not an Object array | |

1017 | for item in inarray[1:]: | |

1018 | if not N.sum(N.alltrue(N.equal(uniques,item),1)): | |

1019 | try: | |

1020 | uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) | |

1021 | except TypeError: # the item to add isn't a list | |

1022 | uniques = N.concatenate([uniques,N.array([item])]) | |

1023 | else: | |

1024 | pass # this item is already in the uniques array | |

1025 | else: # must be an Object array, alltrue/equal functions don't work | |

1026 | for item in inarray[1:]: | |

1027 | newflag = 1 | |

1028 | for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> | |

1029 | test = N.sum(abs(N.array(map(cmp,item,unq)))) | |

1030 | if test == 0: # if item identical to any 1 row in uniques | |

1031 | newflag = 0 # then not a novel item to add | |

1032 | break | |

1033 | if newflag == 1: | |

1034 | try: | |

1035 | uniques = N.concatenate( [uniques,item[N.NewAxis,:]] ) | |

1036 | except TypeError: # the item to add isn't a list | |

1037 | uniques = N.concatenate([uniques,N.array([item])]) | |

1038 | return uniques | |

1039 | ||

1040 | ||

1041 | def aduplicates(inarray): | |

1042 | """ | |

1043 | Returns duplicate items in the FIRST dimension of the passed array. Only | |

1044 | works on arrays NOT including string items. | |

1045 | ||

1046 | Usage: aunique (inarray) | |

1047 | """ | |

1048 | inarray = N.array(inarray) | |

1049 | if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY | |

1050 | dups = [] | |

1051 | inarray = inarray.tolist() | |

1052 | for i in range(len(inarray)): | |

1053 | if inarray[i] in inarray[i+1:]: | |

1054 | dups.append(inarray[i]) | |

1055 | dups = aunique(dups) | |

1056 | else: # IT MUST BE A 2+D ARRAY | |

1057 | dups = [] | |

1058 | aslist = inarray.tolist() | |

1059 | for i in range(len(aslist)): | |

1060 | if aslist[i] in aslist[i+1:]: | |

1061 | dups.append(aslist[i]) | |

1062 | dups = unique(dups) | |

1063 | dups = N.array(dups) | |

1064 | return dups | |

1065 | ||

1066 | except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs | |

1067 | pass |

**Note:**See TracBrowser for help on using the repository browser.