#
source:
orange-bioinformatics/orangecontrib/bio/widgets/Anova.py
@
1873:0810c5708cc5

Revision 1873:0810c5708cc5, 80.5 KB checked in by Ales Erjavec <ales.erjavec@…>, 6 months ago (diff) |
---|

Rev | Line | |
---|---|---|

[188] | 1 | ## Automatically adapted for numpy.oldnumeric Oct 03, 2007 by |

2 | ||

[1632] | 3 | from __future__ import absolute_import |

4 | ||

[188] | 5 | import math |

[1632] | 6 | |

[188] | 7 | import numpy.oldnumeric as Numeric, numpy.oldnumeric.ma as MA |

8 | import numpy.oldnumeric.linear_algebra as LinearAlgebra | |

[1632] | 9 | |

[188] | 10 | import scipy.stats |

11 | ||

[1632] | 12 | from . import numpyExtn |

[188] | 13 | |

14 | ####################################################################################### | |

15 | ## ANOVA based on Multivariate Linear Regression | |

16 | ####################################################################################### | |

17 | ||

18 | class AnovaLRBase: | |

19 | """Base class for all ANOVA types based on multivariate linear regression: manipulation with dummy variables. | |

20 | """ | |

21 | ||

22 | def getDummyRC(self, numVars, numReplicas=1): | |

23 | """Returns 2D array of reference cell encoded dummy variables. | |

24 | """ | |

25 | if not isinstance(numReplicas, int): | |

26 | numReplicas = Numeric.asarray(numReplicas) | |

27 | assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected" | |

28 | return Numeric.repeat(Numeric.concatenate((Numeric.zeros((1,numVars-1), Numeric.Float), Numeric.identity(numVars-1, Numeric.Float)), 0), numReplicas, 0) | |

29 | ||

30 | ||

31 | def getDummyEE(self, numVars, numReplicas=1): | |

32 | """Returns 2D array of effects endoced dummy variables, shape (n, n-1) where n = numVars*numReplicas | sum(numReplicas). | |

33 | Each row represents a code for a single variable value, encoded by numVar-1 dummy variables. | |

34 | numReplicas = int | list | array of shape (numVars,): the number (or list) of observations per cell. | |

35 | """ | |

36 | assert numVars > 0, "numVars <= 0" | |

37 | if isinstance(numReplicas, int): | |

38 | numReplicas = Numeric.ones((numVars,)) * numReplicas | |

39 | else: | |

40 | numReplicas = Numeric.asarray(numReplicas) | |

41 | assert len(numReplicas) == numVars, "numReplicas: int or argument of length numVars expected" | |

42 | if numVars == 1: | |

43 | return Numeric.zeros((numReplicas[0], 0)) | |

44 | else: | |

45 | return Numeric.repeat(Numeric.concatenate((Numeric.identity(numVars-1, Numeric.Int), -1*Numeric.ones((1,numVars-1), Numeric.Int)), 0), numReplicas, 0) | |

46 | ||

47 | ||

48 | def getSubjectDummyEE(self, numSubjList, numReplicas=1): | |

49 | """Returns 2D array of effects endoced subject dummy variables, shape (sum(ni), sum(ni-1)) where numSubjList=[n0,n1,...]. | |

50 | numSubjList: a list of the number of subjects within each level of the non-repeated factor. | |

51 | numReplicas: int | list | array of shape sum(ni): the number of times each subject code (row) is repeated. | |

52 | """ | |

53 | numSubjList = Numeric.asarray(numSubjList) | |

54 | assert len(numSubjList.shape) == 1, "len(numSubjList.shape) != 1" | |

55 | if isinstance(numReplicas, int): | |

56 | numReplicas = Numeric.ones((numSubjList.shape[0],)) * numReplicas | |

57 | else: | |

58 | numReplicas = Numeric.asarray(numReplicas) | |

59 | assert len(numReplicas.shape) == 1, "len(numReplicas.shape) != 1" | |

60 | assert numReplicas.shape[0] == numSubjList.shape[0], "numReplicas: int or a list of the same length as numSubjList expected" | |

61 | si0 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList))) # indices for axis 0 of dm | |

62 | si1 = Numeric.concatenate(([0], Numeric.add.accumulate(numSubjList-1))) # indices for axis 1 of dm | |

63 | dm = Numeric.zeros((si0[-1], si1[-1])) # subject dummy codes matrix | |

64 | numRepeats = Numeric.ones((si0[-1],)) | |

65 | for i in range(numSubjList.shape[0]): | |

66 | if numSubjList[i] == 1: print "Warning: a single subject in group %i" % (i) | |

67 | dm[si0[i]:si0[i+1], si1[i]:si1[i+1]] = self.getDummyEE(numSubjList[i],1) | |

68 | numRepeats[si0[i]:si0[i+1]] = Numeric.ones((numSubjList[i],)) * numReplicas[i] | |

69 | return Numeric.repeat(dm, numRepeats, 0) | |

70 | ||

71 | ||

72 | def getDummiesJoin(self, dummyMtrx1, dummyMtrx2): | |

73 | """Returns a tuple of resized dummy matrices containing the cartesian product of rows. | |

74 | """ | |

75 | dummyMtrx1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[0], 0) # repeat each row separately | |

76 | dummyMtrx2 = Numeric.resize(dummyMtrx2, (dummyMtrx1.shape[0], dummyMtrx2.shape[1])) # repeat whole matrix, add new rows | |

77 | return (dummyMtrx1, dummyMtrx2) | |

78 | ||

79 | ||

80 | def getDummyInteraction(self, dummyMtrx1, dummyMtrx2): | |

81 | """Returns a product of dummy matrices resized to the cartesian product of columns. | |

82 | """ | |

83 | dmIntShp = (dummyMtrx1.shape[0], dummyMtrx1.shape[1] * dummyMtrx2.shape[1]) | |

84 | dmInt1 = Numeric.repeat(dummyMtrx1, dummyMtrx2.shape[1], 1) # repeat each column separately | |

85 | dmInt2 = Numeric.transpose(Numeric.resize(Numeric.transpose(dummyMtrx2, (1,0)), (dmIntShp[1], dmIntShp[0])), (1,0)) # repeat whole matrix, add new columns | |

86 | return dmInt1*dmInt2 | |

87 | ||

88 | ||

89 | class Anova2wayLRBase(AnovaLRBase): | |

90 | """Base class for all 2 way ANOVA types (repeated and non-repeated measures) includes various post-hoc tests. | |

91 | The following variables should be defined by base classes: | |

92 | self._addInteraction = addInteraction | |

93 | self._arr2d = arr2d | |

94 | self._groupLens = groupLens | |

95 | self._MSpool = LR_treat.MSres | |

96 | self._DFpool = LR_treat.MSreg | |

97 | """ | |

98 | ||

99 | def posthoc_tstat_B(self): | |

100 | """Fisher LSD: Compares all levels of factor B and returns (b,b) matrix of t-stat values and a matrix of corresponding p-values. | |

101 | Use when there is no significant interaction. | |

102 | Warning: Statistica computes it WRONG! | |

103 | """ | |

104 | b = self._groupLens.shape[0] | |

105 | # precompute averages | |

106 | x_avrg = Numeric.zeros((b,), Numeric.Float) # mean of cell means over factor A | |

107 | sum_count_x = Numeric.zeros((b,), Numeric.Float) | |

108 | groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist()) | |

109 | for idx in range(b): | |

110 | groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1]) | |

111 | xi = self._arr2d.take(groupInd, 1) | |

112 | x_avrg[idx] = MA.average(MA.average(xi,1)) # first average over replicas to obtain cell mean, then over factor A | |

113 | sum_count_x[idx] = Numeric.add.reduce(1./MA.count(xi,1)) # first count the number of measurements within cells, then sum inverses over factor A | |

114 | ##x_avrg[idx] = MA.average(MA.ravel(xi)) # this is how Statistica computes it, but it is WRONG! | |

115 | ##sum_count_x[idx] = 1./MA.count(xi) # this is how Statistica computes it, but it is WRONG! | |

116 | # t-statistics | |

117 | x_avrg_2 = MA.resize(x_avrg, (x_avrg.shape[0], x_avrg.shape[0])) | |

118 | sum_count_x_2 = Numeric.resize(sum_count_x, (sum_count_x.shape[0],sum_count_x.shape[0])) | |

119 | tstat = (MA.transpose(x_avrg_2) - x_avrg_2) * self._arr2d.shape[0] / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) | |

120 | ##tstat = (MA.transpose(x_avrg_2) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) # this is how Statistica computes it, but it is WRONG! | |

121 | tprob = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + numpyExtn.triangularGet(tstat**2))),1,1) | |

122 | return tstat, tprob | |

123 | ||

124 | ||

125 | def isSignif_holm_B(self, alpha): | |

126 | """Conduct Holm step-down sequential multiple comparison on factor B. | |

127 | Returns (b,b) matrix indicating significant differences between levels of factor B and the direction of changes [-1|0|1]. | |

128 | """ | |

129 | tstat, pvals = self.posthoc_tstat_B() | |

130 | pvals = numpyExtn.triangularGet(pvals) | |

131 | sortInd = Numeric.argsort(pvals) | |

132 | k = pvals.shape[0] | |

133 | isSignif = -1*Numeric.ones((k,), Numeric.Int) | |

134 | for j in range(k): | |

135 | isSignif[sortInd[j]] = pvals[sortInd[j]] < float(alpha) / (k-j) | |

136 | return numpyExtn.triangularPut(isSignif, upper=1, lower=1) * (MA.greater(tstat,0) - MA.less(tstat,0)) | |

137 | ||

138 | ||

139 | def posthoc_tstat_BbyA(self): | |

140 | """Fisher LSD: Compares all levels of factor B by each level of factor A separately. | |

141 | Returns: (b,b,a) matrix of t-stat values | |

142 | (b,b,a) matrix of corresponding p-values | |

143 | (b,b) matrix of geometric means of p-values over all levels of factor A. | |

144 | Use when there is a significant interaction and factor A is of no interest. | |

145 | """ | |

146 | a = self._arr2d.shape[0] | |

147 | b = self._groupLens.shape[0] | |

148 | # precompute averages | |

149 | x_avrg = Numeric.zeros((a,b), Numeric.Float) | |

150 | sum_count_x = Numeric.zeros((a,b), Numeric.Float) | |

151 | groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist()) | |

152 | for idx in range(b): | |

153 | groupInd = range(groupLensAcc0[idx], groupLensAcc0[idx+1]) | |

154 | xi = self._arr2d.take(groupInd, 1) | |

155 | x_avrg[:,idx] = MA.average(xi, 1) | |

156 | sum_count_x[:,idx] = 1. / MA.count(xi, 1) | |

157 | # t-statistics | |

158 | x_avrg_2 = MA.resize(x_avrg, (b,a,b)) | |

159 | sum_count_x_2 = Numeric.resize(sum_count_x, (b,a,b)) | |

160 | tstat = (MA.transpose(x_avrg_2, (2,1,0)) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2, (2,1,0)) + sum_count_x_2)) | |

161 | # get p-values for each level of factor a separately (axis 1) | |

162 | tprob = MA.array(-1*Numeric.ones(tstat.shape, Numeric.Float), mask=Numeric.ones(tstat.shape)) | |

163 | for idx1 in range(tprob.shape[1]): | |

164 | tprob[:,idx1,:] = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + numpyExtn.triangularGet(tstat[:,idx1,:]**2))),1,1) | |

165 | return tstat, tprob | |

166 | ||

167 | ||

168 | def posthoc_anova(self): | |

169 | """Conduct ANOVA for each pair of levels of factor B. | |

170 | Returns for each pair of factor B levels: | |

171 | (b,b) matrix of F-stat values for effect of factor A | |

172 | (b,b) matrix of p-values for effect of factor A | |

173 | (b,b) matrix of F-stat values for effect of factor B | |

174 | (b,b) matrix of p-values for effect of factor B | |

175 | (b,b) matrix of F-stat values for interaction effect | |

176 | (b,b) matrix of p-values for interaction effect | |

177 | """ | |

178 | if self._addInteraction != 1: | |

179 | raise "Error: posthoc_anova can be conducted only when the interaction effect has been tested" | |

180 | b = self._groupLens.shape[0] | |

181 | FA = MA.masked * MA.ones((b,b), Numeric.Float) | |

182 | FAprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

183 | FB = MA.masked * MA.ones((b,b), Numeric.Float) | |

184 | FBprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

185 | FAB = MA.masked * MA.ones((b,b), Numeric.Float) | |

186 | FABprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

187 | groupLensAcc0 = Numeric.array([0] + Numeric.add.accumulate(self._groupLens).tolist()) | |

188 | groupInd = map(lambda i,j: range(i,j), groupLensAcc0[:-1],groupLensAcc0[1:]) | |

189 | for i in range(b): | |

190 | for j in range(i+1,b): | |

191 | takeInd = groupInd[i] + groupInd[j] | |

192 | an = self.__class__(self._arr2d.take(takeInd, 1), [self._groupLens[i],self._groupLens[j]], addInteraction=1) | |

193 | FA[i,j] = FA[j,i] = an.FA | |

194 | FAprob[i,j] = FAprob[j,i] = an.FAprob | |

195 | FB[i,j] = FB[j,i] = an.FB | |

196 | FBprob[i,j] = FBprob[j,i] = an.FBprob | |

197 | FAB[i,j] = FAB[j,i] = an.FAB | |

198 | FABprob[i,j] = FABprob[j,i] = an.FABprob | |

199 | return FA, FAprob, FB, FBprob, FAB, FABprob | |

200 | ||

201 | ||

202 | class Anova1wayLR(AnovaLRBase): | |

203 | """1 way ANOVA for unbalanced designs using multivariate linear regression. | |

204 | Multiple observations given as a list of indices groups, e.g. [[0,1,2],[3,4]]. | |

205 | BUG: the indices are not taken into account, but only the lenghts of the groups! | |

206 | Supports balanced and unbalanced designs and missing (masked) data. | |

207 | """ | |

208 | ||

209 | def __init__(self, arr1d, replicaGroupLens): | |

210 | """arr1d: 1D masked array: [x1,x2,x3, y1,y2, z1,z2,z3,z4] where x,y,z correspond to factor-levels with 3,2 and 4 replicas, respectively | |

211 | replicaGroupLens: the number of replicas in individual groups, i.e. [3,2,4] | |

212 | """ | |

213 | arr1d = MA.asarray(arr1d) | |

214 | assert len(arr1d.shape) == 1, "len(arr1d.shape) != 1" | |

215 | ||

216 | # remove missing observations and the corresponding dummy variable codes | |

217 | self.y = MA.asarray(arr1d) | |

218 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

219 | ||

220 | # dummy variables | |

221 | self.dummyA = self.getDummyEE(len(replicaGroupLens), replicaGroupLens) | |

222 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

223 | ||

224 | # run analysis | |

225 | LRA = MultLinReg(self.dummyA, self.y) | |

226 | try: | |

227 | self.F = LRA.MSreg / LRA.MSres | |

228 | self.Fprob = scipy.stats.fprob(LRA.DFreg, LRA.DFres, self.F) | |

229 | except ZeroDivisionError: | |

230 | self.F = 0 | |

231 | self.Fprob = 1 | |

232 | ||

233 | ||

234 | class Anova1wayLR_2D(AnovaLRBase): | |

235 | """1 way ANOVA for unbalanced designs using multivariate linear regression. | |

236 | Axis 0 correspond to replicas and axis 1 correspond to different factor-levels. | |

237 | Supports balanced and unbalanced designs and missing (masked) data. | |

238 | """ | |

239 | ||

240 | def __init__(self, arr2d): | |

241 | arr2d = MA.asarray(arr2d) | |

242 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

243 | ||

244 | # remove missing observations and the corresponding dummy variable codes | |

245 | self.y = MA.ravel(MA.transpose(arr2d)) | |

246 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

247 | ||

248 | # adjust degrees of freedom for factor-leves that contain no data | |

249 | numFactorLevels = int(MA.add.reduce(MA.greater(arr2d.count(0), 0))) | |

250 | zeroLengthFactorLevels = Numeric.compress(MA.equal(arr2d.count(0), 0), range(arr2d.shape[1])) | |

251 | if numFactorLevels > 1: | |

252 | ||

253 | ## # dummy variables (BUG: index error when the number of levels is reduced) | |

254 | ## print numFactorLevels, arr2d.shape[0], takeInd | |

255 | ## self.dummyA = self.getDummyEE(numFactorLevels, arr2d.shape[0]) | |

256 | ## self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

257 | # dummy variables | |

258 | self.dummyA = self.getDummyEE(arr2d.shape[1], arr2d.shape[0]) # create all dummies | |

259 | for lev in zeroLengthFactorLevels: # reduce length of dummies to adjust for actual number of factor A levels | |

260 | self.dummyA = Numeric.concatenate([self.dummyA[:,0:lev],self.dummyA[:,lev+1:]],1) | |

261 | if len(zeroLengthFactorLevels) > 0 and zeroLengthFactorLevels[-1] == arr2d.shape[1]-1: | |

262 | self.dummyA[self.dummyA.shape[0]-arr2d.shape[1]-1:] = -1 # if the last factor level is missing, manually set the last dummies to matrix of -1's | |

263 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) # take dummies where data is not masked | |

264 | ||

265 | # run analysis | |

266 | LRA = MultLinReg(self.dummyA, self.y) | |

267 | try: | |

268 | self.F = LRA.MSreg / LRA.MSres | |

269 | self.Fprob = scipy.stats.fprob(LRA.DFreg, LRA.DFres, self.F) | |

270 | except ZeroDivisionError: | |

271 | self.F = 0 | |

272 | self.Fprob = 1 | |

273 | ||

274 | else: | |

275 | print "Giving up ANOVA: a factor with a single level encountered." | |

276 | self.F = 0 | |

277 | self.Fprob = 1 | |

278 | ||

279 | ||

280 | class _Anova2wayLR_bug_missing_factor_leves(AnovaLRBase): | |

281 | """2 way ANOVA with multiple observations per cell for unbalanced designs using multivariate linear regression. | |

282 | Multiple observations given at axis 1 together with the list of indices groups, e.g. [[0,1,2],[3,4]]. | |

283 | Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data. | |

284 | TODO: account for empty cells (this implementation results in singular design matrix) | |

285 | """ | |

286 | ||

287 | def __init__(self, arr2d, replicaGroupInd, addInteraction=0): | |

288 | """arr2d: 2D masked array where replicas are given at axis 1 together with the indices groups. | |

289 | replicaGroupInd: indices of axis 1 that belong to a common factor (multiple observations per cell) | |

290 | addInteraction: include / exclude the interaction between factors | |

291 | """ | |

292 | arr2d = MA.asarray(arr2d) | |

293 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

294 | ||

295 | # check if data is balanced | |

296 | rgLens = Numeric.array(map(lambda x: len(x), replicaGroupInd), Numeric.Int) | |

297 | isBalanced = Numeric.add.reduce((1.*rgLens/rgLens[0]) == Numeric.ones((len(rgLens)))) / len(rgLens) == 1 | |

298 | ||

299 | # check for empty cells, raise exception if empty cells exist and addInteraction=1 | |

300 | if addInteraction: | |

301 | ax1Ind = Numeric.concatenate(([0], Numeric.add.accumulate(rgLens))) | |

302 | for idx in range(rgLens.shape[0]): | |

303 | if Numeric.add.reduce(MA.count(arr2d[:,ax1Ind[idx]:ax1Ind[idx+1]],1) == 0) > 0: | |

304 | raise ValueError, "arr2d has empty cells, cannot evaluate the interaction between factors, try addInteraction=0" | |

305 | ||

306 | # remove missing observations and the corresponding dummy variable codes | |

307 | self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...] | |

308 | noMissing = MA.count(self.y) == self.y.shape[0] | |

309 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

310 | ||

311 | # dummy variables | |

312 | self.dummyA = self.getDummyEE(arr2d.shape[0], 1) | |

313 | self.dummyB = self.getDummyEE(len(replicaGroupInd), rgLens) | |

314 | self.dummyA, self.dummyB = self.getDummiesJoin(self.dummyA, self.dummyB) | |

315 | if addInteraction: | |

316 | self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB) | |

317 | self.dummyAB = Numeric.take(self.dummyAB, takeInd, 0) | |

318 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

319 | self.dummyB = Numeric.take(self.dummyB, takeInd, 0) | |

320 | ||

321 | # run analysis | |

322 | if addInteraction: | |

323 | LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB, self.dummyAB), 1), self.y) | |

324 | else: | |

325 | LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB),1), self.y) | |

326 | ||

327 | if isBalanced and noMissing: | |

328 | LR_A = MultLinReg(self.dummyA, self.y) | |

329 | LR_B = MultLinReg(self.dummyB, self.y) | |

330 | self.FA = LR_A.MSreg / LR_treat.MSres | |

331 | self.FB = LR_B.MSreg / LR_treat.MSres | |

332 | if addInteraction: | |

333 | LR_AB = MultLinReg(self.dummyAB, self.y) | |

334 | self.FAB = LR_AB.MSreg / LR_treat.MSres | |

335 | else: | |

336 | if addInteraction: | |

337 | LR_A_B = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB), 1), self.y) | |

338 | LR_A_AB = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyAB), 1), self.y) | |

339 | LR_B_AB = MultLinReg(Numeric.concatenate((self.dummyB, self.dummyAB), 1), self.y) | |

340 | self.FA = (LR_treat.SSreg - LR_B_AB.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

341 | self.FB = (LR_treat.SSreg - LR_A_AB.SSreg) / self.dummyB.shape[1] / LR_treat.MSres | |

342 | self.FAB = (LR_treat.SSreg - LR_A_B.SSreg) / self.dummyAB.shape[1] / LR_treat.MSres | |

343 | else: | |

344 | LR_A = MultLinReg(self.dummyA, self.y) | |

345 | LR_B = MultLinReg(self.dummyB, self.y) | |

346 | self.FA = (LR_treat.SSreg - LR_B.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

347 | self.FB = (LR_treat.SSreg - LR_A.SSreg) / self.dummyB.shape[1] / LR_treat.MSres | |

348 | ||

349 | self.FAprob = scipy.stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA) | |

350 | self.FBprob = scipy.stats.fprob(self.dummyB.shape[1], LR_treat.DFres, self.FB) | |

351 | if addInteraction: | |

352 | self.FABprob = scipy.stats.fprob(self.dummyAB.shape[1], LR_treat.DFres, self.FAB) | |

353 | ||

354 | # store variables needed for posthoc tests | |

355 | self._arr2d = arr2d | |

356 | self._replicaGroupInd = replicaGroupInd | |

357 | self._LR_treat_MSres = LR_treat.MSres | |

358 | self._LR_treat_DFres = LR_treat.DFres | |

359 | ||

360 | ||

361 | def posthoc_tstat_B(self): | |

362 | """Fisher LSD: Compares all levels of factor B and returns (b,b) matrix of t-stat values and a matrix of corresponding p-values. | |

363 | Use when there is no significant interaction. | |

364 | Warning: Statistica computes it WRONG! | |

365 | """ | |

366 | b = len(self._replicaGroupInd) | |

367 | # precompute averages | |

368 | x_avrg = Numeric.zeros((b,), Numeric.Float) # mean of cell means over factor A | |

369 | sum_count_x = Numeric.zeros((b,), Numeric.Float) | |

370 | for idx, replicaInd in enumerate(self._replicaGroupInd): | |

371 | xi = self._arr2d.take(replicaInd, 1) | |

372 | x_avrg[idx] = MA.average(MA.average(xi,1)) # first average over replicas to obtain cell mean, then over factor A | |

373 | sum_count_x[idx] = Numeric.add.reduce(1./MA.count(xi,1)) # first count the number of measurements within cells, then sum inverses over factor A | |

374 | ##x_avrg[idx] = MA.average(MA.ravel(xi)) # this is how Statistica computes it, but it is WRONG! | |

375 | ##sum_count_x[idx] = 1./MA.count(xi) # this is how Statistica computes it, but it is WRONG! | |

376 | # t-statistics | |

377 | x_avrg_2 = MA.resize(x_avrg, (x_avrg.shape[0], x_avrg.shape[0])) | |

378 | sum_count_x_2 = Numeric.resize(sum_count_x, (sum_count_x.shape[0],sum_count_x.shape[0])) | |

379 | tstat = (MA.transpose(x_avrg_2) - x_avrg_2) * self._arr2d.shape[0] / Numeric.sqrt(self._LR_treat_MSres * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) | |

380 | ##tstat = (MA.transpose(x_avrg_2) - x_avrg_2) / Numeric.sqrt(self._LR_treat_MSres * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) # this is how Statistica computes it, but it is WRONG! | |

381 | tprob = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._LR_treat_DFres, 0.5, float(self._LR_treat_DFres) / (self._LR_treat_DFres + numpyExtn.triangularGet(tstat**2))),1,1) | |

382 | return tstat, tprob | |

383 | ||

384 | ||

385 | def isSignif_holm_B(self, alpha): | |

386 | """Conduct Holm step-down sequential multiple comparison on factor B. | |

387 | Returns (b,b) matrix indicating significant differences between levels of factor B and the direction of changes [-1|0|1]. | |

388 | """ | |

389 | tstat, pvals = self.posthoc_tstat_B() | |

390 | pvals = numpyExtn.triangularGet(pvals) | |

391 | sortInd = Numeric.argsort(pvals) | |

392 | k = pvals.shape[0] | |

393 | isSignif = -1*Numeric.ones((k,), Numeric.Int) | |

394 | for j in range(k): | |

395 | isSignif[sortInd[j]] = pvals[sortInd[j]] < float(alpha) / (k-j) | |

396 | return numpyExtn.triangularPut(isSignif, upper=1, lower=1) * (MA.greater(tstat,0) - MA.less(tstat,0)) | |

397 | ||

398 | ||

399 | def posthoc_tstat_BbyA(self): | |

400 | """Fisher LSD: Compares all levels of factor B by each level of factor A separately. | |

401 | Returns: (b,b,a) matrix of t-stat values | |

402 | (b,b,a) matrix of corresponding p-values | |

403 | (b,b) matrix of geometric means of p-values over all levels of factor A. | |

404 | Use when there is a significant interaction and factor A is of no interest. | |

405 | """ | |

406 | a = self._arr2d.shape[0] | |

407 | b = len(self._replicaGroupInd) | |

408 | # precompute averages | |

409 | x_avrg = Numeric.zeros((a,b), Numeric.Float) | |

410 | sum_count_x = Numeric.zeros((a,b), Numeric.Float) | |

411 | for idx, replicaInd in enumerate(self._replicaGroupInd): | |

412 | xi = self._arr2d.take(replicaInd, 1) | |

413 | x_avrg[:,idx] = MA.average(xi, 1) | |

414 | sum_count_x[:,idx] = 1. / MA.count(xi, 1) | |

415 | ||

416 | # t-statistics | |

417 | x_avrg_2 = MA.resize(x_avrg, (b,a,b)) | |

418 | sum_count_x_2 = Numeric.resize(sum_count_x, (b,a,b)) | |

419 | ||

420 | tstat = (MA.transpose(x_avrg_2, (2,1,0)) - x_avrg_2) / Numeric.sqrt(self._LR_treat_MSres * (Numeric.transpose(sum_count_x_2, (2,1,0)) + sum_count_x_2)) | |

421 | # get p-values for each level of factor a separately (axis 1) | |

422 | tprob = MA.array(-1*Numeric.ones(tstat.shape, Numeric.Float), mask=Numeric.ones(tstat.shape)) | |

423 | for idx1 in range(tprob.shape[1]): | |

424 | tprob[:,idx1,:] = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._LR_treat_DFres, 0.5, float(self._LR_treat_DFres) / (self._LR_treat_DFres + numpyExtn.triangularGet(tstat[:,idx1,:]**2))),1,1) | |

425 | return tstat, tprob | |

426 | ||

427 | ||

428 | def posthoc_anova_B_AB(self): | |

429 | """Conduct ANOVA for each pair of levels of factor B to detect interaction effect. | |

430 | Returns for each pair of factor B levels: | |

431 | (b,b) matrix of F-stat values for effect of factor B | |

432 | (b,b) matrix of p-values for effect of factor B | |

433 | (b,b) matrix of F-stat values for interaction effect | |

434 | (b,b) matrix of p-values for interaction effect | |

435 | """ | |

436 | b = len(self._replicaGroupInd) | |

437 | FB = MA.masked * MA.ones((b,b), Numeric.Float) | |

438 | FBprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

439 | FAB = MA.masked * MA.ones((b,b), Numeric.Float) | |

440 | FABprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

441 | for i in range(b): | |

442 | for j in range(i+1,b): | |

443 | rglSub = [self._replicaGroupInd[i], self._replicaGroupInd[j]] | |

444 | takeInd = self._replicaGroupInd[i] + self._replicaGroupInd[j] | |

445 | an = Anova2wayLR(self._arr2d.take(takeInd, 1), rglSub, addInteraction=1) | |

446 | FB[i,j] = FB[j,i] = an.FB | |

447 | FBprob[i,j] = FBprob[j,i] = an.FBprob | |

448 | FAB[i,j] = FAB[j,i] = an.FAB | |

449 | FABprob[i,j] = FABprob[j,i] = an.FABprob | |

450 | return FB, FBprob, FAB, FABprob | |

451 | ||

452 | ||

453 | class Anova2wayLR(Anova2wayLRBase): | |

454 | """2 way ANOVA with multiple observations per cell for unbalanced designs using multivariate linear regression. | |

455 | Multiple observations given at axis 1 together with the list of group lenghts, e.g. [3,2]. | |

456 | Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data. | |

457 | TODO: account for empty cells (this implementation results in singular design matrix) | |

458 | """ | |

459 | ||

460 | def __init__(self, arr2d, groupLens, addInteraction=0, allowReductA=True, allowReductB=False): | |

461 | """arr2d: 2D masked array where replicas are given at axis 1 together with lenghts of the groups (multiple observations per cell); | |

462 | addInteraction: include / exclude the interaction between factors; | |

463 | allowReduct[A|B]: whether to allow or not the reduction of levels of factor A (B). | |

464 | """ | |

465 | arr2d = MA.asarray(arr2d) | |

466 | groupLens = Numeric.array(groupLens) # make a copy for the case if subjects or levels of factor B are removed | |

467 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

468 | ||

469 | # check if there exist factor-levels with all values missing (for A and B) | |

470 | # if so, reduce the corresponding DF and fix the number of dummy variables | |

471 | missIndA = Numeric.compress(MA.count(arr2d, 1) == 0, Numeric.arange(arr2d.shape[0])) | |

472 | if missIndA.shape[0] > 0: | |

473 | if allowReductA: | |

474 | print "Warning: removig factor A level(s) %s" % str(missIndA.tolist()) | |

475 | takeIndA = Numeric.compress(MA.count(arr2d, 1) != 0, Numeric.arange(arr2d.shape[0])) | |

476 | arr2d = arr2d.take(takeIndA, 0) | |

477 | else: | |

478 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor A have no values: %s" % str(missIndA.tolist())) | |

479 | return | |

480 | missIndSubj = Numeric.compress(MA.count(arr2d, 0) == 0, Numeric.arange(arr2d.shape[1])) | |

481 | if missIndSubj.shape[0] > 0: | |

482 | takeIndSubj = Numeric.compress(MA.count(arr2d, 0) != 0, Numeric.arange(arr2d.shape[1])) | |

483 | # fix groupLens | |

484 | ## mapSubj2Group = -1*Numeric.ones(arr2d.shape[1]) | |

485 | ## groupLensAcc0 = [0] + Numeric.add.accumulate(groupLens).tolist() | |

486 | ## for i in range(len(groupLensAcc0) - 1): | |

487 | ## mapSubj2Group[groupLensAcc0[i]:groupLensAcc0[i+1]] = i | |

488 | mapSubj2Group = Numeric.repeat(range(len(groupLens)), groupLens) | |

489 | for subjIdx in missIndSubj: | |

490 | groupLens[mapSubj2Group[subjIdx]] -= 1 | |

491 | # remove data columns that are missing all the values | |

492 | arr2d = arr2d.take(takeIndSubj, 1) | |

493 | # fix number of factor B levels (if the length of any group became 0 due to removed subjects) | |

494 | missIndB = Numeric.compress(groupLens <= 0, Numeric.arange(groupLens.shape[0])) | |

495 | if missIndB.shape[0] > 0: | |

496 | if allowReductB: | |

497 | print "Warning: removig factor B level(s) %s" % str(missIndB) | |

498 | takeIndB = Numeric.compress(groupLens > 0, Numeric.arange(groupLens.shape[0])) | |

499 | groupLens = Numeric.take(groupLens, takeIndB) | |

500 | # arr2d has already been taken care of by removing the subjects without observations | |

501 | else: | |

502 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor B have no values: %s" % str(missIndB)) | |

503 | return | |

504 | # remove levels of factor B with single observation per cell | |

505 | grpIndSingle = Numeric.compress(Numeric.equal(groupLens,1), Numeric.arange(groupLens.shape[0])) | |

506 | if grpIndSingle.shape[0] > 0: | |

507 | if allowReductB: | |

508 | # fix arr2d | |

509 | print "Warning: removing factor B level(s) with single observation: %s" % str(grpIndSingle) | |

510 | groupLensAcc = [0] + list(Numeric.add.accumulate(groupLens)) | |

511 | arr2dRemInd = map(lambda i: groupLensAcc[i], grpIndSingle) | |

512 | arr2dTakeInd = Numeric.ones(arr2d.shape[1]) | |

513 | Numeric.put(arr2dTakeInd, arr2dRemInd, 0) | |

514 | arr2d = MA.compress(arr2dTakeInd, arr2d, 1) | |

515 | # fix groupLens | |

516 | groupLens = Numeric.compress(Numeric.not_equal(groupLens,1), groupLens) | |

517 | else: | |

518 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor B have single observations: %s" % str(grpIndSingle)) | |

519 | return | |

520 | # check that there exist at least 2 levels of factors A and B | |

521 | # and that there is at least single duplicated observation | |

522 | if arr2d.shape[0] < 2 or len(groupLens) < 2: ##or MA.count(arr2d) <= arr2d.shape[0] * len(groupLens): | |

523 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: not enough levels of factor A (%i) or factor B (%i)" % (arr2d.shape[0],len(groupLens))) | |

524 | return | |

525 | ||

526 | # check if data is balanced | |

527 | isBalanced = (1. * Numeric.add.reduce((1.*groupLens/groupLens[0]) == Numeric.ones(groupLens.shape[0])) / groupLens.shape[0]) == 1 | |

528 | ||

529 | # check for empty cells; if exist and addInteraction=1, switch to no interaction and continue | |

530 | if addInteraction: | |

531 | ax1Ind = Numeric.concatenate(([0], Numeric.add.accumulate(groupLens))) | |

532 | for idx in range(groupLens.shape[0]): | |

533 | if Numeric.add.reduce(MA.count(arr2d[:,ax1Ind[idx]:ax1Ind[idx+1]],1) == 0) > 0: | |

534 | ## raise ValueError, "arr2d has empty cells, cannot evaluate the interaction between factors, try addInteraction=0" | |

535 | print ValueError, "Warning: data has empty cells, cannot evaluate the interaction between factors, continuing without interaction" | |

536 | addInteraction=0 | |

537 | # store member varibales that are stored only if addInteraction==1 | |

538 | self.FAB = 0 | |

539 | self.FABprob = 1 | |

540 | self.addInteractionFailed = True | |

541 | ||

542 | # remove missing observations and the corresponding dummy variable codes | |

543 | self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...] | |

544 | noMissing = MA.count(self.y) == self.y.shape[0] | |

545 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

546 | ||

547 | # dummy variables | |

548 | self.dummyA = self.getDummyEE(arr2d.shape[0], 1) | |

549 | self.dummyB = self.getDummyEE(len(groupLens), groupLens) | |

550 | self.dummyA, self.dummyB = self.getDummiesJoin(self.dummyA, self.dummyB) | |

551 | if addInteraction: | |

552 | self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB) | |

553 | else: | |

554 | self.dummyAB = Numeric.zeros((self.dummyA.shape[0],0)) | |

555 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

556 | self.dummyB = Numeric.take(self.dummyB, takeInd, 0) | |

557 | self.dummyAB = Numeric.take(self.dummyAB, takeInd, 0) | |

558 | ||

559 | # check that there is enough observations to allow for variability (DFres > 0) | |

560 | if self.dummyA.shape[0] - sum([self.dummyA.shape[1], self.dummyB.shape[1], self.dummyAB.shape[1]]) - 1 <= 0: | |

561 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: not values (dummy condition)") | |

562 | return | |

563 | ||

564 | # run analysis | |

565 | try: | |

566 | ## if addInteraction: | |

567 | LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB, self.dummyAB), 1), self.y) | |

568 | ## else: | |

569 | ## LR_treat = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB),1), self.y) | |

570 | ||

571 | if isBalanced and noMissing: | |

572 | LR_A = MultLinReg(self.dummyA, self.y) | |

573 | LR_B = MultLinReg(self.dummyB, self.y) | |

574 | self.FA = LR_A.MSreg / LR_treat.MSres | |

575 | self.FB = LR_B.MSreg / LR_treat.MSres | |

576 | if addInteraction: | |

577 | LR_AB = MultLinReg(self.dummyAB, self.y) | |

578 | self.FAB = LR_AB.MSreg / LR_treat.MSres | |

579 | else: | |

580 | if addInteraction: | |

581 | LR_A_B = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB), 1), self.y) | |

582 | LR_A_AB = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyAB), 1), self.y) | |

583 | LR_B_AB = MultLinReg(Numeric.concatenate((self.dummyB, self.dummyAB), 1), self.y) | |

584 | self.FA = (LR_treat.SSreg - LR_B_AB.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

585 | self.FB = (LR_treat.SSreg - LR_A_AB.SSreg) / self.dummyB.shape[1] / LR_treat.MSres | |

586 | self.FAB = (LR_treat.SSreg - LR_A_B.SSreg) / self.dummyAB.shape[1] / LR_treat.MSres | |

587 | else: | |

588 | LR_A = MultLinReg(self.dummyA, self.y) | |

589 | LR_B = MultLinReg(self.dummyB, self.y) | |

590 | self.FA = (LR_treat.SSreg - LR_B.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

591 | self.FB = (LR_treat.SSreg - LR_A.SSreg) / self.dummyB.shape[1] / LR_treat.MSres | |

592 | except: | |

593 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: miltivariate linear regression failed due to lack of variability") | |

594 | return | |

595 | ||

596 | try: | |

597 | self.FAprob = scipy.stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA) | |

598 | except ValueError: | |

599 | self.FAprob = 1 | |

600 | try: | |

601 | self.FBprob = scipy.stats.fprob(self.dummyB.shape[1], LR_treat.DFres, self.FB) | |

602 | except ValueError: | |

603 | self.FBprob = 1 | |

604 | self.ps = [self.FAprob, self.FBprob] | |

605 | if addInteraction: | |

606 | try: | |

607 | self.FABprob = scipy.stats.fprob(self.dummyAB.shape[1], LR_treat.DFres, self.FAB) | |

608 | except ValueError: | |

609 | self.FABprob = 1 | |

610 | self.ps.append(self.FABprob) | |

611 | ||

612 | # store variables needed for posthoc tests | |

613 | self._MSpool = LR_treat.MSres | |

614 | self._DFpool = LR_treat.MSreg | |

615 | self._addInteraction = addInteraction | |

616 | self._arr2d = arr2d | |

617 | self._groupLens = groupLens | |

618 | ||

619 | ||

620 | def _giveUp(self, arr2d, groupLens, addInteraction, output=None): | |

621 | if output != None: | |

622 | print output | |

623 | self.FA = 0 | |

624 | self.FAprob = 1 | |

625 | self.FB = 0 | |

626 | self.FBprob = 1 | |

627 | self.ps = [self.FAprob, self.FBprob] | |

628 | if addInteraction: | |

629 | self.FAB = 0 | |

630 | self.FABprob = 1 | |

631 | self.ps.append(self.FABprob) | |

632 | self._addInteraction = addInteraction | |

633 | self._arr2d = arr2d | |

634 | self._groupLens = groupLens | |

635 | ||

636 | ||

637 | ||

638 | class AnovaRM11LR(AnovaLRBase): | |

639 | """1 way REPEATED MEASURES ANOVA using multivariate linear regression. | |

640 | Axis 0 correspond to subjects, axis 1 correspond to different factor-levels. | |

641 | Does NOT support missing (masked) data. | |

642 | """ | |

643 | ||

644 | def __init__(self, arr2d): | |

645 | """arr2d: 2D masked array: [[x1,x2], [y1,y2], [z1,z2]] | |

646 | where xi,yi,zi correspond to measurements of three different subjects (x,y,z) under the i-th factor-level. | |

647 | """ | |

648 | arr2d = MA.asarray(arr2d) | |

649 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

650 | ||

651 | # remove missing observations and the corresponding dummy variable codes | |

652 | self.y = MA.ravel(arr2d) | |

653 | noMissing = MA.count(self.y) == self.y.shape[0] | |

654 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

655 | ||

656 | # dummy variables for coding subjects and factor-levels | |

657 | self.dummySubj = self.getDummyEE(arr2d.shape[0], 1) | |

658 | self.dummyA = self.getDummyEE(arr2d.shape[1], 1) | |

659 | self.dummySubj, self.dummyA = self.getDummiesJoin(self.dummySubj, self.dummyA) | |

660 | self.dummySubj = Numeric.take(self.dummySubj, takeInd, 0) | |

661 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

662 | ||

663 | # run analysis (better: only 2 LR instead of 3) | |

664 | try: | |

665 | LR_treat = MultLinReg(Numeric.concatenate((self.dummySubj, self.dummyA), 1), self.y) | |

666 | if noMissing: | |

667 | LR_A = MultLinReg(self.dummyA, self.y) | |

668 | self.FA = LR_A.MSreg / LR_treat.MSres | |

669 | else: | |

670 | print "WARNING: missing data with 1-way repeated measures ANOVA" | |

671 | LR_S = MultLinReg(self.dummySubj, self.y) | |

672 | self.FA = (LR_treat.SSreg - LR_S.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

673 | self.FAprob = scipy.stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA) | |

674 | except ZeroDivisionError: | |

675 | self.FA = 0 | |

676 | self.FAprob = 1 | |

677 | ||

678 | ||

679 | class _AnovaRM12LR_bug_missing_factor_leves(AnovaLRBase): | |

680 | """2 way ANOVA with REPEATED MEASURES on factor A using multivariate linear regression. | |

681 | Axis 0 correspond to different levels of factor A, subjects given at axis 1, subjects nested inside factor B according to the given group indices. | |

682 | Factor A is a within-subject effect, factor B is a between-ubject effect. | |

683 | Example: | |

684 | arr2d = [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]]: 2 levels of factor A, 2 levels of factor B, 5 subjects | |

685 | groupInd = [[0,1],[2,3,4]] | |

686 | Supports balanced and unbalanced designs, i.e. different number of observations per cell and missing (masked) data. | |

687 | TODO: fix the F and DF for factor B when there are missing values (see Glantz, pp. 462-5) | |

688 | """ | |

689 | ||

690 | def __init__(self, arr2d, groupInd, addInteraction=0): | |

691 | """arr2d: 2D masked array: [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]] | |

692 | groupInd: axis 1 indices that belong to the same level of factor B: [[0,1],[2,3,4]] | |

693 | addInteraction: include / exclude the interaction between factors A and B | |

694 | """ | |

695 | arr2d = MA.asarray(arr2d) | |

696 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

697 | ||

698 | # remove missing observations and the corresponding dummy variable codes | |

699 | self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...] | |

700 | noMissing = MA.count(self.y) == self.y.shape[0] | |

701 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

702 | ||

703 | # check if data is balanced | |

704 | rgLens = Numeric.array(map(lambda x: len(x), groupInd), Numeric.Int) | |

705 | assert Numeric.add.reduce(rgLens) == arr2d.shape[1], "arr2d.shape[1] != sum_of_len(groupInd)" | |

706 | isBalanced = Numeric.add.reduce((1.*rgLens/rgLens[0]) == Numeric.ones((len(rgLens)))) / len(rgLens) == 1 | |

707 | ||

708 | # dummy variables | |

709 | self.dummyA = self.getDummyEE(arr2d.shape[0], 1) | |

710 | self.dummyB = self.getDummyEE(len(groupInd), rgLens) | |

711 | self.dummySubjInB = self.getSubjectDummyEE(rgLens, 1) | |

712 | dummyB_Subj = Numeric.concatenate((self.dummyB, self.dummySubjInB), 1) | |

713 | self.dummyA, dummyB_Subj = self.getDummiesJoin(self.dummyA, dummyB_Subj) | |

714 | self.dummyB = dummyB_Subj[:, 0:self.dummyB.shape[1]] | |

715 | self.dummySubjInB = dummyB_Subj[:, self.dummyB.shape[1]:] | |

716 | if addInteraction: | |

717 | self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB) | |

718 | self.dummyAB = Numeric.take(self.dummyAB, takeInd, 0) | |

719 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

720 | self.dummyB = Numeric.take(self.dummyB, takeInd, 0) | |

721 | self.dummySubjInB = Numeric.take(self.dummySubjInB, takeInd, 0) | |

722 | ||

723 | # run analysis | |

724 | if addInteraction: | |

725 | LR_treat = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB, self.dummyAB),1), self.y) | |

726 | else: | |

727 | LR_treat = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB),1), self.y) | |

728 | ||

729 | if isBalanced and noMissing and False: | |

730 | # non-repeated-measures factor (B) | |

731 | LR_B = MultLinReg(self.dummyB, self.y) | |

732 | LR_SubjInB = MultLinReg(self.dummySubjInB, self.y) | |

733 | self.FB = LR_B.MSreg / LR_SubjInB.MSreg | |

734 | # repeated measures factor (A) | |

735 | LR_A = MultLinReg(self.dummyA, self.y) | |

736 | self.FA = LR_A.MSreg / LR_treat.MSres | |

737 | # interaction (AB) | |

738 | if addInteraction: | |

739 | LR_AB = MultLinReg(self.dummyAB, self.y) | |

740 | self.FAB = LR_AB.MSreg / LR_treat.MSres | |

741 | # store variables needed for posthoc tests | |

742 | MS_SubjInB = LR_SubjInB.MSreg | |

743 | else: | |

744 | ###################################################################################################################################### | |

745 | ## print "Warning: missing data with 2 way repeated measures ANOVA: F and DF should be adjusted due to random effect factor (subjects)" | |

746 | ###################################################################################################################################### | |

747 | if addInteraction: | |

748 | # non-repeated-measures factor (B) | |

749 | LR_S_A_AB = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyAB),1), self.y) | |

750 | LR_A_B_AB = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB, self.dummyAB),1), self.y) | |

751 | self.FB = (LR_treat.SSreg - LR_S_A_AB.SSreg) / self.dummyB.shape[1] / (LR_treat.SSreg - LR_A_B_AB.SSreg) * self.dummySubjInB.shape[1] | |

752 | # repeated measures factor (A) | |

753 | LR_S_B_AB = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyB, self.dummyAB),1), self.y) | |

754 | self.FA = (LR_treat.SSreg - LR_S_B_AB.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

755 | # interaction (AB) | |

756 | LR_S_A_B = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB),1), self.y) | |

757 | self.FAB = (LR_treat.SSreg - LR_S_A_B.SSreg) / self.dummyAB.shape[1] / LR_treat.MSres | |

758 | # store variables needed for posthoc tests | |

759 | MS_SubjInB = (LR_treat.SSreg - LR_A_B_AB.SSreg) / self.dummySubjInB.shape[1] | |

760 | else: | |

761 | # non-repeated-measures factor (B) | |

762 | LR_S_A = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA),1), self.y) | |

763 | LR_A_B = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB),1), self.y) | |

764 | self.FB = (LR_treat.SSreg - LR_S_A.SSreg) / self.dummyB.shape[1] / (LR_treat.SSreg - LR_A_B.SSreg) * self.dummySubjInB.shape[1] | |

765 | # repeated measures factor (A) | |

766 | LR_S_B = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyB),1), self.y) | |

767 | self.FA = (LR_treat.SSreg - LR_S_B.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

768 | # store variables needed for posthoc tests | |

769 | MS_SubjInB = (LR_treat.SSreg - LR_A_B.SSreg) / self.dummySubjInB.shape[1] | |

770 | ||

771 | self.FBprob = scipy.stats.fprob(self.dummyB.shape[1], self.dummySubjInB.shape[1], self.FB) | |

772 | self.FAprob = scipy.stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA) | |

773 | if addInteraction: | |

774 | self.FABprob = scipy.stats.fprob(self.dummyAB.shape[1], LR_treat.DFres, self.FAB) | |

775 | ||

776 | # store variables needed for posthoc tests | |

777 | self._arr2d = arr2d | |

778 | self._groupInd = groupInd | |

779 | self._LR_treat_MSres = LR_treat.MSres | |

780 | self._LR_treat_DFres = LR_treat.DFres | |

781 | # estimate the variance by pooling residual variance for factors A and B | |

782 | MS_res = LR_treat.MSres | |

783 | a = self.dummyA.shape[1] + 1 # number of levels of factor A | |

784 | DF_SubjInB = self.dummySubjInB.shape[1] # = LR_SubjInB.DFreg | |

785 | DF_res = LR_treat.DFres | |

786 | self._MSpool = (MS_SubjInB + (a-1)*MS_res) / a | |

787 | self._DFpool = ((MS_SubjInB + (a-1)*MS_res)**2) / ((MS_SubjInB**2 / DF_SubjInB) + (((a-1)*MS_res)**2 / DF_res)) | |

788 | ||

789 | ||

790 | def posthoc_tstat_B(self): | |

791 | """Fisher LSD: Compares all levels of factor B and returns (b,b) matrix of t-stat values and a matrix of corresponding p-values. | |

792 | Use when there is no significant interaction. | |

793 | Warning: Statistica computes it WRONG! | |

794 | """ | |

795 | b = len(self._groupInd) | |

796 | # precompute averages | |

797 | x_avrg = Numeric.zeros((b,), Numeric.Float) | |

798 | sum_count_x = Numeric.zeros((b,), Numeric.Float) | |

799 | for idx, replicaInd in enumerate(self._groupInd): | |

800 | xi = self._arr2d.take(replicaInd, 1) | |

801 | x_avrg[idx] = MA.average(MA.average(xi,1)) # first average over replicas to obtain cell mean, then over factor A | |

802 | sum_count_x[idx] = Numeric.add.reduce(1./MA.count(xi,1)) # first count the number of measurements within cells, then sum inverses over factor A | |

803 | ##x_avrg[idx] = MA.average(MA.ravel(xi)) # this is how Statistica computes it, but it is WRONG! | |

804 | ##sum_count_x[idx] = 1./MA.count(xi) # this is how Statistica computes it, but it is WRONG! | |

805 | # t-statistics | |

806 | x_avrg_2 = MA.resize(x_avrg, (x_avrg.shape[0], x_avrg.shape[0])) | |

807 | sum_count_x_2 = Numeric.resize(sum_count_x, (sum_count_x.shape[0],sum_count_x.shape[0])) | |

808 | tstat = (MA.transpose(x_avrg_2) - x_avrg_2) * self._arr2d.shape[0] / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) | |

809 | ##tstat = (MA.transpose(x_avrg_2) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2) + sum_count_x_2)) # this is how Statistica computes it, but it is WRONG! | |

810 | tprob = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + numpyExtn.triangularGet(tstat**2))),1,1) | |

811 | return tstat, tprob | |

812 | ||

813 | ||

814 | def posthoc_tstat_BbyA(self): | |

815 | """Fisher LSD: Compares all levels of factor B by each level of factor A separately. | |

816 | Returns: (b,b,a) matrix of t-stat values | |

817 | (b,b,a) matrix of corresponding p-values | |

818 | (b,b) matrix of geometric means of p-values over all levels of factor A. | |

819 | Use when there is a significant interaction and factor A is of no interest. | |

820 | """ | |

821 | a = self._arr2d.shape[0] | |

822 | b = len(self._groupInd) | |

823 | # precompute averages | |

824 | x_avrg = Numeric.zeros((a,b), Numeric.Float) | |

825 | sum_count_x = Numeric.zeros((a,b), Numeric.Float) | |

826 | for idx, replicaInd in enumerate(self._groupInd): | |

827 | xi = self._arr2d.take(replicaInd, 1) | |

828 | x_avrg[:,idx] = MA.average(xi, 1) | |

829 | sum_count_x[:,idx] = 1. / MA.count(xi, 1) | |

830 | ||

831 | # t-statistics | |

832 | x_avrg_2 = MA.resize(x_avrg, (b,a,b)) | |

833 | sum_count_x_2 = Numeric.resize(sum_count_x, (b,a,b)) | |

834 | ||

835 | tstat = (MA.transpose(x_avrg_2, (2,1,0)) - x_avrg_2) / Numeric.sqrt(self._MSpool * (Numeric.transpose(sum_count_x_2, (2,1,0)) + sum_count_x_2)) | |

836 | # get p-values for each level of factor a separately (axis 1) | |

837 | tprob = MA.array(-1*Numeric.ones(tstat.shape, Numeric.Float), mask=Numeric.ones(tstat.shape)) | |

838 | for idx1 in range(tprob.shape[1]): | |

839 | tprob[:,idx1,:] = numpyExtn.triangularPut(scipy.stats.abetai(0.5*self._DFpool, 0.5, float(self._DFpool) / (self._DFpool + numpyExtn.triangularGet(tstat[:,idx1,:]**2))),1,1) | |

840 | return tstat, tprob | |

841 | ||

842 | def posthoc_anova_B_AB(self): | |

843 | """Conduct ANOVA for each pair of levels of factor B to detect interaction effect. | |

844 | Returns for each pair of factor B levels: | |

845 | (b,b) matrix of F-stat values for effect of factor B | |

846 | (b,b) matrix of p-values for effect of factor B | |

847 | (b,b) matrix of F-stat values for interaction effect | |

848 | (b,b) matrix of p-values for interaction effect | |

849 | """ | |

850 | b = len(self._groupInd) | |

851 | FB = MA.masked * MA.ones((b,b), Numeric.Float) | |

852 | FBprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

853 | FAB = MA.masked * MA.ones((b,b), Numeric.Float) | |

854 | FABprob = MA.masked * MA.ones((b,b), Numeric.Float) | |

855 | for i in range(b): | |

856 | for j in range(i+1,b): | |

857 | rglSub = [self._groupInd[i], self._groupInd[j]] | |

858 | takeInd = self._groupInd[i] + self._groupInd[j] | |

859 | an = AnovaRM12LR(self._arr2d.take(takeInd, 1), rglSub, addInteraction=1) | |

860 | FB[i,j] = FB[j,i] = an.FB | |

861 | FBprob[i,j] = FBprob[j,i] = an.FBprob | |

862 | FAB[i,j] = FAB[j,i] = an.FAB | |

863 | FABprob[i,j] = FABprob[j,i] = an.FABprob | |

864 | return FB, FBprob, FAB, FABprob | |

865 | ||

866 | ||

867 | class AnovaRM12LR(Anova2wayLRBase): | |

868 | """2 way ANOVA with REPEATED MEASURES on factor A using multivariate linear regression. | |

869 | Axis 0 correspond to different levels of factor A, subjects given at axis 1, subjects nested inside factor B according to the given group lenghts. | |

870 | Factor A is a within-subject effect, factor B is a between-subject effect. | |

871 | Example: | |

872 | arr2d = [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]]: 2 levels of factor A, 2 levels of factor B, 5 subjects | |

873 | groupLens = [2,3] | |

874 | ||

875 | TODO: fix the F and DF for factor B when there are missing values (see Glantz, pp. 462-5) | |

876 | """ | |

877 | ||

878 | def __init__(self, arr2d, groupLens, addInteraction=0, allowReductA=True, allowReductB=False): | |

879 | """arr2d: 2D masked array: [[a0b0 a0b0 a0b1 a0b1 a0b1], [a1b0 a1b0 a1b1 a1b1 a1b1]]; | |

880 | groupLens: lenghts of axis 1 indices that belong to the same level of factor B, e.g. [2,3]; | |

881 | addInteraction: include / exclude the interaction between factors A and B; | |

882 | allowReduct[A|B]: whether to allow or not the reduction of levels of factor A (B). | |

883 | """ | |

884 | arr2d = MA.asarray(arr2d) | |

885 | groupLens = Numeric.array(groupLens) # make a copy for the case if subjects or levels of factor B are removed | |

886 | assert len(arr2d.shape) == 2, "len(arr2d.shape) != 2" | |

887 | assert arr2d.shape[1] == Numeric.add.reduce(groupLens), "arr2d.shape[1] != Numeric.add.reduce(groupLens)" | |

888 | ||

889 | # check if there exist factor-levels with all values missing (for A, B and Subj) | |

890 | # if so, reduce the corresponding DF and fix the number of dummy variables | |

891 | missIndA = Numeric.compress(MA.count(arr2d, 1) == 0, Numeric.arange(arr2d.shape[0])) | |

892 | if missIndA.shape[0] > 0: | |

893 | if allowReductA: | |

894 | print "Warning: removig factor A level(s) %s" % str(missIndA.tolist()) | |

895 | takeIndA = Numeric.compress(MA.count(arr2d, 1) != 0, Numeric.arange(arr2d.shape[0])) | |

896 | arr2d = arr2d.take(takeIndA, 0) | |

897 | else: | |

898 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor A have no values: %s" % str(missIndA.tolist())) | |

899 | return | |

900 | missIndSubj = Numeric.compress(MA.count(arr2d, 0) == 0, Numeric.arange(arr2d.shape[1])) | |

901 | if missIndSubj.shape[0] > 0: | |

902 | print "Warning: removig subject(s) %s" % str(missIndSubj.tolist()) | |

903 | takeIndSubj = Numeric.compress(MA.count(arr2d, 0) != 0, Numeric.arange(arr2d.shape[1])) | |

904 | # fix groupLens | |

905 | ## mapSubj2Group = -1*Numeric.ones(arr2d.shape[1]) | |

906 | ## groupLensAcc0 = [0] + Numeric.add.accumulate(groupLens).tolist() | |

907 | ## for i in range(len(groupLensAcc0) - 1): | |

908 | ## mapSubj2Group[groupLensAcc0[i]:groupLensAcc0[i+1]] = i | |

909 | mapSubj2Group = Numeric.repeat(range(len(groupLens)), groupLens) | |

910 | for subjIdx in missIndSubj: | |

911 | groupLens[mapSubj2Group[subjIdx]] -= 1 | |

912 | # remove data columns that are missing all the values | |

913 | arr2d = arr2d.take(takeIndSubj, 1) | |

914 | # fix number of factor B levels (if the length of any group became 0 due to removed subjects) | |

915 | missIndB = Numeric.compress(groupLens <= 0, Numeric.arange(groupLens.shape[0])) | |

916 | if missIndB.shape[0] > 0: | |

917 | if allowReductB: | |

918 | print "Warning: removig factor B level(s) %s" % str(missIndB.tolist()) | |

919 | takeIndB = Numeric.compress(groupLens > 0, Numeric.arange(groupLens.shape[0])) | |

920 | groupLens = Numeric.take(groupLens, takeIndB) | |

921 | # arr2d has already been taken care of by removing the subjects without observations | |

922 | else: | |

923 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor B have no values: %s" % str(missIndB)) | |

924 | return | |

925 | # remove levels of factor B with single observation per cell | |

926 | grpIndSingle = Numeric.compress(Numeric.equal(groupLens,1), Numeric.arange(groupLens.shape[0])) | |

927 | if grpIndSingle.shape[0] > 0: | |

928 | if allowReductB: | |

929 | # fix arr2d | |

930 | print "Warning: removing factor B level(s) with single observation: %s" % str(grpIndSingle) | |

931 | groupLensAcc = [0] + list(Numeric.add.accumulate(groupLens)) | |

932 | arr2dRemInd = map(lambda i: groupLensAcc[i], grpIndSingle) | |

933 | arr2dTakeInd = Numeric.ones(arr2d.shape[1]) | |

934 | Numeric.put(arr2dTakeInd, arr2dRemInd, 0) | |

935 | arr2d = MA.compress(arr2dTakeInd, arr2d, 1) | |

936 | # fix groupLens | |

937 | groupLens = Numeric.compress(Numeric.not_equal(groupLens,1), groupLens) | |

938 | else: | |

939 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: the following level(s) of factor B have single observations: %s" % str(grpIndSingle)) | |

940 | return | |

941 | # check that there exist at least 2 levels of factors A and B | |

942 | # and that there is at least single duplicated observation | |

943 | if arr2d.shape[0] < 2 or len(groupLens) < 2: ## or MA.count(arr2d) <= arr2d.shape[0] * len(groupLens): | |

944 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: not enough levels of factor A (%i) or factor B (%i)" % (arr2d.shape[0],len(groupLens))) | |

945 | return | |

946 | ||

947 | # remove other missing observations and the corresponding dummy variable codes | |

948 | self.y = MA.ravel(arr2d) # [a0b0, a0b1, .., a1b0, ...] | |

949 | noMissing = MA.count(self.y) == self.y.shape[0] | |

950 | self.y, takeInd = numpyExtn.compressIndices(self.y) | |

951 | ||

952 | # check if data is balanced | |

953 | isBalanced = (1. * Numeric.add.reduce((1.*groupLens/groupLens[0]) == Numeric.ones(groupLens.shape[0])) / groupLens.shape[0]) == 1 | |

954 | ||

955 | # dummy variables | |

956 | self.dummyA = self.getDummyEE(arr2d.shape[0], 1) | |

957 | self.dummyB = self.getDummyEE(groupLens.shape[0], groupLens) | |

958 | self.dummySubjInB = self.getSubjectDummyEE(groupLens, 1) | |

959 | dummyB_Subj = Numeric.concatenate((self.dummyB, self.dummySubjInB), 1) | |

960 | self.dummyA, dummyB_Subj = self.getDummiesJoin(self.dummyA, dummyB_Subj) | |

961 | self.dummyB = dummyB_Subj[:, 0:self.dummyB.shape[1]] | |

962 | self.dummySubjInB = dummyB_Subj[:, self.dummyB.shape[1]:] | |

963 | if addInteraction: | |

964 | self.dummyAB = self.getDummyInteraction(self.dummyA, self.dummyB) | |

965 | self.dummyAB = Numeric.take(self.dummyAB, takeInd, 0) | |

966 | else: | |

967 | self.dummyAB = Numeric.zeros((self.dummyA.shape[0],0)) | |

968 | self.dummyA = Numeric.take(self.dummyA, takeInd, 0) | |

969 | self.dummyB = Numeric.take(self.dummyB, takeInd, 0) | |

970 | self.dummySubjInB = Numeric.take(self.dummySubjInB, takeInd, 0) | |

971 | ||

972 | # check that there is enough observations to allow for variability (DFres > 0) | |

973 | if self.dummyA.shape[0] - sum([self.dummySubjInB.shape[1], self.dummyA.shape[1], self.dummyB.shape[1], self.dummyAB.shape[1]]) - 1 <= 0: | |

974 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: not values (dummy condition)") | |

975 | return | |

976 | ||

977 | # run analysis | |

978 | try: | |

979 | ## if addInteraction: | |

980 | LR_treat = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB, self.dummyAB),1), self.y) | |

981 | ## else: | |

982 | ## LR_treat = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB),1), self.y) | |

983 | ||

984 | if isBalanced and noMissing and False: | |

985 | # non-repeated-measures factor (B) | |

986 | LR_B = MultLinReg(self.dummyB, self.y) | |

987 | LR_SubjInB = MultLinReg(self.dummySubjInB, self.y) | |

988 | self.FB = LR_B.MSreg / LR_SubjInB.MSreg | |

989 | # repeated measures factor (A) | |

990 | LR_A = MultLinReg(self.dummyA, self.y) | |

991 | self.FA = LR_A.MSreg / LR_treat.MSres | |

992 | # interaction (AB) | |

993 | if addInteraction: | |

994 | LR_AB = MultLinReg(self.dummyAB, self.y) | |

995 | self.FAB = LR_AB.MSreg / LR_treat.MSres | |

996 | # store variables needed for posthoc tests | |

997 | MS_SubjInB = LR_SubjInB.MSreg | |

998 | else: | |

999 | ###################################################################################################################################### | |

1000 | ## print "Warning: missing data with 2 way repeated measures ANOVA: F and DF should be adjusted due to random effect factor (subjects)" | |

1001 | ||

1002 | if addInteraction: | |

1003 | # non-repeated-measures factor (B) | |

1004 | LR_S_A_AB = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyAB),1), self.y) | |

1005 | LR_A_B_AB = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB, self.dummyAB),1), self.y) | |

1006 | self.FB = (LR_treat.SSreg - LR_S_A_AB.SSreg) / self.dummyB.shape[1] / (LR_treat.SSreg - LR_A_B_AB.SSreg) * self.dummySubjInB.shape[1] | |

1007 | # repeated measures factor (A) | |

1008 | LR_S_B_AB = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyB, self.dummyAB),1), self.y) | |

1009 | self.FA = (LR_treat.SSreg - LR_S_B_AB.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

1010 | # interaction (AB) | |

1011 | LR_S_A_B = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA, self.dummyB),1), self.y) | |

1012 | self.FAB = (LR_treat.SSreg - LR_S_A_B.SSreg) / self.dummyAB.shape[1] / LR_treat.MSres | |

1013 | # store variables needed for posthoc tests | |

1014 | MS_SubjInB = (LR_treat.SSreg - LR_A_B_AB.SSreg) / self.dummySubjInB.shape[1] | |

1015 | else: | |

1016 | # non-repeated-measures factor (B) | |

1017 | LR_S_A = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyA),1), self.y) | |

1018 | LR_A_B = MultLinReg(Numeric.concatenate((self.dummyA, self.dummyB),1), self.y) | |

1019 | self.FB = (LR_treat.SSreg - LR_S_A.SSreg) / self.dummyB.shape[1] / (LR_treat.SSreg - LR_A_B.SSreg) * self.dummySubjInB.shape[1] | |

1020 | # repeated measures factor (A) | |

1021 | LR_S_B = MultLinReg(Numeric.concatenate((self.dummySubjInB, self.dummyB),1), self.y) | |

1022 | self.FA = (LR_treat.SSreg - LR_S_B.SSreg) / self.dummyA.shape[1] / LR_treat.MSres | |

1023 | # store variables needed for posthoc tests | |

1024 | MS_SubjInB = (LR_treat.SSreg - LR_A_B.SSreg) / self.dummySubjInB.shape[1] | |

1025 | except: | |

1026 | self._giveUp(arr2d, groupLens, addInteraction, output="Giving up ANOVA: miltivariate linear regression failed, probably due to lack of variability") | |

1027 | return | |

1028 | ||

1029 | try: | |

1030 | self.FBprob = scipy.stats.fprob(self.dummyB.shape[1], self.dummySubjInB.shape[1], self.FB) | |

1031 | except ValueError: | |

1032 | self.FBprob = 1 | |

1033 | try: | |

1034 | self.FAprob = scipy.stats.fprob(self.dummyA.shape[1], LR_treat.DFres, self.FA) | |

1035 | except ValueError: | |

1036 | self.FAprob = 1 | |

1037 | self.ps = [self.FAprob, self.FBprob] | |

1038 | if addInteraction: | |

1039 | try: | |

1040 | self.FABprob = scipy.stats.fprob(self.dummyAB.shape[1], LR_treat.DFres, self.FAB) | |

1041 | except ValueError: | |

1042 | self.FABprob = 1 | |

1043 | self.ps.append(self.FABprob) | |

1044 | ||

1045 | # estimate the variance and degrees of freedom by pooling residual variance for factors A and B | |

1046 | MS_res = LR_treat.MSres | |

1047 | a = self.dummyA.shape[1] + 1 # number of levels of factor A | |

1048 | DF_SubjInB = self.dummySubjInB.shape[1] # = LR_SubjInB.DFreg | |

1049 | DF_res = LR_treat.DFres | |

1050 | self._MSpool = (MS_SubjInB + (a-1)*MS_res) / a | |

1051 | self._DFpool = ((MS_SubjInB + (a-1)*MS_res)**2) / ((MS_SubjInB**2 / DF_SubjInB) + (((a-1)*MS_res)**2 / DF_res)) | |

1052 | ||

1053 | # store some variables | |

1054 | self._addInteraction = addInteraction | |

1055 | self._arr2d = arr2d | |

1056 | self._groupLens = groupLens | |

1057 | ||

1058 | ||

1059 | def _giveUp(self, arr2d, groupLens, addInteraction, output=None): | |

1060 | if output != None: | |

1061 | print output | |

1062 | self.FA = 0 | |

1063 | self.FAprob = 1 | |

1064 | self.FB = 0 | |

1065 | self.FBprob = 1 | |

1066 | self.ps = [self.FAprob, self.FBprob] | |

1067 | if addInteraction: | |

1068 | self.FAB = 0 | |

1069 | self.FABprob = 1 | |

1070 | self.ps.append(self.FABprob) | |

1071 | self._addInteraction = addInteraction | |

1072 | self._arr2d = arr2d | |

1073 | self._groupLens = groupLens | |

1074 | ||

1075 | ||

1076 | ####################################################################################### | |

1077 | ## Traditional ANOVA | |

1078 | ####################################################################################### | |

1079 | ||

1080 | class Anova2way: | |

1081 | """2 way ANOVA with multiple observations per cell for balanced designs. | |

1082 | Multiple observations given at axis 1 together with the number of repeats. | |

1083 | """ | |

1084 | ||

1085 | def __init__(self, arr2d, numRepeats): | |

1086 | """Input: 2d array with multiple observations per cell ginven at axis 1. | |

1087 | """ | |

1088 | arr2d = MA.asarray(arr2d) | |

1089 | assert math.modf(arr2d.shape[1] / numRepeats)[0] == 0.0, "arr2d.shape[1] / numRepeats not a whole number" | |

1090 | self.a = arr2d.shape[0] | |

1091 | self.b = arr2d.shape[1] / numRepeats | |

1092 | self.n = numRepeats # number of observations per cell | |

1093 | self.Y = arr2d | |

1094 | self._init_sums() | |

1095 | self._init_SS() | |

1096 | self._init_F() | |

1097 | ||

1098 | ||

1099 | def _init_sums(self): | |

1100 | """Initializes sums of Y. | |

1101 | """ | |

1102 | self.dfA = self.a - 1 | |

1103 | self.dfB = self.b - 1 | |

1104 | self.dfAB = self.dfA * self.dfB | |

1105 | self.dfTotal = self.a * self.b * self.n - 1 | |

1106 | self.dfError = self.dfTotal - self.dfA - self.dfB - self.dfAB | |

1107 | # YijK | |

1108 | self.YijK = Numeric.zeros((self.a,self.b), Numeric.Float) | |

1109 | for idx, bIdx in enumerate(range(0, self.b*self.n, self.n)): | |

1110 | self.YijK[:,idx] = Numeric.add.reduce(self.Y[:,bIdx:bIdx+self.n], 1) | |

1111 | # YiJK | |

1112 | self.YiJK = Numeric.add.reduce(self.YijK, 1) | |

1113 | # YIjK | |

1114 | self.YIjK = Numeric.add.reduce(self.YijK, 0) | |

1115 | # YIJK | |

1116 | self.YIJK = Numeric.add.reduce(self.YIjK, 0) | |

1117 | ||

1118 | ||

1119 | def _init_SS(self): | |

1120 | y2IJKdivnab = Numeric.power(self.YIJK,2) / (self.n * self.a * self.b) | |

1121 | self.TSS = Numeric.add.reduce(Numeric.add.reduce(Numeric.power(self.Y,2))) - y2IJKdivnab | |

1122 | self.SSA = Numeric.add.reduce(Numeric.power(self.YiJK,2)) / (self.n * self.b) - y2IJKdivnab | |

1123 | self.SSB = Numeric.add.reduce(Numeric.power(self.YIjK,2)) / (self.n * self.a) - y2IJKdivnab | |

1124 | self.SSAB = Numeric.add.reduce(Numeric.add.reduce(Numeric.power(self.YijK,2))) / (self.n) - y2IJKdivnab - self.SSA - self.SSB | |

1125 | self.SSE = self.TSS - self.SSA - self.SSB - self.SSAB | |

1126 | ||

1127 | ||

1128 | def _init_F(self): | |

1129 | self.FA = (self.SSA / self.dfA) / (self.SSE / self.dfError) | |

1130 | self.FAprob = scipy.stats.fprob(self.dfA, self.dfError, self.FA) | |

1131 | self.FB = (self.SSB / self.dfB) / (self.SSE / self.dfError) | |

1132 | self.FBprob = scipy.stats.fprob(self.dfB, self.dfError, self.FB) | |

1133 | self.FAB = (self.SSAB / self.dfAB) / (self.SSE / self.dfError) | |

1134 | self.FABprob = scipy.stats.fprob(self.dfAB, self.dfError, self.FAB) | |

1135 | ||

1136 | ||

1137 | class Anova3way: | |

1138 | """3 way ANOVA, optional AB interaction, single observation per cell, balanced design. | |

1139 | Removes factor-levels with mising observations. | |

1140 | """ | |

1141 | ||

1142 | def __init__(self, arr3d, addInterAB): | |

1143 | """Input: 3d masked array where axis correspond factors. | |

1144 | Leave out the factor-levels with masked values. | |

1145 | """ | |

1146 | arr3d = MA.asarray(arr3d) | |

1147 | assert len(arr3d.shape) == 3, "len(arr3d.shape) != 3" | |

1148 | a,b,c = arr3d.shape | |

1149 | self.Y = arr3d | |

1150 | ||

1151 | self.YijK = Numeric.add.reduce(self.Y, 2) | |

1152 | self.YiJK = Numeric.add.reduce(self.YijK, 1) | |

1153 | self.YIjK = Numeric.add.reduce(self.YijK, 0) | |

1154 | self.YIJk = Numeric.add.reduce(Numeric.add.reduce(self.Y)) | |

1155 | self.YIJK2_abc = Numeric.power(Numeric.add.reduce(self.YIJk),2) / (a*b*c) | |

1156 | ||

1157 | self.TSS = Numeric.add.reduce(Numeric.add.reduce(Numeric.add.reduce(Numeric.power(self.Y,2)))) - self.YIJK2_abc | |

1158 | self.SSA = Numeric.add.reduce(Numeric.power(self.YiJK,2)) / (b*c) - self.YIJK2_abc | |

1159 | self.SSB = Numeric.add.reduce(Numeric.power(self.YIjK,2)) / (a*c) - self.YIJK2_abc | |

1160 | self.SSC = Numeric.add.reduce(Numeric.power(self.YIJk,2)) / (a*b) - self.YIJK2_abc | |

1161 | ||

1162 | self.dfA = a - 1 | |

1163 | self.dfB = b - 1 | |

1164 | self.dfC = c - 1 | |

1165 | self.dfTotal = a * b * c - 1 | |

1166 | ||

1167 | if addInterAB: | |

1168 | ||

1169 | self.SSAB = Numeric.add.reduce(Numeric.add.reduce(Numeric.power(self.YijK,2))) / c - self.YIJK2_abc - self.SSA - self.SSB | |

1170 | self.dfAB = self.dfA * self.dfB | |

1171 | ||

1172 | self.SSE = self.TSS - self.SSA - self.SSB - self.SSC - self.SSAB | |

1173 | self.dfError = self.dfTotal - self.dfA - self.dfB - self.dfC - self.dfAB | |

1174 | ||

1175 | self.FAB = (self.SSAB / self.dfAB) / (self.SSE / self.dfError) | |

1176 | self.FABprob = scipy.stats.fprob(self.dfAB, self.dfError, self.FAB) | |

1177 | ||

1178 | else: | |

1179 | ||

1180 | self.SSE = self.TSS - self.SSA - self.SSB - self.SSC | |

1181 | self.dfError = self.dfTotal - self.dfA - self.dfB - self.dfC | |

1182 | ||

1183 | self.FA = (self.SSA / self.dfA) / (self.SSE / self.dfError) | |

1184 | self.FAprob = scipy.stats.fprob(self.dfA, self.dfError, self.FA) | |

1185 | self.FB = (self.SSB / self.dfB) / (self.SSE / self.dfError) | |

1186 | self.FBprob = scipy.stats.fprob(self.dfB, self.dfError, self.FB) | |

1187 | self.FC = (self.SSC / self.dfC) / (self.SSE / self.dfError) | |

1188 | self.FCprob = scipy.stats.fprob(self.dfC, self.dfError, self.FC) | |

1189 | ||

1190 | ||

1191 | ####################################################################################### | |

1192 | ## Multivariate Linear Regression | |

1193 | ####################################################################################### | |

1194 | ||

1195 | ||

1196 | class MultLinReg: | |

1197 | """Multivariate Linear Regression. | |

1198 | """ | |

1199 | def __init__(self, X, y, summary=0): | |

1200 | """ | |

1201 | x[i,j]: ith observation for the jth independent variable, shape (n,k) | |

1202 | y[i]: ith observations of the dependent variable, shape (n,) | |

1203 | """ | |

1204 | X = Numeric.asarray(X) | |

1205 | y = Numeric.asarray(y) | |

1206 | assert len(X.shape) == 2, "len(X.shape) != 2" | |

1207 | assert len(y.shape) == 1, "len(y.shape) != 1" | |

1208 | self.DFreg = X.shape[1] # number of independent variables (k) | |

1209 | self.DFres = X.shape[0] - (X.shape[1]) - 1 # number of observations (n) - number of ind. variables (k) - 1 | |

1210 | self.DFtot = X.shape[0] - 1 # number of observations (n) - 1 | |

1211 | X = Numeric.concatenate((Numeric.ones((X.shape[0],1), Numeric.Float), X), 1) | |

1212 | XT = Numeric.transpose(X) | |

1213 | try: | |

1214 | self._XTXinv = LinearAlgebra.inverse(Numeric.dot(XT,X)) | |

1215 | except LinearAlgebra.LinAlgError: | |

1216 | print "Warning: singular matrix, using generalized_inverse" | |

1217 | self._XTX = Numeric.dot(XT,X) # store the singuar matrix | |

1218 | self._XTXinv = LinearAlgebra.generalized_inverse(self._XTX) | |

1219 | y_mean = Numeric.average(y) | |

1220 | # regression | |

1221 | self.b = Numeric.dot(Numeric.dot(self._XTXinv, XT), y) | |

1222 | self.y_hat = Numeric.dot(X, self.b) | |

1223 | # release the variables not needed | |

1224 | X = None | |

1225 | XT = None | |

1226 | # SS | |

1227 | self.SSreg = Numeric.add.reduce((self.y_hat - y_mean)**2) | |

1228 | self.MSreg = self.SSreg / self.DFreg | |

1229 | self.SSres = Numeric.add.reduce((y - self.y_hat)**2) | |

1230 | ## self.MSres = self.SSres / self.DFres # equals to square std. error: s^2_{y|x} | |

1231 | ## if self.MSres == 0.0: | |

1232 | ## print "Warning MultLinReg: MSres equals 0, replaced by 1e-20" | |

1233 | ## self.MSres = 1e-20 | |

1234 | if self.DFres > 0: | |

1235 | self.MSres = self.SSres / self.DFres # equals to square std. error: s^2_{y|x} | |

1236 | else: | |

1237 | print "Warning MultLinReg: SSres=%f, DFres=%f" % (self.SSres, self.DFres) | |

1238 | self.MSres = 0 | |

1239 | self.SStot = self.SSreg + self.SSres # equal to: self.SStot = Numeric.add.reduce((y - y_mean)**2) | |

1240 | self.MStot = self.SStot / self.DFtot | |

1241 | # regression summary | |

1242 | if summary: self.getRegressionSummary() | |

1243 | ||

1244 | ||

1245 | def __call__(self, X): | |

1246 | X = Numeric.asarray(X) | |

1247 | if len(X.shape) == 1: | |

1248 | X = Numeric.concatenate(([1], X),0) | |

1249 | elif len(X.shape) == 2: | |

1250 | X = Numeric.concatenate((Numeric.ones((X.shape[0],1), Numeric.Float), X), 1) | |

1251 | return Numeric.dot(X, self.b) | |

1252 | ||

1253 | ||

1254 | def getRegressionSummary(self): | |

1255 | self.stdErr = math.sqrt(self.MSres) # std. error of the regression estimate: s_{x|y} | |

1256 | self.VarCovar = self.MSres * self._XTXinv # variance-covariance matrix: s^2_b | |

1257 | self.b_se = Numeric.sqrt(Numeric.diagonal(self.VarCovar)) # std. error of regression coef.: s_b_i | |

1258 | self.b_t = self.b / self.b_se # t-test whether each regression coef. is significantly different from 0 | |

1259 | self.b_tprob = scipy.stats.abetai(0.5*self.DFres, 0.5, float(self.DFres)/(self.DFres + self.b_t**2)) | |

1260 | # correlation between coefficients (b's) | |

1261 | # ERROR in Glantz et al.: self.Corr = Numeric.sqrt(self.VarCovar) / Numeric.sqrt(Numeric.dot(self.b_se[:,Numeric.NewAxis],self.b_se[Numeric.NewAxis,:])) | |

1262 | self.Corr = self.VarCovar / Numeric.dot(self.b_se[:,Numeric.NewAxis],self.b_se[Numeric.NewAxis,:]) | |

1263 | self.F = self.MSreg / self.MSres # overall goodness of fit | |

1264 | self.Fprob = scipy.stats.fprob(self.DFreg, self.DFres, self.F) | |

1265 | self.R2 = self.SSreg/self.SStot # coef. of determination: fraction of variance explained by regression plane | |

1266 | self.R2_adj = 1-(self.MSres/self.MStot) # adjusted coef. of determination (unbiased estimator of population R2; larget the better) | |

1267 | self.R = math.sqrt(self.R2) # multiple correlation coefficeint: how tight the data points cluster around the regression plane | |

1268 | ||

1269 | ||

1270 | ||

1271 | ####################################################################################### | |

1272 | ## FDR and q-value | |

1273 | ####################################################################################### | |

1274 | ||

1275 | def qVals(pVals, estimatePi0=False, verbose=False): | |

1276 | """Returns q-values calculated from given p-values. | |

1277 | Input: array of p-values. | |

1278 | estimate pi0: False (pi0==1) | |

1279 | "spline": cubic spline fit on pi0(lambda) curve, estimete at lambda=1 | |

1280 | "loess": loess fit on pi0(lambda) curve, estimate at lambda=1 | |

1281 | Reference: Storey et al. (2003) Statistical significance for genomewide studies. | |

1282 | PNAS 100(16), pp. 9440-5. | |

1283 | Changes: loess fit instead of cubic spline | |

1284 | Added: handles missing values -> it compresses it out | |

1285 | lambda values depend on pValues: | |

1286 | min(pVals) <= lmbd < max(pVals) | |

1287 | len(lmbd) == len(pVals) | |

1288 | TODO: remove calculation of flmbd in all points except for lmbd=1 | |

1289 | """ | |

1290 | assert estimatePi0 in [False, "spline", "loess"] | |

1291 | pValsMA = MA.asarray(pVals) | |

1292 | qVals = MA.ones(pValsMA.shape, Numeric.Float) * MA.masked | |

1293 | putInd = Numeric.compress(Numeric.logical_not(MA.getmaskarray(pValsMA)), Numeric.arange(pValsMA.shape[0])) | |

1294 | pValsMA = pValsMA.compressed() | |

1295 | minp = min(pValsMA); maxp = max(pValsMA) | |

1296 | m = pValsMA.shape[0] | |

1297 | x = Numeric.arange(0,1.01,0.01) | |

1298 | if estimatePi0 == "spline": | |

1299 | import scipy.interpolate | |

1300 | lmbd = Numeric.arange(0,0.96,0.01,Numeric.Float) | |

1301 | pi0lmbd = map(lambda l: Numeric.add.reduce(Numeric.greater(pValsMA,l)) / (m*(1-l)), lmbd) | |

1302 | splineRep = scipy.interpolate.splrep(lmbd, pi0lmbd, k=3, s=0.01) # spline representation: (knots, coefficeints, degree) | |

1303 | flmbd = scipy.interpolate.splev(x, splineRep, der=0) | |

1304 | pi0 = flmbd[-1] | |

1305 | if pi0 <= 0: | |

1306 | print "Warning: pi0<=0, trying smaller lambda..." | |

1307 | while pi0 <= 0: | |

1308 | lmbd = lmbd[:-1] | |

1309 | pi0lmbd = map(lambda l: Numeric.add.reduce(Numeric.greater(pValsMA,l)) / (m*(1-l)), lmbd) | |

1310 | splineRep = scipy.interpolate.splrep(lmbd, pi0lmbd, k=3, s=0.1) # spline representation: (knots, coefficeints, degree) | |

1311 | flmbd = scipy.interpolate.splev(x, splineRep, der=0) | |

1312 | pi0 = flmbd[-1] | |

1313 | elif estimatePi0 == "loess": | |

1314 | lmbd = Numeric.arange(0,1.0,0.01,Numeric.Float) | |

1315 | pi0lmbd = map(lambda l: Numeric.add.reduce(Numeric.greater(pValsMA,l)) / (m*(1-l)), lmbd) | |

1316 | flmbd = Numeric.asarray(statc.loess(zip(lmbd, pi0lmbd), list(x), 0.4))[:,1] | |

1317 | pi0 = flmbd[-1] | |

1318 | if pi0 <= 0: | |

1319 | print "Warning: pi0<=0, trying smaller lambda..." | |

1320 | while pi0 <= 0: | |

1321 | lmbd = lmbd[:-1] | |

1322 | pi0lmbd = map(lambda l: Numeric.add.reduce(Numeric.greater(pValsMA,l)) / (m*(1-l)), lmbd) | |

1323 | flmbd = Numeric.asarray(statc.loess(zip(lmbd, pi0lmbd), list(x), 0.4))[:,1] | |

1324 | pi0 = flmbd[-1] | |

1325 | else: | |

1326 | pi0=1 | |

1327 | if verbose and estimatePi0: | |

1328 | M.plot(lmbd, pi0lmbd) | |

1329 | M.plot(x,flmbd) | |

1330 | M.legend(["pi0(lambda", "fit, len(lmbd)==%i"%len(lmbd)]) | |

1331 | M.title("pi0: %.4f"%pi0) | |

1332 | M.show() | |

1333 | args = Numeric.argsort(pValsMA) | |

1334 | q = Numeric.ones(pValsMA.shape, Numeric.Float) | |

1335 | q[args[m-1]] = pi0 * pValsMA[args[m-1]] | |

1336 | for i in range(m-1, 0, -1): | |

1337 | q[args[i-1]] = min(pi0*m*pValsMA[args[i-1]]/i, q[args[i]]) | |

1338 | MA.put(qVals, putInd, q) | |

1339 | return qVals | |

1340 | ||

1341 | ||

1342 | ||

1343 | if __name__=="__main__": | |

1344 | ||

1345 | #====================== MULTIVARIATE LINEAR REGRESSION ========================== | |

1346 | ||

1347 | # MLR: compare with orange.LinRegLearner | |

1348 | ## import Dicty.DData, Meda.Preproc, orange | |

1349 | ## DD = Dicty.DData.DData_Nancy() | |

1350 | ## p = DD.getMean2d("pkaC") | |

1351 | ## po = Meda.Preproc.ma2orng(p, Meda.Preproc.getTcDomain(13)) | |

1352 | ## po.domain.classVar = po.domain.attributes[-1] | |

1353 | ## lr = orange.LinRegLearner(po) | |

1354 | ## pn = Numeric.array(p) | |

1355 | ## mlr = MultLinReg(pn[:,0:12], pn[:,12]) | |

1356 | ||

1357 | # mlr: simple test | |

1358 | ## xy1 = Numeric.array([[1,1.1],[0,-0.01],[-1,-1.1],[2,2.3],[-2,-1.5]], Numeric.Float) | |

1359 | ## ml1 = MultLinReg(xy1[:,:-1],xy1[:,-1]) | |

1360 | ## et1 = Meda.Preproc.ma2orng(xy1, Meda.Preproc.getTcDomain(2,False,[])) | |

1361 | ## et1.domain.classVar = et1.domain.attributes[-1] | |

1362 | ## lr1 = orange.LinRegLearner(et1) | |

1363 | ||

1364 | # mlr: test with random numbers | |

1365 | ## import numpy.oldnumeric.random_array as RandomArray | |

1366 | ## xy2 = RandomArray.random((5,4)) | |

1367 | ## ml2 = MultLinReg(xy2[:,:-1],xy2[:,-1],1) | |

1368 | ## et2 = Meda.Preproc.ma2orng(xy2, Meda.Preproc.getTcDomain(4,False,[])) | |

1369 | ## et2.domain.classVar = et2.domain.attributes[-1] | |

1370 | ## d3 = orange.Domain(et2.domain.attributes[:-1], et2.domain.attributes[-1]) | |

1371 | ## et3 = orange.ExampleTable(d3, et2) | |

1372 | ## lr2 = orange.LinRegLearner(et3) | |

1373 | ||

1374 | ||

1375 | #====================== 2 WAY ANOVA ========================== | |

1376 | ||

1377 | def print3d(arr3d): | |

1378 | for k in range(arr3d.shape[1]): | |

1379 | print "----",k,"----" | |

1380 | for j in range(arr3d.shape[2]): | |

1381 | for i in range(arr3d.shape[0]): | |

1382 | print arr3d[i,k,j] | |

1383 | ||

1384 | ## import Dicty.DData | |

1385 | ## DN = Dicty.DData.DData_Nancy() | |

1386 | ## rawN = DN.getRaw3dAll() | |

1387 | ## # subset of data | |

1388 | ## rawSub = rawN[0,0:3,0:6] | |

1389 | ## rgiSub = [[0,1,2],[3,4,5]] | |

1390 | ||

1391 | ## # compare traditional ANOVA and ANOVA using linear regression on a balanced design | |

1392 | ## as = Anova2way(rawSub, 3) | |

1393 | ## print "\ntrad.anova, interact", as.FAprob, as.FBprob, as.FABprob | |

1394 | ## ar0 = Anova2wayLR(rawSub, rgiSub, 0) | |

1395 | ## print "regr.anova, no inter", ar0.FAprob, ar0.FBprob | |

1396 | ## ar1 = Anova2wayLR(rawSub, rgiSub, 1) | |

1397 | ## print "regr.anova, interact", ar1.FAprob, ar1.FBprob, ar1.FABprob | |

1398 | ## | |

1399 | ## # missing data, no empty cells | |

1400 | ## dSubM1 = MA.array(rawSub) | |

1401 | ## dSubM1[-1,-1] = MA.masked | |

1402 | ## arM1_0 = Anova2wayLR(dSubM1, rgiSub, 0) | |

1403 | ## print "\nregr.anova, no inter", arM1_0.FAprob, arM1_0.FBprob | |

1404 | ## arM1_1 = Anova2wayLR(dSubM1, rgiSub, 1) | |

1405 | ## print "regr.anova, interact", arM1_1.FAprob, arM1_1.FBprob, arM1_1.FABprob | |

1406 | ## | |

1407 | ## # missing data and empty cell | |

1408 | ## dSubM3 = MA.array(rawSub) | |

1409 | ## dSubM3[-1,-3:] = MA.masked | |

1410 | ## arM3_0 = Anova2wayLR(dSubM3, rgiSub, 0) | |

1411 | ## print "\nregr.anova, no inter", arM3_0.FAprob, arM3_0.FBprob | |

1412 | ## arM3_1 = Anova2wayLR(dSubM3, rgiSub, 1) | |

1413 | ## print "regr.anova, interact", arM3_1.FAprob, arM3_1.FBprob, arM3_1.FABprob | |

1414 | ||

1415 | # test LR ANOVA for selection of genes on a whole dataset | |

1416 | import time | |

1417 | ||

1418 | def anova_2fact_testSpeed(rawAll, rgi): | |

1419 | """factors: time, strain | |

1420 | """ | |

[1073] | 1421 | as1 = Numeric.zeros((rawAll.shape[0],2), Numeric.Float) |

[188] | 1422 | print "2 fact start", time.ctime() |

1423 | for i in range(rawAll.shape[0]): | |

1424 | a0 = Anova2wayLR(rawAll[i], rgi, 0) | |

[1073] | 1425 | as1[i] = [a0.FAprob, a0.FBprob] |

[188] | 1426 | print "2 fact end ", time.ctime() |

[1073] | 1427 | return as1 |

[188] | 1428 | |

1429 | def anova_fulfact_testSpeed(rawAll, rgi): | |

1430 | """factors: time, strain & their interaction | |

1431 | """ | |

1432 | try: | |

[1073] | 1433 | as1 = Numeric.zeros((rawAll.shape[0],3), Numeric.Float) |

[188] | 1434 | print "2 fact start", time.ctime() |

1435 | for i in range(rawAll.shape[0]): | |

1436 | a0 = Anova2wayLR(rawAll[i], rgi, 1) | |

[1073] | 1437 | as1[i] = [a0.FAprob, a0.FBprob, a0.FABprob] |

[188] | 1438 | print "2 fact end ", time.ctime() |

1439 | except: | |

1440 | print "gene", i, time.ctime() | |

1441 | raise | |

[1073] | 1442 | return as1 |

[188] | 1443 | |

1444 | ## asff = anova_fulfact_testSpeed(rawN, DN.replicaGroupInd) # takes appx. 3 hours! | |

1445 | ## as2f = anova_2fact_testSpeed(rawN, DN.replicaGroupInd) # takes appx 2 minutes | |

1446 | ||

1447 | #====================== 3 WAY ANOVA ========================== | |

1448 | ||

1449 | ## import Dicty.DData | |

1450 | ## DN = Dicty.DData.DData_Nancy() | |

1451 | ## rawN = DN.getRaw3dAll() | |

1452 | ||

1453 | # small test: Anova3way | |

1454 | ## rawSub = rawN[0:3, 0:3, 0:3] | |

1455 | ## a3Sub = Anova3way(rawSub, True) | |

1456 | ## print "FprobGene, FprobTime, FprobRep", a3Sub.FAprob, a3Sub.FBprob, a3Sub.FCprob | |

1457 | ||

1458 | # are replicas equal? if FprobRep == 0: different, else equal | |

1459 | ## DN = Dicty.DData.DData_Nancy() | |

1460 | ## DDPJ = Dicty.DData.DData_PJ1() | |

1461 | ## DDMEDSTD = Dicty.DData.DData_MedStd() | |

1462 | ## def anova_testRepEq(DD, addInterAB): | |

1463 | ||

1464 | ## for st in DD.strains: | |

1465 | ## a3 = Anova3way(DD.getRaw3dStrains([st]).filled(0),addInterAB) | |

1466 | ## print "FpG %.10f FpT %.10f FpGxT %.10f FpRep(F) %.20f(%3.5f)\t%12s (%i rep)" % (a3.FAprob, a3.FBprob, a3.FABprob, a3.FCprob, a3.FC, st, len(DD.replicaGroupInd[DD.getIdxStrain(st)])) | |

1467 | ## | |

1468 | ## print "------------------ START ---------------" | |

1469 | ## anova_testRepEq(DN, True) | |

1470 | ## anova_testRepEq(DDPJ, True) | |

1471 | ## anova_testRepEq(DDMEDSTD, True) | |

1472 | ## print "------------------ END ---------------" | |

1473 | ||

1474 | #====================== 1 WAY ANOVA LR ========================== | |

1475 | ||

1476 | ## a1 = Anova1wayLR(Numeric.ravel(rawSub), [range(0,10), range(10,15), range(15,27)]) | |

1477 | ## subT = MA.transpose(rawN[0,:,8:11]) | |

1478 | ||

1479 | #====================== 1 WAY REPEATED MEASURES ANOVA LR ========================== | |

1480 | ||

1481 | ## a1rm = AnovaRM11LR(subT) | |

1482 | ## a1rmf = AnovaRM11LR(subT.filled(-1)) | |

1483 | ||

1484 | #====================== 2 WAY REPEATED MEASURES (on factor A) ANOVA LR ========================== | |

1485 | ||

1486 | ## ab = AnovaLRBase() | |

1487 | ||

1488 | ## print Numeric.concatenate((a2rm1.dummyA,a2rm1.dummyB,a2rm1.dummySubjInB),1) | |

1489 | ||

1490 | ## | |

1491 | ## import Dicty.DData | |

1492 | ## DN = Dicty.DData.DData_Nancy() | |

1493 | ## rawN = DN.getRaw3dAll() | |

1494 | ## rawSub = rawN[0, 0:5, 0:9] | |

1495 | ## gi = [[0,1,2],[3,4,5],[6,7,8]] | |

1496 | ||

1497 | ## a2rm1 = AnovaRM12LR(rawSub,gi,0) | |

1498 | ## print "---no missing---" | |

1499 | ## print a2rm1.FAprob, a2rm1.FBprob#, a2rm1.FABprob | |

1500 | ## | |

1501 | ## rawMiss = MA.array(rawSub) | |

1502 | ## rawMiss[0,-1] = MA.masked | |

1503 | ## a2m = AnovaRM12LR(rawMiss,gi,1) | |

1504 | ## print "---with missing---" | |

1505 | ## print a2m.FAprob, a2m.FBprob, a2m.FABprob | |

1506 | ||

1507 | ||

1508 | #====================== q-values ========================== | |

1509 | ||

1510 | import matplotlib.pylab as M | |

1511 | ||

1512 | for st, epist in epist_MMAG_nonRM.items(): | |

1513 | for i in range(3): | |

1514 | print st, i | |

1515 | ps1 = epist.posthoc_pIntByStrains[:,i].compressed() | |

1516 | qs1 = qVals(ps, "loess", True) | |

1517 | ps2 = epist.posthoc_pIntByStrains[:,i].compressed() | |

1518 | qs2 = qVals(ps2, "loess", True) | |

1519 | M.title("%s, %i" %(st,i)) | |

1520 | M.subplot(1,2,1) | |

1521 | M.scatter(ps1, qs1, 0.001) | |

1522 | M.subplot(1,2,2) | |

1523 | M.scatter(ps2, qs2, 0.001) | |

1524 | M.show() | |

1525 | ||

1526 | # increasing pi0lmbd function | |

1527 | ## qqc = qVals(ppc) | |

1528 | ||

1529 | # negative pi0 | |

1530 | ## pp = epist_DS_nonRM["pkaRregA"].posthoc_pIntByStrains[:,0].compressed() | |

1531 | ## qq = qVals(pp) | |

1532 | ## M.scatter(pp,qq) | |

1533 | ## M.show() | |

1534 |

**Note:**See TracBrowser for help on using the repository browser.