#
source:
orange-bioinformatics/_bioinformatics/obiProb.py
@
1636:10d234fdadb9

Revision 1636:10d234fdadb9, 4.6 KB checked in by mitar, 2 years ago (diff) |
---|

Rev | Line | |
---|---|---|

[537] | 1 | import math |

[472] | 2 | |

[720] | 3 | def _lngamma(z): |

4 | x = 0 | |

5 | x += 0.1659470187408462e-06 / (z + 7) | |

6 | x += 0.9934937113930748e-05 / (z + 6) | |

7 | x -= 0.1385710331296526 / (z + 5) | |

8 | x += 12.50734324009056 / (z + 4) | |

9 | x -= 176.6150291498386 / (z + 3) | |

10 | x += 771.3234287757674 / (z + 2) | |

11 | x -= 1259.139216722289 / (z + 1) | |

12 | x += 676.5203681218835 / (z) | |

13 | x += 0.9999999999995183 | |

14 | ||

15 | return math.log(x) - 5.58106146679532777 - z + (z - 0.5) * math.log(z + 6.5) | |

16 | ||

[489] | 17 | class LogBin(object): |

18 | _max = 2 | |

[537] | 19 | _lookup = [0.0, 0.0] |

[560] | 20 | _max_factorial = 1 |

[472] | 21 | def __init__(self, max=1000): |

22 | self._extend(max) | |

23 | ||

[837] | 24 | @staticmethod |

25 | def _extend(max): | |

26 | if max <= LogBin._max: | |

27 | return | |

28 | for i in range(LogBin._max, max): | |

[720] | 29 | if i > 1000: ## an arbitrary cuttof |

[837] | 30 | LogBin._lookup.append(LogBin._logfactorial(i)) |

[720] | 31 | else: |

[837] | 32 | LogBin._max_factorial *= i |

33 | LogBin._lookup.append(math.log(LogBin._max_factorial)) | |

34 | LogBin._max = max | |

[537] | 35 | |

[506] | 36 | def _logbin(self, n, k): |

[489] | 37 | if n >= self._max: |

38 | self._extend(n + 100) | |

[720] | 39 | if k < n and k >= 0: |

[560] | 40 | return self._lookup[n] - self._lookup[n - k] - self._lookup[k] |

41 | else: | |

[561] | 42 | return 0.0 |

[472] | 43 | |

[837] | 44 | @staticmethod |

45 | def _logfactorial(n): | |

[720] | 46 | if (n <= 1): |

47 | return 0.0 | |

48 | else: | |

49 | return _lngamma(n + 1) | |

50 | ||

[489] | 51 | class Binomial(LogBin): |

52 | ||

53 | def __call__(self, k, N, m, n): | |

54 | p = 1.0 * m / N | |

55 | if p == 0.0: | |

56 | if k == 0: | |

57 | return 1.0 | |

58 | else: | |

[472] | 59 | return 0.0 |

[489] | 60 | elif p == 1.0: |

61 | if n == k: | |

62 | return 1.0 | |

[472] | 63 | else: |

64 | return 0.0 | |

[543] | 65 | try: |

[725] | 66 | return min(math.exp(self._logbin(n, k) + k * math.log(p) + (n - k) * math.log(1.0 - p)), 1.0) |

[543] | 67 | except (OverflowError, ValueError), er: |

[560] | 68 | print k, N, m, n |

69 | raise | |

[537] | 70 | ## return math.exp(self._logbin(n, k) + math.log((p**k) * (1.0 - p)**(n - k))) |

[472] | 71 | |

[489] | 72 | def p_value(self, k, N, m, n): |

[537] | 73 | subtract = n - k + 1 > k |

74 | result = sum([self.__call__(i, N, m, n) for i in (range(k) if subtract else range(k, n+1))]) | |

[702] | 75 | return max(1.0 - result if subtract else result, 0.0) |

[489] | 76 | |

77 | class Hypergeometric(LogBin): | |

[537] | 78 | |

[489] | 79 | def __call__(self, k, N, m, n): |

[720] | 80 | if k < max(0, n + m - N) or k > min(n, m): |

81 | return 0.0 | |

[543] | 82 | try: |

[725] | 83 | return min(math.exp(self._logbin(m, k) + self._logbin(N - m, n - k) - self._logbin(N, n)), 1.0) |

[543] | 84 | except (OverflowError, ValueError), er: |

[560] | 85 | print k, N, m, n |

86 | raise | |

[489] | 87 | |

88 | def p_value(self, k, N, m, n): | |

[537] | 89 | subtract = n - k + 1 > k |

[560] | 90 | ## result = sum([math.exp(self._logbin(m, i) + self._logbin(N - m, n - i)) for i in (range(k) if subtract else range(k, n+1))]) |

91 | result = sum([self.__call__(i, N, m, n) for i in (range(k) if subtract else range(k, n+1))]) | |

92 | ## result /= math.exp(self._logbin(N, n)) | |

[702] | 93 | return max(1.0 - result if subtract else result, 0.0) |

[506] | 94 | |

95 | ||

96 | ## to speed-up FDR, calculate ahead sum([1/i for i in range(1, m+1)]), for m in [1,100000]. For higher values of m use an approximation, with error less or equal to 4.99999157277e-006. (sum([1/i for i in range(1, m+1)]) ~ log(m) + 0.5772..., 0.5572 is an Euler-Mascheroni constant) | |

97 | c = [1.0] | |

98 | for m in range(2, 100000): | |

99 | c.append( c[-1] + 1.0/m) | |

100 | ||

[1506] | 101 | def is_sorted(l): |

102 | return all(l[i] <= l[i+1] for i in xrange(len(l)-1)) | |

103 | ||

104 | def FDR(p_values, dependent=False, m=None, ordered=False): | |

105 | """ | |

106 | If the user is sure that pvalues as already sorted nondescendingly | |

107 | setting ordered=True will make the computation faster. | |

108 | """ | |

109 | ||

110 | if not ordered: | |

111 | ordered = is_sorted(p_values) | |

112 | ||

113 | if not ordered: | |

114 | joined = [ (v,i) for i,v in enumerate(p_values) ] | |

115 | joined.sort() | |

116 | p_values = [ p[0] for p in joined ] | |

117 | indices = [ p[1] for p in joined ] | |

118 | ||

[688] | 119 | if not m: |

[506] | 120 | m = len(p_values) |

[1146] | 121 | if m <= 0 or not p_values: |

[506] | 122 | return [] |

123 | ||

124 | if dependent: # correct q for dependent tests | |

125 | k = c[m-1] if m <= len(c) else math.log(m) + 0.57721566490153286060651209008240243104215933593992 | |

[688] | 126 | m = m * k |

[506] | 127 | |

[688] | 128 | tmp_fdrs = [p*m/(i+1.0) for (i, p) in enumerate(p_values)] |

129 | fdrs = [] | |

130 | cmin = tmp_fdrs[-1] | |

131 | for f in reversed(tmp_fdrs): | |

132 | cmin = min(f, cmin) | |

133 | fdrs.append( cmin) | |

134 | fdrs.reverse() | |

[1506] | 135 | |

136 | if not ordered: | |

137 | new = [ None ] * len(fdrs) | |

138 | for v,i in zip(fdrs, indices): | |

139 | new[i] = v | |

140 | fdrs = new | |

141 | ||

[688] | 142 | return fdrs |

[506] | 143 | |

[688] | 144 | def Bonferroni(p_values, m=None): |

145 | if not m: | |

[506] | 146 | m = len(p_values) |

147 | if m == 0: | |

148 | return [] | |

[688] | 149 | m = float(m) |

150 | return [p/m for p in p_values] |

**Note:**See TracBrowser for help on using the repository browser.