#
source:
orange/Orange/distance/__init__.py
@
11459:fc07a5c346be

Revision 11459:fc07a5c346be, 9.0 KB checked in by Ales Erjavec <ales.erjavec@…>, 12 months ago (diff) |
---|

Rev | Line | |
---|---|---|

[9663] | 1 | import Orange |

2 | ||

3 | from Orange.core import \ | |

[9722] | 4 | DistanceMap, \ |

5 | DistanceMapConstructor, \ | |

6 | ExamplesDistance as Distance, \ | |

7 | ExamplesDistance_Normalized as DistanceNormalized, \ | |

8 | ExamplesDistanceConstructor as DistanceConstructor, \ | |

9 | ExamplesDistance_Hamming as HammingDistance, \ | |

10 | ExamplesDistance_DTW as DTWDistance, \ | |

11 | ExamplesDistance_Euclidean as EuclideanDistance, \ | |

12 | ExamplesDistance_Manhattan as ManhattanDistance, \ | |

13 | ExamplesDistance_Maximal as MaximalDistance, \ | |

14 | ExamplesDistance_Relief as ReliefDistance, \ | |

15 | ExamplesDistanceConstructor_DTW as DTW, \ | |

16 | ExamplesDistanceConstructor_Euclidean as Euclidean, \ | |

17 | ExamplesDistanceConstructor_Hamming as Hamming, \ | |

18 | ExamplesDistanceConstructor_Manhattan as Manhattan, \ | |

19 | ExamplesDistanceConstructor_Maximal as Maximal, \ | |

20 | ExamplesDistanceConstructor_Relief as Relief | |

[9663] | 21 | |

[9725] | 22 | from Orange import statc |

[10580] | 23 | from Orange.utils import progress_bar_milestones |

[9752] | 24 | |

[9663] | 25 | import numpy |

26 | from numpy import linalg | |

27 | ||

[9721] | 28 | class PearsonR(DistanceConstructor): |

[9663] | 29 | |

30 | def __new__(cls, data=None, **argkw): | |

[9722] | 31 | self = DistanceConstructor.__new__(cls, **argkw) |

[9663] | 32 | self.__dict__.update(argkw) |

[11459] | 33 | if data is not None: |

[9663] | 34 | return self.__call__(data) |

35 | else: | |

36 | return self | |

37 | ||

38 | def __call__(self, table): | |

39 | indxs = [i for i, a in enumerate(table.domain.attributes) \ | |

[9923] | 40 | if a.varType==Orange.feature.Type.Continuous] |

[9721] | 41 | return PearsonRDistance(domain=table.domain, indxs=indxs) |

[9663] | 42 | |

[9721] | 43 | class PearsonRDistance(Distance): |

[9663] | 44 | """ |

45 | `Pearson correlation coefficient | |

46 | <http://en.wikipedia.org/wiki/Pearson_product-moment\ | |

[9805] | 47 | _correlation_coefficient>`_. |

[9663] | 48 | """ |

49 | ||

50 | def __init__(self, **argkw): | |

51 | self.__dict__.update(argkw) | |

52 | ||

53 | def __call__(self, e1, e2): | |

54 | """ | |

55 | :param e1: data instances. | |

56 | :param e2: data instances. | |

57 | ||

58 | Returns Pearson's disimilarity between e1 and e2, | |

[9805] | 59 | i.e. (1-r)/2 where r is Pearson's rank coefficient. |

[9663] | 60 | """ |

61 | X1 = [] | |

62 | X2 = [] | |

63 | for i in self.indxs: | |

64 | if not(e1[i].isSpecial() or e2[i].isSpecial()): | |

65 | X1.append(float(e1[i])) | |

66 | X2.append(float(e2[i])) | |

67 | if not X1: | |

68 | return 1.0 | |

69 | try: | |

70 | return (1.0 - statc.pearsonr(X1, X2)[0]) / 2. | |

71 | except: | |

72 | return 1.0 | |

73 | ||

[9721] | 74 | class SpearmanR(DistanceConstructor): |

[9663] | 75 | |

76 | def __new__(cls, data=None, **argkw): | |

[9721] | 77 | self = DistanceConstructor.__new__(cls, **argkw) |

[9663] | 78 | self.__dict__.update(argkw) |

[11459] | 79 | if data is not None: |

[9663] | 80 | return self.__call__(data) |

81 | else: | |

82 | return self | |

83 | ||

84 | def __call__(self, table): | |

85 | indxs = [i for i, a in enumerate(table.domain.attributes) \ | |

[9923] | 86 | if a.varType==Orange.feature.Type.Continuous] |

[9721] | 87 | return SpearmanRDistance(domain=table.domain, indxs=indxs) |

[9663] | 88 | |

[9721] | 89 | class SpearmanRDistance(Distance): |

[9663] | 90 | |

91 | """`Spearman's rank correlation coefficient | |

92 | <http://en.wikipedia.org/wiki/Spearman%27s_rank_\ | |

[9805] | 93 | correlation_coefficient>`_.""" |

[9663] | 94 | |

95 | def __init__(self, **argkw): | |

96 | self.__dict__.update(argkw) | |

97 | ||

98 | def __call__(self, e1, e2): | |

99 | """ | |

100 | :param e1: data instances. | |

101 | :param e2: data instances. | |

102 | ||

103 | Returns Sprearman's disimilarity between e1 and e2, | |

104 | i.e. (1-r)/2 where r is Sprearman's rank coefficient. | |

105 | """ | |

106 | X1 = []; X2 = [] | |

107 | for i in self.indxs: | |

108 | if not(e1[i].isSpecial() or e2[i].isSpecial()): | |

109 | X1.append(float(e1[i])) | |

110 | X2.append(float(e2[i])) | |

111 | if not X1: | |

112 | return 1.0 | |

113 | try: | |

114 | return (1.0 - statc.spearmanr(X1, X2)[0]) / 2. | |

115 | except: | |

116 | return 1.0 | |

117 | ||

[9721] | 118 | class Mahalanobis(DistanceConstructor): |

[9663] | 119 | |

120 | def __new__(cls, data=None, **argkw): | |

[9722] | 121 | self = DistanceConstructor.__new__(cls, **argkw) |

[9663] | 122 | self.__dict__.update(argkw) |

[11459] | 123 | if data is not None: |

[9663] | 124 | return self.__call__(data) |

125 | else: | |

126 | return self | |

127 | ||

128 | # Check attributtes a, b, c | |

129 | def __call__(self, table, a=None, b=None, c=None, **argkw): | |

130 | # Process data | |

131 | dc = Orange.core.DomainContinuizer() | |

132 | dc.classTreatment = Orange.core.DomainContinuizer.Ignore | |

133 | dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan | |

134 | dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues | |

135 | ||

136 | newdomain = dc(table) | |

137 | newtable = table.translate(newdomain) | |

138 | ||

139 | data, cls, _ = newtable.to_numpy() | |

140 | ||

141 | covariance_matrix = numpy.cov(data, rowvar=0, bias=1) | |

142 | inverse_covariance_matrix = linalg.pinv(covariance_matrix, rcond=1e-10) | |

143 | ||

[9721] | 144 | return MahalanobisDistance(domain=newdomain, icm=inverse_covariance_matrix) |

[9663] | 145 | |

[9721] | 146 | class MahalanobisDistance(Distance): |

[9663] | 147 | """`Mahalanobis distance |

148 | <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_""" | |

149 | ||

150 | def __init__(self, domain, icm, **argkw): | |

151 | self.domain = domain | |

152 | self.icm = icm | |

153 | self.__dict__.update(argkw) | |

154 | ||

155 | def __call__(self, e1, e2): | |

156 | """ | |

157 | :param e1: data instances. | |

158 | :param e2: data instances. | |

159 | ||

160 | Returns Mahalanobis distance between e1 and e2. | |

161 | """ | |

162 | e1 = Orange.data.Instance(self.domain, e1) | |

163 | e2 = Orange.data.Instance(self.domain, e2) | |

164 | ||

165 | diff = [] | |

166 | for i in range(len(self.domain.attributes)): | |

167 | diff.append(e1[i].value - e2[i].value) if not(e1[i].isSpecial() or e2[i].isSpecial()) else 0.0 | |

168 | diff = numpy.asmatrix(diff) | |

169 | res = diff * self.icm * diff.transpose() | |

170 | return res[0,0]**0.5 | |

171 | ||

172 | ||

[9722] | 173 | class PearsonRAbsolute(PearsonR): |

[9663] | 174 | """ Construct an instance of PearsonRAbsolute example distance estimator. |

175 | """ | |

176 | def __call__(self, data): | |

177 | indxs = [i for i, a in enumerate(data.domain.attributes) \ | |

[9923] | 178 | if a.varType==Orange.feature.Type.Continuous] |

[9721] | 179 | return PearsonRAbsoluteDistance(domain=data.domain, indxs=indxs) |

[9663] | 180 | |

181 | ||

[9722] | 182 | class PearsonRAbsoluteDistance(PearsonRDistance): |

[9663] | 183 | """ An example distance estimator using absolute value of Pearson |

184 | correlation coefficient. | |

185 | """ | |

186 | def __call__(self, e1, e2): | |

187 | """ | |

188 | Return absolute Pearson's dissimilarity between e1 and e2, | |

189 | i.e. | |

190 | ||

191 | .. math:: (1 - abs(r))/2 | |

192 | ||

193 | where r is Pearson's correlation coefficient. | |

194 | """ | |

195 | X1 = []; X2 = [] | |

196 | for i in self.indxs: | |

197 | if not(e1[i].isSpecial() or e2[i].isSpecial()): | |

198 | X1.append(float(e1[i])) | |

199 | X2.append(float(e2[i])) | |

200 | if not X1: | |

201 | return 1.0 | |

202 | try: | |

203 | return (1.0 - abs(statc.pearsonr(X1, X2)[0])) | |

204 | except: | |

205 | return 1.0 | |

206 | ||

207 | ||

[9722] | 208 | class SpearmanRAbsolute(SpearmanR): |

[9663] | 209 | """ Construct an instance of SpearmanRAbsolute example distance estimator. |

210 | """ | |

211 | def __call__(self, data): | |

212 | indxs = [i for i, a in enumerate(data.domain.attributes) \ | |

[9923] | 213 | if a.varType==Orange.feature.Type.Continuous] |

[9721] | 214 | return SpearmanRAbsoluteDistance(domain=data.domain, indxs=indxs) |

[9663] | 215 | |

216 | ||

[9722] | 217 | class SpearmanRAbsoluteDistance(SpearmanRDistance): |

[9663] | 218 | def __call__(self, e1, e2): |

219 | """ | |

220 | Return absolute Spearman's dissimilarity between e1 and e2, | |

221 | i.e. | |

222 | ||

223 | .. math:: (1 - abs(r))/2 | |

224 | ||

225 | where r is Spearman's correlation coefficient. | |

226 | """ | |

227 | X1 = []; X2 = [] | |

228 | for i in self.indxs: | |

229 | if not(e1[i].isSpecial() or e2[i].isSpecial()): | |

230 | X1.append(float(e1[i])) | |

231 | X2.append(float(e2[i])) | |

232 | if not X1: | |

233 | return 1.0 | |

234 | try: | |

235 | return (1.0 - abs(statc.spearmanr(X1, X2)[0])) | |

236 | except: | |

237 | return 1.0 | |

238 | ||

[9752] | 239 | def _pairs(seq, same = False): |

240 | """ Return all pairs from elements of `seq`. | |

241 | """ | |

242 | seq = list(seq) | |

243 | same = 0 if same else 1 | |

244 | for i in range(len(seq)): | |

245 | for j in range(i + same, len(seq)): | |

246 | yield seq[i], seq[j] | |

247 | ||

248 | def distance_matrix(data, distance_constructor=Euclidean, progress_callback=None): | |

[9916] | 249 | """ A helper function that computes an :obj:`Orange.misc.SymMatrix` of all |

[9663] | 250 | pairwise distances between instances in `data`. |

251 | ||

252 | :param data: A data table | |

253 | :type data: :obj:`Orange.data.Table` | |

254 | ||

[9752] | 255 | :param distance_constructor: An DistanceConstructor instance (defaults to :obj:`Euclidean`). |

[9722] | 256 | :type distance_constructor: :obj:`Orange.distances.DistanceConstructor` |

[9752] | 257 | |

258 | :param progress_callback: A function (taking one argument) to use for | |

259 | reporting the on the progress. | |

260 | :type progress_callback: function | |

261 | ||

[9916] | 262 | :rtype: :class:`Orange.misc.SymMatrix` |

[9663] | 263 | |

264 | """ | |

[9916] | 265 | matrix = Orange.misc.SymMatrix(len(data)) |

[9663] | 266 | dist = distance_constructor(data) |

[9752] | 267 | |

268 | iter_count = matrix.dim * (matrix.dim - 1) / 2 | |

269 | milestones = progress_bar_milestones(iter_count, 100) | |

[9663] | 270 | |

[9752] | 271 | for count, ((i, ex1), (j, ex2)) in enumerate(_pairs(enumerate(data))): |

272 | matrix[i, j] = dist(ex1, ex2) | |

273 | if progress_callback and count in milestones: | |

274 | progress_callback(100.0 * count / iter_count) | |

[9663] | 275 | |

[9752] | 276 | return matrix |

**Note:**See TracBrowser for help on using the repository browser.