Changeset 9663:74b63c8ea80c in orange for orange/Orange/distance/__init__.py
 Timestamp:
 02/06/12 09:42:50 (2 years ago)
 Branch:
 default
 Children:
 9664:6638cc93015a, 9719:782cfec5fe88
 rebase_source:
 a78d1d75a2cac951721701298920fd6faa82902a
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/distance/__init__.py
r8059 r9663 1 import instances 1 import Orange 2 3 from Orange.core import \ 4 AlignmentList, \ 5 DistanceMap, \ 6 DistanceMapConstructor, \ 7 ExampleDistConstructor, \ 8 ExampleDistBySorting, \ 9 ExampleDistVector, \ 10 ExamplesDistance, \ 11 ExamplesDistance_Normalized, \ 12 ExamplesDistanceConstructor 13 14 from Orange.core import ExamplesDistance_Hamming as Hamming 15 from Orange.core import ExamplesDistance_DTW as DTW 16 from Orange.core import ExamplesDistance_Euclidean as Euclidean 17 from Orange.core import ExamplesDistance_Manhattan as Manhattan 18 from Orange.core import ExamplesDistance_Maximal as Maximal 19 from Orange.core import ExamplesDistance_Relief as Relief 20 21 from Orange.core import ExamplesDistanceConstructor_DTW as DTWConstructor 22 from Orange.core import ExamplesDistanceConstructor_Euclidean as EuclideanConstructor 23 from Orange.core import ExamplesDistanceConstructor_Hamming as HammingConstructor 24 from Orange.core import ExamplesDistanceConstructor_Manhattan as ManhattanConstructor 25 from Orange.core import ExamplesDistanceConstructor_Maximal as MaximalConstructor 26 from Orange.core import ExamplesDistanceConstructor_Relief as ReliefConstructor 27 28 import statc 29 import numpy 30 from numpy import linalg 31 32 class PearsonRConstructor(ExamplesDistanceConstructor): 33 """Constructs an instance of PearsonR. Not all the data needs to be given.""" 34 35 def __new__(cls, data=None, **argkw): 36 self = ExamplesDistanceConstructor.__new__(cls, **argkw) 37 self.__dict__.update(argkw) 38 if data: 39 return self.__call__(data) 40 else: 41 return self 42 43 def __call__(self, table): 44 indxs = [i for i, a in enumerate(table.domain.attributes) \ 45 if a.varType==Orange.data.Type.Continuous] 46 return PearsonR(domain=table.domain, indxs=indxs) 47 48 class PearsonR(ExamplesDistance): 49 """ 50 `Pearson correlation coefficient 51 <http://en.wikipedia.org/wiki/Pearson_productmoment\ 52 _correlation_coefficient>`_ 53 """ 54 55 def __init__(self, **argkw): 56 self.__dict__.update(argkw) 57 58 def __call__(self, e1, e2): 59 """ 60 :param e1: data instances. 61 :param e2: data instances. 62 63 Returns Pearson's disimilarity between e1 and e2, 64 i.e. (1r)/2 where r is Sprearman's rank coefficient. 65 """ 66 X1 = [] 67 X2 = [] 68 for i in self.indxs: 69 if not(e1[i].isSpecial() or e2[i].isSpecial()): 70 X1.append(float(e1[i])) 71 X2.append(float(e2[i])) 72 if not X1: 73 return 1.0 74 try: 75 return (1.0  statc.pearsonr(X1, X2)[0]) / 2. 76 except: 77 return 1.0 78 79 class SpearmanRConstructor(ExamplesDistanceConstructor): 80 """Constructs an instance of SpearmanR. Not all the data needs to be given.""" 81 82 def __new__(cls, data=None, **argkw): 83 self = ExamplesDistanceConstructor.__new__(cls, **argkw) 84 self.__dict__.update(argkw) 85 if data: 86 return self.__call__(data) 87 else: 88 return self 89 90 def __call__(self, table): 91 indxs = [i for i, a in enumerate(table.domain.attributes) \ 92 if a.varType==Orange.data.Type.Continuous] 93 return SpearmanR(domain=table.domain, indxs=indxs) 94 95 class SpearmanR(ExamplesDistance): 96 97 """`Spearman's rank correlation coefficient 98 <http://en.wikipedia.org/wiki/Spearman%27s_rank_\ 99 correlation_coefficient>`_""" 100 101 def __init__(self, **argkw): 102 self.__dict__.update(argkw) 103 104 def __call__(self, e1, e2): 105 """ 106 :param e1: data instances. 107 :param e2: data instances. 108 109 Returns Sprearman's disimilarity between e1 and e2, 110 i.e. (1r)/2 where r is Sprearman's rank coefficient. 111 """ 112 X1 = []; X2 = [] 113 for i in self.indxs: 114 if not(e1[i].isSpecial() or e2[i].isSpecial()): 115 X1.append(float(e1[i])) 116 X2.append(float(e2[i])) 117 if not X1: 118 return 1.0 119 try: 120 return (1.0  statc.spearmanr(X1, X2)[0]) / 2. 121 except: 122 return 1.0 123 124 class MahalanobisConstructor(ExamplesDistanceConstructor): 125 """ Construct instance of Mahalanobis. """ 126 127 def __new__(cls, data=None, **argkw): 128 self = ExamplesDistanceConstructor.__new__(cls, **argkw) 129 self.__dict__.update(argkw) 130 if data: 131 return self.__call__(data) 132 else: 133 return self 134 135 # Check attributtes a, b, c 136 def __call__(self, table, a=None, b=None, c=None, **argkw): 137 # Process data 138 dc = Orange.core.DomainContinuizer() 139 dc.classTreatment = Orange.core.DomainContinuizer.Ignore 140 dc.continuousTreatment = Orange.core.DomainContinuizer.NormalizeBySpan 141 dc.multinomialTreatment = Orange.core.DomainContinuizer.NValues 142 143 newdomain = dc(table) 144 newtable = table.translate(newdomain) 145 146 data, cls, _ = newtable.to_numpy() 147 148 covariance_matrix = numpy.cov(data, rowvar=0, bias=1) 149 inverse_covariance_matrix = linalg.pinv(covariance_matrix, rcond=1e10) 150 151 return Mahalanobis(domain=newdomain, icm=inverse_covariance_matrix) 152 153 class Mahalanobis(ExamplesDistance): 154 """`Mahalanobis distance 155 <http://en.wikipedia.org/wiki/Mahalanobis_distance>`_""" 156 157 def __init__(self, domain, icm, **argkw): 158 self.domain = domain 159 self.icm = icm 160 self.__dict__.update(argkw) 161 162 def __call__(self, e1, e2): 163 """ 164 :param e1: data instances. 165 :param e2: data instances. 166 167 Returns Mahalanobis distance between e1 and e2. 168 """ 169 e1 = Orange.data.Instance(self.domain, e1) 170 e2 = Orange.data.Instance(self.domain, e2) 171 172 diff = [] 173 for i in range(len(self.domain.attributes)): 174 diff.append(e1[i].value  e2[i].value) if not(e1[i].isSpecial() or e2[i].isSpecial()) else 0.0 175 diff = numpy.asmatrix(diff) 176 res = diff * self.icm * diff.transpose() 177 return res[0,0]**0.5 178 179 180 class PearsonRAbsoluteConstructor(PearsonRConstructor): 181 """ Construct an instance of PearsonRAbsolute example distance estimator. 182 """ 183 def __call__(self, data): 184 indxs = [i for i, a in enumerate(data.domain.attributes) \ 185 if a.varType==Orange.data.Type.Continuous] 186 return PearsonRAbsolute(domain=data.domain, indxs=indxs) 187 188 189 class PearsonRAbsolute(PearsonR): 190 """ An example distance estimator using absolute value of Pearson 191 correlation coefficient. 192 """ 193 def __call__(self, e1, e2): 194 """ 195 Return absolute Pearson's dissimilarity between e1 and e2, 196 i.e. 197 198 .. math:: (1  abs(r))/2 199 200 where r is Pearson's correlation coefficient. 201 """ 202 X1 = []; X2 = [] 203 for i in self.indxs: 204 if not(e1[i].isSpecial() or e2[i].isSpecial()): 205 X1.append(float(e1[i])) 206 X2.append(float(e2[i])) 207 if not X1: 208 return 1.0 209 try: 210 return (1.0  abs(statc.pearsonr(X1, X2)[0])) 211 except: 212 return 1.0 213 214 215 class SpearmanRAbsoluteConstructor(SpearmanRConstructor): 216 """ Construct an instance of SpearmanRAbsolute example distance estimator. 217 """ 218 def __call__(self, data): 219 indxs = [i for i, a in enumerate(data.domain.attributes) \ 220 if a.varType==Orange.data.Type.Continuous] 221 return SpearmanRAbsolute(domain=data.domain, indxs=indxs) 222 223 224 class SpearmanRAbsolute(SpearmanR): 225 def __call__(self, e1, e2): 226 """ 227 Return absolute Spearman's dissimilarity between e1 and e2, 228 i.e. 229 230 .. math:: (1  abs(r))/2 231 232 where r is Spearman's correlation coefficient. 233 """ 234 X1 = []; X2 = [] 235 for i in self.indxs: 236 if not(e1[i].isSpecial() or e2[i].isSpecial()): 237 X1.append(float(e1[i])) 238 X2.append(float(e2[i])) 239 if not X1: 240 return 1.0 241 try: 242 return (1.0  abs(statc.spearmanr(X1, X2)[0])) 243 except: 244 return 1.0 245 246 247 def distance_matrix(data, distance_constructor, progress_callback=None): 248 """ A helper function that computes an obj:`Orange.core.SymMatrix` of all 249 pairwise distances between instances in `data`. 250 251 :param data: A data table 252 :type data: :obj:`Orange.data.Table` 253 254 :param distance_constructor: An ExamplesDistance_Constructor instance. 255 :type distance_constructor: :obj:`Orange.distances.ExampleDistConstructor` 256 257 """ 258 from Orange.misc import progressBarMilestones as progress_milestones 259 matrix = Orange.core.SymMatrix(len(data)) 260 dist = distance_constructor(data) 261 262 msize = len(data)*(len(data)  1)/2 263 milestones = progress_milestones(msize, 100) 264 count = 0 265 for i in range(len(data)): 266 for j in range(i + 1, len(data)): 267 matrix[i, j] = dist(data[i], data[j]) 268 269 if progress_callback and count in milestones: 270 progress_callback(100.0 * count / msize) 271 count += 1 272 273 return matrix
Note: See TracChangeset
for help on using the changeset viewer.