Changeset 10613:f34d92efb695 in orange
 Timestamp:
 03/22/12 10:40:15 (2 years ago)
 Branch:
 default
 Children:
 10614:be7cf2cf74c8, 10644:68e7c096ec40, 10738:4f410edd688e
 Parents:
 10612:85f3705b313d (diff), 10609:4fe22b62beac (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.  Location:
 Orange
 Files:

 2 deleted
 1 edited
Legend:
 Unmodified
 Added
 Removed

Orange/projection/linear.py
r10604 r10613 8 8 import numpy 9 9 10 from Orange import classification, data, feature 11 from Orange.classification import knn 12 10 13 from Orange.data.preprocess.scaling import ScaleLinProjData 11 14 from Orange.orng import orngVisFuncts as visfuncts 12 from Orange.utils import deprecated_keywords 13 from Orange.utils import deprecated_members 15 from Orange.utils import deprecated_keywords, deprecated_members 14 16 15 17 try: 16 18 import numpy.ma as MA 17 except :19 except ImportError: 18 20 import numpy.core.ma as MA 21 22 class enum(int): 23 def set_name(self, name): 24 self.name = name 25 return self 26 27 def __repr__(self): 28 return getattr(self, "name", str(self)) 29 19 30 20 31 #implementation … … 23 34 LDA_IMPLEMENTATION = 2 24 35 25 LAW_LINEAR = 036 LAW_LINEAR = enum(0).set_name("LAW_LINEAR") 26 37 LAW_SQUARE = 1 27 38 LAW_GAUSSIAN = 2 … … 36 47 return x / numpy.linalg.norm(x) 37 48 49 38 50 def center(matrix): 39 '''centers all variables, i.e. subtracts averages in colomns40 and divides them by their standard deviations '''41 n, m = numpy.shape(matrix)42 return (matrix  numpy.multiply(matrix.mean(axis =0),43 numpy.ones((n, m))))/numpy.std(matrix,44 axis =0)51 """centers all variables, i.e. subtracts averages in colomns 52 and divides them by their standard deviations""" 53 n, m = numpy.shape(matrix) 54 return (matrix  numpy.multiply(matrix.mean(axis=0), 55 numpy.ones((n, m)))) / numpy.std(matrix, 56 axis=0) 45 57 46 58 … … 51 63 to make iterative optimization and visualization possible. It can, however, still be used as any other 52 64 projection optimizer by calling (:obj:`~Orange.projection.linear.FreeViz.__call__`) it. 53 54 .. attribute:: attract_g55 56 Coefficient for the attractive forces. By increasing or decreasing the ratio57 between :obj:`attract_g` and :obj:`repel_g`, you can make one kind of the58 forces stronger. Default: 1.59 60 .. attribute:: repel_g61 62 Coefficient for the repulsive forces. By increasing or decreasing the ratio63 between :obj:`attract_g` and :obj:`repel_g`, you can make one kind of the64 forces stronger. Default: 1.65 66 .. attribute:: force_balancing67 68 If set (default is False), the forces are balanced so that the total sum of69 the attractive equals the total of repulsive, before they are multiplied by70 the above factors. (By our experience, this gives bad results so you may71 want to leave this alone.)72 73 .. attribute:: law74 75 Can be LAW_LINEAR, LAW_SQUARE, LAW_GAUSSIAN, LAW_KNN or LAW_LINEAR_PLUS.76 Default is LAW_LINEAR, which means that the attractive forces increase77 linearly by the distance and the repulsive forces are inversely78 proportional to the distance. LAW_SQUARE would make them rise or fall with79 the square of the distance, LAW_GAUSSIAN is based on a kind of80 loglikelihood estimation, LAW_KNN tries to directly optimize the81 classification accuracy of the kNN classifier in the projection space, and82 in LAW_LINEAR_PLUS both forces rise with the square of the distance,83 yielding a method that is somewhat similar to PCA. We found the first law84 perform the best, with the second to not far behind.85 86 .. attribute:: force_sigma87 88 The sigma to be used in LAW_GAUSSIAN and LAW_KNN.89 90 .. attribute:: mirror_symmetry91 92 If enabled, it keeps the projection of the second attribute on the upper93 side of the graph (the first is always on the righthand xaxis). This is94 useful when comparing whether two projections are the same, but has no95 effect on the projection's clarity or its classification accuracy.96 97 There are some more, undescribed, methods of a more internal nature.98 99 65 """ 100 101 def __init__(self, graph = None): 66 67 #: Coefficient for the attractive forces. By increasing or decreasing the ratio 68 #: between :obj:`attract_g` and :obj:`repel_g`, you can make one kind of the 69 #: forces stronger. 70 attract_g = 1. 71 72 #: Coefficient for the repulsive forces. By increasing or decreasing the ratio 73 #: between :obj:`attract_g` and :obj:`repel_g`, you can make one kind of the 74 #: forces stronger. 75 repel_g = 1. 76 77 #: If set, the forces are balanced so that the total sum of 78 #: the attractive equals the total of repulsive, before they are multiplied by 79 #: the above factors. (By our experience, this gives bad results so you may 80 #: want to leave this alone.) 81 force_balancing = False 82 83 #: Can be LAW_LINEAR, LAW_SQUARE, LAW_GAUSSIAN, LAW_KNN or LAW_LINEAR_PLUS. 84 #: Default is LAW_LINEAR, which means that the attractive forces increase 85 #: linearly by the distance and the repulsive forces are inversely 86 #: proportional to the distance. LAW_SQUARE would make them rise or fall with 87 #: the square of the distance, LAW_GAUSSIAN is based on a kind of 88 #: loglikelihood estimation, LAW_KNN tries to directly optimize the 89 #: classification accuracy of the kNN classifier in the projection space, and 90 #: in LAW_LINEAR_PLUS both forces rise with the square of the distance, 91 #: yielding a method that is somewhat similar to PCA. We found the first law 92 #: perform the best, with the second to not far behind. 93 law = LAW_LINEAR 94 95 #: The sigma to be used in LAW_GAUSSIAN and LAW_KNN. 96 force_sigma = 1. 97 98 #: If enabled, it keeps the projection of the second attribute on the upper 99 #: side of the graph (the first is always on the righthand xaxis). This is 100 #: useful when comparing whether two projections are the same, but has no 101 #: effect on the projection's clarity or its classification accuracy. 102 103 #: There are some more, undescribed, methods of a more internal nature. 104 mirror_symmetry = True 105 106 implementation = FAST_IMPLEMENTATION 107 restrain = False 108 use_generalized_eigenvectors = True 109 110 # s2n heuristics parameters 111 steps_before_update = 10 112 s2n_spread = 5 113 s2n_place_attributes = 50 114 s2n_mix_data = None 115 auto_set_parameters = 1 116 class_permutation_list = None 117 attrs_num = (5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 750, 1000) 118 119 cancel_optimization = False 120 121 122 def __init__(self, graph=None): 102 123 if not graph: 103 124 graph = ScaleLinProjData() 104 125 self.graph = graph 105 106 self.implementation = 0107 self.attract_g = 1.0108 self.repel_g = 1.0109 self.law = LAW_LINEAR110 self.restrain = 0111 self.force_balancing = 0112 self.force_sigma = 1.0113 self.mirror_symmetry = 1114 self.use_generalized_eigenvectors = 1115 116 # s2n heuristics parameters117 self.steps_before_update = 10118 self.s2n_spread = 5119 self.s2n_place_attributes = 50120 self.s2n_mix_data = None121 self.auto_set_parameters = 1122 self.class_permutation_list = None123 self.attrs_num = [5, 10, 20, 30, 50, 70, 100, 150, 200, 300, 500, 750,124 1000]125 126 126 127 def __call__(self, dataset=None): … … 157 158 domain = graph.data_domain 158 159 if len(domain) > len(self.graph.anchor_data): 159 domain = Orange.data.Domain([graph.data_domain[a]160 for _,_,a in self.graph.anchor_data],161 graph.data_domain.class_var)162 163 return Orange.projection.linear.Projector(input_domain =domain,164 mean =Xm,165 stdev =stdev,166 standardize =False,167 projection =U)160 domain = data.Domain([graph.data_domain[a] 161 for _, _, a in self.graph.anchor_data], 162 graph.data_domain.class_var) 163 164 return Projector(input_domain=domain, 165 mean=Xm, 166 stdev=stdev, 167 standardize=False, 168 projection=U) 168 169 169 170 … … 171 172 self.s2n_mix_data = None 172 173 self.class_permutation_list = None 173 174 174 175 clearData = clear_data 175 176 176 177 def set_statusbar_text(self, *args): 177 178 pass 178 179 179 180 setStatusBarText = set_statusbar_text 180 181 181 182 def show_all_attributes(self): 182 self.graph.anchor_data = [(0, 0, a.name)183 183 self.graph.anchor_data = [(0, 0, a.name) 184 for a in self.graph.data_domain.attributes] 184 185 self.radial_anchors() 185 186 186 187 showAllAttributes = show_all_attributes 187 188 … … 200 201 if not attr_list: 201 202 return 202 if hasattr(self, "parentName") and "3d" in self.parentName.lower():203 if "3d" in getattr(self, "parentName", "").lower(): 203 204 self.graph.anchor_data = self.graph.create_anchors(len(attr_list), attr_list) 204 205 return 205 phi = 2 *math.pi/len(attr_list)206 self.graph.anchor_data = [(math.cos(i *phi), math.sin(i*phi), a)207 206 phi = 2 * math.pi / len(attr_list) 207 self.graph.anchor_data = [(math.cos(i * phi), math.sin(i * phi), a) 208 for i, a in enumerate(attr_list)] 208 209 209 210 radialAnchors = radial_anchors … … 219 220 if not attr_list: 220 221 return 221 if "3d" in self.parentName.lower():222 if "3d" in getattr(self, "parentName", "").lower(): 222 223 if self.restrain == 0: 223 224 def ranch(i, label): 224 r = 0.3 +0.7*random.random()225 phi = 2 *math.pi*random.random()226 theta = math.pi *random.random()227 return (r *math.sin(theta)*math.cos(phi),228 r *math.sin(theta)*math.sin(phi),229 r *math.cos(theta),225 r = 0.3 + 0.7 * random.random() 226 phi = 2 * math.pi * random.random() 227 theta = math.pi * random.random() 228 return (r * math.sin(theta) * math.cos(phi), 229 r * math.sin(theta) * math.sin(phi), 230 r * math.cos(theta), 230 231 label) 231 232 elif self.restrain == 1: 232 233 def ranch(i, label): 233 phi = 2 *math.pi*random.random()234 theta = math.pi *random.random()234 phi = 2 * math.pi * random.random() 235 theta = math.pi * random.random() 235 236 r = 1. 236 return (r *math.sin(theta)*math.cos(phi),237 r *math.sin(theta)*math.sin(phi),238 r *math.cos(theta),237 return (r * math.sin(theta) * math.cos(phi), 238 r * math.sin(theta) * math.sin(phi), 239 r * math.cos(theta), 239 240 label) 240 241 else: 241 242 self.graph.anchor_data = self.graph.create_anchors(len(attr_list), attr_list) 243 242 244 def ranch(i, label): 243 r = 0.3 +0.7*random.random()244 return (r *self.graph.anchor_data[i][0],245 r *self.graph.anchor_data[i][1],246 r *self.graph.anchor_data[i][2],245 r = 0.3 + 0.7 * random.random() 246 return (r * self.graph.anchor_data[i][0], 247 r * self.graph.anchor_data[i][1], 248 r * self.graph.anchor_data[i][2], 247 249 label) 248 250 … … 250 252 251 253 if not self.restrain == 1: 252 maxdist = math.sqrt(max([x[0] **2+x[1]**2+x[2]**2 for x in anchors]))253 anchors = [(x[0] /maxdist, x[1]/maxdist, x[2]/maxdist, x[3]) for x in anchors]254 maxdist = math.sqrt(max([x[0] ** 2 + x[1] ** 2 + x[2] ** 2 for x in anchors])) 255 anchors = [(x[0] / maxdist, x[1] / maxdist, x[2] / maxdist, x[3]) for x in anchors] 254 256 255 257 self.graph.anchor_data = anchors … … 258 260 if self.restrain == 0: 259 261 def ranch(i, label): 260 r = 0.3 +0.7*random.random()261 phi = 2 *math.pi*random.random()262 return (r*math.cos(phi), r*math.sin(phi), label)262 r = 0.3 + 0.7 * random.random() 263 phi = 2 * math.pi * random.random() 264 return r * math.cos(phi), r * math.sin(phi), label 263 265 264 266 elif self.restrain == 1: 265 267 def ranch(i, label): 266 phi = 2 *math.pi*random.random()267 return (math.cos(phi), math.sin(phi), label)268 phi = 2 * math.pi * random.random() 269 return math.cos(phi), math.sin(phi), label 268 270 269 271 else: 270 272 def ranch(i, label): 271 r = 0.3 +0.7*random.random()272 phi = 2 *math.pi * i / max(1, len(attr_list))273 return (r*math.cos(phi), r*math.sin(phi), label)273 r = 0.3 + 0.7 * random.random() 274 phi = 2 * math.pi * i / max(1, len(attr_list)) 275 return r * math.cos(phi), r * math.sin(phi), label 274 276 275 277 anchors = [ranch(*a) for a in enumerate(attr_list)] 276 278 277 279 if not self.restrain == 1: 278 maxdist = math.sqrt(max([x[0] **2+x[1]**2 for x in anchors]))279 anchors = [(x[0] /maxdist, x[1]/maxdist, x[2]) for x in anchors]280 maxdist = math.sqrt(max([x[0] ** 2 + x[1] ** 2 for x in anchors])) 281 anchors = [(x[0] / maxdist, x[1] / maxdist, x[2]) for x in anchors] 280 282 281 283 if not self.restrain == 2 and self.mirror_symmetry: 282 # ###Need to rotate and mirror here284 #TODO: Need to rotate and mirror here 283 285 pass 284 286 … … 288 290 289 291 @deprecated_keywords({"singleStep": "single_step"}) 290 def optimize_separation(self, steps = 10, single_step =False, distances=None):292 def optimize_separation(self, steps=10, single_step=False, distances=None): 291 293 """ 292 294 Optimize the class separation. If you did not change any of the settings … … 312 314 if self.implementation == FAST_IMPLEMENTATION and not hasattr(self, '_use_3D'): # TODO 313 315 return self.optimize_fast_separation(steps, single_step, distances) 316 elif self.implementation == LDA_IMPLEMENTATION: 317 impl = self.optimize_lda_separation 318 else: 319 impl = self.optimize_slow_separation 314 320 315 321 if self.__class__ != FreeViz: from PyQt4.QtGui import qApp 316 322 if single_step: steps = 1 317 if self.implementation == SLOW_IMPLEMENTATION:318 impl = self.optimize_slow_separation319 elif self.implementation == LDA_IMPLEMENTATION:320 impl = self.optimize_lda_separation321 323 xanchors = None 322 324 yanchors = None … … 337 339 return 338 340 self.graph.anchor_data, (xanchors, yanchors, zanchors) = impl(attr_indices, 339 self.graph.anchor_data,340 xanchors,341 yanchors,342 zanchors)341 self.graph.anchor_data, 342 xanchors, 343 yanchors, 344 zanchors) 343 345 if self.__class__ != FreeViz: qApp.processEvents() 344 346 if hasattr(self.graph, "updateGraph"): self.graph.updateData() … … 349 351 return 350 352 self.graph.anchor_data, (xanchors, yanchors) = impl(attr_indices, 351 self.graph.anchor_data,352 xanchors,353 yanchors)353 self.graph.anchor_data, 354 xanchors, 355 yanchors) 354 356 if self.__class__ != FreeViz: qApp.processEvents() 355 357 if hasattr(self.graph, "updateGraph"): self.graph.updateData() … … 358 360 359 361 @deprecated_keywords({"singleStep": "single_step"}) 360 def optimize_fast_separation(self, steps = 10, single_step =False, distances=None):362 def optimize_fast_separation(self, steps=10, single_step=False, distances=None): 361 363 optimizer = [orangeom.optimizeAnchors, orangeom.optimizeAnchorsRadial, 362 364 orangeom.optimizeAnchorsR][self.restrain] … … 370 372 371 373 valid_data = self.graph.get_valid_list(attr_indices) 372 n_valid = sum(valid_data) 374 n_valid = sum(valid_data) 373 375 if not n_valid: 374 376 return 0 375 377 376 data = numpy.compress(valid_data, self.graph.no_jittering_scaled_data,377 axis=1)378 data = numpy.transpose(data).tolist()378 dataset = numpy.compress(valid_data, self.graph.no_jittering_scaled_data, 379 axis=1) 380 dataset = numpy.transpose(dataset).tolist() 379 381 if self.__class__ != FreeViz: from PyQt4.QtGui import qApp 380 382 … … 391 393 classes[r, c] = distances[ro, co] 392 394 c += 1 393 r += 1 395 r += 1 394 396 else: 395 397 classes = distances … … 398 400 self.graph.original_data[self.graph.data_class_index]).tolist() 399 401 while 1: 400 self.graph.anchor_data = optimizer(data , classes,401 self.graph.anchor_data,402 attr_indices,403 attractG =self.attract_g,404 repelG =self.repel_g,405 law =self.law,406 sigma2 =self.force_sigma,407 dynamicBalancing =self.force_balancing,408 steps =steps,409 normalizeExamples =self.graph.normalize_examples,410 contClass =2 if distances411 else self.graph.data_has_continuous_class,412 mirrorSymmetry =self.mirror_symmetry)402 self.graph.anchor_data = optimizer(dataset, classes, 403 self.graph.anchor_data, 404 attr_indices, 405 attractG=self.attract_g, 406 repelG=self.repel_g, 407 law=self.law, 408 sigma2=self.force_sigma, 409 dynamicBalancing=self.force_balancing, 410 steps=steps, 411 normalizeExamples=self.graph.normalize_examples, 412 contClass=2 if distances 413 else self.graph.data_has_continuous_class, 414 mirrorSymmetry=self.mirror_symmetry) 413 415 needed_steps += steps 414 416 … … 420 422 self.graph.updateData() 421 423 422 positions = positions[49:] +[numpy.array([x[:2] for x423 in self.graph.anchor_data])]424 if len(positions) ==50:425 m = max(numpy.sum((positions[0] positions[49])**2), 0)424 positions = positions[49:] + [numpy.array([x[:2] for x 425 in self.graph.anchor_data])] 426 if len(positions) == 50: 427 m = max(numpy.sum((positions[0]  positions[49]) ** 2), 0) 426 428 if m < 1e3: break 427 429 if single_step or (self.__class__ != FreeViz … … 436 438 "XAnchors": "xanchors", 437 439 "YAnchors": "yanchors"}) 438 def optimize_lda_separation(self, attr_indices, anchor_data, xanchors = None, yanchors =None):440 def optimize_lda_separation(self, attr_indices, anchor_data, xanchors=None, yanchors=None): 439 441 if (not self.graph.have_data or len(self.graph.raw_data) == 0 440 or not self.graph.data_has_discrete_class): 442 or not self.graph.data_has_discrete_class): 441 443 return anchor_data, (xanchors, yanchors) 442 444 class_count = len(self.graph.data_domain.classVar.values) … … 444 446 selected_data = numpy.compress(valid_data, 445 447 numpy.take(self.graph.no_jittering_scaled_data, 446 attr_indices, axis =0),447 axis =1)448 449 if xanchors ==None:448 attr_indices, axis=0), 449 axis=1) 450 451 if xanchors is None: 450 452 xanchors = numpy.array([a[0] for a in anchor_data], numpy.float) 451 if yanchors ==None:453 if yanchors is None: 452 454 yanchors = numpy.array([a[1] for a in anchor_data], numpy.float) 453 455 454 456 trans_proj_data = self.graph.create_projection_as_numeric_array(attr_indices, 455 validData =valid_data,456 xanchors =xanchors,457 yanchors =yanchors,458 scaleFactor =self.graph.scale_factor,459 normalize =self.graph.normalize_examples,460 useAnchorData =1)461 if trans_proj_data ==None:457 validData=valid_data, 458 xanchors=xanchors, 459 yanchors=yanchors, 460 scaleFactor=self.graph.scale_factor, 461 normalize=self.graph.normalize_examples, 462 useAnchorData=1) 463 if trans_proj_data is None: 462 464 return anchor_data, (xanchors, yanchors) 463 465 … … 471 473 xpos = numpy.compress(ind, x_positions) 472 474 ypos = numpy.compress(ind, y_positions) 473 xave = numpy.sum(xpos) /len(xpos)474 yave = numpy.sum(ypos) /len(ypos)475 xave = numpy.sum(xpos) / len(xpos) 476 yave = numpy.sum(ypos) / len(ypos) 475 477 averages.append((xave, yave)) 476 478 … … 478 480 x_center_vector = numpy.sum(x_positions) / len(x_positions) 479 481 y_center_vector = numpy.sum(y_positions) / len(y_positions) 480 center_vector_length = math.sqrt(x_center_vector*x_center_vector +481 y_center_vector*y_center_vector)482 482 483 483 mean_destination_vectors = [] 484 484 485 485 for i in range(class_count): 486 xdir = 0.0; ydir = 0.0; rs = 0.0486 xdir, ydir = 0., 0. 487 487 for j in range(class_count): 488 if i ==j: continue489 r = math.sqrt((averages[i][0]  averages[j][0]) **2 +490 (averages[i][1]  averages[j][1]) **2)488 if i == j: continue 489 r = math.sqrt((averages[i][0]  averages[j][0]) ** 2 + 490 (averages[i][1]  averages[j][1]) ** 2) 491 491 if r == 0.0: 492 xdir += math.cos((i/float(class_count))*2*math.pi) 493 ydir += math.sin((i/float(class_count))*2*math.pi) 494 r = 0.0001 492 xdir += math.cos((i / float(class_count)) * 2 * math.pi) 493 ydir += math.sin((i / float(class_count)) * 2 * math.pi) 495 494 else: 496 xdir += (1/r**3) * ((averages[i][0]  averages[j][0])) 497 ydir += (1/r**3) * ((averages[i][1]  averages[j][1])) 498 #rs += 1/r 499 #actualDirAmpl = math.sqrt(xDir**2 + yDir**2) 500 #s = abs(xDir)+abs(yDir) 501 #xDir = rs * (xDir/s) 502 #yDir = rs * (yDir/s) 495 xdir += (1 / r ** 3) * ((averages[i][0]  averages[j][0])) 496 ydir += (1 / r ** 3) * ((averages[i][1]  averages[j][1])) 497 503 498 mean_destination_vectors.append((xdir, ydir)) 504 499 505 506 maxlength = math.sqrt(max([x**2 + y**2 for (x,y) 500 maxlength = math.sqrt(max([x ** 2 + y ** 2 for (x, y) 507 501 in mean_destination_vectors])) 508 mean_destination_vectors = [(x/(2*maxlength), y/(2*maxlength)) for (x,y) 509 in mean_destination_vectors] # normalize destination vectors to some normal values 510 mean_destination_vectors = [(mean_destination_vectors[i][0]+averages[i][0], 511 mean_destination_vectors[i][1]+averages[i][1]) 512 for i in range(len(mean_destination_vectors))] # add destination vectors to the class averages 513 #mean_destination_vectors = [(x + x_center_vector/5, y + y_center_vector/5) for (x,y) in mean_destination_vectors] # center mean values 502 mean_destination_vectors = [(x / (2 * maxlength), y / (2 * maxlength)) 503 for (x, y) in mean_destination_vectors] #normalize destination vectors to some normal values 504 mean_destination_vectors = [(mean_destination_vectors[i][0] + averages[i][0], 505 mean_destination_vectors[i][1] + averages[i][1]) 506 for i in range(len(mean_destination_vectors))] # add destination vectors to the class averages 514 507 mean_destination_vectors = [(x + x_center_vector, y + y_center_vector) 515 for (x,y) in mean_destination_vectors] # center mean values508 for (x, y) in mean_destination_vectors] # center mean values 516 509 517 510 fxs = numpy.zeros(len(x_positions), numpy.float) # forces … … 520 513 for c in range(class_count): 521 514 ind = (classData == c) 522 numpy.putmask(fxs, ind, mean_destination_vectors[c][0] x_positions)523 numpy.putmask(fys, ind, mean_destination_vectors[c][1] y_positions)515 numpy.putmask(fxs, ind, mean_destination_vectors[c][0]  x_positions) 516 numpy.putmask(fys, ind, mean_destination_vectors[c][1]  y_positions) 524 517 525 518 # compute gradient for all anchors … … 530 523 531 524 m = max(max(abs(gxs)), max(abs(gys))) 532 gxs /= (20*m); gys /= (20*m) 525 gxs /= (20 * m) 526 gys /= (20 * m) 533 527 534 528 newxanchors = xanchors + gxs … … 536 530 537 531 # normalize so that the anchor most far away will lie on the circle 538 m = math.sqrt(max(newxanchors **2 + newyanchors**2))532 m = math.sqrt(max(newxanchors ** 2 + newyanchors ** 2)) 539 533 newxanchors /= m 540 534 newyanchors /= m 541 535 542 #self.parentWidget.updateGraph()543 544 """545 for a in range(len(anchor_data)):546 x = anchor_data[a][0]; y = anchor_data[a][1];547 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [x, x+gxs[a]], yData = [y, y+gys[a]], forceFilledSymbols = 1, lineWidth=3)548 549 for i in range(class_count):550 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [averages[i][0], mean_destination_vectors[i][0]], yData = [averages[i][1], mean_destination_vectors[i][1]], forceFilledSymbols = 1, lineWidth=3)551 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, xData = [averages[i][0], averages[i][0]], yData = [averages[i][1], averages[i][1]], forceFilledSymbols = 1, lineWidth=5)552 """553 #self.parentWidget.graph.repaint()554 #self.graph.anchor_data = [(newxanchors[i], newyanchors[i], anchor_data[i][2]) for i in range(len(anchor_data))]555 #self.graph.updateData(attrs, 0)556 536 return [(newxanchors[i], newyanchors[i], anchor_data[i][2]) 557 537 for i in range(len(anchor_data))], (newxanchors, newyanchors) 558 538 559 539 optimize_LDA_Separation = optimize_lda_separation … … 563 543 "XAnchors": "xanchors", 564 544 "YAnchors": "yanchors"}) 565 def optimize_slow_separation(self, attr_indices, anchor_data, xanchors = None, yanchors =None):545 def optimize_slow_separation(self, attr_indices, anchor_data, xanchors=None, yanchors=None): 566 546 if (not self.graph.have_data or len(self.graph.raw_data) == 0 567 or not self.graph.data_has_discrete_class): 547 or not self.graph.data_has_discrete_class): 568 548 return anchor_data, (xanchors, yanchors) 569 549 valid_data = self.graph.get_valid_list(attr_indices) 570 550 selected_data = numpy.compress(valid_data, numpy.take(self.graph.no_jittering_scaled_data, 571 551 attr_indices, 572 axis =0),573 axis =1)574 575 if xanchors ==None:552 axis=0), 553 axis=1) 554 555 if xanchors is None: 576 556 xanchors = numpy.array([a[0] for a in anchor_data], numpy.float) 577 if yanchors ==None:557 if yanchors is None: 578 558 yanchors = numpy.array([a[1] for a in anchor_data], numpy.float) 579 559 580 560 trans_proj_data = self.graph.create_projection_as_numeric_array(attr_indices, 581 validData =valid_data,582 xanchors =xanchors,583 yanchors =yanchors,584 scaleFactor =self.graph.scale_factor,585 normalize =self.graph.normalize_examples,586 useAnchorData =1)587 if trans_proj_data ==None:561 validData=valid_data, 562 xanchors=xanchors, 563 yanchors=yanchors, 564 scaleFactor=self.graph.scale_factor, 565 normalize=self.graph.normalize_examples, 566 useAnchorData=1) 567 if trans_proj_data is None: 588 568 return anchor_data, (xanchors, yanchors) 589 569 590 570 proj_data = numpy.transpose(trans_proj_data) 591 x_positions = proj_data[0]; x_positions2 = numpy.array(x_positions) 592 y_positions = proj_data[1]; y_positions2 = numpy.array(y_positions) 593 class_data = proj_data[2] ; class_data2 = numpy.array(class_data) 571 x_positions = proj_data[0] 572 x_positions2 = numpy.array(x_positions) 573 y_positions = proj_data[1] 574 y_positions2 = numpy.array(y_positions) 575 class_data = proj_data[2] 576 class_data2 = numpy.array(class_data) 594 577 595 578 fxs = numpy.zeros(len(x_positions), numpy.float) # forces 596 579 fys = numpy.zeros(len(x_positions), numpy.float) 597 gxs = numpy.zeros(len(anchor_data), numpy.float) # gradients598 gys = numpy.zeros(len(anchor_data), numpy.float)599 580 600 581 rotate_array = range(len(x_positions)) 601 582 rotate_array = rotate_array[1:] + [0] 602 for i in range(len(x_positions) 1):583 for i in range(len(x_positions)  1): 603 584 x_positions2 = numpy.take(x_positions2, rotate_array) 604 585 y_positions2 = numpy.take(y_positions2, rotate_array) … … 606 587 dx = x_positions2  x_positions 607 588 dy = y_positions2  y_positions 608 rs2 = dx **2 + dy**2589 rs2 = dx ** 2 + dy ** 2 609 590 rs2 += numpy.where(rs2 == 0.0, 0.0001, 0.0) # replace zeros to avoid divisions by zero 610 591 rs = numpy.sqrt(rs2) … … 612 593 F = numpy.zeros(len(x_positions), numpy.float) 613 594 classDiff = numpy.where(class_data == class_data2, 1, 0) 614 numpy.putmask(F, classDiff, 150 *self.attract_g*rs2)615 numpy.putmask(F, 1 classDiff, self.repel_g/rs2)595 numpy.putmask(F, classDiff, 150 * self.attract_g * rs2) 596 numpy.putmask(F, 1  classDiff, self.repel_g / rs2) 616 597 fxs += F * dx / rs 617 598 fys += F * dy / rs … … 624 605 625 606 m = max(max(abs(gxs)), max(abs(gys))) 626 gxs /= (20*m); gys /= (20*m) 607 gxs /= (20 * m) 608 gys /= (20 * m) 627 609 628 610 newxanchors = xanchors + gxs … … 630 612 631 613 # normalize so that the anchor most far away will lie on the circle 632 m = math.sqrt(max(newxanchors **2 + newyanchors**2))614 m = math.sqrt(max(newxanchors ** 2 + newyanchors ** 2)) 633 615 newxanchors /= m 634 616 newyanchors /= m 635 617 return [(newxanchors[i], newyanchors[i], anchor_data[i][2]) 636 618 for i in range(len(anchor_data))], (newxanchors, newyanchors) 637 619 638 620 optimize_SLOW_Separation = optimize_slow_separation … … 643 625 "XAnchors": "xanchors", 644 626 "YAnchors": "yanchors"}) 645 def optimize_lda_separation_3D(self, attr_indices, anchor_data, xanchors = None, yanchors = None, zanchors =None):627 def optimize_lda_separation_3D(self, attr_indices, anchor_data, xanchors=None, yanchors=None, zanchors=None): 646 628 if (not self.graph.have_data or len(self.graph.raw_data) == 0 647 or not self.graph.data_has_discrete_class): 629 or not self.graph.data_has_discrete_class): 648 630 return anchor_data, (xanchors, yanchors, zanchors) 649 631 class_count = len(self.graph.data_domain.classVar.values) … … 651 633 selected_data = numpy.compress(valid_data, 652 634 numpy.take(self.graph.no_jittering_scaled_data, 653 attr_indices, axis =0),654 axis =1)655 656 if xanchors ==None:635 attr_indices, axis=0), 636 axis=1) 637 638 if xanchors is None: 657 639 xanchors = numpy.array([a[0] for a in anchor_data], numpy.float) 658 if yanchors ==None:640 if yanchors is None: 659 641 yanchors = numpy.array([a[1] for a in anchor_data], numpy.float) 660 if zanchors ==None:642 if zanchors is None: 661 643 zanchors = numpy.array([a[2] for a in anchor_data], numpy.float) 662 644 663 645 trans_proj_data = self.graph.create_projection_as_numeric_array(attr_indices, 664 validData =valid_data,665 xanchors =xanchors,666 yanchors =yanchors,667 zanchors =zanchors,668 scaleFactor =self.graph.scale_factor,669 normalize =self.graph.normalize_examples,670 useAnchorData =1)671 if trans_proj_data ==None:646 validData=valid_data, 647 xanchors=xanchors, 648 yanchors=yanchors, 649 zanchors=zanchors, 650 scaleFactor=self.graph.scale_factor, 651 normalize=self.graph.normalize_examples, 652 useAnchorData=1) 653 if trans_proj_data is None: 672 654 return anchor_data, (xanchors, yanchors, zanchors) 673 655 … … 684 666 ypos = numpy.compress(ind, y_positions) 685 667 zpos = numpy.compress(ind, z_positions) 686 xave = numpy.sum(xpos) /len(xpos)687 yave = numpy.sum(ypos) /len(ypos)688 zave = numpy.sum(zpos) /len(zpos)668 xave = numpy.sum(xpos) / len(xpos) 669 yave = numpy.sum(ypos) / len(ypos) 670 zave = numpy.sum(zpos) / len(zpos) 689 671 averages.append((xave, yave, zave)) 690 672 … … 692 674 x_center_vector = numpy.sum(x_positions) / len(x_positions) 693 675 y_center_vector = numpy.sum(y_positions) / len(y_positions) 694 z_center_vector = numpy.sum(z_positions) / len(z_positions)695 center_vector_length = math.sqrt(x_center_vector*x_center_vector +696 y_center_vector*y_center_vector +697 z_center_vector*z_center_vector)698 676 699 677 mean_destination_vectors = [] 700 678 701 679 for i in range(class_count): 702 xdir = 0.0; ydir = 0.0; zdir = 0.0; rs = 0.0680 xdir, ydir = 0., 0. 703 681 for j in range(class_count): 704 if i ==j: continue705 r = math.sqrt((averages[i][0]  averages[j][0]) **2 +706 (averages[i][1]  averages[j][1]) **2)682 if i == j: continue 683 r = math.sqrt((averages[i][0]  averages[j][0]) ** 2 + 684 (averages[i][1]  averages[j][1]) ** 2) 707 685 if r == 0.0: 708 xdir += math.cos((i/float(class_count))*2*math.pi) 709 ydir += math.sin((i/float(class_count))*2*math.pi) 710 r = 0.0001 686 xdir += math.cos((i / float(class_count)) * 2 * math.pi) 687 ydir += math.sin((i / float(class_count)) * 2 * math.pi) 711 688 else: 712 xdir += (1/r**3) * ((averages[i][0]  averages[j][0])) 713 ydir += (1/r**3) * ((averages[i][1]  averages[j][1])) 714 #rs += 1/r 715 #actualDirAmpl = math.sqrt(xDir**2 + yDir**2) 716 #s = abs(xDir)+abs(yDir) 717 #xDir = rs * (xDir/s) 718 #yDir = rs * (yDir/s) 689 xdir += (1 / r ** 3) * ((averages[i][0]  averages[j][0])) 690 ydir += (1 / r ** 3) * ((averages[i][1]  averages[j][1])) 691 719 692 mean_destination_vectors.append((xdir, ydir)) 720 693 721 722 maxlength = math.sqrt(max([x**2 + y**2 for (x,y) 694 maxlength = math.sqrt(max([x ** 2 + y ** 2 for (x, y) 723 695 in mean_destination_vectors])) 724 mean_destination_vectors = [(x /(2*maxlength), y/(2*maxlength)) for (x,y)725 in mean_destination_vectors] # normalize destination vectors to some normal values726 mean_destination_vectors = [(mean_destination_vectors[i][0]+averages[i][0],727 mean_destination_vectors[i][1]+averages[i][1])728 for i in range(len(mean_destination_vectors))] # add destination vectors to the class averages729 #mean_destination_vectors = [(x + x_center_vector/5, y + y_center_vector/5) for (x,y) in mean_destination_vectors] # center mean values696 mean_destination_vectors = [(x / (2 * maxlength), y / (2 * maxlength)) for (x, y) 697 in 698 mean_destination_vectors] # normalize destination vectors to some normal values 699 mean_destination_vectors = [(mean_destination_vectors[i][0] + averages[i][0], 700 mean_destination_vectors[i][1] + averages[i][1]) 701 for i in range(len(mean_destination_vectors))] # add destination vectors to the class averages 730 702 mean_destination_vectors = [(x + x_center_vector, y + y_center_vector) 731 for (x,y) in mean_destination_vectors] # center mean values703 for (x, y) in mean_destination_vectors] # center mean values 732 704 733 705 fxs = numpy.zeros(len(x_positions), numpy.float) # forces … … 736 708 for c in range(class_count): 737 709 ind = (classData == c) 738 numpy.putmask(fxs, ind, mean_destination_vectors[c][0] x_positions)739 numpy.putmask(fys, ind, mean_destination_vectors[c][1] y_positions)710 numpy.putmask(fxs, ind, mean_destination_vectors[c][0]  x_positions) 711 numpy.putmask(fys, ind, mean_destination_vectors[c][1]  y_positions) 740 712 741 713 # compute gradient for all anchors … … 746 718 747 719 m = max(max(abs(gxs)), max(abs(gys))) 748 gxs /= (20*m); gys /= (20*m) 720 gxs /= (20 * m) 721 gys /= (20 * m) 749 722 750 723 newxanchors = xanchors + gxs … … 752 725 753 726 # normalize so that the anchor most far away will lie on the circle 754 m = math.sqrt(max(newxanchors **2 + newyanchors**2))727 m = math.sqrt(max(newxanchors ** 2 + newyanchors ** 2)) 755 728 newxanchors /= m 756 729 newyanchors /= m 757 730 758 #self.parentWidget.updateGraph()759 760 """761 for a in range(len(anchor_data)):762 x = anchor_data[a][0]; y = anchor_data[a][1];763 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [x, x+gxs[a]], yData = [y, y+gys[a]], forceFilledSymbols = 1, lineWidth=3)764 765 for i in range(class_count):766 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, symbol = QwtSymbol.NoSymbol, xData = [averages[i][0], mean_destination_vectors[i][0]], yData = [averages[i][1], mean_destination_vectors[i][1]], forceFilledSymbols = 1, lineWidth=3)767 self.parentWidget.graph.addCurve("lll%i" % i, QColor(0, 0, 0), QColor(0, 0, 0), 10, style = QwtPlotCurve.Lines, xData = [averages[i][0], averages[i][0]], yData = [averages[i][1], averages[i][1]], forceFilledSymbols = 1, lineWidth=5)768 """769 #self.parentWidget.graph.repaint()770 #self.graph.anchor_data = [(newxanchors[i], newyanchors[i], anchor_data[i][2]) for i in range(len(anchor_data))]771 #self.graph.updateData(attrs, 0)772 731 return [(newxanchors[i], newyanchors[i], anchor_data[i][2]) 773 732 for i in range(len(anchor_data))], (newxanchors, newyanchors) 774 733 775 734 optimize_LDA_Separation_3D = optimize_lda_separation_3D … … 779 738 "XAnchors": "xanchors", 780 739 "YAnchors": "yanchors"}) 781 def optimize_slow_separation_3D(self, attr_indices, anchor_data, xanchors = None, yanchors = None, zanchors =None):740 def optimize_slow_separation_3D(self, attr_indices, anchor_data, xanchors=None, yanchors=None, zanchors=None): 782 741 if (not self.graph.have_data or len(self.graph.raw_data) == 0 783 or not self.graph.data_has_discrete_class): 742 or not self.graph.data_has_discrete_class): 784 743 return anchor_data, (xanchors, yanchors, zanchors) 785 744 valid_data = self.graph.get_valid_list(attr_indices) 786 745 selected_data = numpy.compress(valid_data, numpy.take(self.graph.no_jittering_scaled_data, 787 746 attr_indices, 788 axis =0),789 axis =1)790 791 if xanchors ==None:747 axis=0), 748 axis=1) 749 750 if xanchors is None: 792 751 xanchors = numpy.array([a[0] for a in anchor_data], numpy.float) 793 if yanchors ==None:752 if yanchors is None: 794 753 yanchors = numpy.array([a[1] for a in anchor_data], numpy.float) 795 if zanchors ==None:754 if zanchors is None: 796 755 zanchors = numpy.array([a[2] for a in anchor_data], numpy.float) 797 756 798 757 trans_proj_data = self.graph.create_projection_as_numeric_array(attr_indices, 799 validData =valid_data,800 XAnchors =xanchors,801 YAnchors =yanchors,802 ZAnchors =zanchors,803 scaleFactor =self.graph.scale_factor,804 normalize =self.graph.normalize_examples,805 useAnchorData =1)806 if trans_proj_data ==None:758 validData=valid_data, 759 XAnchors=xanchors, 760 YAnchors=yanchors, 761 ZAnchors=zanchors, 762 scaleFactor=self.graph.scale_factor, 763 normalize=self.graph.normalize_examples, 764 useAnchorData=1) 765 if trans_proj_data is None: 807 766 return anchor_data, (xanchors, yanchors, zanchors) 808 767 809 768 proj_data = numpy.transpose(trans_proj_data) 810 x_positions = proj_data[0]; x_positions2 = numpy.array(x_positions) 811 y_positions = proj_data[1]; y_positions2 = numpy.array(y_positions) 812 z_positions = proj_data[2]; z_positions2 = numpy.array(z_positions) 813 class_data = proj_data[3]; class_data2 = numpy.array(class_data) 769 x_positions = proj_data[0] 770 x_positions2 = numpy.array(x_positions) 771 y_positions = proj_data[1] 772 y_positions2 = numpy.array(y_positions) 773 z_positions = proj_data[2] 774 z_positions2 = numpy.array(z_positions) 775 class_data = proj_data[3] 776 class_data2 = numpy.array(class_data) 814 777 815 778 fxs = numpy.zeros(len(x_positions), numpy.float) # forces 816 779 fys = numpy.zeros(len(x_positions), numpy.float) 817 780 fzs = numpy.zeros(len(x_positions), numpy.float) 818 gxs = numpy.zeros(len(anchor_data), numpy.float) # gradients819 gys = numpy.zeros(len(anchor_data), numpy.float)820 gzs = numpy.zeros(len(anchor_data), numpy.float)821 781 822 782 rotate_array = range(len(x_positions)) 823 783 rotate_array = rotate_array[1:] + [0] 824 for i in range(len(x_positions) 1):784 for i in range(len(x_positions)  1): 825 785 x_positions2 = numpy.take(x_positions2, rotate_array) 826 786 y_positions2 = numpy.take(y_positions2, rotate_array) … … 830 790 dy = y_positions2  y_positions 831 791 dz = z_positions2  z_positions 832 rs2 = dx **2 + dy**2 + dz**2792 rs2 = dx ** 2 + dy ** 2 + dz ** 2 833 793 rs2 += numpy.where(rs2 == 0.0, 0.0001, 0.0) # replace zeros to avoid divisions by zero 834 794 rs = numpy.sqrt(rs2) … … 836 796 F = numpy.zeros(len(x_positions), numpy.float) 837 797 classDiff = numpy.where(class_data == class_data2, 1, 0) 838 numpy.putmask(F, classDiff, 150 *self.attract_g*rs2)839 numpy.putmask(F, 1 classDiff, self.repel_g/rs2)798 numpy.putmask(F, classDiff, 150 * self.attract_g * rs2) 799 numpy.putmask(F, 1  classDiff, self.repel_g / rs2) 840 800 fxs += F * dx / rs 841 801 fys += F * dy / rs … … 851 811 852 812 m = max(max(abs(gxs)), max(abs(gys)), max(abs(gzs))) 853 gxs /= (20 *m)854 gys /= (20 *m)855 gzs /= (20 *m)813 gxs /= (20 * m) 814 gys /= (20 * m) 815 gzs /= (20 * m) 856 816 857 817 newxanchors = xanchors + gxs … … 860 820 861 821 # normalize so that the anchor most far away will lie on the circle 862 m = math.sqrt(max(newxanchors **2 + newyanchors**2 + newzanchors**2))822 m = math.sqrt(max(newxanchors ** 2 + newyanchors ** 2 + newzanchors ** 2)) 863 823 newxanchors /= m 864 824 newyanchors /= m 865 825 newzanchors /= m 866 826 return [(newxanchors[i], newyanchors[i], newzanchors[i], anchor_data[i][3]) 867 827 for i in range(len(anchor_data))], (newxanchors, newyanchors, newzanchors) 868 828 869 829 optimize_SLOW_Separation_3D = optimize_slow_separation_3D … … 879 839 # place a subset of attributes around the circle. this subset must contain "good" attributes for each of the class values 880 840 @deprecated_keywords({"setAttributeListInRadviz": 881 "set_attribute_list_in_radviz"})882 def s2n_mix_anchors(self, set_attribute_list_in_radviz =1):841 "set_attribute_list_in_radviz"}) 842 def s2n_mix_anchors(self, set_attribute_list_in_radviz=1): 883 843 # check if we have data and a discrete class 884 844 if (not self.graph.have_data or len(self.graph.raw_data) == 0 885 or not self.graph.data_has_discrete_class): 845 or not self.graph.data_has_discrete_class): 886 846 self.set_statusbar_text("S2N only works on data with a discrete class value") 887 847 return 888 848 889 849 # compute the quality of attributes only once 890 if self.s2n_mix_data ==None:850 if self.s2n_mix_data is None: 891 851 ranked_attrs, ranked_attrs_by_class = visfuncts.findAttributeGroupsForRadviz(self.graph.raw_data, 892 852 visfuncts.S2NMeasureMix()) 893 853 self.s2n_mix_data = (ranked_attrs, ranked_attrs_by_class) 894 854 class_count = len(ranked_attrs_by_class) 895 attrs = ranked_attrs[:(self.s2n_place_attributes /class_count)*896 class_count] # select appropriate number of attributes855 attrs = ranked_attrs[:(self.s2n_place_attributes / class_count) * 856 class_count] # select appropriate number of attributes 897 857 else: 898 858 class_count = len(self.s2n_mix_data[1]) 899 attrs = self.s2n_mix_data[0][:(self.s2n_place_attributes /class_count)*900 class_count]901 902 if len(attrs) == 0:859 attrs = self.s2n_mix_data[0][:(self.s2n_place_attributes / class_count) * 860 class_count] 861 862 if not len(attrs): 903 863 self.set_statusbar_text("No discrete attributes found") 904 864 return 0 905 865 906 866 arr = [0] # array that will tell where to put the next attribute 907 for i in range(1,len(attrs)/2): arr += [i,i] 908 909 phi = (2*math.pi*self.s2n_spread)/(len(attrs)*10.0) 910 anchor_data = []; start = [] 911 arr2 = arr[:(len(attrs)/class_count)+1] 867 for i in range(1, len(attrs) / 2): arr += [i, i] 868 869 phi = (2 * math.pi * self.s2n_spread) / (len(attrs) * 10.0) 870 anchor_data = []; 871 start = [] 872 arr2 = arr[:(len(attrs) / class_count) + 1] 912 873 for cls in range(class_count): 913 start_pos = (2 *math.pi*cls)/class_count874 start_pos = (2 * math.pi * cls) / class_count 914 875 if self.class_permutation_list: cls = self.class_permutation_list[cls] 915 876 attrs_cls = attrs[cls::class_count] 916 temp_data = [(arr2[i], math.cos(start_pos + arr2[i] *phi),917 math.sin(start_pos + arr2[i] *phi),877 temp_data = [(arr2[i], math.cos(start_pos + arr2[i] * phi), 878 math.sin(start_pos + arr2[i] * phi), 918 879 attrs_cls[i]) for i in 919 range(min(len(arr2), len(attrs_cls)))]920 start.append(len(anchor_data) + len(arr2) /2) # starting indices for each class value880 range(min(len(arr2), len(attrs_cls)))] 881 start.append(len(anchor_data) + len(arr2) / 2) # starting indices for each class value 921 882 temp_data.sort() 922 883 anchor_data += [(x, y, name) for (i, x, y, name) in temp_data] 923 884 924 anchor_data = anchor_data[(len(attrs) /(2*class_count)):] + anchor_data[:(len(attrs)/(2*class_count))]885 anchor_data = anchor_data[(len(attrs) / (2 * class_count)):] + anchor_data[:(len(attrs) / (2 * class_count))] 925 886 self.graph.anchor_data = anchor_data 926 887 attrNames = [anchor[2] for anchor in anchor_data] … … 939 900 "setAnchors": "set_anchors", 940 901 "percentDataUsed": "percent_data_used"}) 941 def find_projection(self, method, attr_indices = None, set_anchors = 0, percent_data_used =100):902 def find_projection(self, method, attr_indices=None, set_anchors=0, percent_data_used=100): 942 903 if not self.graph.have_data: return 943 904 ai = self.graph.attribute_name_index 944 if attr_indices ==None:905 if attr_indices is None: 945 906 attributes = self.get_shown_attribute_list() 946 907 attr_indices = [ai[label] for label in attributes] 947 if len(attr_indices) == 0: return None908 if not len(attr_indices): return None 948 909 949 910 valid_data = self.graph.get_valid_list(attr_indices) … … 952 913 data_matrix = numpy.compress(valid_data, numpy.take(self.graph.no_jittering_scaled_data, 953 914 attr_indices, 954 axis =0),955 axis =1)915 axis=0), 916 axis=1) 956 917 if self.graph.data_has_class: 957 918 class_array = numpy.compress(valid_data, … … 959 920 960 921 if percent_data_used != 100: 961 indices = Orange.data.sample.SubsetIndices2(self.graph.raw_data,962 1.0(float(percent_data_used)/100.0))922 indices = data.sample.SubsetIndices2(self.graph.raw_data, 923 1.0  (float(percent_data_used) / 100.0)) 963 924 try: 964 data_matrix = numpy.compress(indices, data_matrix, axis =1)965 except :925 data_matrix = numpy.compress(indices, data_matrix, axis=1) 926 except ValueError: 966 927 pass 967 928 if self.graph.data_has_class: … … 972 933 if method == DR_PCA: 973 934 pca = Pca(standardize=False, max_components=ncomps, 974 use_generalized_eigenvectors=0)975 domain = Orange.data.Domain([Orange.feature.Continuous("g%d"%i) for i976 977 pca = pca( Orange.data.Table(domain, data_matrix.T))935 use_generalized_eigenvectors=False) 936 domain = data.Domain([feature.Continuous("g%d" % i) for i 937 in xrange(len(data_matrix))], False) 938 pca = pca(data.Table(domain, data_matrix.T)) 978 939 vals, vectors = pca.eigen_values, pca.projection 979 940 elif method == DR_SPCA and self.graph.data_has_class: 980 941 pca = Spca(standardize=False, max_components=ncomps, 981 use_generalized_eigenvectors=self.use_generalized_eigenvectors)982 domain = Orange.data.Domain([Orange.feature.Continuous("g%d"%i) for i983 in xrange(len(data_matrix))], Orange.feature.Continuous("c"))984 pca = pca( Orange.data.Table(domain,985 numpy.hstack([data_matrix.T, numpy.array(class_array, ndmin=2).T])))942 use_generalized_eigenvectors=self.use_generalized_eigenvectors) 943 domain = data.Domain([feature.Continuous("g%d" % i) for i 944 in xrange(len(data_matrix))], feature.Continuous("c")) 945 pca = pca(data.Table(domain, 946 numpy.hstack([data_matrix.T, numpy.array(class_array, ndmin=2).T]))) 986 947 vals, vectors = pca.eigen_values, pca.projection 987 948 elif method == DR_PLS and self.graph.data_has_class: … … 994 955 if (vectors is None or not vectors.any() or 995 956 False in numpy.isfinite(vectors) or False in numpy.isreal(vectors)): 996 self.set_statusbar_text("Unable to compute anchor positions for the selected attributes") 957 self.set_statusbar_text("Unable to compute anchor positions for the selected attributes") 997 958 return None 998 959 999 960 xanchors = vectors[0] 1000 961 yanchors = vectors[1] 1001 962 1002 963 if ncomps == 3: 1003 964 zanchors = vectors[2] 1004 m = math.sqrt(max(xanchors **2 + yanchors**2 + zanchors**2))965 m = math.sqrt(max(xanchors ** 2 + yanchors ** 2 + zanchors ** 2)) 1005 966 zanchors /= m 1006 967 else: 1007 m = math.sqrt(max(xanchors **2 + yanchors**2))968 m = math.sqrt(max(xanchors ** 2 + yanchors ** 2)) 1008 969 1009 970 xanchors /= m … … 1047 1008 1048 1009 @deprecated_keywords({"X": "x", "Y": "y", "Ncomp": "ncomp"}) 1049 def create_pls_projection(x, y, ncomp =2):1050 '''Predict y from x using first ncomp principal components'''1010 def create_pls_projection(x, y, ncomp=2): 1011 """Predict y from x using first ncomp principal components""" 1051 1012 1052 1013 # data dimensions … … 1056 1017 # Zscores of original matrices 1057 1018 ymean = y.mean() 1058 x, y = center(x), center(y)1059 1060 p = numpy.empty((mx, ncomp))1061 w = numpy.empty((mx, ncomp))1062 c = numpy.empty((my, ncomp))1063 t = numpy.empty((n, ncomp))1064 u = numpy.empty((n, ncomp))1065 b = numpy.zeros((ncomp, ncomp))1066 1067 e, f = x,y1019 x, y = center(x), center(y) 1020 1021 p = numpy.empty((mx, ncomp)) 1022 w = numpy.empty((mx, ncomp)) 1023 c = numpy.empty((my, ncomp)) 1024 t = numpy.empty((n, ncomp)) 1025 u = numpy.empty((n, ncomp)) 1026 b = numpy.zeros((ncomp, ncomp)) 1027 1028 e, f = x, y 1068 1029 1069 1030 # main algorithm 1070 1031 for i in range(ncomp): 1071 1072 u = numpy.random.random_sample((n,1)) 1073 w = normalize(numpy.dot(e.T,u)) 1074 t = normalize(numpy.dot(e,w)) 1075 c = normalize(numpy.dot(f.T,t)) 1032 u = numpy.random.random_sample((n, 1)) 1033 w = normalize(numpy.dot(e.T, u)) 1034 t = normalize(numpy.dot(e, w)) 1035 c = normalize(numpy.dot(f.T, t)) 1076 1036 1077 1037 dif = t 1078 1038 # iterations for loading vector t 1079 1039 while numpy.linalg.norm(dif) > 10e16: 1080 c = normalize(numpy.dot(f.T, t))1081 u = numpy.dot(f, c)1082 w = normalize(numpy.dot(e.T, u))1083 t0 = normalize(numpy.dot(e, w))1040 c = normalize(numpy.dot(f.T, t)) 1041 u = numpy.dot(f, c) 1042 w = normalize(numpy.dot(e.T, u)) 1043 t0 = normalize(numpy.dot(e, w)) 1084 1044 dif = t  t0 1085 1045 t = t0 1086 1046 1087 t[:, i] = t.T1088 u[:, i] = u.T1089 c[:, i] = c.T1090 w[:, i] = w.T1091 1092 b = numpy.dot(t.T, u)[0,0]1047 t[:, i] = t.T 1048 u[:, i] = u.T 1049 c[:, i] = c.T 1050 w[:, i] = w.T 1051 1052 b = numpy.dot(t.T, u)[0, 0] 1093 1053 b[i][i] = b 1094 p = numpy.dot(e.T, t)1095 p[:, i] = p.T1096 e = e  numpy.dot(t, p.T)1097 xx = b * numpy.dot(t, c.T)1054 p = numpy.dot(e.T, t) 1055 p[:, i] = p.T 1056 e = e  numpy.dot(t, p.T) 1057 xx = b * numpy.dot(t, c.T) 1098 1058 f = f  xx 1099 1100 # esimated y1101 #YE = numpy.dot(numpy.dot(t,b),c.t)*numpy.std(y, axis = 0) + ymean1102 #y = y*numpy.std(y, axis = 0)+ ymean1103 #BPls = numpy.dot(numpy.dot(numpy.linalg.pinv(p.t),b),c.t)1104 1059 1105 1060 return w … … 1109 1064 # ############################################################################# 1110 1065 # class that represents freeviz classifier 1111 class FreeVizClassifier( Orange.classification.Classifier):1066 class FreeVizClassifier(classification.Classifier): 1112 1067 """ 1113 1068 A kNN classifier on the 2D projection of the data, optimized by FreeViz. … … 1120 1075 be passed: 1121 1076 1122 :param data : table of data instances to project to a 2D plane and use for1077 :param dataset: table of data instances to project to a 2D plane and use for 1123 1078 classification. 1124 :type data : :class:`Orange.data.Table`1079 :type dataset: :class:`Orange.data.Table` 1125 1080 1126 1081 :param freeviz: the FreeViz algorithm instance to use to optimize the 2D … … 1129 1084 1130 1085 """ 1131 1132 def __init__(self, data , freeviz):1086 1087 def __init__(self, dataset, freeviz): 1133 1088 self.freeviz = freeviz 1134 1089 1135 1090 if self.freeviz.__class__ != FreeViz: 1136 self.freeviz.parentWidget.setData(data )1091 self.freeviz.parentWidget.setData(dataset) 1137 1092 self.freeviz.parentWidget.showAllAttributes = 1 1138 1093 else: 1139 self.freeviz.graph.set_data(data )1094 self.freeviz.graph.set_data(dataset) 1140 1095 self.freeviz.show_all_attributes() 1141 1096 1142 #self.FreeViz.randomAnchors()1143 1097 self.freeviz.radial_anchors() 1144 1098 self.freeviz.optimize_separation() … … 1150 1104 1151 1105 valid_data = graph.get_valid_list(indices) 1152 domain = Orange.data.Domain([graph.data_domain[i].name for i in indices]+1153 1154 1106 domain = data.Domain([graph.data_domain[i].name for i in indices] + 1107 [graph.data_domain.classVar.name], 1108 graph.data_domain) 1155 1109 offsets = [graph.attr_values[graph.attribute_names[i]][0] 1156 1110 for i in indices] 1157 1111 normalizers = [graph.get_min_max_val(i) for i in indices] 1158 selected_data = numpy.take(graph.original_data, indices, axis =0)1112 selected_data = numpy.take(graph.original_data, indices, axis=0) 1159 1113 averages = numpy.average(numpy.compress(valid_data, selected_data, 1160 1114 axis=1), 1) … … 1162 1116 graph.original_data[graph.data_class_index]) 1163 1117 1164 graph.create_projection_as_numeric_array(indices, use_anchor_data =1,1165 remove_missing_data =0,1166 valid_data =valid_data,1167 jitter_size =1)1168 self.classifier = Orange.classification.knn.P2NN(domain,1169 1170 1171 1172 1173 1174 1175 1118 graph.create_projection_as_numeric_array(indices, use_anchor_data=1, 1119 remove_missing_data=0, 1120 valid_data=valid_data, 1121 jitter_size=1) 1122 self.classifier = knn.P2NN(domain, 1123 numpy.transpose(numpy.array([numpy.compress(valid_data, 1124 graph.unscaled_x_positions), 1125 numpy.compress(valid_data, 1126 graph.unscaled_y_positions), 1127 class_data])), 1128 graph.anchor_data, offsets, normalizers, 1129 averages, graph.normalize_examples, law=1) 1176 1130 1177 1131 # for a given instance run argumentation and find out to which class it most often fall 1178 1132 @deprecated_keywords({"example": "instance", "returnType": "return_type"}) 1179 def __call__(self, instance, return_type=Orange.classification.Classifier.GetValue): 1180 #instance.setclass(0) 1133 def __call__(self, instance, return_type=classification.Classifier.GetValue): 1181 1134 return self.classifier(instance, return_type) 1182 1135 1183 FreeVizClassifier = deprecated_members({"FreeViz": "freeviz"})(FreeVizClassifier)1184 1185 class FreeVizLearner( Orange.classification.Learner):1136 FreeVizClassifier = deprecated_members({"FreeViz": "freeviz"})(FreeVizClassifier) 1137 1138 class FreeVizLearner(classification.Learner): 1186 1139 """ 1187 1140 A learner that builds a :class:`FreeVizClassifier` on given data. An … … 1193 1146 1194 1147 """ 1195 def __new__(cls, freeviz = None, instances = None, weight_id = 0, **argkw): 1196 self = Orange.classification.Learner.__new__(cls, **argkw) 1148 1149 def __new__(cls, freeviz=None, instances=None, weight_id=0, **argkw): 1150 self = classification.Learner.__new__(cls, **argkw) 1197 1151 if instances: 1198 1152 self.__init__(freeviz, **argkw) … … 1201 1155 return self 1202 1156 1203 def __init__(self, freeviz = None):1157 def __init__(self, freeviz=None, **kwd): 1204 1158 if not freeviz: 1205 1159 freeviz = FreeViz() … … 1208 1162 1209 1163 @deprecated_keywords({"examples": "instances", "weightID": "weight_id"}) 1210 def __call__(self, instances, weight_id =0):1164 def __call__(self, instances, weight_id=0): 1211 1165 return FreeVizClassifier(instances, self.freeviz) 1212 1166 1213 FreeVizLearner = deprecated_members({"FreeViz": "freeviz"})(FreeVizLearner)1214 1215 1216 class S2NHeuristicLearner( Orange.classification.Learner):1167 FreeVizLearner = deprecated_members({"FreeViz": "freeviz"})(FreeVizLearner) 1168 1169 1170 class S2NHeuristicLearner(classification.Learner): 1217 1171 """ 1218 1172 This class is not documented yet. 1219 1173 1220 1174 """ 1221 def __new__(cls, freeviz = None, instances = None, weight_id = 0, **argkw): 1222 self = Orange.classification.Learner.__new__(cls, **argkw) 1175 1176 def __new__(cls, freeviz=None, instances=None, weight_id=0, **argkw): 1177 self = classification.Learner.__new__(cls, **argkw) 1223 1178 if instances: 1224 1179 self.__init__(freeviz, **argkw) … … 1227 1182 return self 1228 1183 1229 def __init__(self, freeviz = None):1184 def __init__(self, freeviz=None, **kwd): 1230 1185 if not freeviz: 1231 1186 freeviz = FreeViz() … … 1234 1189 1235 1190 @deprecated_keywords({"examples": "instances", "weightID": "weight_id"}) 1236 def __call__(self, instances, weight_id =0):1191 def __call__(self, instances, weight_id=0): 1237 1192 return S2NHeuristicClassifier(instances, self.freeviz) 1238 1193 1239 1194 S2NHeuristicLearner = deprecated_members({"FreeViz": 1240 "freeviz"})(S2NHeuristicLearner)1195 "freeviz"})(S2NHeuristicLearner) 1241 1196 1242 1197 class Projector(object): 1243 1198 """ 1244 1199 Stores a linear projection of data and uses it to transform any given data with matching input domain. 1245 1246 .. attribute:: input_domain1247 1248 Domain of the data set that was used to construct principal component1249 subspace.1250 1251 .. attribute:: output_domain1252 1253 Domain used in returned data sets. This domain has a continuous1254 variable for each axis in the projected space,1255 and no class variable(s).1256 1257 .. attribute:: mean1258 1259 Array containing means of each variable in the data set that was used1260 to construct the projection.1261 1262 .. attribute:: stdev1263 1264 An array containing standard deviations of each variable in the data1265 set that was used to construct the projection.1266 1267 .. attribute:: standardize1268 1269 True, if standardization was used when constructing the projection. If1270 set, instances will be standardized before being projected.1271 1272 .. attribute:: projection1273 1274 Array containing projection (vectors that describe the1275 transformation from input to output domain).1276 1277 1200 """ 1201 #: Domain of the data set that was used to construct principal component subspace. 1202 input_domain = None 1203 1204 #: Domain used in returned data sets. This domain has a continuous 1205 #: variable for each axis in the projected space, 1206 #: and no class variable(s). 1207 output_domain = None 1208 1209 #: Array containing means of each variable in the data set that was used 1210 #: to construct the projection. 1211 mean = numpy.array(()) 1212 1213 #: An array containing standard deviations of each variable in the data 1214 #: set that was used to construct the projection. 1215 stdev = numpy.array(()) 1216 1217 #: True, if standardization was used when constructing the projection. If 1218 #: set, instances will be standardized before being projected. 1219 standardize = True 1220 1221 #: Array containing projection (vectors that describe the 1222 #: transformation from input to output domain). 1223 projection = numpy.array(()).reshape(0, 0) 1224 1278 1225 def __init__(self, **kwds): 1279 1226 self.__dict__.update(kwds) 1280 if not hasattr(self, "output_domain"): 1281 self.output_domain = Orange.data.Domain([Orange.feature.Continuous("a.%d"%(i+1)) for i in range(len(self.projection))], False) 1282 1283 1284 def __call__(self, data): 1227 1228 features = [] 1229 for i in range(len(self.projection)): 1230 f = feature.Continuous("Comp.%d" % (i + 1)) 1231 f.get_value_from = lambda ex, w: self._project_single(ex, w, f, i) 1232 features.append(f) 1233 1234 self.output_domain = Orange.data.Domain(features, 1235 self.input_domain.class_var, 1236 class_vars=self.input_domain.class_vars) 1237 1238 def _project_single(self, example, return_what, new_feature, feature_idx): 1239 ex = Orange.data.Table([example]).to_numpy("a")[0] 1240 ex = self.mean 1241 if self.standardize: 1242 ex /= self.stdev 1243 return new_feature(numpy.dot(self.projection[feature_idx, :], ex.T)[0]) 1244 1245 def __call__(self, dataset): 1285 1246 """ 1286 1247 Project data. 1287 1248 1288 :param data : input data set1289 :type data : :class:`Orange.data.Table`1249 :param dataset: input data set 1250 :type dataset: :class:`Orange.data.Table` 1290 1251 1291 1252 :rtype: :class:`Orange.data.Table` 1292 1253 """ 1293 if type(data) != Orange.data.Table:1294 data = Orange.data.Table([data])1295 if len(self.projection.T) != len(data .domain.features):1296 data = Orange.data.Table(self.input_domain, data)1297 1298 X = data.to_numpy_MA("a")[0]1254 if not isinstance(dataset, data.Table): 1255 dataset = data.Table([dataset]) 1256 if len(self.projection.T) != len(dataset.domain.features): 1257 dataset = data.Table(self.input_domain, dataset) 1258 1259 X, = dataset.to_numpy("a") 1299 1260 Xm, U = self.mean, self.projection 1300 1261 n, m = X.shape 1301 1262 1302 1263 if m != len(self.projection.T): 1303 raise Orange.core.KernelException, "Invalid number of features"1264 raise ValueError, "Invalid number of features" 1304 1265 1305 1266 Xd = X  Xm … … 1308 1269 Xd /= self.stdev 1309 1270 1310 self.A = numpy.ma.dot(Xd, U.T) 1311 1312 return Orange.data.Table(self.output_domain, self.A.tolist()) 1271 self.A = numpy.dot(Xd, U.T) 1272 1273 # TODO: Delete when orange will support creating data.Table from masked array. 1274 self.A = self.A.filled(0.) if isinstance(self.A, numpy.ma.core.MaskedArray) else self.A 1275 # append class variable 1276 1277 class_, classes = dataset.to_numpy("c")[0], dataset.to_numpy("m")[0] 1278 return data.Table(self.output_domain, numpy.hstack((self.A, class_, classes))) 1279 1313 1280 1314 1281 #color table for biplot 1315 Colors = ['bo', 'go','yo','co','mo']1316 1317 class P ca(object):1282 Colors = ['bo', 'go', 'yo', 'co', 'mo'] 1283 1284 class PCA(object): 1318 1285 """ 1319 1286 Orthogonal transformation of data into a set of uncorrelated variables called 1320 1287 principal components. This transformation is defined in such a way that the 1321 1288 first variable has as high variance as possible. 1322 1323 If data instances are provided to the constructor,1324 the optimization algorithm is called and the resulting projector1325 (:class:`~Orange.projection.linear.PcaProjector`) is1326 returned instead of the optimizer (instance of this class).1327 1289 1328 1290 :param standardize: perform standardization of the data set. … … 1335 1297 multiply data matrix with inverse of its covariance matrix). 1336 1298 :type use_generalized_eigenvectors: boolean 1337 1338 :rtype: :class:`~Orange.projection.linear.Pca` or1339 :class:`~Orange.projection.linear.PcaProjector`1340 1299 """ 1341 1300 1342 1301 def __new__(cls, dataset=None, **kwds): 1343 1302 optimizer = object.__new__(cls) 1344 optimizer.__init__(**kwds)1345 1303 1346 1304 if dataset: 1305 optimizer.__init__(**kwds) 1347 1306 return optimizer(dataset) 1348 1307 else: … … 1356 1315 self.use_generalized_eigenvectors = use_generalized_eigenvectors 1357 1316 1358 def _pca(self, dataset, Xd, Xg):1359 n,m = Xd.shape1360 if n < m:1361 C = numpy.ma.dot(Xg.T, Xd.T)1362 V, D, T = numpy.linalg.svd(C)1363 U = numpy.ma.dot(V.T, Xd) / numpy.sqrt(D.reshape(1, 1))1364 else:1365 C = numpy.ma.dot(Xg, Xd)1366 U, D, T = numpy.linalg.svd(C)1367 U = U.T # eigenvectors are now in rows1368 return U, D1369 1370 1317 def __call__(self, dataset): 1371 1318 """ … … 1378 1325 :rtype: :class:`~Orange.projection.linear.PcaProjector` 1379 1326 """ 1380 1381 X = dataset.to_numpy_MA("a")[0] 1382 N,M = X.shape 1383 Xm = numpy.mean(X, axis=0) 1384 Xd = X  Xm 1385 1386 #take care of the constant features 1387 stdev = numpy.std(Xd, axis=0) 1388 relevant_features = stdev != 0 1389 Xd = Xd[:, relevant_features] 1390 if self.standardize: 1391 Xd /= stdev[relevant_features] 1327 Xd, stdev, mean, relevant_features = self._normalize_data(dataset) 1392 1328 1393 1329 #use generalized eigenvectors … … 1398 1334 Xg = Xd.T 1399 1335 1400 #actual pca 1336 components, variances = self._perform_pca(dataset, Xd, Xg) 1337 components = self._insert_zeros_for_constant_features(len(dataset.domain.features), 1338 components, 1339 relevant_features) 1340 1341 variances, components, variance_sum = self._select_components(variances, components) 1342 1343 n, m = components.shape 1344 1345 return PcaProjector(input_domain=dataset.domain, 1346 mean=mean, 1347 stdev=stdev, 1348 standardize=self.standardize, 1349 eigen_vectors=components, 1350 projection=components, 1351 eigen_values=variances, 1352 variance_sum=variance_sum) 1353 1354 def _normalize_data(self, dataset): 1355 if not len(dataset) or not len(dataset.domain.features): 1356 raise ValueError("Empty dataset") 1357 X = dataset.to_numpy("a")[0] 1358 1359 Xm = numpy.mean(X, axis=0) 1360 Xd = X  Xm 1361 1362 if not Xd.any(): 1363 raise ValueError("All features are constant") 1364 1365 #take care of the constant features 1366 stdev = numpy.std(Xd, axis=0) 1367 stdev[stdev == 0] = 1. # to prevent division by zero 1368 relevant_features = stdev != 0 1369 Xd = Xd[:, relevant_features] 1370 if self.standardize: 1371 Xd /= stdev[relevant_features] 1372 return Xd, stdev, Xm, relevant_features 1373 1374 def _perform_pca(self, dataset, Xd, Xg): 1401 1375 n, m = Xd.shape 1402 U, D = self._pca(dataset, Xd, Xg) 1403 1404 #insert zeros for constant features 1405 n, m = U.shape 1406 if m != M: 1407 U_ = numpy.zeros((n, M)) 1408 U_[:, relevant_features] = U 1409 U = U_ 1410 1376 if n < m: 1377 C = numpy.dot(Xg.T, Xd.T) 1378 V, D, T = numpy.linalg.svd(C) 1379 U = numpy.dot(V.T, Xd) / numpy.sqrt(D.reshape(1, 1)) 1380 else: 1381 C = numpy.dot(Xg, Xd) 1382 U, D, T = numpy.linalg.svd(C) 1383 U = U.T # eigenvectors are now in rows 1384 return U, D 1385 1386 def _select_components(self, D, U): 1411 1387 variance_sum = D.sum() 1412 1413 1388 #select eigen vectors 1414 1389 if self.variance_covered != 1: … … 1417 1392 U = U[:nfeatures, :] 1418 1393 D = D[:nfeatures] 1419 1420 1394 if self.max_components > 0: 1421 1395 U = U[:self.max_components, :] 1422 1396 D = D[:self.max_components] 1423 1397 return D, U, variance_sum 1398 1399 def _insert_zeros_for_constant_features(self, original_dimension, U, relevant_features): 1424 1400 n, m = U.shape 1425 pc_domain = Orange.data.Domain([Orange.feature.Continuous("Comp.%d"% 1426 (i + 1)) for i in range(n)], False) 1427 1428 return PcaProjector(input_domain=dataset.domain, 1429 output_domain = pc_domain, 1430 pc_domain = pc_domain, 1431 mean = Xm, 1432 stdev = stdev, 1433 standardize = self.standardize, 1434 eigen_vectors = U, 1435 projection = U, 1436 eigen_values = D, 1437 variance_sum = variance_sum) 1401 if m != original_dimension: 1402 U_ = numpy.zeros((n, original_dimension)) 1403 U_[:, relevant_features] = U 1404 U = U_ 1405 return U 1406 1407 Pca = PCA 1438 1408 1439 1409 1440 1410 class Spca(Pca): 1441 def _p ca(self, dataset, Xd, Xg):1411 def _perform_pca(self, dataset, Xd, Xg): 1442 1412 # define the Laplacian matrix 1443 1413 c = dataset.to_numpy("c")[0] … … 1447 1417 Xg = numpy.dot(Xg, l) 1448 1418 1449 return Pca._pca(self, dataset, Xd, Xg) 1450 1419 return Pca._perform_pca(self, dataset, Xd, Xg) 1420 1421 1422 @deprecated_members({"pc_domain": "output_domain"}) 1451 1423 class PcaProjector(Projector): 1452 """ 1453 .. attribute:: pc_domain 1454 1455 Synonymous for :obj:`~Orange.projection.linear.Projector.output_domain`. 1456 1457 .. attribute:: eigen_vectors 1458 1459 Synonymous for :obj:`~Orange.projection.linear.Projector.projection`. 1460 1461 .. attribute:: eigen_values 1462 1463 Array containing standard deviations of principal components. 1464 1465 .. attribute:: variance_sum 1466 1467 Sum of all variances in the data set that was used to construct the PCA 1468 space. 1469 1470 """ 1471 1472 def __init__(self, **kwds): 1473 self.__dict__.update(kwds) 1424 #: Synonymous for :obj:`~Orange.projection.linear.Projector.projection`. 1425 eigen_vectors = numpy.array(()).reshape(0, 0) 1426 1427 #: Array containing standard deviations of principal components. 1428 eigen_values = numpy.array(()) 1429 1430 #: Sum of all variances in the data set that was used to construct the PCA space. 1431 variance_sum = 0. 1474 1432 1475 1433 def __str__(self): … … 1482 1440 "Std. deviation of components:", 1483 1441 " ".join([" "] + 1484 ["%10s" % a.name for a in self. pc_domain.attributes]),1442 ["%10s" % a.name for a in self.output_domain.attributes]), 1485 1443 " ".join(["Std. deviation"] + 1486 1444 ["%10.3f" % a for a in self.eigen_values]), 1487 1445 " ".join(["Proportion Var"] + 1488 ["%10.3f" % a for a in 1446 ["%10.3f" % a for a in self.eigen_values / s * 100]), 1489 1447 " ".join(["Cumulative Var"] + 1490 1448 ["%10.3f" % a for a in cs * 100]), 1491 1449 "", 1492 #"Loadings:", 1493 #" ".join(["%10s"%""] + ["%10s" % a.name for a in self.pc_domain]), 1494 #"\n".join([ 1495 # " ".join([a.name] + ["%10.3f" % b for b in self.eigen_vectors.T[i]]) 1496 # for i, a in enumerate(self.input_domain.attributes) 1497 # ]) 1498 ]) if len(self.pc_domain) <= ncomponents else\ 1450 ]) if len(self.output_domain) <= ncomponents else\ 1499 1451 "\n".join([ 1500 1452 "PCA SUMMARY", … … 1502 1454 "Std. deviation of components:", 1503 1455 " ".join([" "] + 1504 ["%10s" % a.name for a in self. pc_domain.attributes[:ncomponents]] +1456 ["%10s" % a.name for a in self.output_domain.attributes[:ncomponents]] + 1505 1457 ["%10s" % "..."] + 1506 ["%10s" % self. pc_domain.attributes[1].name]),1458 ["%10s" % self.output_domain.attributes[1].name]), 1507 1459 " ".join(["Std. deviation"] + 1508 1460 ["%10.3f" % a for a in self.eigen_values[:ncomponents]] + … … 1518 1470 ["%10.3f" % (cs[1] * 100)]), 1519 1471 "", 1520 #"Loadings:", 1521 #" ".join(["%16s" % ""] + 1522 # ["%8s" % a.name for a in self.pc_domain.attributes[:ncomponents]] + 1523 # ["%8s" % "..."] + 1524 # ["%8s" % self.pc_domain.attributes[1].name]), 1525 #"\n".join([ 1526 # " ".join(["%16.16s" %a.name] + 1527 # ["%8.3f" % b for b in self.eigen_vectors.T[i, :ncomponents]] + 1528 # ["%8s" % ""] + 1529 # ["%8.3f" % self.eigen_vectors.T[i, 1]]) 1530 # for i, a in enumerate(self.input_domain.attributes) 1531 # ]) 1532 ]) 1533 1534 1535 1536 ################ Plotting functions ################### 1537 1538 def scree_plot(self, filename = None, title = 'Scree Plot'): 1472 ]) 1473 1474 1475 def scree_plot(self, filename=None, title='Scree Plot'): 1539 1476 """ 1540 1477 Draw a scree plot of principal components 1541 1478 1542 :param filename: Name of the file to which the plot will be saved. \1543 If None, plot will be displayed instead.1479 :param filename: Name of the file to which the plot will be saved. 1480 If None, plot will be displayed instead. 1544 1481 :type filename: str 1545 1482 :param title: Plot title … … 1556 1493 1557 1494 x_axis = range(len(self.eigen_values)) 1558 if len(x_axis) < 5:1559 ax.set_xticks(x_axis)1560 1495 plt.grid(True) 1561 1496 … … 1577 1512 plt.show() 1578 1513 1579 def biplot(self, filename = None, components = [0,1], title ='Biplot'):1514 def biplot(self, filename=None, components=(0, 1), title='Biplot'): 1580 1515 """ 1581 1516 Draw biplot for PCA. Actual projection must be performed via pca(data) 1582 1517 before bipot can be used. 1583 1518 1584 :param filename: Name of the file to which the plot will be saved. \1585 If None, plot will be displayed instead.1586 :type plot: str1519 :param filename: Name of the file to which the plot will be saved. 1520 If None, plot will be displayed instead. 1521 :type filename: str 1587 1522 :param components: List of two components to plot. 1588 1523 :type components: list … … 1593 1528 1594 1529 if len(components) < 2: 1595 raise orange.KernelException, 'Two components are needed for biplot'1530 raise ValueError, 'Two components are needed for biplot' 1596 1531 1597 1532 if not (0 <= min(components) <= max(components) < len(self.eigen_values)): 1598 raise orange.KernelException, 'Invalid components' 1599 1600 X = self.A[:,components[0]] 1601 Y = self.A[:,components[1]] 1602 1603 vectorsX = self.eigen_vectors[:,components[0]] 1604 vectorsY = self.eigen_vectors[:,components[1]] 1605 1606 1607 #TO DO > pc.biplot (maybe) 1608 #trDataMatrix = dataMatrix / lam 1609 #trLoadings = loadings * lam 1610 1611 #max_data_value = numpy.max(abs(trDataMatrix)) * 1.05 1533 raise ValueError, 'Invalid components' 1534 1535 X = self.A[:, components[0]] 1536 Y = self.A[:, components[1]] 1537 1538 vectorsX = self.eigen_vectors[:, components[0]] 1539 vectorsY = self.eigen_vectors[:, components[1]] 1540 1612 1541 max_load_value = self.eigen_vectors.max() * 1.5 1613 1542 1614 #plt.clf()1615 1543 fig = plt.figure() 1616 1544 ax1 = fig.add_subplot(111) … … 1623 1551 ax1.yaxis.set_ticks_position('left') 1624 1552 1625 #if self._classArray == None:1626 #trDataMatrix = transpose(trDataMatrix)1627 1553 ax1.plot(X, Y, Colors[0]) 1628 #else: 1629 #suboptimal 1630 # classValues = [] 1631 # for classValue in self._classArray: 1632 # if classValue not in classValues: 1633 # classValues.append(classValue) 1634 # for i in range(len(classValues)): 1635 # choice = numpy.array([classValues[i] == cv for cv in self._classArray]) 1636 # partialDataMatrix = transpose(trDataMatrix[choice]) 1637 # ax1.plot(partialDataMatrix[0], partialDataMatrix[1], 1638 # Colors[i % len(Colors)], label = str(classValues[i])) 1639 # ax1.legend() 1640 1641 #ax1.set_xlim(max_data_value, max_data_value) 1642 #ax1.set_ylim(max_data_value, max_data_value) 1554 1643 1555 1644 1556 #eliminate double axis on right … … 1656 1568 tl.set_color('r') 1657 1569 1658 arrowprops = dict(facecolor = 'red', edgecolor = 'red', width = 1, headwidth =4)1659 1660 for (x, y, a) in zip(vectorsX, vectorsY, self.input_domain.attributes):1570 arrowprops = dict(facecolor='red', edgecolor='red', width=1, headwidth=4) 1571 1572 for (x, y, a) in zip(vectorsX, vectorsY, self.input_domain.attributes): 1661 1573 if max(x, y) < 0.1: 1662 1574 continue 1663 1575 print x, y, a 1664 ax2.annotate('', (x, y), (0, 0), arrowprops =arrowprops)1665 ax2.text(x * 1.1, y * 1.2, a.name, color ='red')1576 ax2.annotate('', (x, y), (0, 0), arrowprops=arrowprops) 1577 ax2.text(x * 1.1, y * 1.2, a.name, color='red') 1666 1578 1667 1579 ax2.set_xlim(max_load_value, max_load_value) … … 1688 1600 """ 1689 1601 1690 def __new__(cls, data =None):1602 def __new__(cls, dataset=None): 1691 1603 self = object.__new__(cls) 1692 if data :1604 if dataset: 1693 1605 self.__init__() 1694 return self.__call__(data )1606 return self.__call__(dataset) 1695 1607 else: 1696 1608 return self … … 1709 1621 1710 1622 Xm = numpy.mean(X, axis=0) 1711 X = X Xm1623 X = Xm 1712 1624 1713 1625 #take care of the constant features … … 1716 1628 stdev[stdev == 0] = 1. 1717 1629 X /= stdev 1718 X = X[:, relevant_features]1630 X = X[:, relevant_features] 1719 1631 1720 1632 instances, features = X.shape … … 1733 1645 for v in set(Y): 1734 1646 d = MA.take(X, numpy.argwhere(Y == v).flatten(), axis=0) 1735 d = d numpy.mean(d, axis=0)1647 d = numpy.mean(d, axis=0) 1736 1648 Sw += MA.dot(d.T, d) 1737 1649 Sw /= instances 1738 total = MA.dot(X.T, X) /float(instances)1650 total = MA.dot(X.T, X) / float(instances) 1739 1651 Sb = total  Sw 1740 1652 1741 matrix = numpy.linalg.inv(Sw) *Sb1653 matrix = numpy.linalg.inv(Sw) * Sb 1742 1654 D, U = numpy.linalg.eigh(matrix) 1743 1655 1744 sorted_indices = [i for _, i in sorted([(ev, i)1745 1746 U = numpy.take(U, sorted_indices, axis =1)1656 sorted_indices = [i for _, i in sorted([(ev, i) 1657 for i, ev in enumerate(D)], reverse=True)] 1658 U = numpy.take(U, sorted_indices, axis=1) 1747 1659 D = numpy.take(D, sorted_indices) 1748 1660 … … 1750 1662 n, m = U.shape 1751 1663 if m != M: 1752 U_ = numpy.zeros((n, M))1753 U_[:, relevant_features] = U1664 U_ = numpy.zeros((n, M)) 1665 U_[:, relevant_features] = U 1754 1666 U = U_ 1755 1667 1756 out_domain = Orange.data.Domain([Orange.feature.Continuous("Comp.%d"% 1757 (i+1)) for 1758 i in range(len(D))], False) 1759 1760 return FdaProjector(input_domain = dataset.domain, 1761 output_domain = out_domain, 1762 mean = Xm, 1763 stdev = stdev, 1764 standardize = True, 1765 eigen_vectors = U, 1766 projection = U, 1767 eigen_values = D) 1668 out_domain = data.Domain([feature.Continuous("Comp.%d" % 1669 (i + 1)) for 1670 i in range(len(D))], False) 1671 1672 return FdaProjector(input_domain=dataset.domain, 1673 output_domain=out_domain, 1674 mean=Xm, 1675 stdev=stdev, 1676 standardize=True, 1677 eigen_vectors=U, 1678 projection=U, 1679 eigen_values=D) 1680 1768 1681 1769 1682 class FdaProjector(Projector): … … 1778 1691 1779 1692 """ 1780 1781 def __init__(self, **kwds):1782 self.__dict__.update(kwds)1783 1784 1693 1785 1694 … … 1788 1697 "NComps": "ncomps", 1789 1698 "useGeneralizedEigenvectors": "use_generalized_eigenvectors"}) 1790 def create_pca_projection(data_matrix, class_array = None, ncomps = 1, use_generalized_eigenvectors = 1):1699 def create_pca_projection(data_matrix, class_array=None, ncomps=1, use_generalized_eigenvectors=True): 1791 1700 import warnings 1701 1792 1702 warnings.warn("Deprecated in favour of Orange" 1793 1703 ".projection.linear.Pca.", 1794 DeprecationWarning)1795 if type(data_matrix) == numpy.ma.core.MaskedArray:1704 DeprecationWarning) 1705 if isinstance(data_matrix, numpy.ma.core.MaskedArray): 1796 1706 data_matrix = numpy.array(data_matrix) 1797 if class_array != None and type(class_array) == numpy.ma.core.MaskedArray:1707 if isinstance(class_array, numpy.ma.core.MaskedArray): 1798 1708 class_array = numpy.array(class_array) 1799 1709 1800 1710 data_matrix = numpy.transpose(data_matrix) 1801 1711 1802 s = numpy.sum(data_matrix, axis=0) /float(len(data_matrix))1712 s = numpy.sum(data_matrix, axis=0) / float(len(data_matrix)) 1803 1713 data_matrix = s # substract average value to get zero mean 1804 1714 1805 if class_array !=None and use_generalized_eigenvectors:1715 if class_array is not None and use_generalized_eigenvectors: 1806 1716 covarMatrix = numpy.dot(numpy.transpose(data_matrix), data_matrix) 1807 1717 try: 1808 matrix = inv(covarMatrix)1809 except :1718 matrix = numpy.linalg.inv(covarMatrix) 1719 except numpy.linalg.LinAlgError: 1810 1720 return None, None 1811 1721 matrix = numpy.dot(matrix, numpy.transpose(data_matrix)) … … 1814 1724 1815 1725 # compute dataMatrixT * L * dataMatrix 1816 if class_array !=None:1726 if class_array is not None: 1817 1727 # define the Laplacian matrix 1818 1728 l = numpy.zeros((len(data_matrix), len(data_matrix))) 1819 1729 for i in range(len(data_matrix)): 1820 for j in range(i +1, len(data_matrix)):1821 l[i, j] = int(class_array[i] != class_array[j])1822 l[j, i] = int(class_array[i] != class_array[j])1730 for j in range(i + 1, len(data_matrix)): 1731 l[i, j] = int(class_array[i] != class_array[j]) 1732 l[j, i] = int(class_array[i] != class_array[j]) 1823 1733 1824 1734 s = numpy.sum(l, axis=0) # doesn't matter which axis since the matrix l is symmetrical 1825 1735 for i in range(len(data_matrix)): 1826 l[i, i] = s[i]1736 l[i, i] = s[i] 1827 1737 1828 1738 matrix = numpy.dot(matrix, l) … … 1847 1757 vals[bestind] = 1 1848 1758 1849 return ret_vals, numpy.take(vectors.T, ret_indices, axis = 0) # ith eigenvector is the ith column in vectors so we have to transpose the array 1759 return ret_vals, numpy.take(vectors.T, ret_indices, 1760 axis=0) # ith eigenvector is the ith column in vectors so we have to transpose the array 1850 1761 1851 1762 createPCAProjection = create_pca_projection
Note: See TracChangeset
for help on using the changeset viewer.