Changeset 7337:c8511c1bf231 in orange
 Timestamp:
 02/03/11 19:42:20 (3 years ago)
 Branch:
 default
 Convert:
 f3d035c06e77e9388cce2ed50f2a216612177914
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

orange/Orange/projection/som.py
r7281 r7337 21 21 22 22 import Orange 23 som = Orange.projection.som.SOMLearner(map_shape=(8, 8), initialize=Orange.projection.som.InitializeRandom) 23 som = Orange.projection.som.SOMLearner(map_shape=(8, 8), 24 initialize=Orange.projection.som.InitializeRandom) 24 25 data = Orange.data.table("iris.tab") 25 26 map = som(data) 26 27 28 .. autodata:: NeighbourhoodGaussian 29 30 .. autodata:: HexagonalTopology 31 32 .. autodata:: RectangularTopology 33 34 .. autodata:: InitializeLinear 35 36 .. autodata:: InitializeRandom 37 38 .. autodata:: NeighbourhoodGaussian 39 40 .. autodata:: NeighbourhoodBubble 41 42 .. autodata:: NeighbourhoodEpanechicov 43 27 44 .. autoclass:: SOMLearner 28 45 :members: … … 30 47 .. autoclass:: Solver 31 48 :members: 32 49 33 50 .. autoclass:: SOMMap 34 51 :members: … … 61 78 ================== 62 79 63 Class :obj:`Map` stores the selforganizing map composed of :obj:`Node` objects. 80 Class :obj:`Map` stores the selforganizing map composed of :obj:`Node` objects. The code below 81 (`somnode.py`_, uses `iris.tab`_) shows an example how to access the information stored in the 82 node of the map: 83 84 .. literalinclude:: code/somnode.py 85 :lines: 7 64 86 65 87 .. autoclass:: Map … … 73 95 ======== 74 96 75 The following code (`code/sommapping.py`_, uses `iris.tab`_) infers selforganizing map from Iris data set. The map is rather small, and consists 76 of only 9 cells. We optimize the network, and then report how many data instances were mappped 97 .. _sommapping.py: code/sommapping.py 98 .. _somnode.py: code/somnode.py 99 .. _iris.tab: code/iris.tab 100 101 The following code (`sommapping.py`_, uses `iris.tab`_) infers selforganizing map from Iris data set. The map is rather small, and consists 102 of only 9 cells. We optimize the network, and then report how many data instances were mapped 77 103 into each cell. The second part of the code reports on data instances from one of the corner cells: 78 104 79 105 .. literalinclude:: code/sommapping.py 106 :lines: 7 80 107 81 108 The output of this code is:: … … 113 140 114 141 HexagonalTopology = 0 142 """Hexagonal topology, cells are hexagonshaped.""" 115 143 RectangularTopology = 1 144 """Rectangular topology, cells are squareshaped""" 145 116 146 InitializeLinear = 0 147 """Data instances are initially assigned to cells according to their twodimensional PCA projection.""" 117 148 InitializeRandom = 1 118 NeighbourhoodGaussian = 0 149 """Data instances are initially randomly assigned to cells.""" 150 151 NeighbourhoodGaussian = 0 152 """Gaussian (smoothed) neighborhood.""" 119 153 NeighbourhoodBubble = 1 154 """Bubble (crisp) neighborhood.""" 120 155 NeighbourhoodEpanechicov = 2 121 122 class Node(object): 123 """An object holding the information about the node in the map. 124 """ 125 def __init__(self, pos, map=None, vector=None): 126 self.pos = pos 127 self.map = map 128 self.vector = vector 129 130 class Map(object): 131 """Self organizing map (the structure). Includes methods for data initialization. 132 """ 133 134 HexagonalTopology = HexagonalTopology 135 RectangularTopology = RectangularTopology 136 InitializeLinear = InitializeLinear 137 InitializeRandom = InitializeRandom 138 NeighbourhoodGaussian = NeighbourhoodGaussian 139 NeighbourhoodBubble = NeighbourhoodBubble 140 NeighbourhoodEpanechicov = NeighbourhoodEpanechicov 141 142 def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): 143 self.map_shape = map_shape 144 self.topology = topology 145 self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] 146 147 def __getitem__(self, pos): 148 """ Return the node at position x, y. 149 """ 150 x, y = pos 151 return self.map[x][y] 152 153 def __iter__(self): 154 """ Iterate over all nodes in the map. 155 """ 156 for row in self.map: 157 for node in row: 158 yield node 159 160 def vectors(self): 161 """Return all vectors of the map as rows in an numpy.array. 162 """ 163 return numpy.array([node.vector for node in self]) 164 165 def unit_distances(self): 166 """Return a NxN numpy.array of internode distances (based on 167 node position in the map, not vector space) where N is the number of 168 nodes. 169 """ 170 nodes = list(self) 171 dist = numpy.zeros((len(nodes), len(nodes))) 172 173 coords = self.unit_coords() 174 for i in range(len(nodes)): 175 for j in range(len(nodes)): 176 dist[i, j] = numpy.sqrt(numpy.dot(coords[i]  coords[j], coords[i]  coords[j])) 177 return numpy.array(dist) 178 179 def unit_coords(self): 180 """ Return the unit coordinates of all nodes in the map as an numpy.array. 181 """ 182 nodes = list(self) 183 coords = numpy.zeros((len(nodes), len(self.map_shape))) 184 coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0]) 185 coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1]) 186 187 ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75) 188 if self.topology == Map.HexagonalTopology: 189 ind = numpy.nonzero(1  numpy.mod(coords[:, 0], 2)) 190 coords[ind] = coords[ind] + 0.5 191 coords = coords * numpy.sqrt(0.75) 192 return coords 193 194 195 def initialize_map_random(self, data=None, dimension=5): 196 """Initialize the map nodes vectors randomly, by supplying 197 either training data or dimension of the data. 198 """ 199 if data is not None: 200 min, max = ma.min(data, 0), ma.max(data, 0); 201 dimension = data.shape[1] 202 else: 203 min, max = numpy.zeros(dimension), numpy.ones(dimension) 204 for node in self: 205 # node.vector = min + numpy.random.rand(dimension) * (max  min) 206 node.vector = min + random.randint(0, dimension) * (max  min) 207 208 def initialize_map_linear(self, data, map_shape=(10, 20)): 209 """ Initialize the map node vectors linearly over the subspace 210 of the two most significant eigenvectors. 211 """ 212 data = data.copy() #ma.array(data) 213 dim = data.shape[1] 214 mdim = len(map_shape) 215 munits = len(list(self)) 216 me = ma.mean(data, 0) 217 A = numpy.zeros((dim ,dim)) 218 219 for i in range(dim): 220 data[:, i] = data[:, i]  me[i] 221 222 for i in range(dim): 223 for j in range(dim): 224 c = data[:, i] * data[:, j] 225 A[i, j] = ma.sum(c) / len(c) 226 A[j, i] = A[i, j] 227 228 eigval, eigvec = numpy.linalg.eig(A) 229 ind = list(reversed(numpy.argsort(eigval))) 230 eigval = eigval[ind[:mdim]] 231 eigvec = eigvec[:, ind[:mdim]] 232 233 for i in range(mdim): 234 eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 235 236 unit_coords = self.unit_coords() 237 for d in range(mdim): 238 max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) 239 unit_coords[:, d] = (unit_coords[:, d]  min)/(max  min) 240 unit_coords = (unit_coords  0.5) * 2 241 242 vectors = numpy.array([me for i in range(munits)]) 243 for i in range(munits): 244 for d in range(mdim): 245 vectors[i] = vectors[i] + unit_coords[i][d] * numpy.transpose(eigvec[:, d]) 246 247 for i, node in enumerate(self): 248 node.vector = vectors[i] 249 250 def getUMat(self): 251 return getUMat(self) 252 156 """Epanechicov (cut and smoothed) neighborhood.""" 157 158 ########################################################################## 159 # Inference of SelfOrganizing Maps 160 253 161 class Solver(object): 254 162 """ SOM Solver class used to train the map. Supports batch and sequential training. 255 163 Based on ideas from `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_. 256 164 257 :param neighbourhood: neighborhood function (NeighbourhoodGaussian, or NeighbourhoodBubble) 165 :param neighbourhood: neighborhood function id 166 :type neighbourhood: :obj:`NeighbourhoodGaussian`, :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov` 258 167 :param radius_ini: initial radius 168 :type radius_ini: int 259 169 :param raduis_fin: final radius 170 :type raduis_fin: int 260 171 :param epoch: number of training interactions 172 :type epoch: int 261 173 :param batch_train: if True run the batch training algorithm (default), else use the sequential one 174 :type batch_train: bool 262 175 :param learning_rate: learning rate for the sequential training algorithm 176 :type learning_rate: float 263 177 """ 264 178 … … 284 198 285 199 def __call__(self, data, map, progressCallback=None): 286 """ Train the map from data. SetprogressCallback function to report on the progress.200 """ Train the map from data. Pass progressCallback function to report on the progress. 287 201 """ 288 202 self.data = data … … 413 327 414 328 self.vectors[nonzero] = S[nonzero] / A[nonzero] 415 416 329 330 417 331 class SOMLearner(orange.Learner): 418 332 """An implementation of selforganizing map. Considers an input data set, projects the data … … 423 337 424 338 :param map_shape: dimension of the map 425 :param initialize: type of initialization (InitializeLinear or InitializeRandom), linear 339 :type map_shape: tuple 340 :param initialize: initialization type id; linear 426 341 initialization assigns the data to the cells according to its position in twodimensional 427 342 principal component projection 428 :param topology: topology type (HexagonalTopology or RectangularTopology) 429 :param neighbourhood: cell neighborhood type (NeighbourhoodGaussian, NeighbourhoodBubble or NeighbourhoodEpanechicov), 343 :type initialize: :obj:`InitializeRandom` or :obj:`InitializeLinear` 344 :param topology: topology type id 345 :type topology: :obj:`HexagonalTopology` or :obj:`RectangularTopology` 346 :param neighbourhood: cell neighborhood type id 347 :type neighbourhood: :obj:`NeighbourhoodGaussian`, obj:`NeighbourhoodBubble`, or obj:`NeighbourhoodEpanechicov` 430 348 :param batch_train: perform batch training? 349 :type batch_train: bool 431 350 :param learning_rate: learning rate 351 :type learning_rate: float 432 352 :param radius_ini: initial radius 353 :type radius_ini: int 433 354 :param radius_fin: final radius 355 :type radius_fin: int 434 356 :param epochs: number of epochs (iterations of a training steps) 435 :param solver: a class that executes the optimization algorithm 357 :type epochs: int 358 :param solver: a class with the optimization algorithm 436 359 """ 437 360 … … 473 396 return SOMMap(map, data) 474 397 398 class SOMMap(orange.Classifier): 399 """Project the data onto the inferred selforganizing map. 400 401 :param map: a trained selforganizing map 402 :type map: :obj:`SOMMap` 403 :param data: the data to be mapped on the map 404 :type data: :obj:`Orange.data.Table` 405 """ 406 407 def __init__(self, map=[], data=[]): 408 self.map = map 409 self.examples = data 410 for node in map: 411 node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False), 412 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 413 for var, value in zip(self.examples.domain.attributes, node.vector)]) 414 node.examples = orange.ExampleTable(self.examples.domain) 415 416 for ex in self.examples: 417 node = self.getBestMatchingNode(ex) 418 node.examples.append(ex) 419 420 if self.examples and self.examples.domain.classVar: 421 for node in self.map: 422 node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples) 423 424 self.classVar = self.examples.domain.classVar 425 else: 426 self.classVar = None 427 428 def getBestMatchingNode(self, example): 429 """Return the best matching node for a given data instance 430 """ 431 example, c, w = orange.ExampleTable([example]).toNumpyMA() 432 vectors = self.map.vectors() 433 Dist = vectors  example 434 bmu = ma.argmin(ma.sum(Dist**2, 1)) 435 return list(self.map)[bmu] 436 437 def __call__(self, example, what=orange.GetValue): 438 bmu = self.getBestMatchingNode(example) 439 return bmu.classifier(example, what) 440 441 def __getattr__(self, name): 442 try: 443 return getattr(self.__dict__["map"], name) 444 except (KeyError, AttributeError): 445 raise AttributeError(name) 446 447 def __iter__(self): 448 """ Iterate over all nodes in the map 449 """ 450 return iter(self.map) 451 452 def __getitem__(self, val): 453 """ Return the node at position x, y 454 """ 455 return self.map.__getitem__(val) 456 457 ########################################################################## 458 # Supervised learning 459 475 460 class SOMSupervisedLearner(SOMLearner): 476 461 """SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the … … 480 465 481 466 :param data: classlabeled data set 482 :param progressCallback: a function to report on inference progress 467 :type data: :obj:`Orange.data.Table` 468 :param progressCallback: a one argument function to report on inference progress (in %) 483 469 """ 484 470 def __call__(self, examples, weightID=0, progressCallback=None): … … 500 486 return SOMMap(map, examples) 501 487 502 class SOMMap(orange.Classifier): 503 """Project the data onto the inferred selforganizing map. 504 505 :param map: a trained selforganizing map 506 :param data: the data to be mapped on the map 488 ########################################################################## 489 # Supporting Classes 490 491 class Node(object): 492 """An object holding the information about the node in the map. 493 494 .. attribute:: pos 495 496 Node position. 497 498 .. attribute:: referenceExample 499 500 Reference data instance (a prototype). 501 502 .. attribute:: examples 503 504 Data set with instances training instances that were mapped to the node. 507 505 """ 508 509 def __init__(self, map=[], data=[]):506 def __init__(self, pos, map=None, vector=None): 507 self.pos = pos 510 508 self.map = map 511 self.examples = data 512 for node in map: 513 node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False), 514 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 515 for var, value in zip(self.examples.domain.attributes, node.vector)]) 516 node.examples = orange.ExampleTable(self.examples.domain) 517 518 for ex in self.examples: 519 node = self.getBestMatchingNode(ex) 520 node.examples.append(ex) 521 522 if self.examples and self.examples.domain.classVar: 523 for node in self.map: 524 node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples) 525 526 self.classVar = self.examples.domain.classVar 509 self.vector = vector 510 511 class Map(object): 512 """Self organizing map (the structure). Includes methods for data initialization. 513 514 .. attribute:: map 515 516 Self orginzing map. A list of lists of :obj:`Node`. 517 518 .. attribute:: examples 519 520 Data set that was considered when optimizing the map. 521 """ 522 523 HexagonalTopology = HexagonalTopology 524 RectangularTopology = RectangularTopology 525 InitializeLinear = InitializeLinear 526 InitializeRandom = InitializeRandom 527 NeighbourhoodGaussian = NeighbourhoodGaussian 528 NeighbourhoodBubble = NeighbourhoodBubble 529 NeighbourhoodEpanechicov = NeighbourhoodEpanechicov 530 531 def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): 532 self.map_shape = map_shape 533 self.topology = topology 534 self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] 535 536 def __getitem__(self, pos): 537 """ Return the node at position x, y. 538 """ 539 x, y = pos 540 return self.map[x][y] 541 542 def __iter__(self): 543 """ Iterate over all nodes in the map. 544 """ 545 for row in self.map: 546 for node in row: 547 yield node 548 549 def vectors(self): 550 """Return all vectors of the map as rows in an numpy.array. 551 """ 552 return numpy.array([node.vector for node in self]) 553 554 def unit_distances(self): 555 """Return a NxN numpy.array of internode distances (based on 556 node position in the map, not vector space) where N is the number of 557 nodes. 558 """ 559 nodes = list(self) 560 dist = numpy.zeros((len(nodes), len(nodes))) 561 562 coords = self.unit_coords() 563 for i in range(len(nodes)): 564 for j in range(len(nodes)): 565 dist[i, j] = numpy.sqrt(numpy.dot(coords[i]  coords[j], coords[i]  coords[j])) 566 return numpy.array(dist) 567 568 def unit_coords(self): 569 """ Return the unit coordinates of all nodes in the map as an numpy.array. 570 """ 571 nodes = list(self) 572 coords = numpy.zeros((len(nodes), len(self.map_shape))) 573 coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0]) 574 coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1]) 575 576 ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75) 577 if self.topology == Map.HexagonalTopology: 578 ind = numpy.nonzero(1  numpy.mod(coords[:, 0], 2)) 579 coords[ind] = coords[ind] + 0.5 580 coords = coords * numpy.sqrt(0.75) 581 return coords 582 583 584 def initialize_map_random(self, data=None, dimension=5): 585 """Initialize the map nodes vectors randomly, by supplying 586 either training data or dimension of the data. 587 """ 588 if data is not None: 589 min, max = ma.min(data, 0), ma.max(data, 0); 590 dimension = data.shape[1] 527 591 else: 528 self.classVar = None 529 530 def getBestMatchingNode(self, example): 531 """Return the best matching node for a given data instance 532 """ 533 example, c, w = orange.ExampleTable([example]).toNumpyMA() 534 vectors = self.map.vectors() 535 Dist = vectors  example 536 bmu = ma.argmin(ma.sum(Dist**2, 1)) 537 return list(self.map)[bmu] 538 539 def __call__(self, example, what=orange.GetValue): 540 bmu = self.getBestMatchingNode(example) 541 return bmu.classifier(example, what) 542 543 def __getattr__(self, name): 544 try: 545 return getattr(self.__dict__["map"], name) 546 except (KeyError, AttributeError): 547 raise AttributeError(name) 548 549 def __iter__(self): 550 """ Iterate over all nodes in the map 551 """ 552 return iter(self.map) 553 554 def __getitem__(self, val): 555 """ Return the node at position x, y 556 """ 557 return self.map.__getitem__(val) 592 min, max = numpy.zeros(dimension), numpy.ones(dimension) 593 for node in self: 594 # node.vector = min + numpy.random.rand(dimension) * (max  min) 595 node.vector = min + random.randint(0, dimension) * (max  min) 596 597 def initialize_map_linear(self, data, map_shape=(10, 20)): 598 """ Initialize the map node vectors linearly over the subspace 599 of the two most significant eigenvectors. 600 """ 601 data = data.copy() #ma.array(data) 602 dim = data.shape[1] 603 mdim = len(map_shape) 604 munits = len(list(self)) 605 me = ma.mean(data, 0) 606 A = numpy.zeros((dim ,dim)) 607 608 for i in range(dim): 609 data[:, i] = data[:, i]  me[i] 610 611 for i in range(dim): 612 for j in range(dim): 613 c = data[:, i] * data[:, j] 614 A[i, j] = ma.sum(c) / len(c) 615 A[j, i] = A[i, j] 616 617 eigval, eigvec = numpy.linalg.eig(A) 618 ind = list(reversed(numpy.argsort(eigval))) 619 eigval = eigval[ind[:mdim]] 620 eigvec = eigvec[:, ind[:mdim]] 621 622 for i in range(mdim): 623 eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 624 625 unit_coords = self.unit_coords() 626 for d in range(mdim): 627 max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) 628 unit_coords[:, d] = (unit_coords[:, d]  min)/(max  min) 629 unit_coords = (unit_coords  0.5) * 2 630 631 vectors = numpy.array([me for i in range(munits)]) 632 for i in range(munits): 633 for d in range(mdim): 634 vectors[i] = vectors[i] + unit_coords[i][d] * numpy.transpose(eigvec[:, d]) 635 636 for i, node in enumerate(self): 637 node.vector = vectors[i] 638 639 def getUMat(self): 640 return getUMat(self) 641 642 ########################################################################## 643 # Supporting functions 558 644 559 645 def getUMat(som): … … 609 695 return array 610 696 697 ########################################################################## 698 # Testing (deprecated, use regression tests instead 699 611 700 if __name__ == "__main__": 612 data = orange.ExampleTable(" doc//datasets//iris.tab")701 data = orange.ExampleTable("iris.tab") 613 702 learner = SOMLearner() 614 703 learner = SOMLearner(batch_train=True, initialize=InitializeLinear, radius_ini=3, radius_fin=1, neighbourhood=Map.NeighbourhoodGaussian, epochs=1000)
Note: See TracChangeset
for help on using the changeset viewer.