Changeset 7337:c8511c1bf231 in orange


Ignore:
Timestamp:
02/03/11 19:42:20 (3 years ago)
Author:
blaz <blaz.zupan@…>
Branch:
default
Convert:
f3d035c06e77e9388cce2ed50f2a216612177914
Message:

added few more examples, descriptions of attributes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/projection/som.py

    r7281 r7337  
    2121 
    2222   import Orange 
    23    som = Orange.projection.som.SOMLearner(map_shape=(8, 8), initialize=Orange.projection.som.InitializeRandom) 
     23   som = Orange.projection.som.SOMLearner(map_shape=(8, 8),  
     24            initialize=Orange.projection.som.InitializeRandom) 
    2425   data = Orange.data.table("iris.tab") 
    2526   map = som(data) 
    2627 
     28.. autodata:: NeighbourhoodGaussian 
     29 
     30.. autodata:: HexagonalTopology 
     31 
     32.. autodata:: RectangularTopology 
     33 
     34.. autodata:: InitializeLinear 
     35 
     36.. autodata:: InitializeRandom 
     37 
     38.. autodata:: NeighbourhoodGaussian  
     39 
     40.. autodata:: NeighbourhoodBubble 
     41 
     42.. autodata:: NeighbourhoodEpanechicov  
     43 
    2744.. autoclass:: SOMLearner 
    2845   :members: 
     
    3047.. autoclass:: Solver 
    3148   :members: 
    32  
     49    
    3350.. autoclass:: SOMMap 
    3451   :members: 
     
    6178================== 
    6279 
    63 Class :obj:`Map` stores the self-organizing map composed of :obj:`Node` objects. 
     80Class :obj:`Map` stores the self-organizing map composed of :obj:`Node` objects. The code below 
     81(`som-node.py`_, uses `iris.tab`_) shows an example how to access the information stored in the  
     82node of the map: 
     83 
     84.. literalinclude:: code/som-node.py 
     85    :lines: 7- 
    6486 
    6587.. autoclass:: Map 
     
    7395======== 
    7496 
    75 The following code  (`code/som-mapping.py`_, uses `iris.tab`_) infers self-organizing map from Iris data set. The map is rather small, and consists  
    76 of only 9 cells. We optimize the network, and then report how many data instances were mappped 
     97.. _som-mapping.py: code/som-mapping.py 
     98.. _som-node.py: code/som-node.py 
     99.. _iris.tab: code/iris.tab 
     100 
     101The following code  (`som-mapping.py`_, uses `iris.tab`_) infers self-organizing map from Iris data set. The map is rather small, and consists  
     102of only 9 cells. We optimize the network, and then report how many data instances were mapped 
    77103into each cell. The second part of the code reports on data instances from one of the corner cells: 
    78104 
    79105.. literalinclude:: code/som-mapping.py 
     106    :lines: 7- 
    80107 
    81108The output of this code is:: 
     
    113140 
    114141HexagonalTopology = 0 
     142"""Hexagonal topology, cells are hexagon-shaped.""" 
    115143RectangularTopology = 1 
     144"""Rectangular topology, cells are square-shaped""" 
     145 
    116146InitializeLinear = 0 
     147"""Data instances are initially assigned to cells according to their two-dimensional PCA projection.""" 
    117148InitializeRandom = 1 
    118 NeighbourhoodGaussian = 0 
     149"""Data instances are initially randomly assigned to cells.""" 
     150 
     151NeighbourhoodGaussian = 0  
     152"""Gaussian (smoothed) neighborhood.""" 
    119153NeighbourhoodBubble = 1 
     154"""Bubble (crisp) neighborhood.""" 
    120155NeighbourhoodEpanechicov = 2 
    121  
    122 class Node(object): 
    123     """An object holding the information about the node in the map. 
    124     """ 
    125     def __init__(self, pos, map=None, vector=None): 
    126         self.pos = pos 
    127         self.map = map 
    128         self.vector = vector 
    129  
    130 class Map(object): 
    131     """Self organizing map (the structure). Includes methods for data initialization. 
    132     """ 
    133      
    134     HexagonalTopology = HexagonalTopology 
    135     RectangularTopology = RectangularTopology 
    136     InitializeLinear = InitializeLinear 
    137     InitializeRandom = InitializeRandom 
    138     NeighbourhoodGaussian = NeighbourhoodGaussian 
    139     NeighbourhoodBubble = NeighbourhoodBubble 
    140     NeighbourhoodEpanechicov = NeighbourhoodEpanechicov 
    141          
    142     def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): 
    143         self.map_shape = map_shape 
    144         self.topology = topology 
    145         self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] 
    146          
    147     def __getitem__(self, pos): 
    148         """ Return the node at position x, y. 
    149         """ 
    150         x, y = pos 
    151         return self.map[x][y] 
    152  
    153     def __iter__(self): 
    154         """ Iterate over all nodes in the map. 
    155         """ 
    156         for row in self.map: 
    157             for node in row: 
    158                 yield node 
    159  
    160     def vectors(self): 
    161         """Return all vectors of the map as rows in an numpy.array. 
    162         """ 
    163         return numpy.array([node.vector for node in self]) 
    164  
    165     def unit_distances(self): 
    166         """Return a NxN numpy.array of internode distances (based on 
    167         node position in the map, not vector space) where N is the number of 
    168         nodes. 
    169         """ 
    170         nodes = list(self) 
    171         dist = numpy.zeros((len(nodes), len(nodes))) 
    172  
    173         coords = self.unit_coords() 
    174         for i in range(len(nodes)): 
    175             for j in range(len(nodes)): 
    176                 dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j])) 
    177         return numpy.array(dist) 
    178  
    179     def unit_coords(self): 
    180         """ Return the unit coordinates of all nodes in the map as an numpy.array. 
    181         """ 
    182         nodes = list(self) 
    183         coords = numpy.zeros((len(nodes), len(self.map_shape))) 
    184         coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0]) 
    185         coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1]) 
    186          
    187         ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75) 
    188         if self.topology == Map.HexagonalTopology: 
    189             ind = numpy.nonzero(1 - numpy.mod(coords[:, 0], 2)) 
    190             coords[ind] = coords[ind] + 0.5 
    191             coords = coords * numpy.sqrt(0.75) 
    192         return coords 
    193  
    194  
    195     def initialize_map_random(self, data=None, dimension=5): 
    196         """Initialize the map nodes vectors randomly, by supplying 
    197         either training data or dimension of the data. 
    198         """ 
    199         if data is not None: 
    200             min, max = ma.min(data, 0), ma.max(data, 0); 
    201             dimension = data.shape[1] 
    202         else: 
    203             min, max = numpy.zeros(dimension), numpy.ones(dimension) 
    204         for node in self: 
    205 #            node.vector = min + numpy.random.rand(dimension) * (max - min) 
    206             node.vector = min + random.randint(0, dimension) * (max - min) 
    207  
    208     def initialize_map_linear(self, data, map_shape=(10, 20)): 
    209         """ Initialize the map node vectors linearly over the subspace 
    210         of the two most significant eigenvectors. 
    211         """ 
    212         data = data.copy() #ma.array(data) 
    213         dim = data.shape[1] 
    214         mdim = len(map_shape) 
    215         munits = len(list(self)) 
    216         me = ma.mean(data, 0) 
    217         A = numpy.zeros((dim ,dim)) 
    218  
    219         for i in range(dim): 
    220             data[:, i] = data[:, i] - me[i] 
    221          
    222         for i in range(dim): 
    223             for j in range(dim): 
    224                 c = data[:, i] * data[:, j] 
    225                 A[i, j] = ma.sum(c) / len(c) 
    226                 A[j, i] = A[i, j] 
    227  
    228         eigval, eigvec = numpy.linalg.eig(A) 
    229         ind = list(reversed(numpy.argsort(eigval))) 
    230         eigval = eigval[ind[:mdim]] 
    231         eigvec = eigvec[:, ind[:mdim]] 
    232  
    233         for i in range(mdim): 
    234             eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 
    235  
    236         unit_coords = self.unit_coords() 
    237         for d in range(mdim): 
    238             max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) 
    239             unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min) 
    240         unit_coords = (unit_coords - 0.5) * 2 
    241  
    242         vectors = numpy.array([me for i in range(munits)]) 
    243         for i in range(munits): 
    244             for d in range(mdim): 
    245                 vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d]) 
    246  
    247         for i, node in enumerate(self): 
    248             node.vector = vectors[i] 
    249  
    250     def getUMat(self): 
    251         return getUMat(self) 
    252          
     156"""Epanechicov (cut and smoothed) neighborhood.""" 
     157 
     158########################################################################## 
     159# Inference of Self-Organizing Maps  
     160 
    253161class Solver(object): 
    254162    """ SOM Solver class used to train the map. Supports batch and sequential training. 
    255163    Based on ideas from `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_. 
    256164 
    257     :param neighbourhood: neighborhood function (NeighbourhoodGaussian, or NeighbourhoodBubble) 
     165    :param neighbourhood: neighborhood function id 
     166    :type neighbourhood: :obj:`NeighbourhoodGaussian`, :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov` 
    258167    :param radius_ini: initial radius 
     168    :type radius_ini: int 
    259169    :param raduis_fin: final radius 
     170    :type raduis_fin: int 
    260171    :param epoch: number of training interactions 
     172    :type epoch: int 
    261173    :param batch_train: if True run the batch training algorithm (default), else use the sequential one 
     174    :type batch_train: bool 
    262175    :param learning_rate: learning rate for the sequential training algorithm 
     176    :type learning_rate: float 
    263177    """ 
    264178     
     
    284198             
    285199    def __call__(self, data, map, progressCallback=None): 
    286         """ Train the map from data. Set progressCallback function to report on the progress. 
     200        """ Train the map from data. Pass progressCallback function to report on the progress. 
    287201        """ 
    288202        self.data = data 
     
    413327         
    414328        self.vectors[nonzero] = S[nonzero] / A[nonzero] 
    415   
    416              
     329 
     330 
    417331class SOMLearner(orange.Learner): 
    418332    """An implementation of self-organizing map. Considers an input data set, projects the data  
     
    423337     
    424338    :param map_shape: dimension of the map 
    425     :param initialize: type of initialization (InitializeLinear or InitializeRandom), linear  
     339    :type map_shape: tuple 
     340    :param initialize: initialization type id; linear  
    426341      initialization assigns the data to the cells according to its position in two-dimensional 
    427342      principal component projection 
    428     :param topology: topology type (HexagonalTopology or RectangularTopology) 
    429     :param neighbourhood: cell neighborhood type (NeighbourhoodGaussian, NeighbourhoodBubble or NeighbourhoodEpanechicov), 
     343    :type initialize: :obj:`InitializeRandom` or :obj:`InitializeLinear` 
     344    :param topology: topology type id 
     345    :type topology: :obj:`HexagonalTopology` or :obj:`RectangularTopology` 
     346    :param neighbourhood: cell neighborhood type id 
     347    :type neighbourhood: :obj:`NeighbourhoodGaussian`, obj:`NeighbourhoodBubble`, or obj:`NeighbourhoodEpanechicov` 
    430348    :param batch_train: perform batch training? 
     349    :type batch_train: bool 
    431350    :param learning_rate: learning rate 
     351    :type learning_rate: float 
    432352    :param radius_ini: initial radius 
     353    :type radius_ini: int 
    433354    :param radius_fin: final radius 
     355    :type radius_fin: int 
    434356    :param epochs: number of epochs (iterations of a training steps) 
    435     :param solver: a class that executes the optimization algorithm 
     357    :type epochs: int 
     358    :param solver: a class with the optimization algorithm 
    436359    """ 
    437360     
     
    473396        return SOMMap(map, data) 
    474397 
     398class SOMMap(orange.Classifier): 
     399    """Project the data onto the inferred self-organizing map. 
     400     
     401    :param map: a trained self-organizing map 
     402    :type map: :obj:`SOMMap` 
     403    :param data: the data to be mapped on the map 
     404    :type data: :obj:`Orange.data.Table` 
     405    """ 
     406     
     407    def __init__(self, map=[], data=[]): 
     408        self.map = map 
     409        self.examples = data 
     410        for node in map: 
     411            node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False), 
     412                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 
     413                                                  for var, value in zip(self.examples.domain.attributes, node.vector)]) 
     414            node.examples = orange.ExampleTable(self.examples.domain) 
     415 
     416        for ex in self.examples: 
     417            node = self.getBestMatchingNode(ex) 
     418            node.examples.append(ex) 
     419 
     420        if self.examples and self.examples.domain.classVar: 
     421            for node in self.map: 
     422                node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples) 
     423                      
     424            self.classVar = self.examples.domain.classVar 
     425        else: 
     426            self.classVar = None 
     427 
     428    def getBestMatchingNode(self, example): 
     429        """Return the best matching node for a given data instance 
     430        """ 
     431        example, c, w = orange.ExampleTable([example]).toNumpyMA() 
     432        vectors = self.map.vectors() 
     433        Dist = vectors - example 
     434        bmu = ma.argmin(ma.sum(Dist**2, 1)) 
     435        return list(self.map)[bmu] 
     436         
     437    def __call__(self, example, what=orange.GetValue): 
     438        bmu = self.getBestMatchingNode(example) 
     439        return bmu.classifier(example, what) 
     440 
     441    def __getattr__(self, name): 
     442        try: 
     443            return getattr(self.__dict__["map"], name) 
     444        except (KeyError, AttributeError): 
     445            raise AttributeError(name) 
     446 
     447    def __iter__(self): 
     448        """ Iterate over all nodes in the map 
     449        """ 
     450        return iter(self.map) 
     451 
     452    def __getitem__(self, val): 
     453        """ Return the node at position x, y 
     454        """ 
     455        return self.map.__getitem__(val) 
     456 
     457########################################################################## 
     458# Supervised learning 
     459 
    475460class SOMSupervisedLearner(SOMLearner): 
    476461    """SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the 
     
    480465     
    481466    :param data: class-labeled data set 
    482     :param progressCallback: a function to report on inference progress 
     467    :type data: :obj:`Orange.data.Table` 
     468    :param progressCallback: a one argument function to report on inference progress (in %) 
    483469    """ 
    484470    def __call__(self, examples, weightID=0, progressCallback=None): 
     
    500486        return SOMMap(map, examples) 
    501487 
    502 class SOMMap(orange.Classifier): 
    503     """Project the data onto the inferred self-organizing map. 
    504      
    505     :param map: a trained self-organizing map 
    506     :param data: the data to be mapped on the map 
     488########################################################################## 
     489# Supporting Classes  
     490 
     491class Node(object): 
     492    """An object holding the information about the node in the map. 
     493 
     494    .. attribute:: pos 
     495 
     496        Node position. 
     497 
     498    .. attribute:: referenceExample 
     499 
     500        Reference data instance (a prototype). 
     501         
     502    .. attribute:: examples 
     503     
     504        Data set with instances training instances that were mapped to the node.  
    507505    """ 
    508      
    509     def __init__(self, map=[], data=[]): 
     506    def __init__(self, pos, map=None, vector=None): 
     507        self.pos = pos 
    510508        self.map = map 
    511         self.examples = data 
    512         for node in map: 
    513             node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False), 
    514                                                  [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 
    515                                                   for var, value in zip(self.examples.domain.attributes, node.vector)]) 
    516             node.examples = orange.ExampleTable(self.examples.domain) 
    517  
    518         for ex in self.examples: 
    519             node = self.getBestMatchingNode(ex) 
    520             node.examples.append(ex) 
    521  
    522         if self.examples and self.examples.domain.classVar: 
    523             for node in self.map: 
    524                 node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples) 
    525                       
    526             self.classVar = self.examples.domain.classVar 
     509        self.vector = vector 
     510 
     511class Map(object): 
     512    """Self organizing map (the structure). Includes methods for data initialization. 
     513     
     514    .. attribute:: map 
     515 
     516        Self orginzing map. A list of lists of :obj:`Node`. 
     517         
     518    .. attribute:: examples 
     519     
     520        Data set that was considered when optimizing the map. 
     521    """ 
     522     
     523    HexagonalTopology = HexagonalTopology 
     524    RectangularTopology = RectangularTopology 
     525    InitializeLinear = InitializeLinear 
     526    InitializeRandom = InitializeRandom 
     527    NeighbourhoodGaussian = NeighbourhoodGaussian 
     528    NeighbourhoodBubble = NeighbourhoodBubble 
     529    NeighbourhoodEpanechicov = NeighbourhoodEpanechicov 
     530         
     531    def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): 
     532        self.map_shape = map_shape 
     533        self.topology = topology 
     534        self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] 
     535         
     536    def __getitem__(self, pos): 
     537        """ Return the node at position x, y. 
     538        """ 
     539        x, y = pos 
     540        return self.map[x][y] 
     541 
     542    def __iter__(self): 
     543        """ Iterate over all nodes in the map. 
     544        """ 
     545        for row in self.map: 
     546            for node in row: 
     547                yield node 
     548 
     549    def vectors(self): 
     550        """Return all vectors of the map as rows in an numpy.array. 
     551        """ 
     552        return numpy.array([node.vector for node in self]) 
     553 
     554    def unit_distances(self): 
     555        """Return a NxN numpy.array of internode distances (based on 
     556        node position in the map, not vector space) where N is the number of 
     557        nodes. 
     558        """ 
     559        nodes = list(self) 
     560        dist = numpy.zeros((len(nodes), len(nodes))) 
     561 
     562        coords = self.unit_coords() 
     563        for i in range(len(nodes)): 
     564            for j in range(len(nodes)): 
     565                dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j])) 
     566        return numpy.array(dist) 
     567 
     568    def unit_coords(self): 
     569        """ Return the unit coordinates of all nodes in the map as an numpy.array. 
     570        """ 
     571        nodes = list(self) 
     572        coords = numpy.zeros((len(nodes), len(self.map_shape))) 
     573        coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0]) 
     574        coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1]) 
     575         
     576        ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75) 
     577        if self.topology == Map.HexagonalTopology: 
     578            ind = numpy.nonzero(1 - numpy.mod(coords[:, 0], 2)) 
     579            coords[ind] = coords[ind] + 0.5 
     580            coords = coords * numpy.sqrt(0.75) 
     581        return coords 
     582 
     583 
     584    def initialize_map_random(self, data=None, dimension=5): 
     585        """Initialize the map nodes vectors randomly, by supplying 
     586        either training data or dimension of the data. 
     587        """ 
     588        if data is not None: 
     589            min, max = ma.min(data, 0), ma.max(data, 0); 
     590            dimension = data.shape[1] 
    527591        else: 
    528             self.classVar = None 
    529  
    530     def getBestMatchingNode(self, example): 
    531         """Return the best matching node for a given data instance 
    532         """ 
    533         example, c, w = orange.ExampleTable([example]).toNumpyMA() 
    534         vectors = self.map.vectors() 
    535         Dist = vectors - example 
    536         bmu = ma.argmin(ma.sum(Dist**2, 1)) 
    537         return list(self.map)[bmu] 
    538          
    539     def __call__(self, example, what=orange.GetValue): 
    540         bmu = self.getBestMatchingNode(example) 
    541         return bmu.classifier(example, what) 
    542  
    543     def __getattr__(self, name): 
    544         try: 
    545             return getattr(self.__dict__["map"], name) 
    546         except (KeyError, AttributeError): 
    547             raise AttributeError(name) 
    548  
    549     def __iter__(self): 
    550         """ Iterate over all nodes in the map 
    551         """ 
    552         return iter(self.map) 
    553  
    554     def __getitem__(self, val): 
    555         """ Return the node at position x, y 
    556         """ 
    557         return self.map.__getitem__(val) 
     592            min, max = numpy.zeros(dimension), numpy.ones(dimension) 
     593        for node in self: 
     594#            node.vector = min + numpy.random.rand(dimension) * (max - min) 
     595            node.vector = min + random.randint(0, dimension) * (max - min) 
     596 
     597    def initialize_map_linear(self, data, map_shape=(10, 20)): 
     598        """ Initialize the map node vectors linearly over the subspace 
     599        of the two most significant eigenvectors. 
     600        """ 
     601        data = data.copy() #ma.array(data) 
     602        dim = data.shape[1] 
     603        mdim = len(map_shape) 
     604        munits = len(list(self)) 
     605        me = ma.mean(data, 0) 
     606        A = numpy.zeros((dim ,dim)) 
     607 
     608        for i in range(dim): 
     609            data[:, i] = data[:, i] - me[i] 
     610         
     611        for i in range(dim): 
     612            for j in range(dim): 
     613                c = data[:, i] * data[:, j] 
     614                A[i, j] = ma.sum(c) / len(c) 
     615                A[j, i] = A[i, j] 
     616 
     617        eigval, eigvec = numpy.linalg.eig(A) 
     618        ind = list(reversed(numpy.argsort(eigval))) 
     619        eigval = eigval[ind[:mdim]] 
     620        eigvec = eigvec[:, ind[:mdim]] 
     621 
     622        for i in range(mdim): 
     623            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 
     624 
     625        unit_coords = self.unit_coords() 
     626        for d in range(mdim): 
     627            max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) 
     628            unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min) 
     629        unit_coords = (unit_coords - 0.5) * 2 
     630 
     631        vectors = numpy.array([me for i in range(munits)]) 
     632        for i in range(munits): 
     633            for d in range(mdim): 
     634                vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d]) 
     635 
     636        for i, node in enumerate(self): 
     637            node.vector = vectors[i] 
     638 
     639    def getUMat(self): 
     640        return getUMat(self) 
     641         
     642########################################################################## 
     643# Supporting functions  
    558644 
    559645def getUMat(som): 
     
    609695    return array 
    610696 
     697########################################################################## 
     698# Testing (deprecated, use regression tests instead   
     699 
    611700if __name__ == "__main__": 
    612     data = orange.ExampleTable("doc//datasets//iris.tab") 
     701    data = orange.ExampleTable("iris.tab") 
    613702    learner = SOMLearner() 
    614703    learner = SOMLearner(batch_train=True, initialize=InitializeLinear, radius_ini=3, radius_fin=1, neighbourhood=Map.NeighbourhoodGaussian, epochs=1000) 
Note: See TracChangeset for help on using the changeset viewer.