Changeset 7261:3c086359bd69 in orange


Ignore:
Timestamp:
02/02/11 22:46:36 (3 years ago)
Author:
blaz <blaz.zupan@…>
Branch:
default
Convert:
7ce2217c7738742d4adf7005a7012752bb66a435
Message:

with some more documentation

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/projection/som.py

    r7192 r7261  
     1""" 
     2 
     3.. index:: self-organizing map (SOM) 
     4 
     5.. index::  
     6   single: projection; self-organizing map (SOM) 
     7 
     8An implementation of `self-organizing map <http://en.wikipedia.org/wiki/Self-organizing_map>`_ algorithm (SOM).  
     9SOM is an unsupervised learning  
     10algorithm that infers low, typically two-dimensional discretized representation of the input space, 
     11called a map. The map preserves topological properties of the input space, such that 
     12the cells that are close in the map include data instances that are similar to each other. 
     13 
     14================================= 
     15Inference of Self-Organizing Maps 
     16================================= 
     17 
     18The main class for inference of self-organizing maps is :obj:`SOMLearner`. The class initializes 
     19the topology of the map and returns an inference objects which, given the data, performs the  
     20optimization of the map::  
     21 
     22   import Orange 
     23   som = Orange.projection.som.SOMLearner(map_shape=(10, 20), initialize=Orange.projection.som.InitializeRandom) 
     24   data = Orange.data.table("iris.tab") 
     25   map = som(data) 
     26 
     27.. autoclass:: SOMLearner 
     28   :members: 
     29 
     30.. autoclass:: Solver 
     31   :members: 
     32 
     33.. autoclass:: SOMMap 
     34   :members: 
     35 
     36============================================= 
     37Supervised Learning with Self-Organizing Maps 
     38============================================= 
     39 
     40.. autoclass:: SOMSupervisedLearner 
     41   :members: 
     42    
     43================== 
     44Supporting Classes 
     45================== 
     46 
     47.. autoclass:: Map 
     48   :members: 
     49    
     50.. autoclass:: Node 
     51   :members: 
     52  
     53======== 
     54Examples 
     55======== 
     56 
     57The following code runs k-means clustering and prints out the cluster indexes for the last 10 data instances (`kmeans-run.py`_, uses `iris.tab`_): 
     58 
     59.. literalinclude:: code/som1.py 
     60 
     61The output of this code is:: 
     62 
     63    [1, 1, 2, 1, 1, 1, 2, 1, 1, 2] 
     64 
     65""" 
     66 
    167import sys, os 
    268 
     
    1480 
    1581class Node(object): 
     82    """An object holding the information about the node in the map. 
     83    """ 
    1684    def __init__(self, pos, map=None, vector=None): 
    1785        self.pos = pos 
     
    2088 
    2189class Map(object): 
     90    """Self organizing map (the structure). Includes methods for data initialization. 
     91    """ 
     92     
    2293    HexagonalTopology = HexagonalTopology 
    2394    RectangularTopology = RectangularTopology 
     
    34105         
    35106    def __getitem__(self, pos): 
    36         """ Return the node at position x, y 
     107        """ Return the node at position x, y. 
    37108        """ 
    38109        x, y = pos 
     
    40111 
    41112    def __iter__(self): 
    42         """ Iterate over all nodes in the map 
     113        """ Iterate over all nodes in the map. 
    43114        """ 
    44115        for row in self.map: 
     
    47118 
    48119    def vectors(self): 
    49         """ Return all vectors of the map as rows in an numpy.array 
     120        """Return all vectors of the map as rows in an numpy.array. 
    50121        """ 
    51122        return numpy.array([node.vector for node in self]) 
    52123 
    53124    def unit_distances(self): 
    54         """ Return a NxN numpy.array of internode distances (based on 
     125        """Return a NxN numpy.array of internode distances (based on 
    55126        node position in the map, not vector space) where N is the number of 
    56         nodes 
     127        nodes. 
    57128        """ 
    58129        nodes = list(self) 
     
    66137 
    67138    def unit_coords(self): 
    68         """ Return the unit coordinates of all nodes in the map as an numpy.array 
     139        """ Return the unit coordinates of all nodes in the map as an numpy.array. 
    69140        """ 
    70141        nodes = list(self) 
     
    82153 
    83154    def initialize_map_random(self, data=None, dimension=5): 
    84         """ Initialize the map nodes vectors randomly, by supplying 
    85         either training data or dimension of the data 
     155        """Initialize the map nodes vectors randomly, by supplying 
     156        either training data or dimension of the data. 
    86157        """ 
    87158        if data is not None: 
     
    94165 
    95166    def initialize_map_linear(self, data, map_shape=(10, 20)): 
    96         """ Initialize the map node vectors lineary over the subspace 
    97         of the two most significant eigenvectors 
     167        """ Initialize the map node vectors linearly over the subspace 
     168        of the two most significant eigenvectors. 
    98169        """ 
    99170        data = data.copy() #ma.array(data) 
     
    120191        for i in range(mdim): 
    121192            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 
    122 ##        print eigvec, eigval 
    123193 
    124194        unit_coords = self.unit_coords() 
     
    140210         
    141211class Solver(object): 
    142     """ SOM Solver class used to train the map. 
    143     Arguments: 
    144         * neighbourhood - Neighbourhood function (NeighbourhoodGaussian, or NeighbourhoodBubble) 
    145         * radius_ini    - Inttial radius 
    146         * raduis_fin    - Final radius 
    147         * epoch         - Number of training iterations 
    148         * batch_train   - If True run the batch training algorithem (default), else use the sequential one 
    149         * learning_rate - If learning rate for the sequential training algorithem 
    150  
    151     Both the batch ans sequential algorithems are based on SOM Toolkit for Matlab 
     212    """ SOM Solver class used to train the map. Supports batch and sequential training. 
     213    Based on ideas from `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_. 
     214 
     215    :param neighbourhood: neighborhood function (NeighbourhoodGaussian, or NeighbourhoodBubble) 
     216    :param radius_ini: initial radius 
     217    :param raduis_fin: final radius 
     218    :param epoch: number of training interactions 
     219    :param batch_train: if True run the batch training algorithm (default), else use the sequential one 
     220    :param learning_rate: learning rate for the sequential training algorithm 
    152221    """ 
     222     
    153223    def __init__(self, **kwargs): 
    154224        self.neighbourhood = NeighbourhoodGaussian 
     
    167237 
    168238    def alpha(self, epoch): 
     239        """Compute the learning rate from epoch, starting with learning_rate to 0 at the end of training.  
     240        """ 
    169241        return (1 - epoch/self.epochs)*self.learning_rate 
    170242             
    171243    def __call__(self, data, map, progressCallback=None): 
    172         """ Train the map on data. Use progressCallback to Report on the progress. 
     244        """ Train the map from data. Set progressCallback function to report on the progress. 
    173245        """ 
    174246        self.data = data 
     
    184256 
    185257    def train_sequential(self, progressCallback): 
     258        """Sequential training algorithm.  
     259        """ 
    186260        self.vectors = self.map.vectors() 
    187261        self.unit_distances = self.map.unit_distances() 
     
    211285 
    212286    def train_step_sequential(self, epoch, indices=None): 
     287        """A single step of sequential training algorithm. 
     288        """ 
    213289        indices = range(len(self.data)) if indices == None else indices 
    214290        for ind in indices: 
     
    235311 
    236312    def train_batch(self, progressCallback=None): 
     313        """Batch training algorithm. 
     314        """ 
     315         
    237316        self.unit_distances = self.map.unit_distances() 
    238317        self.constant_matrix = 2 * ma.dot(numpy.eye(self.data.shape[1]), numpy.transpose(self.data)) 
     
    261340 
    262341    def train_step_batch(self, epoch): 
     342        """A single step of batch training algorithm. 
     343        """ 
    263344        D1 = ma.dot(self.vectors**2, self.weight_matrix) 
    264345        D2 = ma.dot(self.vectors, self.constant_matrix) 
     
    293374             
    294375class SOMLearner(orange.Learner): 
    295     """ SOMLearner is a class used to learn SOM from orange.ExampleTable 
    296  
    297     Example: 
    298         >>> som = orngSOM.SOMLearner(map_shape=(10, 20), initialize=orngSOM.InitializeRandom) 
    299         >>> map = som(orange.ExampleTable("iris.tab")) 
     376    """An implementation of self-organizing map. Considers an input data set, projects the data  
     377    instances onto a map, and returns a result in the form of a classifier holding projection 
     378    information together with an algorithm to project new data instances. Uses :obj:`Map` for 
     379    representation of projection space, :obj:`Solver` for training, and returns a trained  
     380    map with information on projection of the training data as crafted by :obj:`SOMMap`. 
     381     
     382    :param map_shape: dimension of the map 
     383    :param initialize: type of initialization (InitializeLinear or InitializeRandom), linear  
     384      initialization assigns the data to the cells according to its position in two-dimensional 
     385      principal component projection 
     386    :param topology: topology type (HexagonalTopology or RectangularTopology) 
     387    :param neighbourhood: cell neighborhood type (NeighbourhoodGaussian, NeighbourhoodBubble or NeighbourhoodEpanechicov), 
     388    :param batch_train: perform batch training? 
     389    :param learning_rate: learning rate 
     390    :param radius_ini: initial radius 
     391    :param radius_fin: final radius 
     392    :param epochs: number of epochs (iterations of a training steps) 
     393    :param solver: a class that executes the optimization algorithm 
    300394    """ 
    301395     
     
    309403         
    310404    def __init__(self, map_shape=(5, 10), initialize=InitializeLinear, topology=HexagonalTopology, neighbourhood=NeighbourhoodGaussian, 
    311                  batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, **kwargs): 
     405                 batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, solver=Solver, **kwargs): 
     406 
    312407        self.map_shape = map_shape 
    313408        self.initialize = initialize 
     
    319414        self.radius_fin = radius_fin 
    320415        self.epochs = epochs 
     416        self.solver = solver 
    321417        self.eps = 1e-4 
    322418         
    323419        orange.Learner.__init__(self, **kwargs) 
    324420         
    325     def __call__(self, examples, weightID=0, progressCallback=None): 
    326         data, classes, w = examples.toNumpyMA() 
     421    def __call__(self, data, weightID=0, progressCallback=None): 
     422        numdata, classes, w = data.toNumpyMA() 
    327423        map = Map(self.map_shape, topology=self.topology) 
    328424        if self.initialize == Map.InitializeLinear: 
    329             map.initialize_map_linear(data) 
    330         else: 
    331             map.initialize_map_random(data) 
    332         map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, 
     425            map.initialize_map_linear(numdata) 
     426        else: 
     427            map.initialize_map_random(numdata) 
     428        map = self.solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, 
    333429                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, 
    334                      epochs=self.epochs)(data, map, progressCallback=progressCallback) 
    335         return SOMMap(map, examples) 
     430                     epochs=self.epochs)(numdata, map, progressCallback=progressCallback) 
     431        return SOMMap(map, data) 
    336432 
    337433class SOMSupervisedLearner(SOMLearner): 
    338     """ SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the 
     434    """SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the 
    339435    class information in the learning process. This is achieved by adding a value for each class 
    340436    to the training instances, where 1.0 signals class membership and all other values are 0.0. 
    341437    After the training, the new values are discarded from the node vectors. 
     438     
     439    :param data: class-labeled data set 
     440    :param progressCallback: a function to report on inference progress 
    342441    """ 
    343442    def __call__(self, examples, weightID=0, progressCallback=None): 
     
    360459 
    361460class SOMMap(orange.Classifier): 
    362     def __init__(self, map=[], examples=[]): 
     461    """Project the data onto the inferred self-organizing map. 
     462     
     463    :param map: a trained self-organizing map 
     464    :param data: the data to be mapped on the map 
     465    """ 
     466     
     467    def __init__(self, map=[], data=[]): 
    363468        self.map = map 
    364         self.examples = examples 
     469        self.examples = data 
    365470        for node in map: 
    366             node.referenceExample = orange.Example(orange.Domain(examples.domain.attributes, False), 
     471            node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False), 
    367472                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 
    368                                                   for var, value in zip(examples.domain.attributes, node.vector)]) 
    369             node.examples = orange.ExampleTable(examples.domain) 
    370  
    371         for ex in examples: 
     473                                                  for var, value in zip(self.examples.domain.attributes, node.vector)]) 
     474            node.examples = orange.ExampleTable(self.examples.domain) 
     475 
     476        for ex in self.examples: 
    372477            node = self.getBestMatchingNode(ex) 
    373478            node.examples.append(ex) 
    374479 
    375         if examples and examples.domain.classVar: 
     480        if self.examples and self.examples.domain.classVar: 
    376481            for node in self.map: 
    377                 node.classifier = orange.MajorityLearner(node.examples if node.examples else examples) 
     482                node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples) 
    378483                      
    379             self.classVar = examples.domain.classVar 
     484            self.classVar = self.examples.domain.classVar 
    380485        else: 
    381486            self.classVar = None 
    382487 
    383488    def getBestMatchingNode(self, example): 
    384         """ Return the best matching node 
     489        """Return the best matching node for a given data instance 
    385490        """ 
    386491        example, c, w = orange.ExampleTable([example]).toNumpyMA() 
Note: See TracChangeset for help on using the changeset viewer.