Changeset 7348:a36ebe30d7be in orange


Ignore:
Timestamp:
02/03/11 22:48:59 (3 years ago)
Author:
blaz <blaz.zupan@…>
Branch:
default
Convert:
dc672fdeb54866b79ffa5b7ab944329660083f1b
Message:

cosmetic update (some empty lines removed)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/orngSOM.py

    r7093 r7348  
    1 import sys, os 
    2  
    3 import numpy 
    4 import numpy.ma as ma 
    5 import orange 
    6 ##import math 
    7  
    8 ##from numpy import dot, abs, sqrt, random, array 
    9  
    10 HexagonalTopology = 0 
    11 RectangularTopology = 1 
    12 InitializeLinear = 0 
    13 InitializeRandom = 1 
    14 NeighbourhoodGaussian = 0 
    15 NeighbourhoodBubble = 1 
    16 NeighbourhoodEpanechicov = 2 
    17  
    18 class Node(object): 
    19     def __init__(self, pos, map=None, vector=None): 
    20         self.pos = pos 
    21         self.map = map 
    22         self.vector = vector 
    23  
    24 class Map(object): 
    25     HexagonalTopology = HexagonalTopology 
    26     RectangularTopology = RectangularTopology 
    27     InitializeLinear = InitializeLinear 
    28     InitializeRandom = InitializeRandom 
    29     NeighbourhoodGaussian = NeighbourhoodGaussian 
    30     NeighbourhoodBubble = NeighbourhoodBubble 
    31     NeighbourhoodEpanechicov = NeighbourhoodEpanechicov 
    32          
    33     def __init__(self, map_shape=(20, 40), topology=HexagonalTopology): 
    34         self.map_shape = map_shape 
    35         self.topology = topology 
    36         self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])] 
    37          
    38     def __getitem__(self, pos): 
    39         """ Return the node at position x, y 
    40         """ 
    41         x, y = pos 
    42         return self.map[x][y] 
    43  
    44     def __iter__(self): 
    45         """ Iterate over all nodes in the map 
    46         """ 
    47         for row in self.map: 
    48             for node in row: 
    49                 yield node 
    50  
    51     def vectors(self): 
    52         """ Return all vectors of the map as rows in an numpy.array 
    53         """ 
    54         return numpy.array([node.vector for node in self]) 
    55  
    56     def unit_distances(self): 
    57         """ Return a NxN numpy.array of internode distances (based on 
    58         node position in the map, not vector space) where N is the number of 
    59         nodes 
    60         """ 
    61         nodes = list(self) 
    62         dist = numpy.zeros((len(nodes), len(nodes))) 
    63  
    64         coords = self.unit_coords() 
    65         for i in range(len(nodes)): 
    66             for j in range(len(nodes)): 
    67                 dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j])) 
    68         return numpy.array(dist) 
    69  
    70     def unit_coords(self): 
    71         """ Return the unit coordinates of all nodes in the map as an numpy.array 
    72         """ 
    73         nodes = list(self) 
    74         coords = numpy.zeros((len(nodes), len(self.map_shape))) 
    75         coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0]) 
    76         coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1]) 
    77          
    78         ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75) 
    79         if self.topology == Map.HexagonalTopology: 
    80             ind = numpy.nonzero(1 - numpy.mod(coords[:, 0], 2)) 
    81             coords[ind] = coords[ind] + 0.5 
    82             coords = coords * numpy.sqrt(0.75) 
    83         return coords 
    84  
    85  
    86     def initialize_map_random(self, data=None, dimension=5): 
    87         """ Initialize the map nodes vectors randomly, by supplying 
    88         either training data or dimension of the data 
    89         """ 
    90         if data is not None: 
    91             min, max = ma.min(data, 0), ma.max(data, 0); 
    92             dimension = data.shape[1] 
    93         else: 
    94             min, max = numpy.zeros(dimension), numpy.ones(dimension) 
    95         for node in self: 
    96             node.vector = min + numpy.random.rand(dimension) * (max - min) 
    97  
    98     def initialize_map_linear(self, data, map_shape=(10, 20)): 
    99         """ Initialize the map node vectors lineary over the subspace 
    100         of the two most significant eigenvectors 
    101         """ 
    102         data = data.copy() #ma.array(data) 
    103         dim = data.shape[1] 
    104         mdim = len(map_shape) 
    105         munits = len(list(self)) 
    106         me = ma.mean(data, 0) 
    107         A = numpy.zeros((dim ,dim)) 
    108  
    109         for i in range(dim): 
    110             data[:, i] = data[:, i] - me[i] 
    111          
    112         for i in range(dim): 
    113             for j in range(dim): 
    114                 c = data[:, i] * data[:, j] 
    115                 A[i, j] = ma.sum(c) / len(c) 
    116                 A[j, i] = A[i, j] 
    117  
    118         eigval, eigvec = numpy.linalg.eig(A) 
    119         ind = list(reversed(numpy.argsort(eigval))) 
    120         eigval = eigval[ind[:mdim]] 
    121         eigvec = eigvec[:, ind[:mdim]] 
    122  
    123         for i in range(mdim): 
    124             eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i]) 
    125 ##        print eigvec, eigval 
    126  
    127         unit_coords = self.unit_coords() 
    128         for d in range(mdim): 
    129             max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d]) 
    130             unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min) 
    131         unit_coords = (unit_coords - 0.5) * 2 
    132  
    133         vectors = numpy.array([me for i in range(munits)]) 
    134         for i in range(munits): 
    135             for d in range(mdim): 
    136                 vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d]) 
    137  
    138         for i, node in enumerate(self): 
    139             node.vector = vectors[i] 
    140  
    141     def getUMat(self): 
    142         return getUMat(self) 
    143          
    144 class Solver(object): 
    145     """ SOM Solver class used to train the map. 
    146     Arguments: 
    147         * neighbourhood - Neighbourhood function (NeighbourhoodGaussian, or NeighbourhoodBubble) 
    148         * radius_ini    - Inttial radius 
    149         * raduis_fin    - Final radius 
    150         * epoch         - Number of training iterations 
    151         * batch_train   - If True run the batch training algorithem (default), else use the sequential one 
    152         * learning_rate - If learning rate for the sequential training algorithem 
    153  
    154     Both the batch ans sequential algorithems are based on SOM Toolkit for Matlab 
    155     """ 
    156     def __init__(self, **kwargs): 
    157         self.neighbourhood = NeighbourhoodGaussian 
    158         self.learning_rate = 0.05 
    159         self.radius_ini = 2 
    160         self.radius_fin = 1 
    161         self.epochs = 100 
    162         self.random_order = False 
    163         self.batch_train = True 
    164         self.eps = 1e-5 
    165         self.qerror = [] 
    166         self.__dict__.update(kwargs) 
    167  
    168     def radius(self, epoch): 
    169         return self.radius_ini - (float(self.radius_ini) - self.radius_fin)*(float(epoch) / self.epochs) 
    170  
    171     def alpha(self, epoch): 
    172         return (1 - epoch/self.epochs)*self.learning_rate 
    173              
    174     def __call__(self, data, map, progressCallback=None): 
    175         """ Train the map on data. Use progressCallback to Report on the progress. 
    176         """ 
    177         self.data = data 
    178         self.map = map 
    179  
    180         self.qerror = [] 
    181         self.bmu_cache = {} 
    182         if self.batch_train: 
    183             self.train_batch(progressCallback) 
    184         else: 
    185             self.train_sequential(progressCallback) 
    186         return self.map 
    187  
    188     def train_sequential(self, progressCallback): 
    189         self.vectors = self.map.vectors() 
    190         self.unit_distances = self.map.unit_distances() 
    191          
    192 ##        from pylab import plot, show, draw, ion 
    193 ##        ion() 
    194 ##        plot(self.data[:, 0], self.data[:, 1], "ro") 
    195 ##        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0] 
    196          
    197         for epoch in range(self.epochs): 
    198             self.distances = [] 
    199             ind = range(len(self.data)) 
    200             if self.random_order: 
    201                 random.shuffle(ind) 
    202             self.train_step_sequential(epoch, ind) 
    203             if progressCallback: 
    204                 progressCallback(100.0*epoch/self.epochs) 
    205             self.qerror.append(numpy.mean(numpy.sqrt(self.distances))) 
    206 ##            print epoch, "q error:", numpy.mean(numpy.sqrt(self.distances)), self.radius(epoch) 
    207             if epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps: 
    208                 break 
    209              
    210 ##            vec_plot.set_xdata(self.vectors[:, 0]) 
    211 ##            vec_plot.set_ydata(self.vectors[:, 1]) 
    212 ##            draw() 
    213 ##        show() 
    214  
    215     def train_step_sequential(self, epoch, indices=None): 
    216         indices = range(len(self.data)) if indices == None else indices 
    217         for ind in indices: 
    218             x = self.data[ind] 
    219             Dx = self.vectors - self.data[ind] 
    220             Dist = ma.sum(Dx**2, 1) 
    221             min_dist = ma.min(Dist) 
    222             bmu = ma.argmin(Dist) 
    223             self.distances.append(min_dist) 
    224  
    225             if self.neighbourhood == Map.NeighbourhoodGaussian: 
    226                 h = numpy.exp(-self.unit_distances[:, bmu]/(2*self.radius(epoch))) * (self.unit_distances[:, bmu] <= self.radius(epoch)) 
    227             elif self.neighbourhood == Map.NeighbourhoodEpanechicov: 
    228                 h = 1.0 - (self.unit_distances[:bmu]/self.radius(epoch))**2 
    229                 h = h * (h >= 0.0) 
    230             else: 
    231                 h = 1.0*(self.unit_distances[:, bmu] <= self.radius(epoch)) 
    232             h = h * self.alpha(epoch) 
    233  
    234             nonzero = ma.nonzero(h) 
    235             h = h[nonzero] 
    236  
    237             self.vectors[nonzero] = self.vectors[nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1)) 
    238  
    239     def train_batch(self, progressCallback=None): 
    240         self.unit_distances = self.map.unit_distances() 
    241         self.constant_matrix = 2 * ma.dot(numpy.eye(self.data.shape[1]), numpy.transpose(self.data)) 
    242         self.dist_cons = numpy.transpose(ma.dot(self.data**2, numpy.ones(self.data.shape[1]))) 
    243         self.weight_matrix = numpy.ones((self.data.shape[1], self.data.shape[0])) 
    244         self.vectors = self.map.vectors() 
    245          
    246 ##        from pylab import plot, show, draw, ion 
    247 ##        ion() 
    248 ##        plot(self.data[:, 0], self.data[:, 1], "ro") 
    249 ##        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0] 
    250          
    251         for epoch in range(self.epochs): 
    252             self.train_step_batch(epoch) 
    253             if progressCallback: 
    254                 progressCallback(100.0*epoch/self.epochs) 
    255             if False and epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps: 
    256                 break 
    257 ##            vec_plot.set_xdata(self.vectors[:, 0]) 
    258 ##            vec_plot.set_ydata(self.vectors[:, 1]) 
    259 ##            draw() 
    260 ##        show() 
    261          
    262         for node, vector in zip(self.map, self.vectors): 
    263             node.vector = vector 
    264  
    265     def train_step_batch(self, epoch): 
    266         D1 = ma.dot(self.vectors**2, self.weight_matrix) 
    267         D2 = ma.dot(self.vectors, self.constant_matrix) 
    268         Dist = D1 - D2 
    269  
    270         best_nodes = ma.argmin(Dist, 0) 
    271         distances = ma.min(Dist, 0) 
    272 ##        print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch) 
    273         self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons))) 
    274  
    275         if self.neighbourhood == Map.NeighbourhoodGaussian:         
    276             H = numpy.exp(-self.unit_distances/(2*self.radius(epoch))) * (self.unit_distances <= self.radius(epoch)) 
    277         elif self.neighbourhood == Map.NeighbourhoodEpanechicov: 
    278             H = 1.0 - (self.unit_distances/self.radius(epoch))**2 
    279             H = H * (H >= 0.0) 
    280         else: 
    281             H = 1.0*(self.unit_distances <= self.radius(epoch)) 
    282  
    283         P =  numpy.zeros((self.vectors.shape[0], self.data.shape[0])) 
    284          
    285         P[(best_nodes, range(len(best_nodes)))] = numpy.ones(len(best_nodes)) 
    286          
    287         S = ma.dot(H, ma.dot(P, self.data)) 
    288          
    289         A = ma.dot(H, ma.dot(P, ~self.data._mask)) 
    290  
    291 ##        nonzero = (range(epoch%2, len(self.vectors), 2), ) 
    292         nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), ) 
    293          
    294         self.vectors[nonzero] = S[nonzero] / A[nonzero] 
    295   
    296              
    297 class SOMLearner(orange.Learner): 
    298     """ SOMLearner is a class used to learn SOM from orange.ExampleTable 
    299  
    300     Example: 
    301         >>> som = orngSOM.SOMLearner(map_shape=(10, 20), initialize=orngSOM.InitializeRandom) 
    302         >>> map = som(orange.ExampleTable("iris.tab")) 
    303     """ 
    304      
    305     def __new__(cls, examples=None, weightId=0, **kwargs): 
    306         self = orange.Learner.__new__(cls, **kwargs) 
    307         if examples is not None: 
    308             self.__init__(**kwargs) 
    309             return self.__call__(examples, weightId) 
    310         else: 
    311             return self 
    312          
    313     def __init__(self, map_shape=(5, 10), initialize=InitializeLinear, topology=HexagonalTopology, neighbourhood=NeighbourhoodGaussian, 
    314                  batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, **kwargs): 
    315         self.map_shape = map_shape 
    316         self.initialize = initialize 
    317         self.topology = topology 
    318         self.neighbourhood = neighbourhood 
    319         self.batch_train = batch_train 
    320         self.learning_rate = learning_rate 
    321         self.radius_ini = radius_ini 
    322         self.radius_fin = radius_fin 
    323         self.epochs = epochs 
    324         self.eps = 1e-4 
    325          
    326         orange.Learner.__init__(self, **kwargs) 
    327          
    328     def __call__(self, examples, weightID=0, progressCallback=None): 
    329         data, classes, w = examples.toNumpyMA() 
    330         map = Map(self.map_shape, topology=self.topology) 
    331         if self.initialize == Map.InitializeLinear: 
    332             map.initialize_map_linear(data) 
    333         else: 
    334             map.initialize_map_random(data) 
    335         map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, 
    336                      radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, 
    337                      epochs=self.epochs)(data, map, progressCallback=progressCallback) 
    338         return SOMMap(map, examples) 
    339  
    340 class SOMSupervisedLearner(SOMLearner): 
    341     """ SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the 
    342     class information in the learning process. This is achieved by adding a value for each class 
    343     to the training instances, where 1.0 signals class membership and all other values are 0.0. 
    344     After the training, the new values are discarded from the node vectors. 
    345     """ 
    346     def __call__(self, examples, weightID=0, progressCallback=None): 
    347         data, classes, w = examples.toNumpyMA() 
    348         nval = len(examples.domain.classVar.values) 
    349         ext = ma.zeros((len(data), nval)) 
    350         ext[([i for i, m in enumerate(classes.mask) if m], [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0 
    351         data = ma.hstack((data, ext)) 
    352         map = Map(self.map_shape, topology=self.topology) 
    353         if self.initialize == Map.InitializeLinear: 
    354             map.initialize_map_linear(data) 
    355         else: 
    356             map.initialize_map_random(data) 
    357         map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood, 
    358                      radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate, 
    359                      epoch=self.epochs)(data, map, progressCallback=progressCallback) 
    360         for node in map: 
    361             node.vector = node.vector[:-nval] 
    362         return SOMMap(map, examples) 
    363  
    364 class SOMMap(orange.Classifier): 
    365     def __init__(self, map=[], examples=[]): 
    366         self.map = map 
    367         self.examples = examples 
    368         for node in map: 
    369             node.referenceExample = orange.Example(orange.Domain(examples.domain.attributes, False), 
    370                                                  [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \ 
    371                                                   for var, value in zip(examples.domain.attributes, node.vector)]) 
    372             node.examples = orange.ExampleTable(examples.domain) 
    373  
    374         for ex in examples: 
    375             node = self.getBestMatchingNode(ex) 
    376             node.examples.append(ex) 
    377  
    378         if examples and examples.domain.classVar: 
    379             for node in self.map: 
    380                 node.classifier = orange.MajorityLearner(node.examples if node.examples else examples) 
    381                       
    382             self.classVar = examples.domain.classVar 
    383         else: 
    384             self.classVar = None 
    385  
    386     def getBestMatchingNode(self, example): 
    387         """ Return the best matching node 
    388         """ 
    389         example, c, w = orange.ExampleTable([example]).toNumpyMA() 
    390         vectors = self.map.vectors() 
    391         Dist = vectors - example 
    392         bmu = ma.argmin(ma.sum(Dist**2, 1)) 
    393         return list(self.map)[bmu] 
    394          
    395     def __call__(self, example, what=orange.GetValue): 
    396         bmu = self.getBestMatchingNode(example) 
    397         return bmu.classifier(example, what) 
    398  
    399     def __getattr__(self, name): 
    400         try: 
    401             return getattr(self.__dict__["map"], name) 
    402         except (KeyError, AttributeError): 
    403             raise AttributeError(name) 
    404  
    405     def __iter__(self): 
    406         """ Iterate over all nodes in the map 
    407         """ 
    408         return iter(self.map) 
    409  
    410     def __getitem__(self, val): 
    411         """ Return the node at position x, y 
    412         """ 
    413         return self.map.__getitem__(val) 
    414  
    415 def getUMat(som): 
    416     dim1=som.map_shape[0]*2-1 
    417     dim2=som.map_shape[1]*2-1 
    418  
    419     a=numpy.zeros((dim1, dim2)) 
    420     if som.topology == HexagonalTopology: 
    421         return __fillHex(a, som) 
    422     else: 
    423         return __fillRect(a, som) 
    424  
    425 def __fillHex(array, som): 
    426     xDim, yDim = som.map_shape 
    427 ##    for n in som.nodes: 
    428 ##        d[tuple(n.pos)]=n 
    429     d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)]) 
    430     check=lambda x,y:x>=0 and x<(xDim*2-1) and y>=0 and y<(yDim*2-1) 
    431     dx=[1,0,-1] 
    432     dy=[0,1, 1] 
    433     for i in range(0, xDim*2,2): 
    434         for j in range(0, yDim*2,2): 
    435             for ddx, ddy in zip(dx, dy): 
    436                 if check(i+ddx, j+ddy): 
    437 ##                    array[i+ddx][j+ddy]=d[(i/2, j/2)].getDistance(d[(i/2+ddx, j/2+ddy)].referenceExample) 
    438                     array[i+ddx][j+ddy] = numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - d[(i/2+ddx, j/2+ddy)].vector)**2)) 
    439     dx=[1,-1,0,-1, 0, 1] 
    440     dy=[0, 0,1, 1,-1,-1] 
    441     for i in range(0, xDim*2, 2): 
    442         for j in range(0, yDim*2, 2): 
    443             l=[array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) if check(i+ddx, j+ddy)] 
    444             array[i][j]=sum(l)/len(l) 
    445     return array 
    446  
    447 def __fillRect(array, som): 
    448     xDim, yDim = som.map_shape 
    449     d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)]) 
    450     check=lambda x,y:x>=0 and x<xDim*2-1 and y>=0 and y<yDim*2-1 
    451     dx=[1, 0, 1] 
    452     dy=[0, 1, 1] 
    453     for i in range(0, xDim*2, 2): 
    454         for j in range(0, yDim*2, 2): 
    455             for ddx, ddy in zip(dx, dy): 
    456                 if check(i+ddx, j+ddy): 
    457 ##                    array[i+ddx][j+ddy]=d[(i/2, j/2)].getDistance(d[(i/2+ddx, j/2+ddy)].referenceExample) 
    458                     array[i+ddx][j+ddy] = numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - d[(i/2+ddx, j/2+ddy)].vector)**2)) 
    459     dx=[1,-1, 0,0,1,-1,-1, 1] 
    460     dy=[0, 0,-1,1,1,-1, 1,-1] 
    461     for i in range(0, xDim*2, 2): 
    462         for j in range(0, yDim*2, 2): 
    463             l=[array[i+ddx,j+ddy] for ddx,ddy in zip(dx,dy) if check(i+ddx, j+ddy)] 
    464             array[i][j]=sum(l)/len(l) 
    465     return array 
    466  
    467 if __name__ == "__main__": 
    468     data = orange.ExampleTable("doc//datasets//iris.tab") 
    469     learner = SOMLearner() 
    470     learner = SOMLearner(batch_train=True, initialize=InitializeLinear, radius_ini=3, radius_fin=1, neighbourhood=Map.NeighbourhoodGaussian, epochs=1000) 
    471     map = learner(data) 
    472     for e in data: 
    473         print map(e), e.getclass() 
    474      
     1from Orange.projection.som import * 
Note: See TracChangeset for help on using the changeset viewer.