source: orange/orange/Orange/projection/som.py @ 9349:fa13a2c52fcd

Revision 9349:fa13a2c52fcd, 26.1 KB checked in by mitar, 2 years ago (diff)

Changed way of linking to code in documentation.

Line 
1"""
2******************************
3Self-organizing maps (``som``)
4******************************
5
6.. index:: self-organizing map (SOM)
7
8.. index::
9   single: projection; self-organizing map (SOM)
10
11An implementation of `self-organizing map <http://en.wikipedia.org/wiki/Self-organizing_map>`_ algorithm (SOM).
12SOM is an unsupervised learning
13algorithm that infers low, typically two-dimensional discretized representation of the input space,
14called a map. The map preserves topological properties of the input space, such that
15the cells that are close in the map include data instances that are similar to each other.
16
17=================================
18Inference of Self-Organizing Maps
19=================================
20
21The main class for inference of self-organizing maps is :obj:`SOMLearner`. The class initializes
22the topology of the map and returns an inference objects which, given the data, performs the
23optimization of the map::
24
25   import Orange
26   som = Orange.projection.som.SOMLearner(map_shape=(8, 8),
27            initialize=Orange.projection.som.InitializeRandom)
28   data = Orange.data.table("iris.tab")
29   map = som(data)
30
31.. autodata:: NeighbourhoodGaussian
32
33.. autodata:: HexagonalTopology
34
35.. autodata:: RectangularTopology
36
37.. autodata:: InitializeLinear
38
39.. autodata:: InitializeRandom
40
41.. autodata:: NeighbourhoodGaussian
42
43.. autodata:: NeighbourhoodBubble
44
45.. autodata:: NeighbourhoodEpanechicov
46
47.. autoclass:: SOMLearner
48   :members:
49
50.. autoclass:: Solver
51   :members:
52   
53.. autoclass:: SOMMap
54   :members:
55
56=============================================
57Supervised Learning with Self-Organizing Maps
58=============================================
59
60Supervised learning requires class-labeled data. For training,
61class information is first added to data instances as a regular feature
62by extending the feature vectors accordingly. Next, the map is trained, and the
63training data projected to nodes. Each node then classifies to the majority class.
64For classification, the data instance is projected the cell, returning the associated class.
65An example of the code that trains and then classifies on the same data set is::
66
67    import Orange
68    import random
69    learner = Orange.projection.som.SOMSupervisedLearner(map_shape=(4, 4))
70    data = Orange.data.Table("iris.tab")
71    classifier = learner(data)
72    random.seed(50)
73    for d in random.sample(data, 5):
74        print "%-15s originally %-15s" % (classifier(d), d.getclass())
75
76.. autoclass:: SOMSupervisedLearner
77   :members:
78   
79==================
80Supporting Classes
81==================
82
83Class :obj:`Map` stores the self-organizing map composed of :obj:`Node` objects. The code below
84(:download:`som-node.py <code/som-node.py>`, uses :download:`iris.tab <code/iris.tab>`) shows an example how to access the information stored in the
85node of the map:
86
87.. literalinclude:: code/som-node.py
88    :lines: 7-
89
90.. autoclass:: Map
91   :members:
92   
93.. autoclass:: Node
94   :members:
95 
96========
97Examples
98========
99
100The following code  (:download:`som-mapping.py <code/som-mapping.py>`, uses :download:`iris.tab <code/iris.tab>`) infers self-organizing map from Iris data set. The map is rather small, and consists
101of only 9 cells. We optimize the network, and then report how many data instances were mapped
102into each cell. The second part of the code reports on data instances from one of the corner cells:
103
104.. literalinclude:: code/som-mapping.py
105    :lines: 7-
106
107The output of this code is::
108
109    Node    Instances
110    (0, 0)  21
111    (0, 1)  1
112    (0, 2)  23
113    (1, 0)  22
114    (1, 1)  7
115    (1, 2)  6
116    (2, 0)  32
117    (2, 1)  16
118    (2, 2)  22
119   
120    Data instances in cell (1, 2):
121    [4.9, 2.4, 3.3, 1.0, 'Iris-versicolor']
122    [5.0, 2.0, 3.5, 1.0, 'Iris-versicolor']
123    [5.6, 2.9, 3.6, 1.3, 'Iris-versicolor']
124    [5.7, 2.6, 3.5, 1.0, 'Iris-versicolor']
125    [5.5, 2.4, 3.7, 1.0, 'Iris-versicolor']
126    [5.0, 2.3, 3.3, 1.0, 'Iris-versicolor']
127"""
128
129import sys, os
130
131import numpy
132import numpy.ma as ma
133import orange
134import random
135
136random.seed(42)
137
138HexagonalTopology = 0
139"""Hexagonal topology, cells are hexagon-shaped."""
140RectangularTopology = 1
141"""Rectangular topology, cells are square-shaped"""
142
143InitializeLinear = 0
144"""Data instances are initially assigned to cells according to their two-dimensional PCA projection."""
145InitializeRandom = 1
146"""Data instances are initially randomly assigned to cells."""
147
148NeighbourhoodGaussian = 0 
149"""Gaussian (smoothed) neighborhood."""
150NeighbourhoodBubble = 1
151"""Bubble (crisp) neighborhood."""
152NeighbourhoodEpanechicov = 2
153"""Epanechicov (cut and smoothed) neighborhood."""
154
155##########################################################################
156# Inference of Self-Organizing Maps
157
158class Solver(object):
159    """ SOM Solver class used to train the map. Supports batch and sequential training.
160    Based on ideas from `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_.
161
162    :param neighbourhood: neighborhood function id
163    :type neighbourhood: :obj:`NeighbourhoodGaussian`, :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov`
164    :param radius_ini: initial radius
165    :type radius_ini: int
166    :param raduis_fin: final radius
167    :type raduis_fin: int
168    :param epoch: number of training interactions
169    :type epoch: int
170    :param batch_train: if True run the batch training algorithm (default), else use the sequential one
171    :type batch_train: bool
172    :param learning_rate: learning rate for the sequential training algorithm
173    :type learning_rate: float
174    """
175   
176    def __init__(self, **kwargs):
177        self.neighbourhood = NeighbourhoodGaussian
178        self.learning_rate = 0.05
179        self.radius_ini = 2
180        self.radius_fin = 1
181        self.epochs = 100
182        self.random_order = False
183        self.batch_train = True
184        self.eps = 1e-5
185        self.qerror = []
186        self.__dict__.update(kwargs)
187
188    def radius(self, epoch):
189        return self.radius_ini - (float(self.radius_ini) - self.radius_fin)*(float(epoch) / self.epochs)
190
191    def alpha(self, epoch):
192        """Compute the learning rate from epoch, starting with learning_rate to 0 at the end of training.
193        """
194        return (1 - epoch/self.epochs)*self.learning_rate
195           
196    def __call__(self, data, map, progressCallback=None):
197        """ Train the map from data. Pass progressCallback function to report on the progress.
198        """
199        self.data = data
200        self.map = map
201
202        self.qerror = []
203        self.bmu_cache = {}
204        if self.batch_train:
205            self.train_batch(progressCallback)
206        else:
207            self.train_sequential(progressCallback)
208        return self.map
209
210    def train_sequential(self, progressCallback):
211        """Sequential training algorithm.
212        """
213        self.vectors = self.map.vectors()
214        self.unit_distances = self.map.unit_distances()
215       
216#        from pylab import plot, show, draw, ion
217#        ion()
218#        plot(self.data[:, 0], self.data[:, 1], "ro")
219#        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
220       
221        for epoch in range(self.epochs):
222            self.distances = []
223            ind = range(len(self.data))
224            if self.random_order:
225                random.shuffle(ind)
226            self.train_step_sequential(epoch, ind)
227            if progressCallback:
228                progressCallback(100.0*epoch/self.epochs)
229            self.qerror.append(numpy.mean(numpy.sqrt(self.distances)))
230#            print epoch, "q error:", numpy.mean(numpy.sqrt(self.distances)), self.radius(epoch)
231            if epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
232                break
233           
234#            vec_plot.set_xdata(self.vectors[:, 0])
235#            vec_plot.set_ydata(self.vectors[:, 1])
236#            draw()
237#        show()
238
239    def train_step_sequential(self, epoch, indices=None):
240        """A single step of sequential training algorithm.
241        """
242        indices = range(len(self.data)) if indices == None else indices
243        for ind in indices:
244            x = self.data[ind]
245            Dx = self.vectors - self.data[ind]
246            Dist = ma.sum(Dx**2, 1)
247            min_dist = ma.min(Dist)
248            bmu = ma.argmin(Dist)
249            self.distances.append(min_dist)
250
251            if self.neighbourhood == Map.NeighbourhoodGaussian:
252                h = numpy.exp(-self.unit_distances[:, bmu]/(2*self.radius(epoch))) * (self.unit_distances[:, bmu] <= self.radius(epoch))
253            elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
254                h = 1.0 - (self.unit_distances[:bmu]/self.radius(epoch))**2
255                h = h * (h >= 0.0)
256            else:
257                h = 1.0*(self.unit_distances[:, bmu] <= self.radius(epoch))
258            h = h * self.alpha(epoch)
259
260            nonzero = ma.nonzero(h)
261            h = h[nonzero]
262
263            self.vectors[nonzero] = self.vectors[nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1))
264
265    def train_batch(self, progressCallback=None):
266        """Batch training algorithm.
267        """
268       
269        self.unit_distances = self.map.unit_distances()
270        self.constant_matrix = 2 * ma.dot(numpy.eye(self.data.shape[1]), numpy.transpose(self.data))
271        self.dist_cons = numpy.transpose(ma.dot(self.data**2, numpy.ones(self.data.shape[1])))
272        self.weight_matrix = numpy.ones((self.data.shape[1], self.data.shape[0]))
273        self.vectors = self.map.vectors()
274       
275##        from pylab import plot, show, draw, ion
276##        ion()
277##        plot(self.data[:, 0], self.data[:, 1], "ro")
278##        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
279       
280        for epoch in range(self.epochs):
281            self.train_step_batch(epoch)
282            if progressCallback:
283                progressCallback(100.0*epoch/self.epochs)
284            if False and epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
285                break
286##            vec_plot.set_xdata(self.vectors[:, 0])
287##            vec_plot.set_ydata(self.vectors[:, 1])
288##            draw()
289##        show()
290       
291        for node, vector in zip(self.map, self.vectors):
292            node.vector = vector
293
294    def train_step_batch(self, epoch):
295        """A single step of batch training algorithm.
296        """
297        D1 = ma.dot(self.vectors**2, self.weight_matrix)
298        D2 = ma.dot(self.vectors, self.constant_matrix)
299        Dist = D1 - D2
300
301        best_nodes = ma.argmin(Dist, 0)
302        distances = ma.min(Dist, 0)
303##        print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch)
304        self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons)))
305
306        if self.neighbourhood == Map.NeighbourhoodGaussian:       
307            H = numpy.exp(-self.unit_distances/(2*self.radius(epoch))) * (self.unit_distances <= self.radius(epoch))
308        elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
309            H = 1.0 - (self.unit_distances/self.radius(epoch))**2
310            H = H * (H >= 0.0)
311        else:
312            H = 1.0*(self.unit_distances <= self.radius(epoch))
313
314        P =  numpy.zeros((self.vectors.shape[0], self.data.shape[0]))
315       
316        P[(best_nodes, range(len(best_nodes)))] = numpy.ones(len(best_nodes))
317       
318        S = ma.dot(H, ma.dot(P, self.data))
319       
320        A = ma.dot(H, ma.dot(P, ~self.data._mask))
321
322##        nonzero = (range(epoch%2, len(self.vectors), 2), )
323        nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), )
324       
325        self.vectors[nonzero] = S[nonzero] / A[nonzero]
326
327
328class SOMLearner(orange.Learner):
329    """An implementation of self-organizing map. Considers an input data set, projects the data
330    instances onto a map, and returns a result in the form of a classifier holding projection
331    information together with an algorithm to project new data instances. Uses :obj:`Map` for
332    representation of projection space, :obj:`Solver` for training, and returns a trained
333    map with information on projection of the training data as crafted by :obj:`SOMMap`.
334   
335    :param map_shape: dimension of the map
336    :type map_shape: tuple
337    :param initialize: initialization type id; linear
338      initialization assigns the data to the cells according to its position in two-dimensional
339      principal component projection
340    :type initialize: :obj:`InitializeRandom` or :obj:`InitializeLinear`
341    :param topology: topology type id
342    :type topology: :obj:`HexagonalTopology` or :obj:`RectangularTopology`
343    :param neighbourhood: cell neighborhood type id
344    :type neighbourhood: :obj:`NeighbourhoodGaussian`, obj:`NeighbourhoodBubble`, or obj:`NeighbourhoodEpanechicov`
345    :param batch_train: perform batch training?
346    :type batch_train: bool
347    :param learning_rate: learning rate
348    :type learning_rate: float
349    :param radius_ini: initial radius
350    :type radius_ini: int
351    :param radius_fin: final radius
352    :type radius_fin: int
353    :param epochs: number of epochs (iterations of a training steps)
354    :type epochs: int
355    :param solver: a class with the optimization algorithm
356    """
357   
358    def __new__(cls, examples=None, weightId=0, **kwargs):
359        self = orange.Learner.__new__(cls, **kwargs)
360        if examples is not None:
361            self.__init__(**kwargs)
362            return self.__call__(examples, weightId)
363        else:
364            return self
365       
366    def __init__(self, map_shape=(5, 10), initialize=InitializeLinear, topology=HexagonalTopology, neighbourhood=NeighbourhoodGaussian,
367                 batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, solver=Solver, **kwargs):
368
369        self.map_shape = map_shape
370        self.initialize = initialize
371        self.topology = topology
372        self.neighbourhood = neighbourhood
373        self.batch_train = batch_train
374        self.learning_rate = learning_rate
375        self.radius_ini = radius_ini
376        self.radius_fin = radius_fin
377        self.epochs = epochs
378        self.solver = solver
379        self.eps = 1e-4
380       
381        orange.Learner.__init__(self, **kwargs)
382       
383    def __call__(self, data, weightID=0, progressCallback=None):
384        numdata, classes, w = data.toNumpyMA()
385        map = Map(self.map_shape, topology=self.topology)
386        if self.initialize == Map.InitializeLinear:
387            map.initialize_map_linear(numdata)
388        else:
389            map.initialize_map_random(numdata)
390        map = self.solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
391                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
392                     epochs=self.epochs)(numdata, map, progressCallback=progressCallback)
393        return SOMMap(map, data)
394
395class SOMMap(orange.Classifier):
396    """Project the data onto the inferred self-organizing map.
397   
398    :param map: a trained self-organizing map
399    :type map: :obj:`SOMMap`
400    :param data: the data to be mapped on the map
401    :type data: :obj:`Orange.data.Table`
402    """
403   
404    def __init__(self, map=[], data=[]):
405        self.map = map
406        self.examples = data
407        for node in map:
408            node.referenceExample = orange.Example(orange.Domain(self.examples.domain.attributes, False),
409                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \
410                                                  for var, value in zip(self.examples.domain.attributes, node.vector)])
411            node.examples = orange.ExampleTable(self.examples.domain)
412
413        for ex in self.examples:
414            node = self.getBestMatchingNode(ex)
415            node.examples.append(ex)
416
417        if self.examples and self.examples.domain.classVar:
418            for node in self.map:
419                node.classifier = orange.MajorityLearner(node.examples if node.examples else self.examples)
420                     
421            self.classVar = self.examples.domain.classVar
422        else:
423            self.classVar = None
424
425    def getBestMatchingNode(self, example):
426        """Return the best matching node for a given data instance
427        """
428        example, c, w = orange.ExampleTable([example]).toNumpyMA()
429        vectors = self.map.vectors()
430        Dist = vectors - example
431        bmu = ma.argmin(ma.sum(Dist**2, 1))
432        return list(self.map)[bmu]
433       
434    def __call__(self, example, what=orange.GetValue):
435        bmu = self.getBestMatchingNode(example)
436        return bmu.classifier(example, what)
437
438    def __getattr__(self, name):
439        try:
440            return getattr(self.__dict__["map"], name)
441        except (KeyError, AttributeError):
442            raise AttributeError(name)
443
444    def __iter__(self):
445        """ Iterate over all nodes in the map
446        """
447        return iter(self.map)
448
449    def __getitem__(self, val):
450        """ Return the node at position x, y
451        """
452        return self.map.__getitem__(val)
453
454##########################################################################
455# Supervised learning
456
457class SOMSupervisedLearner(SOMLearner):
458    """SOMSupervisedLearner is a class used to learn SOM from orange.ExampleTable, by using the
459    class information in the learning process. This is achieved by adding a value for each class
460    to the training instances, where 1.0 signals class membership and all other values are 0.0.
461    After the training, the new values are discarded from the node vectors.
462   
463    :param data: class-labeled data set
464    :type data: :obj:`Orange.data.Table`
465    :param progressCallback: a one argument function to report on inference progress (in %)
466    """
467    def __call__(self, examples, weightID=0, progressCallback=None):
468        data, classes, w = examples.toNumpyMA()
469        nval = len(examples.domain.classVar.values)
470        ext = ma.zeros((len(data), nval))
471        ext[([i for i, m in enumerate(classes.mask) if m], [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
472        data = ma.hstack((data, ext))
473        map = Map(self.map_shape, topology=self.topology)
474        if self.initialize == Map.InitializeLinear:
475            map.initialize_map_linear(data)
476        else:
477            map.initialize_map_random(data)
478        map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
479                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
480                     epoch=self.epochs)(data, map, progressCallback=progressCallback)
481        for node in map:
482            node.vector = node.vector[:-nval]
483        return SOMMap(map, examples)
484
485##########################################################################
486# Supporting Classes
487
488class Node(object):
489    """An object holding the information about the node in the map.
490
491    .. attribute:: pos
492
493        Node position.
494
495    .. attribute:: referenceExample
496
497        Reference data instance (a prototype).
498       
499    .. attribute:: examples
500   
501        Data set with instances training instances that were mapped to the node.
502    """
503    def __init__(self, pos, map=None, vector=None):
504        self.pos = pos
505        self.map = map
506        self.vector = vector
507
508class Map(object):
509    """Self organizing map (the structure). Includes methods for data initialization.
510   
511    .. attribute:: map
512
513        Self orginzing map. A list of lists of :obj:`Node`.
514       
515    .. attribute:: examples
516   
517        Data set that was considered when optimizing the map.
518    """
519   
520    HexagonalTopology = HexagonalTopology
521    RectangularTopology = RectangularTopology
522    InitializeLinear = InitializeLinear
523    InitializeRandom = InitializeRandom
524    NeighbourhoodGaussian = NeighbourhoodGaussian
525    NeighbourhoodBubble = NeighbourhoodBubble
526    NeighbourhoodEpanechicov = NeighbourhoodEpanechicov
527       
528    def __init__(self, map_shape=(20, 40), topology=HexagonalTopology):
529        self.map_shape = map_shape
530        self.topology = topology
531        self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])]
532       
533    def __getitem__(self, pos):
534        """ Return the node at position x, y.
535        """
536        x, y = pos
537        return self.map[x][y]
538
539    def __iter__(self):
540        """ Iterate over all nodes in the map.
541        """
542        for row in self.map:
543            for node in row:
544                yield node
545
546    def vectors(self):
547        """Return all vectors of the map as rows in an numpy.array.
548        """
549        return numpy.array([node.vector for node in self])
550
551    def unit_distances(self):
552        """Return a NxN numpy.array of internode distances (based on
553        node position in the map, not vector space) where N is the number of
554        nodes.
555        """
556        nodes = list(self)
557        dist = numpy.zeros((len(nodes), len(nodes)))
558
559        coords = self.unit_coords()
560        for i in range(len(nodes)):
561            for j in range(len(nodes)):
562                dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j]))
563        return numpy.array(dist)
564
565    def unit_coords(self):
566        """ Return the unit coordinates of all nodes in the map as an numpy.array.
567        """
568        nodes = list(self)
569        coords = numpy.zeros((len(nodes), len(self.map_shape)))
570        coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0])
571        coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1])
572       
573        ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75)
574        if self.topology == Map.HexagonalTopology:
575            ind = numpy.nonzero(1 - numpy.mod(coords[:, 0], 2))
576            coords[ind] = coords[ind] + 0.5
577            coords = coords * numpy.sqrt(0.75)
578        return coords
579
580
581    def initialize_map_random(self, data=None, dimension=5):
582        """Initialize the map nodes vectors randomly, by supplying
583        either training data or dimension of the data.
584        """
585        if data is not None:
586            min, max = ma.min(data, 0), ma.max(data, 0);
587            dimension = data.shape[1]
588        else:
589            min, max = numpy.zeros(dimension), numpy.ones(dimension)
590        for node in self:
591#            node.vector = min + numpy.random.rand(dimension) * (max - min)
592            node.vector = min + random.randint(0, dimension) * (max - min)
593
594    def initialize_map_linear(self, data, map_shape=(10, 20)):
595        """ Initialize the map node vectors linearly over the subspace
596        of the two most significant eigenvectors.
597        """
598        data = data.copy() #ma.array(data)
599        dim = data.shape[1]
600        mdim = len(map_shape)
601        munits = len(list(self))
602        me = ma.mean(data, 0)
603        A = numpy.zeros((dim ,dim))
604
605        for i in range(dim):
606            data[:, i] = data[:, i] - me[i]
607       
608        for i in range(dim):
609            for j in range(dim):
610                c = data[:, i] * data[:, j]
611                A[i, j] = ma.sum(c) / len(c)
612                A[j, i] = A[i, j]
613
614        eigval, eigvec = numpy.linalg.eig(A)
615        ind = list(reversed(numpy.argsort(eigval)))
616        eigval = eigval[ind[:mdim]]
617        eigvec = eigvec[:, ind[:mdim]]
618
619        for i in range(mdim):
620            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i])
621
622        unit_coords = self.unit_coords()
623        for d in range(mdim):
624            max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d])
625            unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min)
626        unit_coords = (unit_coords - 0.5) * 2
627
628        vectors = numpy.array([me for i in range(munits)])
629        for i in range(munits):
630            for d in range(mdim):
631                vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d])
632
633        for i, node in enumerate(self):
634            node.vector = vectors[i]
635
636    def getUMat(self):
637        return getUMat(self)
638       
639##########################################################################
640# Supporting functions
641
642def getUMat(som):
643    dim1=som.map_shape[0]*2-1
644    dim2=som.map_shape[1]*2-1
645
646    a=numpy.zeros((dim1, dim2))
647    if som.topology == HexagonalTopology:
648        return __fillHex(a, som)
649    else:
650        return __fillRect(a, som)
651
652def __fillHex(array, som):
653    xDim, yDim = som.map_shape
654##    for n in som.nodes:
655##        d[tuple(n.pos)]=n
656    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
657    check=lambda x,y:x>=0 and x<(xDim*2-1) and y>=0 and y<(yDim*2-1)
658    dx=[1,0,-1]
659    dy=[0,1, 1]
660    for i in range(0, xDim*2,2):
661        for j in range(0, yDim*2,2):
662            for ddx, ddy in zip(dx, dy):
663                if check(i+ddx, j+ddy):
664##                    array[i+ddx][j+ddy]=d[(i/2, j/2)].getDistance(d[(i/2+ddx, j/2+ddy)].referenceExample)
665                    array[i+ddx][j+ddy] = numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - d[(i/2+ddx, j/2+ddy)].vector)**2))
666    dx=[1,-1,0,-1, 0, 1]
667    dy=[0, 0,1, 1,-1,-1]
668    for i in range(0, xDim*2, 2):
669        for j in range(0, yDim*2, 2):
670            l=[array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) if check(i+ddx, j+ddy)]
671            array[i][j]=sum(l)/len(l)
672    return array
673
674def __fillRect(array, som):
675    xDim, yDim = som.map_shape
676    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
677    check=lambda x,y:x>=0 and x<xDim*2-1 and y>=0 and y<yDim*2-1
678    dx=[1, 0, 1]
679    dy=[0, 1, 1]
680    for i in range(0, xDim*2, 2):
681        for j in range(0, yDim*2, 2):
682            for ddx, ddy in zip(dx, dy):
683                if check(i+ddx, j+ddy):
684##                    array[i+ddx][j+ddy]=d[(i/2, j/2)].getDistance(d[(i/2+ddx, j/2+ddy)].referenceExample)
685                    array[i+ddx][j+ddy] = numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - d[(i/2+ddx, j/2+ddy)].vector)**2))
686    dx=[1,-1, 0,0,1,-1,-1, 1]
687    dy=[0, 0,-1,1,1,-1, 1,-1]
688    for i in range(0, xDim*2, 2):
689        for j in range(0, yDim*2, 2):
690            l=[array[i+ddx,j+ddy] for ddx,ddy in zip(dx,dy) if check(i+ddx, j+ddy)]
691            array[i][j]=sum(l)/len(l)
692    return array
693
694##########################################################################
695# Testing (deprecated, use regression tests instead 
696
697if __name__ == "__main__":
698    data = orange.ExampleTable("iris.tab")
699    learner = SOMLearner()
700    learner = SOMLearner(batch_train=True, initialize=InitializeLinear, radius_ini=3, radius_fin=1, neighbourhood=Map.NeighbourhoodGaussian, epochs=1000)
701    map = learner(data)
702    for e in data:
703        print map(e), e.getclass()
Note: See TracBrowser for help on using the repository browser.