source: orange/Orange/projection/som.py @ 10580:c4cbae8dcf8b

Revision 10580:c4cbae8dcf8b, 29.7 KB checked in by markotoplak, 2 years ago (diff)

Moved deprecation functions, progress bar support and environ into Orange.utils. Orange imports cleanly, although it is not tested yet.

Line 
1"""
2******************************
3Self-organizing maps (``som``)
4******************************
5
6.. index:: self-organizing map (SOM)
7
8.. index::
9   single: projection; self-organizing map (SOM)
10
11
12`Self-organizing map <http://en.wikipedia.org/wiki/Self-organizing_map>`_
13(SOM) is an unsupervised learning  algorithm that infers low,
14typically two-dimensional discretized representation of the input
15space, called a map. The map preserves topological properties of the
16input space, such that the cells that are close in the map include data
17instances that are similar to each other.
18
19=================================
20Inference of Self-Organizing Maps
21=================================
22
23The main class for inference of self-organizing maps is :obj:`SOMLearner`.
24The class initializes the topology of the map and returns an inference
25objects which, given the data, performs the optimization of the map::
26
27   import Orange
28   som = Orange.projection.som.SOMLearner(map_shape=(8, 8),
29            initialize=Orange.projection.som.InitializeRandom)
30   data = Orange.data.table("iris.tab")
31   map = som(data)
32
33
34.. autoclass:: SOMLearner
35   :members:
36   
37.. autoclass:: SOMMap
38   :members:
39   
40Topology
41--------
42
43.. autodata:: HexagonalTopology
44
45.. autodata:: RectangularTopology
46
47
48Map initialization
49------------------
50
51.. autodata:: InitializeLinear
52
53.. autodata:: InitializeRandom
54
55Node neighbourhood
56------------------
57
58.. autodata:: NeighbourhoodGaussian
59
60.. autodata:: NeighbourhoodBubble
61
62.. autodata:: NeighbourhoodEpanechicov
63
64   
65=============================================
66Supervised Learning with Self-Organizing Maps
67=============================================
68
69Supervised learning requires class-labeled data. For training,
70class information is first added to data instances as a regular
71feature by extending the feature vectors accordingly. Next, the
72map is trained, and the training data projected to nodes. Each
73node then classifies to the majority class. The dimensions
74corresponding to the class features are then removed from the
75prototype vector of each node in the map. For classification,
76the data instance is projected to the best matching cell, returning
77the associated class.
78
79An example of the code that trains and then classifies on the same
80data set is::
81
82    import Orange
83    import random
84    learner = Orange.projection.som.SOMSupervisedLearner(map_shape=(4, 4))
85    data = Orange.data.Table("iris.tab")
86    classifier = learner(data)
87    random.seed(50)
88    for d in random.sample(data, 5):
89        print "%-15s originally %-15s" % (classifier(d), d.getclass())
90
91.. autoclass:: SOMSupervisedLearner
92   :members:
93   
94==================
95Supporting Classes
96==================
97
98The actual map optimization algorithm is implemented by :class:`Solver`
99class which is used by both the :class:`SOMLearner` and the
100:class:`SOMSupervisedLearner`.
101
102.. autoclass:: Solver
103   :members:
104   
105Class :obj:`Map` stores the self-organizing map composed of :obj:`Node`
106objects. The code below (:download:`som-node.py <code/som-node.py>`)
107shows an example how to access the information stored in the node of the
108map:
109
110.. literalinclude:: code/som-node.py
111    :lines: 7-
112
113.. autoclass:: Map
114   :members:
115   
116.. autoclass:: Node
117   :members:
118 
119========
120Examples
121========
122
123The following code  (:download:`som-mapping.py <code/som-mapping.py>`)
124infers self-organizing map from Iris data set. The map is rather small,
125and consists of only 9 cells. We optimize the network, and then report
126how many data instances were mapped into each cell. The second part
127of the code reports on data instances from one of the corner cells:
128
129.. literalinclude:: code/som-mapping.py
130    :lines: 7-
131
132The output of this code is::
133
134    Node    Instances
135    (0, 0)  31
136    (0, 1)  7
137    (0, 2)  0
138    (1, 0)  24
139    (1, 1)  7
140    (1, 2)  50
141    (2, 0)  10
142    (2, 1)  21
143    (2, 2)  0
144
145    Data instances in cell (0, 1):
146    [6.9, 3.1, 4.9, 1.5, 'Iris-versicolor']
147    [6.7, 3.0, 5.0, 1.7, 'Iris-versicolor']
148    [6.3, 2.9, 5.6, 1.8, 'Iris-virginica']
149    [6.5, 3.2, 5.1, 2.0, 'Iris-virginica']
150    [6.4, 2.7, 5.3, 1.9, 'Iris-virginica']
151    [6.1, 2.6, 5.6, 1.4, 'Iris-virginica']
152    [6.5, 3.0, 5.2, 2.0, 'Iris-virginica']
153   
154"""
155
156import sys, os
157
158import numpy
159import numpy.ma as ma
160import orange
161import random
162
163import Orange
164from Orange.utils import deprecated_keywords, \
165                        deprecated_attribute
166
167
168HexagonalTopology = 0
169"""Hexagonal topology, cells are hexagon-shaped."""
170RectangularTopology = 1
171"""Rectangular topology, cells are square-shaped"""
172
173InitializeLinear = 0
174"""Data instances are initially assigned to cells according to their two-dimensional PCA projection."""
175InitializeRandom = 1
176"""Data instances are initially randomly assigned to cells."""
177
178NeighbourhoodGaussian = 0 
179"""Gaussian (smoothed) neighborhood."""
180NeighbourhoodBubble = 1
181"""Bubble (crisp) neighborhood."""
182NeighbourhoodEpanechicov = 2
183"""Epanechicov (cut and smoothed) neighborhood."""
184
185##########################################################################
186# Inference of Self-Organizing Maps
187
188class Solver(object):
189    """ SOM Solver class used to train the map. Supports batch
190    and sequential training. Based on ideas from
191    `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_.
192
193    :param neighbourhood: neighborhood function id
194    :type neighbourhood: :obj:`NeighbourhoodGaussian`,
195        :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov`
196       
197    :param radius_ini: initial radius
198    :type radius_ini: int
199   
200    :param raduis_fin: final radius
201    :type raduis_fin: int
202   
203    :param epoch: number of training interactions
204    :type epoch: int
205   
206    :param batch_train: if True run the batch training algorithm
207        (default), else use the sequential one
208    :type batch_train: bool
209   
210    :param learning_rate: learning rate for the sequential training algorithm
211    :type learning_rate: float
212   
213    """
214   
215    def __init__(self, **kwargs):
216        self.neighbourhood = NeighbourhoodGaussian
217        self.learning_rate = 0.05
218        self.radius_ini = 2
219        self.radius_fin = 1
220        self.epochs = 100
221        self.random_order = False
222        self.batch_train = True
223        self.eps = 1e-5
224        self.qerror = []
225        self.__dict__.update(kwargs)
226
227    def radius(self, epoch):
228        return self.radius_ini - (float(self.radius_ini) - self.radius_fin)*(float(epoch) / (self.epochs-1))
229
230    def radius_seq(self, iter):
231        """Compute the radius regarding the iterations, not epochs."""
232        iterations = len(self.data)*self.epochs
233        return self.radius_ini - (float(self.radius_ini) - self.radius_fin)*(float(iter) / (iterations-1))
234
235    def alpha(self, iter):
236        """Compute the learning rate from iterations, starting with learning_rate to 0 at the end of training.
237        """
238        iterations = len(self.data)*self.epochs
239        return (1 - float(iter)/(iterations-1))*self.learning_rate
240
241    @deprecated_keywords({"progressCallback": "progress_callback"})
242    def __call__(self, data, map, progress_callback=None):
243        """ Train the map from data. Pass progress_callback function to report on the progress.
244        """
245        self.data = data
246        self.map = map
247
248        self.qerror = []
249        if self.batch_train:
250            self.train_batch(progress_callback)
251        else:
252            self.train_sequential(progress_callback)
253        return self.map
254
255    def train_sequential(self, progress_callback):
256        """Sequential training algorithm.
257        """
258        self.vectors = self.map.vectors()
259        self.unit_distances = self.map.unit_distances()
260       
261#        from pylab import plot, show, draw, ion
262#        ion()
263#        plot(self.data[:, 0], self.data[:, 1], "ro")
264#        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
265       
266        for epoch in range(self.epochs):
267            self.distances = []
268            ind = range(len(self.data))
269            if self.random_order:
270                random.shuffle(ind)
271            self.train_step_sequential(epoch, ind)
272            if progress_callback:
273                progress_callback(100.0*epoch/self.epochs)
274            self.qerror.append(numpy.mean(numpy.sqrt(self.distances)))
275#            print epoch, "q error:", numpy.mean(numpy.sqrt(self.distances)), self.radius(epoch)
276            if epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
277                break
278           
279#            vec_plot.set_xdata(self.vectors[:, 0])
280#            vec_plot.set_ydata(self.vectors[:, 1])
281#            draw()
282#        show()
283
284    def train_step_sequential(self, epoch, indices=None):
285        """A single step of sequential training algorithm.
286        """
287        indices = range(len(self.data)) if indices == None else indices
288        for ind in indices:
289            x = self.data[ind]
290            Dx = self.vectors - self.data[ind]
291            Dist = ma.sum(Dx**2, 1)
292            min_dist = ma.min(Dist)
293            bmu = ma.argmin(Dist)
294            self.distances.append(min_dist)
295
296            iter = epoch*len(self.data)+ind
297
298            if self.neighbourhood == Map.NeighbourhoodGaussian:
299                h = numpy.exp(-self.unit_distances[:, bmu]**2/(2*self.radius_seq(iter)**2)) * (self.unit_distances[:, bmu]**2 <= self.radius_seq(iter)**2)
300            elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
301                h = 1.0 - (self.unit_distances[:bmu]/self.radius_seq(iter))**2
302                h = h * (h >= 0.0)
303            else:
304                h = 1.0*(self.unit_distances[:, bmu] <= self.radius_seq(iter))
305            h = h * self.alpha(iter)
306
307            nonzero = ma.nonzero(h)
308            h = h[nonzero]
309
310            self.vectors[nonzero] = self.vectors[nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1))
311
312    @deprecated_keywords({"progressCallback": "progress_callback"})
313    def train_batch(self, progress_callback=None):
314        """Batch training algorithm.
315        """
316       
317        self.unit_distances = self.map.unit_distances()
318        self.constant_matrix = 2 * ma.dot(numpy.eye(self.data.shape[1]), numpy.transpose(self.data))
319        self.dist_cons = numpy.transpose(ma.dot(self.data**2, numpy.ones(self.data.shape[1])))
320        self.weight_matrix = numpy.ones((self.data.shape[1], self.data.shape[0]))
321        self.vectors = self.map.vectors()
322       
323##        from pylab import plot, show, draw, ion
324##        ion()
325##        plot(self.data[:, 0], self.data[:, 1], "ro")
326##        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
327       
328        for epoch in range(self.epochs):
329            self.train_step_batch(epoch)
330            if progress_callback:
331                progress_callback(100.0*epoch/self.epochs)
332            if False and epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
333                break
334##            vec_plot.set_xdata(self.vectors[:, 0])
335##            vec_plot.set_ydata(self.vectors[:, 1])
336##            draw()
337##        show()
338       
339        for node, vector in zip(self.map, self.vectors):
340            node.vector = vector
341
342    def train_step_batch(self, epoch):
343        """A single step of batch training algorithm.
344        """
345        D1 = ma.dot(self.vectors**2, self.weight_matrix)
346        D2 = ma.dot(self.vectors, self.constant_matrix)
347        Dist = D1 - D2
348
349        best_nodes = ma.argmin(Dist, 0)
350        distances = ma.min(Dist, 0)
351##        print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch)
352        self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons)))
353
354        if self.neighbourhood == Map.NeighbourhoodGaussian:       
355            H = numpy.exp(-self.unit_distances**2/(2*self.radius(epoch)**2)) * (self.unit_distances**2 <= self.radius(epoch)**2)
356        elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
357            H = 1.0 - (self.unit_distances/self.radius(epoch))**2
358            H = H * (H >= 0.0)
359        else:
360            H = 1.0*(self.unit_distances <= self.radius(epoch))
361
362        P =  numpy.zeros((self.vectors.shape[0], self.data.shape[0]))
363       
364        P[(best_nodes, range(len(best_nodes)))] = numpy.ones(len(best_nodes))
365       
366        S = ma.dot(H, ma.dot(P, self.data))
367       
368        A = ma.dot(H, ma.dot(P, ~self.data._mask))
369
370##        nonzero = (range(epoch%2, len(self.vectors), 2), )
371        nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), )
372       
373        self.vectors[nonzero] = S[nonzero] / A[nonzero]
374       
375
376class SOMLearner(orange.Learner):
377    """Considers an input data set, projects the data instances
378    onto a map, and returns a result in the form of a classifier
379    holding  projection information together with an algorithm to
380    project new data instances. Uses :obj:`Map` for representation of
381    projection space, :obj:`Solver` for training, and returns a
382    trained map with information on projection of the training
383    data as crafted by :obj:`SOMMap`.
384   
385    :param map_shape: dimension of the map
386    :type map_shape: tuple
387   
388    :param initialize: initialization type id; linear initialization
389        assigns the data to the cells according to its position in
390        two-dimensional principal component projection   
391    :type initialize: :obj:`InitializeRandom` or :obj:`InitializeLinear`
392   
393    :param topology: topology type id
394    :type topology: :obj:`HexagonalTopology` or :obj:`RectangularTopology`
395   
396    :param neighbourhood: cell neighborhood type id
397    :type neighbourhood: :obj:`NeighbourhoodGaussian`,
398        :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov`
399       
400    :param batch_train: perform batch training?
401    :type batch_train: bool
402   
403    :param learning_rate: learning rate
404    :type learning_rate: float
405   
406    :param radius_ini: initial radius
407    :type radius_ini: int
408   
409    :param radius_fin: final radius
410    :type radius_fin: int
411   
412    :param epochs: number of epochs (iterations of a training steps)
413    :type epochs: int
414   
415    :param solver: a class with the optimization algorithm
416   
417    """
418    @deprecated_keywords({"examples": "data",
419                          "weightId": "weight_id"})
420    def __new__(cls, data=None, weight_id=0, **kwargs):
421        self = orange.Learner.__new__(cls, **kwargs)
422        if data is not None:
423            self.__init__(**kwargs)
424            return self.__call__(data, weight_id)
425        else:
426            return self
427       
428    def __init__(self, map_shape=(5, 10), initialize=InitializeLinear, topology=HexagonalTopology, neighbourhood=NeighbourhoodGaussian,
429                 batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, solver=Solver, **kwargs):
430
431        self.map_shape = map_shape
432        self.initialize = initialize
433        self.topology = topology
434        self.neighbourhood = neighbourhood
435        self.batch_train = batch_train
436        self.learning_rate = learning_rate
437        self.radius_ini = radius_ini
438        self.radius_fin = radius_fin
439        self.epochs = epochs
440        self.solver = solver
441        self.eps = 1e-4
442       
443        orange.Learner.__init__(self, **kwargs)
444       
445    @deprecated_keywords({"weightID": "weight_id",
446                          "progressCallback": "progress_callback"})
447    def __call__(self, data, weight_id=0, progress_callback=None):
448        numdata, classes, w = data.toNumpyMA()
449        map = Map(self.map_shape, topology=self.topology)
450        if self.initialize == Map.InitializeLinear:
451            map.initialize_map_linear(numdata)
452        else:
453            map.initialize_map_random(numdata)
454        map = self.solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
455                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
456                     epochs=self.epochs)(numdata, map, progress_callback=progress_callback)
457        return SOMMap(map, data)
458
459class SOMMap(orange.Classifier):
460    """Project the data onto the inferred self-organizing map.
461   
462    :param map: a trained self-organizing map
463    :type map: :obj:`SOMMap`
464    :param data: the data to be mapped on the map
465    :type data: :obj:`Orange.data.Table`
466   
467    """
468   
469    def __init__(self, map=[], data=[]):
470        self.map = map
471        self.data = data
472        for node in map:
473            node.reference_instance = orange.Example(orange.Domain(self.data.domain.attributes, False),
474                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \
475                                                  for var, value in zip(self.data.domain.attributes, node.vector)])
476           
477            node.instances = orange.ExampleTable(self.data.domain)
478
479        for inst in self.data:
480            node = self.get_best_matching_node(inst)
481            node.instances.append(inst)
482
483        if self.data and self.data.domain.class_var:
484            for node in self.map:
485                node.classifier = orange.MajorityLearner(node.instances if node.instances else self.data)
486                     
487            self.class_var = self.data.domain.class_var
488        else:
489            self.class_var = None
490           
491    classVar = deprecated_attribute("classVar", "class_var")
492    examples = deprecated_attribute("examples", "data")
493
494    def get_best_matching_node(self, instance):
495        """Return the best matching node for a given data instance
496        """
497        instance, c, w = orange.ExampleTable([instance]).toNumpyMA()
498        vectors = self.map.vectors()
499        Dist = vectors - instance
500        bmu = ma.argmin(ma.sum(Dist**2, 1))
501        return list(self.map)[bmu]
502   
503    getBestMatchingNode = \
504        deprecated_attribute("getBestMatchingNode",
505                             "get_best_matching_node")
506       
507    def __call__(self, instance, what=orange.GetValue):
508        """Map `instance` onto the best matching node and predict
509        its class using the majority/mean of the training data in
510        that node.
511         
512        """
513        bmu = self.get_best_matching_node(instance)
514        return bmu.classifier(instance, what)
515
516    def __getattr__(self, name):
517        try:
518            return getattr(self.__dict__["map"], name)
519        except (KeyError, AttributeError):
520            raise AttributeError(name)
521
522    def __iter__(self):
523        """ Iterate over all nodes in the map
524        """
525        return iter(self.map)
526
527    def __getitem__(self, val):
528        """ Return the node at position x, y
529        """
530        return self.map.__getitem__(val)
531
532##########################################################################
533# Supervised learning
534
535class SOMSupervisedLearner(SOMLearner):
536    """SOMSupervisedLearner is a class used to learn SOM from
537    orange.ExampleTable, by using the class information in the
538    learning process. This is achieved by adding a value for each
539    class to the training instances, where 1.0 signals class membership
540    and all other values are 0.0. After the training, the new values
541    are discarded from the node vectors.
542   
543    :param data: class-labeled data set
544    :type data: :obj:`Orange.data.Table`
545    :param progress_callback: a one argument function to report
546        on inference progress (in %)
547       
548    """
549    @deprecated_keywords({"weightID": "weight_id",
550                          "progressCallback": "progress_callback"})
551    def __call__(self, data, weight_id=0, progress_callback=None):
552        array, classes, w = data.toNumpyMA()
553        domain = data.domain
554        if isinstance(domain.class_var, Orange.feature.Discrete):
555            # Discrete class (extend the data with class indicator matrix)
556            nval = len(data.domain.class_var.values)
557            ext = ma.zeros((len(array), nval))
558            ext[([i for i, m in enumerate(classes.mask) if m],
559                 [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
560        elif isinstance(domain.class_var, Orange.feature.Continuous):
561            # Continuous class, just add the one column (what about multitarget)
562            nval = 1
563            ext = ma.zeros((len(array), nval))
564            ext[:,0] = classes
565        elif domain.class_var is None:
566            # No class var
567            nval = 0
568            ext = ma.zeros((len(array), nval))
569        else:
570            raise TypeError("Unsuported `class_var` %r" % domain.class_var) 
571        array = ma.hstack((array, ext))
572       
573        map = Map(self.map_shape, topology=self.topology)
574        if self.initialize == Map.InitializeLinear:
575            map.initialize_map_linear(array)
576        else:
577            map.initialize_map_random(array)
578        map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
579                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
580                     epoch=self.epochs)(array, map, progress_callback=progress_callback)
581        # Remove class columns from the vectors
582        for node in map:
583            node.vector = node.vector[:-nval]
584        return SOMMap(map, data)
585
586##########################################################################
587# Supporting Classes
588
589class Node(object):
590    """An object holding the information about the node in the map.
591
592    .. attribute:: pos
593
594        Node position.
595
596    .. attribute:: reference_instance
597
598        Reference data instance (a prototype).
599       
600    .. attribute:: instances
601   
602        Data set with training instances that were mapped to the node.
603         
604    """
605    def __init__(self, pos, map=None, vector=None):
606        self.pos = pos
607        self.map = map
608        self.vector = vector
609        self.reference_instance = None
610        self.instances = None
611       
612    referenceExample = deprecated_attribute("referenceExample", "reference_instance")
613    examples = deprecated_attribute("examples", "instances")
614
615class Map(object):
616    """Self organizing map (the structure). Includes methods for
617    data initialization.
618   
619    .. attribute:: map_shape
620   
621        A two element tuple containing the map width and height.
622         
623    .. attribute:: topology
624   
625        Topology of the map (``HexagonalTopology`` or
626        ``RectangularTopology``)
627       
628    .. attribute:: map
629
630        Self orginzing map. A list of lists of :obj:`Node`.
631       
632    """
633   
634    HexagonalTopology = HexagonalTopology
635    RectangularTopology = RectangularTopology
636    InitializeLinear = InitializeLinear
637    InitializeRandom = InitializeRandom
638    NeighbourhoodGaussian = NeighbourhoodGaussian
639    NeighbourhoodBubble = NeighbourhoodBubble
640    NeighbourhoodEpanechicov = NeighbourhoodEpanechicov
641       
642    def __init__(self, map_shape=(20, 40), topology=HexagonalTopology):
643        self.map_shape = map_shape
644        self.topology = topology
645        self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])]
646       
647    def __getitem__(self, pos):
648        """ Return the node at position x, y.
649        """
650        x, y = pos
651        return self.map[x][y]
652
653    def __iter__(self):
654        """ Iterate over all nodes in the map.
655        """
656        for row in self.map:
657            for node in row:
658                yield node
659
660    def vectors(self):
661        """Return all vectors of the map as rows in an numpy.array.
662        """
663        return numpy.array([node.vector for node in self])
664
665    def unit_distances(self):
666        """Return a NxN numpy.array of internode distances (based on
667        node position in the map, not vector space) where N is the
668        number of nodes.
669       
670        """
671        nodes = list(self)
672        dist = numpy.zeros((len(nodes), len(nodes)))
673
674        coords = self.unit_coords()
675        for i in range(len(nodes)):
676            for j in range(len(nodes)):
677                dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j]))
678        return numpy.array(dist)
679
680    def unit_coords(self):
681        """ Return the unit coordinates of all nodes in the map
682        as an numpy.array.
683       
684        """
685        nodes = list(self)
686        coords = numpy.zeros((len(nodes), len(self.map_shape)))
687
688        k = [self.map_shape[1],1]
689        inds = numpy.arange(len(nodes))
690        for i in range(0,len(self.map_shape)):
691            coords[:,i] = numpy.transpose(numpy.floor(inds/k[i]))
692            inds = numpy.mod(inds,k[i])
693
694        ## in hexagonal topology we move every odd map row by 0.5 (only the second coordinate)
695        ## and multiply all the first coordinates by sqrt(0.75) to assure that
696        ## distances between neighbours are of unit size
697        if self.topology == Map.HexagonalTopology:
698            ind = numpy.nonzero(numpy.mod(coords[:, 0], 2))
699            coords[ind,1] = coords[ind,1] + 0.5
700            coords[:,0] = coords[:,0] * numpy.sqrt(0.75)
701        return coords
702
703
704    def initialize_map_random(self, data=None, dimension=5):
705        """Initialize the map nodes vectors randomly, by supplying
706        either training data or dimension of the data.
707       
708        """
709        if data is not None:
710            min, max = ma.min(data, 0), ma.max(data, 0)
711            dimension = data.shape[1]
712        else:
713            min, max = numpy.zeros(dimension), numpy.ones(dimension)
714        for node in self:
715#            node.vector = min + numpy.random.rand(dimension) * (max - min)
716            node.vector = min + random.randint(0, dimension) * (max - min)
717
718    def initialize_map_linear(self, data, map_shape=(10, 20)):
719        """ Initialize the map node vectors linearly over the subspace
720        of the two most significant eigenvectors.
721       
722        """
723        data = data.copy() #ma.array(data)
724        dim = data.shape[1]
725        mdim = len(map_shape)
726        munits = len(list(self))
727        me = ma.mean(data, 0)
728        A = numpy.zeros((dim, dim))
729
730        for i in range(dim):
731            data[:, i] = data[:, i] - me[i]
732       
733        for i in range(dim):
734            for j in range(dim):
735                c = data[:, i] * data[:, j]
736                A[i, j] = ma.sum(c) / len(c)
737                A[j, i] = A[i, j]
738
739        eigval, eigvec = numpy.linalg.eig(A)
740        ind = list(reversed(numpy.argsort(eigval)))
741        eigval = eigval[ind[:mdim]]
742        eigvec = eigvec[:, ind[:mdim]]
743
744        for i in range(mdim):
745            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i])
746
747        unit_coords = self.unit_coords()
748        for d in range(mdim):
749            max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d])
750            if max > min:
751                unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min)
752            ## in case of one-dimensional SOM
753            else:
754                unit_coords[:, d] = 0.5
755
756        unit_coords = (unit_coords - 0.5) * 2
757
758        vectors = numpy.array([me for i in range(munits)])
759        for i in range(munits):
760            for d in range(mdim):
761                vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d])
762
763        for i, node in enumerate(self):
764            node.vector = vectors[i]
765
766    def get_u_matrix(self):
767        return getUMat(self)
768   
769    getUMat = deprecated_attribute("getUMat", "get_u_matrix")
770       
771##########################################################################
772# Supporting functions
773
774def get_u_matrix(som):
775    dim1 = som.map_shape[0]*2-1
776    dim2 = som.map_shape[1]*2-1
777
778    a = numpy.zeros((dim1, dim2))
779    if som.topology == HexagonalTopology:
780        return __fill_hex(a, som)
781    else:
782        return __fill_rect(a, som)
783   
784def getUMat(som):
785    import warnings
786    warnings.warn("Deprecated function name 'getUMat'. Use 'get_u_matrix' instead.",
787                  DeprecationWarning)
788    return get_u_matrix(som)
789
790def __fill_hex(array, som):
791    xDim, yDim = som.map_shape
792    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
793    check = lambda x, y: x >= 0 and x < (xDim * 2 - 1) and \
794                            y >= 0 and y < (yDim * 2 - 1)
795    dx = [1, 0, -1]
796    dy = [0, 1, 1]
797    for i in range(0, xDim*2, 2):
798        for j in range(0, yDim*2, 2):
799            for ddx, ddy in zip(dx, dy):
800                if check(i+ddx, j+ddy):
801                    array[i+ddx][j+ddy] = \
802                        numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - \
803                                           d[(i/2+ddx, j/2+ddy)].vector)**2))
804    dx = [1, -1, 0, -1, 0, 1]
805    dy = [0, 0, 1, 1, -1, -1]
806    for i in range(0, xDim*2, 2):
807        for j in range(0, yDim*2, 2):
808            l = [array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) \
809                 if check(i+ddx, j+ddy)]
810            array[i][j] = sum(l)/len(l)
811    return array
812
813def __fill_rect(array, som):
814    xDim, yDim = som.map_shape
815    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
816    check = lambda x, y: x >= 0 and x < xDim*2 - 1 and y >= 0 and y < yDim*2 - 1
817    dx = [1, 0, 1]
818    dy = [0, 1, 1]
819    for i in range(0, xDim*2, 2):
820        for j in range(0, yDim*2, 2):
821            for ddx, ddy in zip(dx, dy):
822                if check(i+ddx, j+ddy):
823                    array[i+ddx][j+ddy] = \
824                        numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - \
825                                           d[(i/2+ddx, j/2+ddy)].vector)**2))
826    dx = [1, -1, 0, 0, 1, -1, -1, 1]
827    dy = [0, 0, -1, 1, 1, -1, 1, -1]
828    for i in range(0, xDim*2, 2):
829        for j in range(0, yDim*2, 2):
830            l = [array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) \
831                 if check(i+ddx, j+ddy)]
832            array[i][j] = sum(l)/len(l)
833    return array
834
835##########################################################################
836# Testing (deprecated, use regression tests instead 
837
838if __name__ == "__main__":
839    data = orange.ExampleTable("iris.tab")
840    learner = SOMLearner()
841    learner = SOMLearner(batch_train=True,
842                         initialize=InitializeLinear, 
843                         radius_ini=3,
844                         radius_fin=1,
845                         neighbourhood=Map.NeighbourhoodGaussian, 
846                         epochs=1000)
847    map = learner(data)
848    for e in data:
849        print map(e), e.getclass()
Note: See TracBrowser for help on using the repository browser.