source: orange/Orange/projection/som.py @ 10316:cae20013e4c8

Revision 10316:cae20013e4c8, 29.0 KB checked in by Ales Erjavec <ales.erjavec@…>, 2 years ago (diff)

Added support for continuous class in SOMSupervisedLearner (fixes #1101).

Line 
1"""
2******************************
3Self-organizing maps (``som``)
4******************************
5
6.. index:: self-organizing map (SOM)
7
8.. index::
9   single: projection; self-organizing map (SOM)
10
11
12`Self-organizing map <http://en.wikipedia.org/wiki/Self-organizing_map>`_
13(SOM) is an unsupervised learning  algorithm that infers low,
14typically two-dimensional discretized representation of the input
15space, called a map. The map preserves topological properties of the
16input space, such that the cells that are close in the map include data
17instances that are similar to each other.
18
19=================================
20Inference of Self-Organizing Maps
21=================================
22
23The main class for inference of self-organizing maps is :obj:`SOMLearner`.
24The class initializes the topology of the map and returns an inference
25objects which, given the data, performs the optimization of the map::
26
27   import Orange
28   som = Orange.projection.som.SOMLearner(map_shape=(8, 8),
29            initialize=Orange.projection.som.InitializeRandom)
30   data = Orange.data.table("iris.tab")
31   map = som(data)
32
33
34.. autoclass:: SOMLearner
35   :members:
36   
37.. autoclass:: SOMMap
38   :members:
39   
40Topology
41--------
42
43.. autodata:: HexagonalTopology
44
45.. autodata:: RectangularTopology
46
47
48Map initialization
49------------------
50
51.. autodata:: InitializeLinear
52
53.. autodata:: InitializeRandom
54
55Node neighbourhood
56------------------
57
58.. autodata:: NeighbourhoodGaussian
59
60.. autodata:: NeighbourhoodBubble
61
62.. autodata:: NeighbourhoodEpanechicov
63
64   
65=============================================
66Supervised Learning with Self-Organizing Maps
67=============================================
68
69Supervised learning requires class-labeled data. For training,
70class information is first added to data instances as a regular
71feature by extending the feature vectors accordingly. Next, the
72map is trained, and the training data projected to nodes. Each
73node then classifies to the majority class. The dimensions
74corresponding to the class features are then removed from the
75prototype vector of each node in the map. For classification,
76the data instance is projected to the best matching cell, returning
77the associated class.
78
79An example of the code that trains and then classifies on the same
80data set is::
81
82    import Orange
83    import random
84    learner = Orange.projection.som.SOMSupervisedLearner(map_shape=(4, 4))
85    data = Orange.data.Table("iris.tab")
86    classifier = learner(data)
87    random.seed(50)
88    for d in random.sample(data, 5):
89        print "%-15s originally %-15s" % (classifier(d), d.getclass())
90
91.. autoclass:: SOMSupervisedLearner
92   :members:
93   
94==================
95Supporting Classes
96==================
97
98The actual map optimization algorithm is implemented by :class:`Solver`
99class which is used by both the :class:`SOMLearner` and the
100:class:`SOMSupervisedLearner`.
101
102.. autoclass:: Solver
103   :members:
104   
105Class :obj:`Map` stores the self-organizing map composed of :obj:`Node`
106objects. The code below (:download:`som-node.py <code/som-node.py>`)
107shows an example how to access the information stored in the node of the
108map:
109
110.. literalinclude:: code/som-node.py
111    :lines: 7-
112
113.. autoclass:: Map
114   :members:
115   
116.. autoclass:: Node
117   :members:
118 
119========
120Examples
121========
122
123The following code  (:download:`som-mapping.py <code/som-mapping.py>`)
124infers self-organizing map from Iris data set. The map is rather small,
125and consists of only 9 cells. We optimize the network, and then report
126how many data instances were mapped into each cell. The second part
127of the code reports on data instances from one of the corner cells:
128
129.. literalinclude:: code/som-mapping.py
130    :lines: 7-
131
132The output of this code is::
133
134    Node    Instances
135    (0, 0)  21
136    (0, 1)  1
137    (0, 2)  23
138    (1, 0)  22
139    (1, 1)  7
140    (1, 2)  6
141    (2, 0)  32
142    (2, 1)  16
143    (2, 2)  22
144   
145    Data instances in cell (1, 2):
146    [4.9, 2.4, 3.3, 1.0, 'Iris-versicolor']
147    [5.0, 2.0, 3.5, 1.0, 'Iris-versicolor']
148    [5.6, 2.9, 3.6, 1.3, 'Iris-versicolor']
149    [5.7, 2.6, 3.5, 1.0, 'Iris-versicolor']
150    [5.5, 2.4, 3.7, 1.0, 'Iris-versicolor']
151    [5.0, 2.3, 3.3, 1.0, 'Iris-versicolor']
152   
153"""
154
155import sys, os
156
157import numpy
158import numpy.ma as ma
159import orange
160import random
161
162import Orange
163from Orange.misc import deprecated_keywords, \
164                        deprecated_attribute
165
166
167HexagonalTopology = 0
168"""Hexagonal topology, cells are hexagon-shaped."""
169RectangularTopology = 1
170"""Rectangular topology, cells are square-shaped"""
171
172InitializeLinear = 0
173"""Data instances are initially assigned to cells according to their two-dimensional PCA projection."""
174InitializeRandom = 1
175"""Data instances are initially randomly assigned to cells."""
176
177NeighbourhoodGaussian = 0 
178"""Gaussian (smoothed) neighborhood."""
179NeighbourhoodBubble = 1
180"""Bubble (crisp) neighborhood."""
181NeighbourhoodEpanechicov = 2
182"""Epanechicov (cut and smoothed) neighborhood."""
183
184##########################################################################
185# Inference of Self-Organizing Maps
186
187class Solver(object):
188    """ SOM Solver class used to train the map. Supports batch
189    and sequential training. Based on ideas from
190    `SOM Toolkit for Matlab <http://www.cis.hut.fi/somtoolbox>`_.
191
192    :param neighbourhood: neighborhood function id
193    :type neighbourhood: :obj:`NeighbourhoodGaussian`,
194        :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov`
195       
196    :param radius_ini: initial radius
197    :type radius_ini: int
198   
199    :param raduis_fin: final radius
200    :type raduis_fin: int
201   
202    :param epoch: number of training interactions
203    :type epoch: int
204   
205    :param batch_train: if True run the batch training algorithm
206        (default), else use the sequential one
207    :type batch_train: bool
208   
209    :param learning_rate: learning rate for the sequential training algorithm
210    :type learning_rate: float
211   
212    """
213   
214    def __init__(self, **kwargs):
215        self.neighbourhood = NeighbourhoodGaussian
216        self.learning_rate = 0.05
217        self.radius_ini = 2
218        self.radius_fin = 1
219        self.epochs = 100
220        self.random_order = False
221        self.batch_train = True
222        self.eps = 1e-5
223        self.qerror = []
224        self.__dict__.update(kwargs)
225
226    def radius(self, epoch):
227        return self.radius_ini - (float(self.radius_ini) - self.radius_fin)*(float(epoch) / self.epochs)
228
229    def alpha(self, epoch):
230        """Compute the learning rate from epoch, starting with learning_rate to 0 at the end of training.
231        """
232        return (1 - epoch/self.epochs)*self.learning_rate
233           
234    @deprecated_keywords({"progressCallback": "progress_callback"})
235    def __call__(self, data, map, progress_callback=None):
236        """ Train the map from data. Pass progress_callback function to report on the progress.
237        """
238        self.data = data
239        self.map = map
240
241        self.qerror = []
242        if self.batch_train:
243            self.train_batch(progress_callback)
244        else:
245            self.train_sequential(progress_callback)
246        return self.map
247
248    def train_sequential(self, progress_callback):
249        """Sequential training algorithm.
250        """
251        self.vectors = self.map.vectors()
252        self.unit_distances = self.map.unit_distances()
253       
254#        from pylab import plot, show, draw, ion
255#        ion()
256#        plot(self.data[:, 0], self.data[:, 1], "ro")
257#        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
258       
259        for epoch in range(self.epochs):
260            self.distances = []
261            ind = range(len(self.data))
262            if self.random_order:
263                random.shuffle(ind)
264            self.train_step_sequential(epoch, ind)
265            if progress_callback:
266                progress_callback(100.0*epoch/self.epochs)
267            self.qerror.append(numpy.mean(numpy.sqrt(self.distances)))
268#            print epoch, "q error:", numpy.mean(numpy.sqrt(self.distances)), self.radius(epoch)
269            if epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
270                break
271           
272#            vec_plot.set_xdata(self.vectors[:, 0])
273#            vec_plot.set_ydata(self.vectors[:, 1])
274#            draw()
275#        show()
276
277    def train_step_sequential(self, epoch, indices=None):
278        """A single step of sequential training algorithm.
279        """
280        indices = range(len(self.data)) if indices == None else indices
281        for ind in indices:
282            x = self.data[ind]
283            Dx = self.vectors - self.data[ind]
284            Dist = ma.sum(Dx**2, 1)
285            min_dist = ma.min(Dist)
286            bmu = ma.argmin(Dist)
287            self.distances.append(min_dist)
288
289            if self.neighbourhood == Map.NeighbourhoodGaussian:
290                h = numpy.exp(-self.unit_distances[:, bmu]/(2*self.radius(epoch))) * (self.unit_distances[:, bmu] <= self.radius(epoch))
291            elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
292                h = 1.0 - (self.unit_distances[:bmu]/self.radius(epoch))**2
293                h = h * (h >= 0.0)
294            else:
295                h = 1.0*(self.unit_distances[:, bmu] <= self.radius(epoch))
296            h = h * self.alpha(epoch)
297
298            nonzero = ma.nonzero(h)
299            h = h[nonzero]
300
301            self.vectors[nonzero] = self.vectors[nonzero] - Dx[nonzero] * numpy.reshape(h, (len(h), 1))
302
303    @deprecated_keywords({"progressCallback": "progress_callback"})
304    def train_batch(self, progress_callback=None):
305        """Batch training algorithm.
306        """
307       
308        self.unit_distances = self.map.unit_distances()
309        self.constant_matrix = 2 * ma.dot(numpy.eye(self.data.shape[1]), numpy.transpose(self.data))
310        self.dist_cons = numpy.transpose(ma.dot(self.data**2, numpy.ones(self.data.shape[1])))
311        self.weight_matrix = numpy.ones((self.data.shape[1], self.data.shape[0]))
312        self.vectors = self.map.vectors()
313       
314##        from pylab import plot, show, draw, ion
315##        ion()
316##        plot(self.data[:, 0], self.data[:, 1], "ro")
317##        vec_plot = plot(self.vectors[:, 0], self.vectors[:, 1], "bo")[0]
318       
319        for epoch in range(self.epochs):
320            self.train_step_batch(epoch)
321            if progress_callback:
322                progress_callback(100.0*epoch/self.epochs)
323            if False and epoch > 5 and numpy.mean(numpy.abs(numpy.array(self.qerror[-5:-1]) - self.qerror[-1])) <= self.eps:
324                break
325##            vec_plot.set_xdata(self.vectors[:, 0])
326##            vec_plot.set_ydata(self.vectors[:, 1])
327##            draw()
328##        show()
329       
330        for node, vector in zip(self.map, self.vectors):
331            node.vector = vector
332
333    def train_step_batch(self, epoch):
334        """A single step of batch training algorithm.
335        """
336        D1 = ma.dot(self.vectors**2, self.weight_matrix)
337        D2 = ma.dot(self.vectors, self.constant_matrix)
338        Dist = D1 - D2
339
340        best_nodes = ma.argmin(Dist, 0)
341        distances = ma.min(Dist, 0)
342##        print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch)
343        self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons)))
344
345        if self.neighbourhood == Map.NeighbourhoodGaussian:       
346            H = numpy.exp(-self.unit_distances/(2*self.radius(epoch))) * (self.unit_distances <= self.radius(epoch))
347        elif self.neighbourhood == Map.NeighbourhoodEpanechicov:
348            H = 1.0 - (self.unit_distances/self.radius(epoch))**2
349            H = H * (H >= 0.0)
350        else:
351            H = 1.0*(self.unit_distances <= self.radius(epoch))
352
353        P =  numpy.zeros((self.vectors.shape[0], self.data.shape[0]))
354       
355        P[(best_nodes, range(len(best_nodes)))] = numpy.ones(len(best_nodes))
356       
357        S = ma.dot(H, ma.dot(P, self.data))
358       
359        A = ma.dot(H, ma.dot(P, ~self.data._mask))
360
361##        nonzero = (range(epoch%2, len(self.vectors), 2), )
362        nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), )
363       
364        self.vectors[nonzero] = S[nonzero] / A[nonzero]
365       
366
367class SOMLearner(orange.Learner):
368    """Considers an input data set, projects the data instances
369    onto a map, and returns a result in the form of a classifier
370    holding  projection information together with an algorithm to
371    project new data instances. Uses :obj:`Map` for representation of
372    projection space, :obj:`Solver` for training, and returns a
373    trained map with information on projection of the training
374    data as crafted by :obj:`SOMMap`.
375   
376    :param map_shape: dimension of the map
377    :type map_shape: tuple
378   
379    :param initialize: initialization type id; linear initialization
380        assigns the data to the cells according to its position in
381        two-dimensional principal component projection   
382    :type initialize: :obj:`InitializeRandom` or :obj:`InitializeLinear`
383   
384    :param topology: topology type id
385    :type topology: :obj:`HexagonalTopology` or :obj:`RectangularTopology`
386   
387    :param neighbourhood: cell neighborhood type id
388    :type neighbourhood: :obj:`NeighbourhoodGaussian`,
389        :obj:`NeighbourhoodBubble`, or :obj:`NeighbourhoodEpanechicov`
390       
391    :param batch_train: perform batch training?
392    :type batch_train: bool
393   
394    :param learning_rate: learning rate
395    :type learning_rate: float
396   
397    :param radius_ini: initial radius
398    :type radius_ini: int
399   
400    :param radius_fin: final radius
401    :type radius_fin: int
402   
403    :param epochs: number of epochs (iterations of a training steps)
404    :type epochs: int
405   
406    :param solver: a class with the optimization algorithm
407   
408    """
409    @deprecated_keywords({"examples": "data",
410                          "weightId": "weight_id"})
411    def __new__(cls, data=None, weight_id=0, **kwargs):
412        self = orange.Learner.__new__(cls, **kwargs)
413        if data is not None:
414            self.__init__(**kwargs)
415            return self.__call__(data, weight_id)
416        else:
417            return self
418       
419    def __init__(self, map_shape=(5, 10), initialize=InitializeLinear, topology=HexagonalTopology, neighbourhood=NeighbourhoodGaussian,
420                 batch_train=True, learning_rate=0.05, radius_ini=3.0, radius_fin=1.0, epochs=1000, solver=Solver, **kwargs):
421
422        self.map_shape = map_shape
423        self.initialize = initialize
424        self.topology = topology
425        self.neighbourhood = neighbourhood
426        self.batch_train = batch_train
427        self.learning_rate = learning_rate
428        self.radius_ini = radius_ini
429        self.radius_fin = radius_fin
430        self.epochs = epochs
431        self.solver = solver
432        self.eps = 1e-4
433       
434        orange.Learner.__init__(self, **kwargs)
435       
436    @deprecated_keywords({"weightID": "weight_id",
437                          "progressCallback": "progress_callback"})
438    def __call__(self, data, weight_id=0, progress_callback=None):
439        numdata, classes, w = data.toNumpyMA()
440        map = Map(self.map_shape, topology=self.topology)
441        if self.initialize == Map.InitializeLinear:
442            map.initialize_map_linear(numdata)
443        else:
444            map.initialize_map_random(numdata)
445        map = self.solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
446                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
447                     epochs=self.epochs)(numdata, map, progress_callback=progress_callback)
448        return SOMMap(map, data)
449
450class SOMMap(orange.Classifier):
451    """Project the data onto the inferred self-organizing map.
452   
453    :param map: a trained self-organizing map
454    :type map: :obj:`SOMMap`
455    :param data: the data to be mapped on the map
456    :type data: :obj:`Orange.data.Table`
457   
458    """
459   
460    def __init__(self, map=[], data=[]):
461        self.map = map
462        self.data = data
463        for node in map:
464            node.reference_instance = orange.Example(orange.Domain(self.data.domain.attributes, False),
465                                                 [(var(value) if var.varType == orange.VarTypes.Continuous else var(int(value))) \
466                                                  for var, value in zip(self.data.domain.attributes, node.vector)])
467           
468            node.instances = orange.ExampleTable(self.data.domain)
469
470        for inst in self.data:
471            node = self.get_best_matching_node(inst)
472            node.instances.append(inst)
473
474        if self.data and self.data.domain.class_var:
475            for node in self.map:
476                node.classifier = orange.MajorityLearner(node.instances if node.instances else self.data)
477                     
478            self.class_var = self.data.domain.class_var
479        else:
480            self.class_var = None
481           
482    classVar = deprecated_attribute("classVar", "class_var")
483    examples = deprecated_attribute("examples", "data")
484
485    def get_best_matching_node(self, instance):
486        """Return the best matching node for a given data instance
487        """
488        instance, c, w = orange.ExampleTable([instance]).toNumpyMA()
489        vectors = self.map.vectors()
490        Dist = vectors - instance
491        bmu = ma.argmin(ma.sum(Dist**2, 1))
492        return list(self.map)[bmu]
493   
494    getBestMatchingNode = \
495        deprecated_attribute("getBestMatchingNode",
496                             "get_best_matching_node")
497       
498    def __call__(self, instance, what=orange.GetValue):
499        """Map `instance` onto the best matching node and predict
500        its class using the majority/mean of the training data in
501        that node.
502         
503        """
504        bmu = self.get_best_matching_node(instance)
505        return bmu.classifier(instance, what)
506
507    def __getattr__(self, name):
508        try:
509            return getattr(self.__dict__["map"], name)
510        except (KeyError, AttributeError):
511            raise AttributeError(name)
512
513    def __iter__(self):
514        """ Iterate over all nodes in the map
515        """
516        return iter(self.map)
517
518    def __getitem__(self, val):
519        """ Return the node at position x, y
520        """
521        return self.map.__getitem__(val)
522
523##########################################################################
524# Supervised learning
525
526class SOMSupervisedLearner(SOMLearner):
527    """SOMSupervisedLearner is a class used to learn SOM from
528    orange.ExampleTable, by using the class information in the
529    learning process. This is achieved by adding a value for each
530    class to the training instances, where 1.0 signals class membership
531    and all other values are 0.0. After the training, the new values
532    are discarded from the node vectors.
533   
534    :param data: class-labeled data set
535    :type data: :obj:`Orange.data.Table`
536    :param progress_callback: a one argument function to report
537        on inference progress (in %)
538       
539    """
540    @deprecated_keywords({"weightID": "weight_id",
541                          "progressCallback": "progress_callback"})
542    def __call__(self, data, weight_id=0, progress_callback=None):
543        array, classes, w = data.toNumpyMA()
544        domain = data.domain
545        if isinstance(domain.class_var, Orange.feature.Discrete):
546            # Discrete class (extend the data with class indicator matrix)
547            nval = len(data.domain.class_var.values)
548            ext = ma.zeros((len(array), nval))
549            ext[([i for i, m in enumerate(classes.mask) if m],
550                 [int(c) for c, m in zip(classes, classes.mask) if m])] = 1.0
551        elif isinstance(domain.class_var, Orange.feature.Continuous):
552            # Continuous class, just add the one column (what about multitarget)
553            nval = 1
554            ext = ma.zeros((len(array), nval))
555            ext[:,0] = classes
556        elif domain.class_var is None:
557            # No class var
558            nval = 0
559            ext = ma.zeros((len(array), nval))
560        else:
561            raise TypeError("Unsuported `class_var` %r" % domain.class_var) 
562        array = ma.hstack((array, ext))
563       
564        map = Map(self.map_shape, topology=self.topology)
565        if self.initialize == Map.InitializeLinear:
566            map.initialize_map_linear(array)
567        else:
568            map.initialize_map_random(array)
569        map = Solver(batch_train=self.batch_train, eps=self.eps, neighbourhood=self.neighbourhood,
570                     radius_ini=self.radius_ini, radius_fin=self.radius_fin, learning_rate=self.learning_rate,
571                     epoch=self.epochs)(array, map, progress_callback=progress_callback)
572        # Remove class columns from the vectors
573        for node in map:
574            node.vector = node.vector[:-nval]
575        return SOMMap(map, data)
576
577##########################################################################
578# Supporting Classes
579
580class Node(object):
581    """An object holding the information about the node in the map.
582
583    .. attribute:: pos
584
585        Node position.
586
587    .. attribute:: reference_instance
588
589        Reference data instance (a prototype).
590       
591    .. attribute:: instances
592   
593        Data set with training instances that were mapped to the node.
594         
595    """
596    def __init__(self, pos, map=None, vector=None):
597        self.pos = pos
598        self.map = map
599        self.vector = vector
600        self.reference_instance = None
601        self.instances = None
602       
603    referenceExample = deprecated_attribute("referenceExample", "reference_instance")
604    examples = deprecated_attribute("examples", "instances")
605
606class Map(object):
607    """Self organizing map (the structure). Includes methods for
608    data initialization.
609   
610    .. attribute:: map_shape
611   
612        A two element tuple containing the map width and height.
613         
614    .. attribute:: topology
615   
616        Topology of the map (``HexagonalTopology`` or
617        ``RectangularTopology``)
618       
619    .. attribute:: map
620
621        Self orginzing map. A list of lists of :obj:`Node`.
622       
623    """
624   
625    HexagonalTopology = HexagonalTopology
626    RectangularTopology = RectangularTopology
627    InitializeLinear = InitializeLinear
628    InitializeRandom = InitializeRandom
629    NeighbourhoodGaussian = NeighbourhoodGaussian
630    NeighbourhoodBubble = NeighbourhoodBubble
631    NeighbourhoodEpanechicov = NeighbourhoodEpanechicov
632       
633    def __init__(self, map_shape=(20, 40), topology=HexagonalTopology):
634        self.map_shape = map_shape
635        self.topology = topology
636        self.map = [[Node((i, j), self) for j in range(map_shape[1])] for i in range(map_shape[0])]
637       
638    def __getitem__(self, pos):
639        """ Return the node at position x, y.
640        """
641        x, y = pos
642        return self.map[x][y]
643
644    def __iter__(self):
645        """ Iterate over all nodes in the map.
646        """
647        for row in self.map:
648            for node in row:
649                yield node
650
651    def vectors(self):
652        """Return all vectors of the map as rows in an numpy.array.
653        """
654        return numpy.array([node.vector for node in self])
655
656    def unit_distances(self):
657        """Return a NxN numpy.array of internode distances (based on
658        node position in the map, not vector space) where N is the
659        number of nodes.
660       
661        """
662        nodes = list(self)
663        dist = numpy.zeros((len(nodes), len(nodes)))
664
665        coords = self.unit_coords()
666        for i in range(len(nodes)):
667            for j in range(len(nodes)):
668                dist[i, j] = numpy.sqrt(numpy.dot(coords[i] - coords[j], coords[i] - coords[j]))
669        return numpy.array(dist)
670
671    def unit_coords(self):
672        """ Return the unit coordinates of all nodes in the map
673        as an numpy.array.
674       
675        """
676        nodes = list(self)
677        coords = numpy.zeros((len(nodes), len(self.map_shape)))
678        coords[:, 0] = numpy.floor(numpy.arange(len(nodes)) / self.map_shape[0])
679        coords[:, 1] = numpy.mod(numpy.arange(len(nodes)), self.map_shape[1])
680       
681        ## in hexagonal topology we move every odd map row by 0.5 and multiply all by sqrt(0.75)
682        if self.topology == Map.HexagonalTopology:
683            ind = numpy.nonzero(1 - numpy.mod(coords[:, 0], 2))
684            coords[ind] = coords[ind] + 0.5
685            coords = coords * numpy.sqrt(0.75)
686        return coords
687
688
689    def initialize_map_random(self, data=None, dimension=5):
690        """Initialize the map nodes vectors randomly, by supplying
691        either training data or dimension of the data.
692       
693        """
694        if data is not None:
695            min, max = ma.min(data, 0), ma.max(data, 0)
696            dimension = data.shape[1]
697        else:
698            min, max = numpy.zeros(dimension), numpy.ones(dimension)
699        for node in self:
700#            node.vector = min + numpy.random.rand(dimension) * (max - min)
701            node.vector = min + random.randint(0, dimension) * (max - min)
702
703    def initialize_map_linear(self, data, map_shape=(10, 20)):
704        """ Initialize the map node vectors linearly over the subspace
705        of the two most significant eigenvectors.
706       
707        """
708        data = data.copy() #ma.array(data)
709        dim = data.shape[1]
710        mdim = len(map_shape)
711        munits = len(list(self))
712        me = ma.mean(data, 0)
713        A = numpy.zeros((dim, dim))
714
715        for i in range(dim):
716            data[:, i] = data[:, i] - me[i]
717       
718        for i in range(dim):
719            for j in range(dim):
720                c = data[:, i] * data[:, j]
721                A[i, j] = ma.sum(c) / len(c)
722                A[j, i] = A[i, j]
723
724        eigval, eigvec = numpy.linalg.eig(A)
725        ind = list(reversed(numpy.argsort(eigval)))
726        eigval = eigval[ind[:mdim]]
727        eigvec = eigvec[:, ind[:mdim]]
728
729        for i in range(mdim):
730            eigvec[:, i] = eigvec[:, i] / numpy.sqrt(numpy.dot(eigvec[:, i], eigvec[:, i])) * numpy.sqrt(eigval[i])
731
732        unit_coords = self.unit_coords()
733        for d in range(mdim):
734            max, min = numpy.max(unit_coords[:, d]), numpy.min(unit_coords[:, d])
735            unit_coords[:, d] = (unit_coords[:, d] - min)/(max - min)
736        unit_coords = (unit_coords - 0.5) * 2
737
738        vectors = numpy.array([me for i in range(munits)])
739        for i in range(munits):
740            for d in range(mdim):
741                vectors[i] = vectors[i] +  unit_coords[i][d] * numpy.transpose(eigvec[:, d])
742
743        for i, node in enumerate(self):
744            node.vector = vectors[i]
745
746    def get_u_matrix(self):
747        return getUMat(self)
748   
749    getUMat = deprecated_attribute("getUMat", "get_u_matrix")
750       
751##########################################################################
752# Supporting functions
753
754def get_u_matrix(som):
755    dim1 = som.map_shape[0]*2-1
756    dim2 = som.map_shape[1]*2-1
757
758    a = numpy.zeros((dim1, dim2))
759    if som.topology == HexagonalTopology:
760        return __fill_hex(a, som)
761    else:
762        return __fill_rect(a, som)
763   
764def getUMat(som):
765    import warnings
766    warnings.warn("Deprecated function name 'getUMat'. Use 'get_u_matrix' instead.",
767                  DeprecationWarning)
768    return get_u_matrix(som)
769
770def __fill_hex(array, som):
771    xDim, yDim = som.map_shape
772    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
773    check = lambda x, y: x >= 0 and x < (xDim * 2 - 1) and \
774                            y >= 0 and y < (yDim * 2 - 1)
775    dx = [1, 0, -1]
776    dy = [0, 1, 1]
777    for i in range(0, xDim*2, 2):
778        for j in range(0, yDim*2, 2):
779            for ddx, ddy in zip(dx, dy):
780                if check(i+ddx, j+ddy):
781                    array[i+ddx][j+ddy] = \
782                        numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - \
783                                           d[(i/2+ddx, j/2+ddy)].vector)**2))
784    dx = [1, -1, 0, -1, 0, 1]
785    dy = [0, 0, 1, 1, -1, -1]
786    for i in range(0, xDim*2, 2):
787        for j in range(0, yDim*2, 2):
788            l = [array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) \
789                 if check(i+ddx, j+ddy)]
790            array[i][j] = sum(l)/len(l)
791    return array
792
793def __fill_rect(array, som):
794    xDim, yDim = som.map_shape
795    d = dict([((i, j), som[i, j]) for i in range(xDim) for j in range(yDim)])
796    check = lambda x, y: x >= 0 and x < xDim*2 - 1 and y >= 0 and y < yDim*2 - 1
797    dx = [1, 0, 1]
798    dy = [0, 1, 1]
799    for i in range(0, xDim*2, 2):
800        for j in range(0, yDim*2, 2):
801            for ddx, ddy in zip(dx, dy):
802                if check(i+ddx, j+ddy):
803                    array[i+ddx][j+ddy] = \
804                        numpy.sqrt(ma.sum((d[(i/2, j/2)].vector - \
805                                           d[(i/2+ddx, j/2+ddy)].vector)**2))
806    dx = [1, -1, 0, 0, 1, -1, -1, 1]
807    dy = [0, 0, -1, 1, 1, -1, 1, -1]
808    for i in range(0, xDim*2, 2):
809        for j in range(0, yDim*2, 2):
810            l = [array[i+ddx, j+ddy] for ddx, ddy in zip(dx, dy) \
811                 if check(i+ddx, j+ddy)]
812            array[i][j] = sum(l)/len(l)
813    return array
814
815##########################################################################
816# Testing (deprecated, use regression tests instead 
817
818if __name__ == "__main__":
819    data = orange.ExampleTable("iris.tab")
820    learner = SOMLearner()
821    learner = SOMLearner(batch_train=True,
822                         initialize=InitializeLinear, 
823                         radius_ini=3,
824                         radius_fin=1,
825                         neighbourhood=Map.NeighbourhoodGaussian, 
826                         epochs=1000)
827    map = learner(data)
828    for e in data:
829        print map(e), e.getclass()
Note: See TracBrowser for help on using the repository browser.