source: orange/Orange/misc/__init__.py @ 10654:cd73789785b5

Revision 10654:cd73789785b5, 9.7 KB checked in by markotoplak, 2 years ago (diff)

Moved selection from misc to utils.

Line 
1"""
2.. index:: misc
3
4.. index: CostMatrix
5
6-----------------------
7CostMatrix
8-----------------------
9
10CostMatrix is an object that stores costs of (mis)classifications. Costs can be either negative or positive.
11
12.. class:: CostMatrix
13
14    .. attribute:: class_var
15       
16        The (class) attribute to which the matrix applies. This can
17        also be None.
18       
19    .. attribute:: dimension (read only)
20   
21        Matrix dimension, ie. number of classes.
22       
23    .. method:: CostMatrix(dimension[, default cost])
24   
25        Constructs a matrix of the given size and initializes it with
26        the default cost (1, if not given). All elements of the matrix
27        are assigned the given cost, except for the diagonal that have
28        the default cost of 0.  (Diagonal elements represent correct
29        classifications and these usually have no price; you can,
30        however, change this.)
31       
32        .. literalinclude:: code/CostMatrix.py
33            :lines: 1-8
34       
35        This initializes the matrix and print it out:
36       
37        .. literalinclude:: code/CostMatrix.res
38            :lines: 1-3
39   
40    .. method:: CostMatrix(class descriptor[, default cost])
41   
42        Similar as above, except that classVar is also set to the given descriptor.
43        The number of values of the given attribute (which must be discrete) is used
44        for dimension.
45       
46        .. literalinclude:: code/CostMatrix.py
47            :lines: 10-11
48           
49        This constructs a matrix similar to the one above (the class attribute in iris
50        domain is three-valued) except that the matrix contains 2s instead of 1s.
51       
52    .. method:: CostMatrix([attribute descriptor, ]matrix)
53   
54        Initializes the matrix with the elements given as a sequence of sequences (you
55        can mix lists and tuples if you find it funny). Each subsequence represents a row.
56       
57        .. literalinclude:: code/CostMatrix.py
58            :lines: 13
59
60        If you print this matrix out, will it look like this:
61       
62        .. literalinclude:: code/CostMatrix.res
63            :lines: 5-7
64           
65    .. method:: setcost(predicted, correct, cost)
66   
67        Set the misclassification cost. The matrix above could be
68        constructed by first initializing it with 2s and then changing
69        the prices for virginica's into 1s.
70       
71        .. literalinclude:: code/CostMatrix.py
72            :lines: 15-17
73           
74    .. method:: getcost(predicted, correct)
75   
76        Returns the cost of prediction. Values must be integer
77        indices; if class_var is set, you can also use symbolic values
78        (strings). Note that there's no way to change the size of the
79        matrix. Size is set at construction and does not change.  For
80        the final example, we shall compute the profits of knowing
81        attribute values in the dataset lenses with the same
82        cost-matrix as printed above.
83       
84        .. literalinclude:: code/CostMatrix.py
85            :lines: 19-23
86           
87        As the script shows, you don't have to (and usually won't) call the constructor
88        explicitly. Instead, you will set the corresponding field (in our case meas.cost)
89        to a matrix and let Orange convert it to CostMatrix automatically. Funny as it
90        might look, but since Orange uses constructor to perform such conversion, even
91        the above statement is correct (although the cost matrix is rather dull,
92        with 0s on the diagonal and 1s around):           
93           
94        .. literalinclude:: code/CostMatrix.py
95            :lines: 25
96               
97.. index: SymMatrix
98
99-----------------------
100SymMatrix
101-----------------------
102
103:obj:`SymMatrix` implements symmetric matrices of size fixed at
104construction time (and stored in :obj:`SymMatrix.dim`).
105
106.. class:: SymMatrix
107
108    .. attribute:: dim
109   
110        Matrix dimension.
111           
112    .. attribute:: matrix_type
113
114        Can be ``SymMatrix.Lower`` (0), ``SymMatrix.Upper`` (1),
115        ``SymMatrix.Symmetric`` (2, default), ``SymMatrix.LowerFilled`` (3) or
116        ``SymMatrix.Upper_Filled`` (4).
117
118        If the matrix type is ``Lower`` or ``Upper``, indexing
119        above or below the diagonal, respectively, will fail.
120        With ``LowerFilled`` and ``Upper_Filled``,
121        the elements upper or lower, respectively, still
122        exist and are set to zero, but they cannot be modified. The
123        default matrix type is ``Symmetric``, but can be changed
124        at any time.
125
126        If matrix type is ``Upper``, it is printed as:
127
128        >>> import Orange
129        >>> m = Orange.misc.SymMatrix(
130        ...     [[1],
131        ...      [2, 4],
132        ...      [3, 6, 9],
133        ...      [4, 8, 12, 16]])
134        >>> m.matrix_type = m.Upper
135        >>> print m
136        (( 1.000,  2.000,  3.000,  4.000),
137         (         4.000,  6.000,  8.000),
138         (                 9.000, 12.000),
139         (                        16.000))
140
141        Changing the type to ``LowerFilled`` changes the printout to
142
143        >>> m.matrix_type = m.LowerFilled
144        >>> print m
145        (( 1.000,  0.000,  0.000,  0.000),
146         ( 2.000,  4.000,  0.000,  0.000),
147         ( 3.000,  6.000,  9.000,  0.000),
148         ( 4.000,  8.000, 12.000, 16.000))
149   
150    .. method:: __init__(dim[, value])
151
152        Construct a symmetric matrix of the given dimension.
153
154        :param dim: matrix dimension
155        :type dim: int
156
157        :param value: default value (0 by default)
158        :type value: double
159       
160       
161    .. method:: __init__(data)
162
163        Construct a new symmetric matrix containing the given data.
164        These can be given as Python list containing lists or tuples.
165       
166        The following example fills a matrix created above with
167        data in a list::
168
169            import Orange
170            m = [[],
171                 [ 3],
172                 [ 2, 4],
173                 [17, 5, 4],
174                 [ 2, 8, 3, 8],
175                 [ 7, 5, 10, 11, 2],
176                 [ 8, 4, 1, 5, 11, 13],
177                 [ 4, 7, 12, 8, 10, 1, 5],
178                 [13, 9, 14, 15, 7, 8, 4, 6],
179                 [12, 10, 11, 15, 2, 5, 7, 3, 1]]
180                   
181            matrix = Orange.data.SymMatrix(m)
182
183        SymMatrix also stores diagonal elements. They are set
184        to zero, if they are not specified. The missing elements
185        (shorter lists) are set to zero as well. If a list
186        spreads over the diagonal, the constructor checks
187        for asymmetries. For instance, the matrix
188
189        ::
190
191            m = [[],
192                 [ 3,  0, f],
193                 [ 2,  4]]
194   
195        is only OK if f equals 2. Finally, no row can be longer
196        than matrix size. 
197
198    .. method:: get_values()
199   
200        Return all matrix values in a Python list.
201
202    .. method:: get_KNN(i, k)
203   
204        Return k columns with the lowest value in the i-th row.
205       
206        :param i: i-th row
207        :type i: int
208       
209        :param k: number of neighbors
210        :type k: int
211       
212    .. method:: avg_linkage(clusters)
213   
214        Return a symmetric matrix with average distances between given clusters. 
215     
216        :param clusters: list of clusters
217        :type clusters: list of lists
218       
219    .. method:: invert(type)
220   
221        Invert values in the symmetric matrix.
222       
223        :param type: 0 (-X), 1 (1 - X), 2 (max - X), 3 (1 / X)
224        :type type: int
225
226    .. method:: normalize(type)
227   
228        Normalize values in the symmetric matrix.
229       
230        :param type: 0 (normalize to [0, 1] interval), 1 (Sigmoid)
231        :type type: int
232       
233       
234
235Indexing
236..........
237
238For symmetric matrices the order of indices is not important:
239if ``m`` is a SymMatrix, then ``m[2, 4]`` addresses the same element as ``m[4, 2]``.
240
241..
242    .. literalinclude:: code/symmatrix.py
243        :lines: 1-6
244
245>>> import Orange
246>>> m = Orange.misc.SymMatrix(4)
247>>> for i in range(4):
248...    for j in range(i+1):
249...        m[i, j] = (i+1)*(j+1)
250
251
252Although only the lower left half of the matrix was set explicitely,
253the whole matrix is constructed.
254
255>>> print m
256(( 1.000,  2.000,  3.000,  4.000),
257 ( 2.000,  4.000,  6.000,  8.000),
258 ( 3.000,  6.000,  9.000, 12.000),
259 ( 4.000,  8.000, 12.000, 16.000))
260 
261Entire rows are indexed with a single index. They can be iterated
262over in a for loop or sliced (with, for example, ``m[:3]``):
263
264>>> print m[1]
265(2.0, 4.0, 6.0, 8.0)
266>>> m.matrix_type = m.Lower
267>>> for row in m:
268...     print row
269(1.0,)
270(2.0, 4.0)
271(3.0, 6.0, 9.0)
272(4.0, 8.0, 12.0, 16.0)
273
274.. index: Random number generator
275
276-----------------------
277Random number generator
278-----------------------
279
280:obj:`Random` uses the
281`Mersenne twister <http://en.wikipedia.org/wiki/Mersenne_twister>`_ algorithm
282to generate random numbers.
283
284::
285
286    >>> import Orange
287    >>> rg = Orange.misc.Random(42)
288    >>> rg(10)
289    4
290    >>> rg(10)
291    7
292    >>> rg.uses  # We called rg two times.
293    2
294    >>> rg.reset()
295    >>> rg(10)
296    4
297    >>> rg(10)
298    7
299    >>> rg.uses
300    2
301
302
303.. class:: Random(seed)
304
305    :param initseed: Seed used for initializing the random generator.
306    :type initseed: int
307
308    .. method:: __call__(n)
309
310        Return a random integer R such that 0 <= R < n.
311
312        :type n: int
313
314    .. method:: reset([seed])
315
316        Reinitialize the random generator with `initseed`. If `initseed`
317        is not given use the existing value of attribute `initseed`.
318
319    .. attribute:: uses
320       
321        The number of times the generator was called after
322        initialization/reset.
323   
324    .. attribute:: initseed
325
326        Random seed.
327
328Two examples or random number generator uses found in the documentation
329are :obj:`Orange.evaluation.testing` and :obj:`Orange.data.Table`.
330
331"""
332from functools import wraps
333from Orange.core import RandomGenerator as Random
334from Orange.core import SymMatrix
335from Orange.core import CostMatrix
Note: See TracBrowser for help on using the repository browser.