source:orange/Orange/misc/selection.py@9994:1073e0304a87

Revision 9994:1073e0304a87, 8.6 KB checked in by Matija Polajnar <matija.polajnar@…>, 2 years ago (diff)

Remove links from documentation to datasets. Remove datasets reference directory.

Line
1"""
2=========================
3Selection (``selection``)
4=========================
5
6.. index:: selection
7.. index::
8   single: misc; selection
9
10Many machine learning techniques generate a set of different solutions or
11have to choose, as for instance in classification tree induction, between
12different features. The most trivial solution is to iterate through the
13candidates, compare them and remember the optimal one. The problem occurs,
14however, when there are multiple candidates that are equally good, and the
15naive approaches would select the first or the last one, depending upon the
16formulation of the if-statement.
17
18:class:`Orange.misc.selection` provides a class that makes a random choice
19in such cases. Each new candidate is compared with the currently optimal
20one; it replaces the optimal if it is better, while if they are equal,
21one is chosen by random. The number of competing optimal candidates is stored,
22so in this random choice the probability to select the new candidate (over the
23current one) is 1/w, where w is the current number of equal candidates,
24including the present one. One can easily verify that this gives equal
25chances to all candidates, independent of the order in which they are
26presented.
27
28Example
29--------
30
31The following snippet loads the data set lymphography and prints out the
32feature with the highest information gain.
33
35
36.. literalinclude:: code/misc-selection-bestonthefly.py
37  :lines: 7-16
38
39Our candidates are tuples gain ratios and features, so we set
40:obj:`call_compare_on_1st` to make the compare function compare the first
41element (gain ratios). We could achieve the same by initializing the object
42like this:
43
45
46.. literalinclude:: code/misc-selection-bestonthefly.py
47  :lines: 18-18
48
49
50The other way to do it is through indices.
51
53
54.. literalinclude:: code/misc-selection-bestonthefly.py
55  :lines: 25-
56
57Here we only give gain ratios to :obj:`BestOnTheFly`, so we don't have
58to specify a special compare operator. After checking all features we
59get the index of the  optimal one by calling :obj:`winner_index`.
60
61.. autoclass:: BestOnTheFly
62
63.. autofunction:: select_best
64
65.. autofunction:: select_best_index
66
67.. autofunction:: compare_first_bigger
68
69.. autofunction:: compare_first_smaller
70
71.. autofunction:: compare_last_bigger
72
73.. autofunction:: compare_last_smaller
74
75.. autofunction:: compare_bigger
76
77.. autofunction:: compare_smaller
78
79"""
80
81import random
82
83from Orange.misc import deprecated_members, deprecated_function_name, \
84                        deprecated_keywords
85
86class BestOnTheFly:
87    """
88    Finds the optimal object in a sequence of objects. The class is fed the
89    candidates one by one, and remembers the winner. It can thus be used by
90    methods that generate different solutions to a problem and need to
91    select the optimal one, but do not want to store them all.
92
93    :param compare: compare function.
94    :param seed: If not given, a random seed of 0 is used to ensure that\
95    the same experiment always gives the same results, despite\
96    pseudo-randomness.random seed.
97    :type seed: int
98    :param call_compare_on_1st: If set, :obj:`BestOnTheFly` will suppose\
99    that the candidates are lists are tuples, and it will call compare\
100    with the first element of the tuple.
101    :type call_compare_on_1st: bool
102    """
103
104    def __init__(self, compare=cmp, seed = 0, call_compare_on_1st = False):
105        self.randomGenerator = random.Random(seed)
106        self.compare=compare
107        self.wins=0
108        self.bestIndex, self.index = -1, -1
109        self.best = None
110        self.call_compare_on_1st = call_compare_on_1st
111
112    def candidate(self, x):
114
115        :param x: new candidate.
116        :type x: object
117        """
118        self.index += 1
119        if not self.wins:
120            self.best=x
121            self.wins=1
122            self.bestIndex=self.index
123            return 1
124        else:
125            if self.call_compare_on_1st:
126                cmpr=self.compare(x[0], self.best[0])
127            else:
128                cmpr=self.compare(x, self.best)
129            if cmpr>0:
130                self.best=x
131                self.wins=1
132                self.bestIndex=self.index
133                return 1
134            elif cmpr==0:
135                self.wins=self.wins+1
136                if not self.randomGenerator.randint(0, self.wins-1):
137                    self.best=x
138                    self.bestIndex=self.index
139                    return 1
140        return 0
141
142    def winner(self):
143        """Return (currently) optimal object. This function can be called
144        any number of times, even when the candidates are still coming.
145
146        :rtype: object
147        """
148        return self.best
149
150    def winner_index(self):
151        """Return the index of the optimal object within the sequence of
152        the candidates.
153
154        :rtype: int
155        """
156        if self.best is not None:
157            return self.bestIndex
158        else:
159            return None
160
161BestOnTheFly = deprecated_members({"callCompareOn1st": "call_compare_on_1st",
162                                   "winnerIndex": "winner_index",
163                                   },
164                                   wrap_methods=["__init__"])(BestOnTheFly)
165
166
167@deprecated_keywords({"callCompareOn1st": "call_compare_on_1st"})
168def select_best(x, compare=cmp, seed = 0, call_compare_on_1st = False):
169    """Return the optimal object from list x. The function is used if the candidates
170    are already in the list, so using the more complicated :obj:`BestOnTheFly` directly is
171    not needed.
172
173    To demonstrate the use of :obj:`BestOnTheFly` see the implementation of
174    :obj:`selectBest`::
175
176      def selectBest(x, compare=cmp, seed = 0, call_compare_on_1st = False):
177          bs=BestOnTheFly(compare, seed, call_compare_on_1st)
178          for i in x:
179              bs.candidate(i)
180          return bs.winner()
181
182    :param x: list of existing candidates.
183    :type x: list
184    :param compare: compare function.
185    :param seed: If not given, a random seed of 0 is used to ensure that
186        the same experiment always gives the same results, despite
187        pseudo-randomness.random seed.
188    :type seed: int
189    :param call_compare_on_1st: If set, :obj:`BestOnTheFly` will suppose
190        that the candidates are lists are tuples, and it will call compare
191        with the first element of the tuple.
192    :type call_compare_on_1st: bool
193    :rtype: object
194
195    """
196    bs=BestOnTheFly(compare, seed, call_compare_on_1st)
197    for i in x:
198        bs.candidate(i)
199    return bs.winner()
200
201selectBest = deprecated_function_name(select_best)
202
203
204@deprecated_keywords({"callCompareOn1st": "call_compare_on_1st"})
205def select_best_index(x, compare=cmp, seed = 0, call_compare_on_1st = False):
206    """Similar to :obj:`selectBest` except that it doesn't return the best object
207    but its index in the list x.
208    """
209    bs=BestOnTheFly(compare, seed, call_compare_on_1st)
210    for i in x:
211        bs.candidate(i)
212    return bs.winner_index()
213
214selectBestIndex = deprecated_function_name(select_best_index)
215
216
217def compare_first_bigger(x, y):
218    """Function takes two lists and compares first elements.
219
220    :param x: list of values.
221    :type x: list
222    :param y: list of values.
223    :type y: list
224    :rtype:  cmp(x[0], y[0])
225    """
226    return cmp(x[0], y[0])
227
228def compare_first_smaller(x, y):
229    """Function takes two lists and compares first elements.
230
231    :param x: list of values.
232    :type x: list
233    :param y: list of values.
234    :type y: list
235    :rtype:  -cmp(x[0], y[0])
236    """
237    return -cmp(x[0], y[0])
238
239def compare_last_bigger(x, y):
240    """Function takes two lists and compares last elements.
241
242    :param x: list of values.
243    :type x: list
244    :param y: list of values.
245    :type y: list
246    :rtype:  cmp(x[0], y[0])
247    """
248    return cmp(x[-1], y[-1])
249
250def compare_last_smaller(x, y):
251    """Function takes two lists and compares last elements.
252
253    :param x: list of values.
254    :type x: list
255    :param y: list of values.
256    :type y: list
257    :rtype:  -cmp(x[0], y[0])
258    """
259    return -cmp(x[-1], y[-1])
260
261def compare_bigger(x, y):
262    """Function takes and compares two numbers.
263
264    :param x: value.
265    :type x: int
266    :param y: value.
267    :type y: int
268    :rtype:  cmp(x, y)
269    """
270    return cmp(x, y)
271
272def compare_smaller(x, y):
273    """Function takes and compares two numbers.
274
275    :param x: value.
276    :type x: int
277    :param y: value.
278    :type y: int
279    :rtype: cmp(x, y)
280    """
281    return -cmp(x, y)
Note: See TracBrowser for help on using the repository browser.