# source:orange/Orange/utils/selection.py@10654:cd73789785b5

Revision 10654:cd73789785b5, 8.8 KB checked in by markotoplak, 2 years ago (diff)

Moved selection from misc to utils.

Line
1"""
2=========================
3Selection (``selection``)
4=========================
5
6.. index:: selection
7.. index::
8   single: misc; selection
9
10Many machine learning techniques generate a set of different solutions or
11have to choose, as for instance in classification tree induction, between
12different features. The most trivial solution is to iterate through the
13candidates, compare them and remember the optimal one. The problem occurs,
14however, when there are multiple candidates that are equally good, and the
15naive approaches would select the first or the last one, depending upon the
16formulation of the if-statement.
17
18:class:`Orange.utils.selection` provides a class that makes a random choice
19in such cases. Each new candidate is compared with the currently optimal
20one; it replaces the optimal if it is better, while if they are equal,
21one is chosen by random. The number of competing optimal candidates is stored,
22so in this random choice the probability to select the new candidate (over the
23current one) is 1/w, where w is the current number of equal candidates,
24including the present one. One can easily verify that this gives equal
25chances to all candidates, independent of the order in which they are
26presented.
27
28Example
29--------
30
31The following snippet loads the data set lymphography and prints out the
32feature with the highest information gain.
33
35
36.. literalinclude:: code/misc-selection-bestonthefly.py
37  :lines: 7-16
38
39Our candidates are tuples gain ratios and features, so we set
40:obj:`call_compare_on_1st` to make the compare function compare the first
41element (gain ratios). We could achieve the same by initializing the object
42like this:
43
45
46.. literalinclude:: code/misc-selection-bestonthefly.py
47  :lines: 18-18
48
49
50The other way to do it is through indices.
51
53
54.. literalinclude:: code/misc-selection-bestonthefly.py
55  :lines: 25-
56
57Here we only give gain ratios to :obj:`BestOnTheFly`, so we don't have
58to specify a special compare operator. After checking all features we
59get the index of the  optimal one by calling :obj:`winner_index`.
60
61.. autoclass:: BestOnTheFly
62
63.. autofunction:: select_best
64
65.. autofunction:: select_best_index
66
67.. autofunction:: compare_first_bigger
68
69.. autofunction:: compare_first_smaller
70
71.. autofunction:: compare_last_bigger
72
73.. autofunction:: compare_last_smaller
74
75.. autofunction:: compare_bigger
76
77.. autofunction:: compare_smaller
78
79"""
80
81import random
82
83from Orange.utils import deprecated_members, deprecated_function_name, \
84                        deprecated_keywords
85
86class BestOnTheFly:
87    """
88    Finds the optimal object in a sequence of objects. The class is fed the
89    candidates one by one, and remembers the winner. It can thus be used by
90    methods that generate different solutions to a problem and need to
91    select the optimal one, but do not want to store them all.
92
93    :param compare: compare function.
94    :param seed: If not given, a random seed of 0 is used to ensure that\
95    the same experiment always gives the same results, despite\
96    pseudo-randomness.random seed.
97    :type seed: int
98    :param call_compare_on_1st: If set, :obj:`BestOnTheFly` will suppose\
99    that the candidates are lists are tuples, and it will call compare\
100    with the first element of the tuple.
101    :type call_compare_on_1st: bool
102    """
103
104    def __init__(self, compare=cmp, seed = 0, call_compare_on_1st = False):
105        self.random_generator = random.Random(seed)
106        self.compare = compare
107        self.wins = 0
108        self.best_index, self.index = -1, -1
109        self.best = None
110        self.call_compare_on_1st = call_compare_on_1st
111
112    def candidate(self, x):
114
115        :param x: new candidate.
116        :type x: object
117        """
118        self.index += 1
119        if not self.wins:
120            self.best = x
121            self.wins = 1
122            self.best_index = self.index
123            return 1
124        else:
125            if self.call_compare_on_1st:
126                cmpr = self.compare(x[0], self.best[0])
127            else:
128                cmpr = self.compare(x, self.best)
129            if cmpr > 0:
130                self.best = x
131                self.wins = 1
132                self.best_index = self.index
133                return 1
134            elif cmpr == 0:
135                self.wins = self.wins + 1
136                if not self.random_generator.randint(0, self.wins - 1):
137                    self.best = x
138                    self.best_index = self.index
139                    return 1
140        return 0
141
142    def winner(self):
143        """Return (currently) optimal object. This function can be called
144        any number of times, even when the candidates are still coming.
145
146        :rtype: object
147        """
148        return self.best
149
150    def winner_index(self):
151        """Return the index of the optimal object within the sequence of
152        the candidates.
153
154        :rtype: int
155        """
156        if self.best is not None:
157            return self.best_index
158        else:
159            return None
160
161BestOnTheFly = deprecated_members({"callCompareOn1st": "call_compare_on_1st",
162                                   "winnerIndex": "winner_index",
163                                   "randomGenerator": "random_generator",
164                                   "bestIndex": "best_index"
165                                   },
166                                   wrap_methods=["__init__"])(BestOnTheFly)
167
168
169@deprecated_keywords({"callCompareOn1st": "call_compare_on_1st"})
170def select_best(x, compare=cmp, seed = 0, call_compare_on_1st = False):
171    """Return the optimal object from list x. The function is used if the candidates
172    are already in the list, so using the more complicated :obj:`BestOnTheFly` directly is
173    not needed.
174
175    To demonstrate the use of :obj:`BestOnTheFly` see the implementation of
176    :obj:`selectBest`::
177
178      def selectBest(x, compare=cmp, seed = 0, call_compare_on_1st = False):
179          bs=BestOnTheFly(compare, seed, call_compare_on_1st)
180          for i in x:
181              bs.candidate(i)
182          return bs.winner()
183
184    :param x: list of existing candidates.
185    :type x: list
186    :param compare: compare function.
187    :param seed: If not given, a random seed of 0 is used to ensure that
188        the same experiment always gives the same results, despite
189        pseudo-randomness.random seed.
190    :type seed: int
191    :param call_compare_on_1st: If set, :obj:`BestOnTheFly` will suppose
192        that the candidates are lists are tuples, and it will call compare
193        with the first element of the tuple.
194    :type call_compare_on_1st: bool
195    :rtype: object
196
197    """
198    bs=BestOnTheFly(compare, seed, call_compare_on_1st)
199    for i in x:
200        bs.candidate(i)
201    return bs.winner()
202
203selectBest = deprecated_function_name(select_best)
204
205
206@deprecated_keywords({"callCompareOn1st": "call_compare_on_1st"})
207def select_best_index(x, compare=cmp, seed = 0, call_compare_on_1st = False):
208    """Similar to :obj:`selectBest` except that it doesn't return the best object
209    but its index in the list x.
210    """
211    bs=BestOnTheFly(compare, seed, call_compare_on_1st)
212    for i in x:
213        bs.candidate(i)
214    return bs.winner_index()
215
216selectBestIndex = deprecated_function_name(select_best_index)
217
218
219def compare_first_bigger(x, y):
220    """Function takes two lists and compares first elements.
221
222    :param x: list of values.
223    :type x: list
224    :param y: list of values.
225    :type y: list
226    :rtype:  cmp(x[0], y[0])
227    """
228    return cmp(x[0], y[0])
229
230def compare_first_smaller(x, y):
231    """Function takes two lists and compares first elements.
232
233    :param x: list of values.
234    :type x: list
235    :param y: list of values.
236    :type y: list
237    :rtype:  -cmp(x[0], y[0])
238    """
239    return -cmp(x[0], y[0])
240
241def compare_last_bigger(x, y):
242    """Function takes two lists and compares last elements.
243
244    :param x: list of values.
245    :type x: list
246    :param y: list of values.
247    :type y: list
248    :rtype:  cmp(x[0], y[0])
249    """
250    return cmp(x[-1], y[-1])
251
252def compare_last_smaller(x, y):
253    """Function takes two lists and compares last elements.
254
255    :param x: list of values.
256    :type x: list
257    :param y: list of values.
258    :type y: list
259    :rtype:  -cmp(x[0], y[0])
260    """
261    return -cmp(x[-1], y[-1])
262
263def compare_bigger(x, y):
264    """Function takes and compares two numbers.
265
266    :param x: value.
267    :type x: int
268    :param y: value.
269    :type y: int
270    :rtype:  cmp(x, y)
271    """
272    return cmp(x, y)
273
274def compare_smaller(x, y):
275    """Function takes and compares two numbers.
276
277    :param x: value.
278    :type x: int
279    :param y: value.
280    :type y: int
281    :rtype: cmp(x, y)
282    """
283    return -cmp(x, y)
Note: See TracBrowser for help on using the repository browser.