source: orange/Orange/feature/selection.py @ 10523:6e9843fb4dac

Revision 10523:6e9843fb4dac, 8.6 KB checked in by Ales Erjavec <ales.erjavec@…>, 2 years ago (diff)

Fixed best_n function.

Line 
1__docformat__ = 'restructuredtext'
2
3from operator import itemgetter
4
5import Orange.core as orange
6
7from Orange.feature.scoring import score_all
8
9
10def best_n(scores, n):
11    """Return the best features (without scores) from the list
12
13    :param scores: a list such as the one returned by
14      :obj:`~Orange.feature.scoring.score_all`
15    :type scores: list
16    :param n: number of features to select.
17    :type n: int
18    :rtype: :obj:`list`
19
20    """
21    return [x[0] for x in \
22            sorted(scores, key=itemgetter(1), reverse=True)[:n]
23            ]
24
25bestNAtts = best_n
26
27
28def above_threshold(scores, threshold=0.0):
29    """Return features (without scores) with scores above or
30    equal to a specified threshold.
31
32    :param scores: a list such as one returned by
33      :obj:`~Orange.feature.scoring.score_all`
34    :type scores: list
35    :param threshold: threshold for selection
36    :type threshold: float
37    :rtype: :obj:`list`
38
39    """
40    return [x[0] for x in scores if x[1] > threshold]
41
42
43attsAboveThreshold = above_threshold
44
45
46def select_best_n(data, scores, n):
47    """Construct and return a new data table that includes a
48    class and only the best features from a list scores.
49
50    :param data: a data table
51    :type data: :obj:`Orange.data.Table`
52    :param scores: a list such as the one returned by
53      :obj:`~Orange.feature.scoring.score_all`
54    :type scores: list
55    :param n: number of features to select
56    :type n: int
57    :rtype: :obj:`Orange.data.Table`
58    """
59    return data.select(best_n(scores, n) + [data.domain.classVar.name])
60
61selectBestNAtts = select_best_n
62
63
64def select_above_threshold(data, scores, threshold=0.0):
65    """Construct and return a new data table that includes a class and
66    features from the list returned by
67    :obj:`~Orange.feature.scoring.score_all` that have the score above or
68    equal to a specified threshold.
69
70    :param data: a data table
71    :type data: :obj:`Orange.data.Table`
72    :param scores: a list such as the one returned by
73      :obj:`~Orange.feature.scoring.score_all`
74    :type scores: list
75    :param threshold: threshold for selection
76    :type threshold: float
77    :rtype: :obj:`Orange.data.Table`
78    """
79    return data.select(above_threshold(scores, threshold) + \
80                       [data.domain.classVar.name])
81
82selectAttsAboveThresh = select_above_threshold
83
84
85def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
86    """Iteratively remove the worst scored feature until no feature
87    has a score below the margin. The filter procedure was originally
88    designed for measures such as Relief, which are context dependent,
89    i.e., removal of features may change the scores of other remaining
90    features. The score is thus recomputed in each iteration.
91
92    :param data: a data table
93    :type data: :obj:`Orange.data.Table`
94    :param measure: a feature scorer
95    :type measure: :obj:`Orange.feature.scoring.Score`
96    :param margin: margin for removal
97    :type margin: float
98
99    """
100    measl = score_all(data, measure)
101    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
102        data = select_best_n(data, measl, len(data.domain.attributes) - 1)
103        measl = score_all(data, measure)
104    return data
105
106filterRelieff = select_relief
107
108
109class FilterAboveThreshold(object):
110    """A class wrapper around :obj:`select_above_threshold`; the
111    constructor stores the filter parameters that are applied when the
112    function is called.
113
114    :param measure: a feature scorer
115    :type measure: :obj:`Orange.feature.scoring.Score`
116    :param threshold: threshold for selection. Defaults to 0.
117    :type threshold: float
118
119    Some examples of how to use this class::
120
121        filter = Orange.feature.selection.FilterAboveThreshold(threshold=.15)
122        new_data = filter(data)
123        new_data = Orange.feature.selection.FilterAboveThreshold(data)
124        new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1)
125        new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1,
126                   measure=Orange.feature.scoring.Gini())
127
128    """
129    def __new__(cls, data=None,
130                measure=orange.MeasureAttribute_relief(k=20, m=50),
131                threshold=0.0):
132        if data is None:
133            self = object.__new__(cls)
134            return self
135        else:
136            self = cls(measure=measure, threshold=threshold)
137            return self(data)
138
139    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
140                 threshold=0.0):
141        self.measure = measure
142        self.threshold = threshold
143
144    def __call__(self, data):
145        """Return data table features that have scores above given
146        threshold.
147
148        :param data: data table
149        :type data: Orange.data.Table
150
151        """
152        ma = score_all(data, self.measure)
153        return select_above_threshold(data, ma, self.threshold)
154
155FilterAttsAboveThresh = FilterAboveThreshold
156FilterAttsAboveThresh_Class = FilterAboveThreshold
157
158
159class FilterBestN(object):
160    """A class wrapper around :obj:`select_best_n`; the
161    constructor stores the filter parameters that are applied when the
162    function is called.
163
164    :param measure: a feature scorer
165    :type measure: :obj:`Orange.feature.scoring.Score`
166    :param n: number of features to select
167    :type n: int
168
169    """
170    def __new__(cls, data=None,
171                measure=orange.MeasureAttribute_relief(k=20, m=50),
172                n=5):
173
174        if data is None:
175            self = object.__new__(cls)
176            return self
177        else:
178            self = cls(measure=measure, n=n)
179            return self(data)
180
181    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
182                 n=5):
183        self.measure = measure
184        self.n = n
185
186    def __call__(self, data):
187        ma = score_all(data, self.measure)
188        self.n = min(self.n, len(data.domain.attributes))
189        return select_best_n(data, ma, self.n)
190
191FilterBestNAtts = FilterBestN
192FilterBestNAtts_Class = FilterBestN
193
194
195class FilterRelief(object):
196    """A class wrapper around :obj:`select_best_n`; the
197    constructor stores the filter parameters that are applied when the
198    function is called.
199
200    :param measure: a feature scorer
201    :type measure: :obj:`Orange.feature.scoring.Score`
202    :param margin: margin for Relief scoring
203    :type margin: float
204
205    """
206    def __new__(cls, data=None,
207                measure=orange.MeasureAttribute_relief(k=20, m=50),
208                margin=0):
209
210        if data is None:
211            self = object.__new__(cls)
212            return self
213        else:
214            self = cls(measure=measure, margin=margin)
215            return self(data)
216
217    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
218                 margin=0):
219        self.measure = measure
220        self.margin = margin
221
222    def __call__(self, data):
223        return select_relief(data, self.measure, self.margin)
224
225FilterRelief_Class = FilterRelief
226
227##############################################################################
228# wrapped learner
229
230
231class FilteredLearner(object):
232    """A learner that applies the given features selection method and
233    then calls the base learner. This learner is needed to properly cross-validate a combination of feature selection and learning.
234
235    Here is an example of how to build a wrapper around naive Bayesian learner
236    and use it on a data set::
237
238        nb = Orange.classification.bayes.NaiveBayesLearner()
239        learner = Orange.feature.selection.FilteredLearner(nb,
240            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
241        classifier = learner(data)
242
243    """
244    def __new__(cls, baseLearner, data=None, weight=0,
245                filter=FilterAboveThreshold(), name='filtered'):
246
247        if data is None:
248            self = object.__new__(cls)
249            return self
250        else:
251            self = cls(baseLearner, filter=filter, name=name)
252            return self(data, weight)
253
254    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
255                 name='filtered'):
256        self.baseLearner = baseLearner
257        self.filter = filter
258        self.name = name
259
260    def __call__(self, data, weight=0):
261        # filter the data and then learn
262        fdata = self.filter(data)
263        model = self.baseLearner(fdata, weight)
264        return FilteredClassifier(classifier=model, domain=model.domain)
265
266FilteredLearner_Class = FilteredLearner
267
268
269class FilteredClassifier:
270    """A classifier returned by FilteredLearner."""
271    def __init__(self, **kwds):
272        self.__dict__.update(kwds)
273
274    def __call__(self, example, resultType=orange.GetValue):
275        return self.classifier(example, resultType)
276
277    def atts(self):
278        return self.domain.attributes
Note: See TracBrowser for help on using the repository browser.