source: orange/Orange/feature/selection.py @ 11646:94e12073788c

Revision 11646:94e12073788c, 8.2 KB checked in by Ales Erjavec <ales.erjavec@…>, 9 months ago (diff)

Fixed 'above_threshold' test condition.

It now tests True for scores equal to threshold (as documented).

Line 
1__docformat__ = 'restructuredtext'
2
3from operator import itemgetter
4
5import Orange.core as orange
6
7from Orange.feature.scoring import score_all
8
9
10def top_rated(scores, n, highest_best=True):
11    """Return n top-rated features from the list of scores.
12
13    :param list scores:
14        A list such as the one returned by :func:`.score_all`
15    :param int n: Number of features to select.
16    :param bool highest_best:
17        If true, the features that are scored higher are preferred.
18    :rtype: :obj:`list`
19
20    """
21    return [f for f, score in
22            sorted(scores, key=itemgetter(1), reverse=highest_best)[:n]]
23
24bestNAtts = top_rated
25
26
27def above_threshold(scores, threshold=0.0):
28    """Return features (without scores) with scores above or
29    equal to a specified threshold.
30
31    :param list scores:
32        A list such as one returned by :func:`.score_all`
33    :param float threshold: Threshold for selection.
34    :rtype: :obj:`list`
35
36    """
37    return [f for f, score in scores if score >= threshold]
38
39
40attsAboveThreshold = above_threshold
41
42
43def select(data, scores, n):
44    """Construct and return a new data table that includes a
45    class and only the best features from a list scores.
46
47    :param data: a data table
48    :type data: :obj:`Orange.data.Table`
49    :param scores: a list such as the one returned by
50      :obj:`~Orange.feature.scoring.score_all`
51    :type scores: list
52    :param n: number of features to select
53    :type n: int
54    :rtype: :obj:`Orange.data.Table`
55    """
56    return data.select(top_rated(scores, n) + [data.domain.classVar.name])
57
58selectBestNAtts = select
59select_best_n = select
60
61
62def select_above_threshold(data, scores, threshold=0.0):
63    """Construct and return a new data table that includes a class and
64    features from the list returned by
65    :obj:`~Orange.feature.scoring.score_all` with higher or equal score
66    to a given threshold.
67
68    :param data: a data table
69    :type data: :obj:`Orange.data.Table`
70    :param scores: a list such as the one returned by
71      :obj:`~Orange.feature.scoring.score_all`
72    :type scores: list
73    :param threshold: threshold for selection
74    :type threshold: float
75    :rtype: :obj:`Orange.data.Table`
76    """
77    return data.select(above_threshold(scores, threshold) + \
78                       [data.domain.classVar.name])
79
80selectAttsAboveThresh = select_above_threshold
81
82
83def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
84    """Iteratively remove the worst scored feature until no feature
85    has a score below the margin. The filter procedure was originally
86    designed for measures such as Relief, which are context dependent,
87    i.e., removal of features may change the scores of other remaining
88    features. The score is thus recomputed in each iteration.
89
90    :param data: a data table
91    :type data: :obj:`Orange.data.Table`
92    :param measure: a feature scorer
93    :type measure: :obj:`Orange.feature.scoring.Score`
94    :param margin: margin for removal
95    :type margin: float
96
97    """
98    measl = score_all(data, measure)
99    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
100        data = select(data, measl, len(data.domain.attributes) - 1)
101        measl = score_all(data, measure)
102    return data
103
104filterRelieff = select_relief
105
106
107class FilterAboveThreshold(object):
108    """A wrapper around :obj:`select_above_threshold`; the
109    constructor stores the parameters of the feature selection
110    procedure that are then applied when the the selection
111    is called with the actual data.
112
113    :param measure: a feature scorer
114    :type measure: :obj:`Orange.feature.scoring.Score`
115    :param threshold: threshold for selection. Defaults to 0.
116    :type threshold: float
117    """
118
119    def __new__(cls, data=None,
120                measure=orange.MeasureAttribute_relief(k=20, m=50),
121                threshold=0.0):
122        if data is None:
123            self = object.__new__(cls)
124            return self
125        else:
126            self = cls(measure=measure, threshold=threshold)
127            return self(data)
128
129    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
130                 threshold=0.0):
131        self.measure = measure
132        self.threshold = threshold
133
134    def __call__(self, data):
135        """Return data table features that have scores above given
136        threshold.
137
138        :param data: data table
139        :type data: Orange.data.Table
140
141        """
142        ma = score_all(data, self.measure)
143        return select_above_threshold(data, ma, self.threshold)
144
145FilterAttsAboveThresh = FilterAboveThreshold
146FilterAttsAboveThresh_Class = FilterAboveThreshold
147
148
149class FilterBestN(object):
150    """A wrapper around :obj:`select`; the
151    constructor stores the filter parameters that are applied when the
152    function is called.
153
154    :param measure: a feature scorer
155    :type measure: :obj:`Orange.feature.scoring.Score`
156    :param n: number of features to select
157    :type n: int
158
159    """
160    def __new__(cls, data=None,
161                measure=orange.MeasureAttribute_relief(k=20, m=50),
162                n=5):
163
164        if data is None:
165            self = object.__new__(cls)
166            return self
167        else:
168            self = cls(measure=measure, n=n)
169            return self(data)
170
171    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
172                 n=5):
173        self.measure = measure
174        self.n = n
175
176    def __call__(self, data):
177        ma = score_all(data, self.measure)
178        self.n = min(self.n, len(data.domain.attributes))
179        return select(data, ma, self.n)
180
181FilterBestNAtts = FilterBestN
182FilterBestNAtts_Class = FilterBestN
183
184
185class FilterRelief(object):
186    """A class wrapper around :obj:`select_best_n`; the
187    constructor stores the filter parameters that are applied when the
188    function is called.
189
190    :param measure: a feature scorer
191    :type measure: :obj:`Orange.feature.scoring.Score`
192    :param margin: margin for Relief scoring
193    :type margin: float
194
195    """
196    def __new__(cls, data=None,
197                measure=orange.MeasureAttribute_relief(k=20, m=50),
198                margin=0):
199
200        if data is None:
201            self = object.__new__(cls)
202            return self
203        else:
204            self = cls(measure=measure, margin=margin)
205            return self(data)
206
207    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
208                 margin=0):
209        self.measure = measure
210        self.margin = margin
211
212    def __call__(self, data):
213        return select_relief(data, self.measure, self.margin)
214
215FilterRelief_Class = FilterRelief
216
217##############################################################################
218# wrapped learner
219
220
221class FilteredLearner(object):
222    """A feature selection wrapper around base learner. When provided data,
223     this learner applies a given feature selection method and then calls
224     the base learner.
225
226    Here is an example of how to build a wrapper around naive Bayesian learner
227    and use it on a data set::
228
229        nb = Orange.classification.bayes.NaiveBayesLearner()
230        learner = Orange.feature.selection.FilteredLearner(nb,
231            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
232        classifier = learner(data)
233
234    """
235    def __new__(cls, baseLearner, data=None, weight=0,
236                filter=FilterAboveThreshold(), name='filtered'):
237
238        if data is None:
239            self = object.__new__(cls)
240            return self
241        else:
242            self = cls(baseLearner, filter=filter, name=name)
243            return self(data, weight)
244
245    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
246                 name='filtered'):
247        self.baseLearner = baseLearner
248        self.filter = filter
249        self.name = name
250
251    def __call__(self, data, weight=0):
252        # filter the data and then learn
253        fdata = self.filter(data)
254        model = self.baseLearner(fdata, weight)
255        return FilteredClassifier(classifier=model, domain=model.domain)
256
257FilteredLearner_Class = FilteredLearner
258
259
260class FilteredClassifier:
261    """A classifier returned by FilteredLearner."""
262    def __init__(self, **kwds):
263        self.__dict__.update(kwds)
264
265    def __call__(self, example, resultType=orange.GetValue):
266        return self.classifier(example, resultType)
267
268    def atts(self):
269        return self.domain.attributes
Note: See TracBrowser for help on using the repository browser.