source: orange/Orange/feature/selection.py @ 10709:de7d66dc0118

Revision 10709:de7d66dc0118, 8.3 KB checked in by blaz <blaz.zupan@…>, 2 years ago (diff)

pointer to select_best_n for backward compatibility

Line 
1__docformat__ = 'restructuredtext'
2
3from operator import itemgetter
4
5import Orange.core as orange
6
7from Orange.feature.scoring import score_all
8
9def top_rated(scores, n, highest_best=True):
10    """Return n top-rated features from the list of scores.
11
12    :param scores: a list such as the one returned by
13      :obj:`~Orange.feature.scoring.score_all`
14    :type scores: list
15    :param n: number of features to select.
16    :type n: int
17    :rtype: :obj:`list`
18    :param highest_best: if true, the features that are scored higher are preferred
19    :type highest_best: bool
20    """
21    return [x[0] for x in \
22            sorted(scores, key=itemgetter(1), reverse=True)[:n]
23            ]
24
25bestNAtts = top_rated
26
27def above_threshold(scores, threshold=0.0):
28    """Return features (without scores) with scores above or
29    equal to a specified threshold.
30
31    :param scores: a list such as one returned by
32      :obj:`~Orange.feature.scoring.score_all`
33    :type scores: list
34    :param threshold: threshold for selection
35    :type threshold: float
36    :rtype: :obj:`list`
37
38    """
39    return [x[0] for x in scores if x[1] > threshold]
40
41
42attsAboveThreshold = above_threshold
43
44
45def select(data, scores, n):
46    """Construct and return a new data table that includes a
47    class and only the best features from a list scores.
48
49    :param data: a data table
50    :type data: :obj:`Orange.data.Table`
51    :param scores: a list such as the one returned by
52      :obj:`~Orange.feature.scoring.score_all`
53    :type scores: list
54    :param n: number of features to select
55    :type n: int
56    :rtype: :obj:`Orange.data.Table`
57    """
58    return data.select(top_rated(scores, n) + [data.domain.classVar.name])
59
60selectBestNAtts = select
61select_best_n = select
62
63
64def select_above_threshold(data, scores, threshold=0.0):
65    """Construct and return a new data table that includes a class and
66    features from the list returned by
67    :obj:`~Orange.feature.scoring.score_all` with higher or equal score
68    to a given threshold.
69
70    :param data: a data table
71    :type data: :obj:`Orange.data.Table`
72    :param scores: a list such as the one returned by
73      :obj:`~Orange.feature.scoring.score_all`
74    :type scores: list
75    :param threshold: threshold for selection
76    :type threshold: float
77    :rtype: :obj:`Orange.data.Table`
78    """
79    return data.select(above_threshold(scores, threshold) + \
80                       [data.domain.classVar.name])
81
82selectAttsAboveThresh = select_above_threshold
83
84
85def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
86    """Iteratively remove the worst scored feature until no feature
87    has a score below the margin. The filter procedure was originally
88    designed for measures such as Relief, which are context dependent,
89    i.e., removal of features may change the scores of other remaining
90    features. The score is thus recomputed in each iteration.
91
92    :param data: a data table
93    :type data: :obj:`Orange.data.Table`
94    :param measure: a feature scorer
95    :type measure: :obj:`Orange.feature.scoring.Score`
96    :param margin: margin for removal
97    :type margin: float
98
99    """
100    measl = score_all(data, measure)
101    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
102        data = select(data, measl, len(data.domain.attributes) - 1)
103        measl = score_all(data, measure)
104    return data
105
106filterRelieff = select_relief
107
108
109class FilterAboveThreshold(object):
110    """A wrapper around :obj:`select_above_threshold`; the
111    constructor stores the parameters of the feature selection
112    procedure that are then applied when the the selection
113    is called with the actual data.
114
115    :param measure: a feature scorer
116    :type measure: :obj:`Orange.feature.scoring.Score`
117    :param threshold: threshold for selection. Defaults to 0.
118    :type threshold: float
119    """
120
121    def __new__(cls, data=None,
122                measure=orange.MeasureAttribute_relief(k=20, m=50),
123                threshold=0.0):
124        if data is None:
125            self = object.__new__(cls)
126            return self
127        else:
128            self = cls(measure=measure, threshold=threshold)
129            return self(data)
130
131    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
132                 threshold=0.0):
133        self.measure = measure
134        self.threshold = threshold
135
136    def __call__(self, data):
137        """Return data table features that have scores above given
138        threshold.
139
140        :param data: data table
141        :type data: Orange.data.Table
142
143        """
144        ma = score_all(data, self.measure)
145        return select_above_threshold(data, ma, self.threshold)
146
147FilterAttsAboveThresh = FilterAboveThreshold
148FilterAttsAboveThresh_Class = FilterAboveThreshold
149
150
151class FilterBestN(object):
152    """A wrapper around :obj:`select`; the
153    constructor stores the filter parameters that are applied when the
154    function is called.
155
156    :param measure: a feature scorer
157    :type measure: :obj:`Orange.feature.scoring.Score`
158    :param n: number of features to select
159    :type n: int
160
161    """
162    def __new__(cls, data=None,
163                measure=orange.MeasureAttribute_relief(k=20, m=50),
164                n=5):
165
166        if data is None:
167            self = object.__new__(cls)
168            return self
169        else:
170            self = cls(measure=measure, n=n)
171            return self(data)
172
173    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
174                 n=5):
175        self.measure = measure
176        self.n = n
177
178    def __call__(self, data):
179        ma = score_all(data, self.measure)
180        self.n = min(self.n, len(data.domain.attributes))
181        return select(data, ma, self.n)
182
183FilterBestNAtts = FilterBestN
184FilterBestNAtts_Class = FilterBestN
185
186
187class FilterRelief(object):
188    """A class wrapper around :obj:`select_best_n`; the
189    constructor stores the filter parameters that are applied when the
190    function is called.
191
192    :param measure: a feature scorer
193    :type measure: :obj:`Orange.feature.scoring.Score`
194    :param margin: margin for Relief scoring
195    :type margin: float
196
197    """
198    def __new__(cls, data=None,
199                measure=orange.MeasureAttribute_relief(k=20, m=50),
200                margin=0):
201
202        if data is None:
203            self = object.__new__(cls)
204            return self
205        else:
206            self = cls(measure=measure, margin=margin)
207            return self(data)
208
209    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
210                 margin=0):
211        self.measure = measure
212        self.margin = margin
213
214    def __call__(self, data):
215        return select_relief(data, self.measure, self.margin)
216
217FilterRelief_Class = FilterRelief
218
219##############################################################################
220# wrapped learner
221
222
223class FilteredLearner(object):
224    """A feature selection wrapper around base learner. When provided data,
225     this learner applies a given feature selection method and then calls
226     the base learner.
227
228    Here is an example of how to build a wrapper around naive Bayesian learner
229    and use it on a data set::
230
231        nb = Orange.classification.bayes.NaiveBayesLearner()
232        learner = Orange.feature.selection.FilteredLearner(nb,
233            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
234        classifier = learner(data)
235
236    """
237    def __new__(cls, baseLearner, data=None, weight=0,
238                filter=FilterAboveThreshold(), name='filtered'):
239
240        if data is None:
241            self = object.__new__(cls)
242            return self
243        else:
244            self = cls(baseLearner, filter=filter, name=name)
245            return self(data, weight)
246
247    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
248                 name='filtered'):
249        self.baseLearner = baseLearner
250        self.filter = filter
251        self.name = name
252
253    def __call__(self, data, weight=0):
254        # filter the data and then learn
255        fdata = self.filter(data)
256        model = self.baseLearner(fdata, weight)
257        return FilteredClassifier(classifier=model, domain=model.domain)
258
259FilteredLearner_Class = FilteredLearner
260
261
262class FilteredClassifier:
263    """A classifier returned by FilteredLearner."""
264    def __init__(self, **kwds):
265        self.__dict__.update(kwds)
266
267    def __call__(self, example, resultType=orange.GetValue):
268        return self.classifier(example, resultType)
269
270    def atts(self):
271        return self.domain.attributes
Note: See TracBrowser for help on using the repository browser.