source: orange/Orange/feature/selection.py @ 10708:43138a3b5624

Revision 10708:43138a3b5624, 8.3 KB checked in by blaz <blaz.zupan@…>, 2 years ago (diff)

Some cosmetics and renaming in Orange.feature.selection

Line 
1__docformat__ = 'restructuredtext'
2
3from operator import itemgetter
4
5import Orange.core as orange
6
7from Orange.feature.scoring import score_all
8
9def top_rated(scores, n, highest_best=True):
10    """Return n top-rated features from the list of scores.
11
12    :param scores: a list such as the one returned by
13      :obj:`~Orange.feature.scoring.score_all`
14    :type scores: list
15    :param n: number of features to select.
16    :type n: int
17    :rtype: :obj:`list`
18    :param highest_best: if true, the features that are scored higher are preferred
19    :type highest_best: bool
20    """
21    return [x[0] for x in \
22            sorted(scores, key=itemgetter(1), reverse=True)[:n]
23            ]
24
25bestNAtts = top_rated
26
27def above_threshold(scores, threshold=0.0):
28    """Return features (without scores) with scores above or
29    equal to a specified threshold.
30
31    :param scores: a list such as one returned by
32      :obj:`~Orange.feature.scoring.score_all`
33    :type scores: list
34    :param threshold: threshold for selection
35    :type threshold: float
36    :rtype: :obj:`list`
37
38    """
39    return [x[0] for x in scores if x[1] > threshold]
40
41
42attsAboveThreshold = above_threshold
43
44
45def select(data, scores, n):
46    """Construct and return a new data table that includes a
47    class and only the best features from a list scores.
48
49    :param data: a data table
50    :type data: :obj:`Orange.data.Table`
51    :param scores: a list such as the one returned by
52      :obj:`~Orange.feature.scoring.score_all`
53    :type scores: list
54    :param n: number of features to select
55    :type n: int
56    :rtype: :obj:`Orange.data.Table`
57    """
58    return data.select(top_rated(scores, n) + [data.domain.classVar.name])
59
60selectBestNAtts = select
61
62
63def select_above_threshold(data, scores, threshold=0.0):
64    """Construct and return a new data table that includes a class and
65    features from the list returned by
66    :obj:`~Orange.feature.scoring.score_all` with higher or equal score
67    to a given threshold.
68
69    :param data: a data table
70    :type data: :obj:`Orange.data.Table`
71    :param scores: a list such as the one returned by
72      :obj:`~Orange.feature.scoring.score_all`
73    :type scores: list
74    :param threshold: threshold for selection
75    :type threshold: float
76    :rtype: :obj:`Orange.data.Table`
77    """
78    return data.select(above_threshold(scores, threshold) + \
79                       [data.domain.classVar.name])
80
81selectAttsAboveThresh = select_above_threshold
82
83
84def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
85    """Iteratively remove the worst scored feature until no feature
86    has a score below the margin. The filter procedure was originally
87    designed for measures such as Relief, which are context dependent,
88    i.e., removal of features may change the scores of other remaining
89    features. The score is thus recomputed in each iteration.
90
91    :param data: a data table
92    :type data: :obj:`Orange.data.Table`
93    :param measure: a feature scorer
94    :type measure: :obj:`Orange.feature.scoring.Score`
95    :param margin: margin for removal
96    :type margin: float
97
98    """
99    measl = score_all(data, measure)
100    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
101        data = select(data, measl, len(data.domain.attributes) - 1)
102        measl = score_all(data, measure)
103    return data
104
105filterRelieff = select_relief
106
107
108class FilterAboveThreshold(object):
109    """A wrapper around :obj:`select_above_threshold`; the
110    constructor stores the parameters of the feature selection
111    procedure that are then applied when the the selection
112    is called with the actual data.
113
114    :param measure: a feature scorer
115    :type measure: :obj:`Orange.feature.scoring.Score`
116    :param threshold: threshold for selection. Defaults to 0.
117    :type threshold: float
118    """
119
120    def __new__(cls, data=None,
121                measure=orange.MeasureAttribute_relief(k=20, m=50),
122                threshold=0.0):
123        if data is None:
124            self = object.__new__(cls)
125            return self
126        else:
127            self = cls(measure=measure, threshold=threshold)
128            return self(data)
129
130    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
131                 threshold=0.0):
132        self.measure = measure
133        self.threshold = threshold
134
135    def __call__(self, data):
136        """Return data table features that have scores above given
137        threshold.
138
139        :param data: data table
140        :type data: Orange.data.Table
141
142        """
143        ma = score_all(data, self.measure)
144        return select_above_threshold(data, ma, self.threshold)
145
146FilterAttsAboveThresh = FilterAboveThreshold
147FilterAttsAboveThresh_Class = FilterAboveThreshold
148
149
150class FilterBestN(object):
151    """A wrapper around :obj:`select`; the
152    constructor stores the filter parameters that are applied when the
153    function is called.
154
155    :param measure: a feature scorer
156    :type measure: :obj:`Orange.feature.scoring.Score`
157    :param n: number of features to select
158    :type n: int
159
160    """
161    def __new__(cls, data=None,
162                measure=orange.MeasureAttribute_relief(k=20, m=50),
163                n=5):
164
165        if data is None:
166            self = object.__new__(cls)
167            return self
168        else:
169            self = cls(measure=measure, n=n)
170            return self(data)
171
172    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
173                 n=5):
174        self.measure = measure
175        self.n = n
176
177    def __call__(self, data):
178        ma = score_all(data, self.measure)
179        self.n = min(self.n, len(data.domain.attributes))
180        return select(data, ma, self.n)
181
182FilterBestNAtts = FilterBestN
183FilterBestNAtts_Class = FilterBestN
184
185
186class FilterRelief(object):
187    """A class wrapper around :obj:`select_best_n`; the
188    constructor stores the filter parameters that are applied when the
189    function is called.
190
191    :param measure: a feature scorer
192    :type measure: :obj:`Orange.feature.scoring.Score`
193    :param margin: margin for Relief scoring
194    :type margin: float
195
196    """
197    def __new__(cls, data=None,
198                measure=orange.MeasureAttribute_relief(k=20, m=50),
199                margin=0):
200
201        if data is None:
202            self = object.__new__(cls)
203            return self
204        else:
205            self = cls(measure=measure, margin=margin)
206            return self(data)
207
208    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
209                 margin=0):
210        self.measure = measure
211        self.margin = margin
212
213    def __call__(self, data):
214        return select_relief(data, self.measure, self.margin)
215
216FilterRelief_Class = FilterRelief
217
218##############################################################################
219# wrapped learner
220
221
222class FilteredLearner(object):
223    """A feature selection wrapper around base learner. When provided data,
224     this learner applies a given feature selection method and then calls
225     the base learner.
226
227    Here is an example of how to build a wrapper around naive Bayesian learner
228    and use it on a data set::
229
230        nb = Orange.classification.bayes.NaiveBayesLearner()
231        learner = Orange.feature.selection.FilteredLearner(nb,
232            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
233        classifier = learner(data)
234
235    """
236    def __new__(cls, baseLearner, data=None, weight=0,
237                filter=FilterAboveThreshold(), name='filtered'):
238
239        if data is None:
240            self = object.__new__(cls)
241            return self
242        else:
243            self = cls(baseLearner, filter=filter, name=name)
244            return self(data, weight)
245
246    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
247                 name='filtered'):
248        self.baseLearner = baseLearner
249        self.filter = filter
250        self.name = name
251
252    def __call__(self, data, weight=0):
253        # filter the data and then learn
254        fdata = self.filter(data)
255        model = self.baseLearner(fdata, weight)
256        return FilteredClassifier(classifier=model, domain=model.domain)
257
258FilteredLearner_Class = FilteredLearner
259
260
261class FilteredClassifier:
262    """A classifier returned by FilteredLearner."""
263    def __init__(self, **kwds):
264        self.__dict__.update(kwds)
265
266    def __call__(self, example, resultType=orange.GetValue):
267        return self.classifier(example, resultType)
268
269    def atts(self):
270        return self.domain.attributes
Note: See TracBrowser for help on using the repository browser.