source: orange/Orange/feature/selection.py @ 10172:2ab492979b00

Revision 10172:2ab492979b00, 8.6 KB checked in by janezd <janez.demsar@…>, 2 years ago (diff)

Documentation for Orange.feature.scoring: more cleaning up

Line 
1__docformat__ = 'restructuredtext'
2
3import Orange.core as orange
4
5from Orange.feature.scoring import score_all
6
7
8def best_n(scores, n):
9    """Return the best features (without scores) from the list
10
11    :param scores: a list such as the one returned by
12      :obj:`~Orange.feature.scoring.score_all`
13    :type scores: list
14    :param n: number of features to select.
15    :type n: int
16    :rtype: :obj:`list`
17
18    """
19    return [x[0] for x in sorted(scores)[:n]]
20
21bestNAtts = best_n
22
23
24def above_threshold(scores, threshold=0.0):
25    """Return features (without scores) with scores above or
26    equal to a specified threshold.
27
28    :param scores: a list such as one returned by
29      :obj:`~Orange.feature.scoring.score_all`
30    :type scores: list
31    :param threshold: threshold for selection
32    :type threshold: float
33    :rtype: :obj:`list`
34
35    """
36    return [x[0] for x in scores if x[1] > threshold]
37
38
39attsAboveThreshold = above_threshold
40
41
42def select_best_n(data, scores, n):
43    """Construct and return a new data table that includes a
44    class and only the best features from a list scores.
45
46    :param data: a data table
47    :type data: :obj:`Orange.data.Table`
48    :param scores: a list such as the one returned by
49      :obj:`~Orange.feature.scoring.score_all`
50    :type scores: list
51    :param n: number of features to select
52    :type n: int
53    :rtype: :obj:`Orange.data.Table`
54    """
55    return data.select(best_n(scores, n) + [data.domain.classVar.name])
56
57selectBestNAtts = select_best_n
58
59
60def select_above_threshold(data, scores, threshold=0.0):
61    """Construct and return a new data table that includes a class and
62    features from the list returned by
63    :obj:`~Orange.feature.scoring.score_all` that have the score above or
64    equal to a specified threshold.
65
66    :param data: a data table
67    :type data: :obj:`Orange.data.Table`
68    :param scores: a list such as the one returned by
69      :obj:`~Orange.feature.scoring.score_all`
70    :type scores: list
71    :param threshold: threshold for selection
72    :type threshold: float
73    :rtype: :obj:`Orange.data.Table`
74    """
75    return data.select(above_threshold(scores, threshold) + \
76                       [data.domain.classVar.name])
77
78selectAttsAboveThresh = select_above_threshold
79
80
81def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0):
82    """Iteratively remove the worst scored feature until no feature
83    has a score below the margin. The filter procedure was originally
84    designed for measures such as Relief, which are context dependent,
85    i.e., removal of features may change the scores of other remaining
86    features. The score is thus recomputed in each iteration.
87
88    :param data: a data table
89    :type data: :obj:`Orange.data.Table`
90    :param measure: a feature scorer
91    :type measure: :obj:`Orange.feature.scoring.Score`
92    :param margin: margin for removal
93    :type margin: float
94
95    """
96    measl = score_all(data, measure)
97    while len(data.domain.attributes) > 0 and measl[-1][1] < margin:
98        data = select_best_n(data, measl, len(data.domain.attributes) - 1)
99        measl = score_all(data, measure)
100    return data
101
102filterRelieff = select_relief
103
104
105class FilterAboveThreshold(object):
106    """A class wrapper around :obj:`select_above_threshold`; the
107    constructor stores the filter parameters that are applied when the
108    function is called.
109
110    :param measure: a feature scorer
111    :type measure: :obj:`Orange.feature.scoring.Score`
112    :param threshold: threshold for selection. Defaults to 0.
113    :type threshold: float
114
115    Some examples of how to use this class::
116
117        filter = Orange.feature.selection.FilterAboveThreshold(threshold=.15)
118        new_data = filter(data)
119        new_data = Orange.feature.selection.FilterAboveThreshold(data)
120        new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1)
121        new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1,
122                   measure=Orange.feature.scoring.Gini())
123
124    """
125    def __new__(cls, data=None,
126                measure=orange.MeasureAttribute_relief(k=20, m=50),
127                threshold=0.0):
128        if data is None:
129            self = object.__new__(cls)
130            return self
131        else:
132            self = cls(measure=measure, threshold=threshold)
133            return self(data)
134
135    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), \
136                 threshold=0.0):
137        self.measure = measure
138        self.threshold = threshold
139
140    def __call__(self, data):
141        """Return data table features that have scores above given
142        threshold.
143
144        :param data: data table
145        :type data: Orange.data.Table
146
147        """
148        ma = score_all(data, self.measure)
149        return select_above_threshold(data, ma, self.threshold)
150
151FilterAttsAboveThresh = FilterAboveThreshold
152FilterAttsAboveThresh_Class = FilterAboveThreshold
153
154
155class FilterBestN(object):
156    """A class wrapper around :obj:`select_best_n`; the
157    constructor stores the filter parameters that are applied when the
158    function is called.
159
160    :param measure: a feature scorer
161    :type measure: :obj:`Orange.feature.scoring.Score`
162    :param n: number of features to select
163    :type n: int
164
165    """
166    def __new__(cls, data=None,
167                measure=orange.MeasureAttribute_relief(k=20, m=50),
168                n=5):
169
170        if data is None:
171            self = object.__new__(cls)
172            return self
173        else:
174            self = cls(measure=measure, n=n)
175            return self(data)
176
177    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
178                 n=5):
179        self.measure = measure
180        self.n = n
181
182    def __call__(self, data):
183        ma = score_all(data, self.measure)
184        self.n = min(self.n, len(data.domain.attributes))
185        return select_best_n(data, ma, self.n)
186
187FilterBestNAtts = FilterBestN
188FilterBestNAtts_Class = FilterBestN
189
190
191class FilterRelief(object):
192    """A class wrapper around :obj:`select_best_n`; the
193    constructor stores the filter parameters that are applied when the
194    function is called.
195
196    :param measure: a feature scorer
197    :type measure: :obj:`Orange.feature.scoring.Score`
198    :param margin: margin for Relief scoring
199    :type margin: float
200
201    """
202    def __new__(cls, data=None,
203                measure=orange.MeasureAttribute_relief(k=20, m=50),
204                margin=0):
205
206        if data is None:
207            self = object.__new__(cls)
208            return self
209        else:
210            self = cls(measure=measure, margin=margin)
211            return self(data)
212
213    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50),
214                 margin=0):
215        self.measure = measure
216        self.margin = margin
217
218    def __call__(self, data):
219        return select_relief(data, self.measure, self.margin)
220
221FilterRelief_Class = FilterRelief
222
223##############################################################################
224# wrapped learner
225
226
227class FilteredLearner(object):
228    """A learner that applies the given features selection method and
229    then calls the base learner. This learner is needed to properly cross-validate a combination of feature selection and learning.
230
231    Here is an example of how to build a wrapper around naive Bayesian learner
232    and use it on a data set::
233
234        nb = Orange.classification.bayes.NaiveBayesLearner()
235        learner = Orange.feature.selection.FilteredLearner(nb,
236            filter=Orange.feature.selection.FilterBestN(n=5), name='filtered')
237        classifier = learner(data)
238
239    """
240    def __new__(cls, baseLearner, data=None, weight=0,
241                filter=FilterAboveThreshold(), name='filtered'):
242
243        if data is None:
244            self = object.__new__(cls)
245            return self
246        else:
247            self = cls(baseLearner, filter=filter, name=name)
248            return self(data, weight)
249
250    def __init__(self, baseLearner, filter=FilterAboveThreshold(),
251                 name='filtered'):
252        self.baseLearner = baseLearner
253        self.filter = filter
254        self.name = name
255
256    def __call__(self, data, weight=0):
257        # filter the data and then learn
258        fdata = self.filter(data)
259        model = self.baseLearner(fdata, weight)
260        return FilteredClassifier(classifier=model, domain=model.domain)
261
262FilteredLearner_Class = FilteredLearner
263
264
265class FilteredClassifier:
266    """A classifier returned by FilteredLearner."""
267    def __init__(self, **kwds):
268        self.__dict__.update(kwds)
269
270    def __call__(self, example, resultType=orange.GetValue):
271        return self.classifier(example, resultType)
272
273    def atts(self):
274        return self.domain.attributes
Note: See TracBrowser for help on using the repository browser.