Changeset 10708:43138a3b5624 in orange


Ignore:
Timestamp:
04/03/12 07:25:08 (2 years ago)
Author:
blaz <blaz.zupan@…>
Branch:
default
Message:

Some cosmetics and renaming in Orange.feature.selection

Files:
4 edited

Legend:

Unmodified
Added
Removed
  • Orange/feature/selection.py

    r10523 r10708  
    77from Orange.feature.scoring import score_all 
    88 
    9  
    10 def best_n(scores, n): 
    11     """Return the best features (without scores) from the list 
     9def top_rated(scores, n, highest_best=True): 
     10    """Return n top-rated features from the list of scores. 
    1211 
    1312    :param scores: a list such as the one returned by 
     
    1716    :type n: int 
    1817    :rtype: :obj:`list` 
    19  
     18    :param highest_best: if true, the features that are scored higher are preferred 
     19    :type highest_best: bool 
    2020    """ 
    2121    return [x[0] for x in \ 
     
    2323            ] 
    2424 
    25 bestNAtts = best_n 
    26  
     25bestNAtts = top_rated 
    2726 
    2827def above_threshold(scores, threshold=0.0): 
     
    4443 
    4544 
    46 def select_best_n(data, scores, n): 
     45def select(data, scores, n): 
    4746    """Construct and return a new data table that includes a 
    4847    class and only the best features from a list scores. 
     
    5756    :rtype: :obj:`Orange.data.Table` 
    5857    """ 
    59     return data.select(best_n(scores, n) + [data.domain.classVar.name]) 
    60  
    61 selectBestNAtts = select_best_n 
     58    return data.select(top_rated(scores, n) + [data.domain.classVar.name]) 
     59 
     60selectBestNAtts = select 
    6261 
    6362 
     
    6564    """Construct and return a new data table that includes a class and 
    6665    features from the list returned by 
    67     :obj:`~Orange.feature.scoring.score_all` that have the score above or 
    68     equal to a specified threshold. 
     66    :obj:`~Orange.feature.scoring.score_all` with higher or equal score 
     67    to a given threshold. 
    6968 
    7069    :param data: a data table 
     
    10099    measl = score_all(data, measure) 
    101100    while len(data.domain.attributes) > 0 and measl[-1][1] < margin: 
    102         data = select_best_n(data, measl, len(data.domain.attributes) - 1) 
     101        data = select(data, measl, len(data.domain.attributes) - 1) 
    103102        measl = score_all(data, measure) 
    104103    return data 
     
    108107 
    109108class FilterAboveThreshold(object): 
    110     """A class wrapper around :obj:`select_above_threshold`; the 
    111     constructor stores the filter parameters that are applied when the 
    112     function is called. 
     109    """A wrapper around :obj:`select_above_threshold`; the 
     110    constructor stores the parameters of the feature selection 
     111    procedure that are then applied when the the selection 
     112    is called with the actual data. 
    113113 
    114114    :param measure: a feature scorer 
     
    116116    :param threshold: threshold for selection. Defaults to 0. 
    117117    :type threshold: float 
    118  
    119     Some examples of how to use this class:: 
    120  
    121         filter = Orange.feature.selection.FilterAboveThreshold(threshold=.15) 
    122         new_data = filter(data) 
    123         new_data = Orange.feature.selection.FilterAboveThreshold(data) 
    124         new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1) 
    125         new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1, 
    126                    measure=Orange.feature.scoring.Gini()) 
    127  
    128     """ 
     118    """ 
     119 
    129120    def __new__(cls, data=None, 
    130121                measure=orange.MeasureAttribute_relief(k=20, m=50), 
     
    158149 
    159150class FilterBestN(object): 
    160     """A class wrapper around :obj:`select_best_n`; the 
     151    """A wrapper around :obj:`select`; the 
    161152    constructor stores the filter parameters that are applied when the 
    162153    function is called. 
     
    187178        ma = score_all(data, self.measure) 
    188179        self.n = min(self.n, len(data.domain.attributes)) 
    189         return select_best_n(data, ma, self.n) 
     180        return select(data, ma, self.n) 
    190181 
    191182FilterBestNAtts = FilterBestN 
     
    230221 
    231222class FilteredLearner(object): 
    232     """A learner that applies the given features selection method and 
    233     then calls the base learner. This learner is needed to properly cross-validate a combination of feature selection and learning. 
     223    """A feature selection wrapper around base learner. When provided data, 
     224     this learner applies a given feature selection method and then calls 
     225     the base learner. 
    234226 
    235227    Here is an example of how to build a wrapper around naive Bayesian learner 
  • Orange/testing/unit/tests/test_feature_selection.py

    r10655 r10708  
    2727         
    2828        # test best_n function 
    29         self.assertEqual(selection.best_n(self.scores, 3), best_3) 
     29        self.assertEqual(selection.top_rated(self.scores, 3), best_3) 
    3030         
    31         self.assertTrue(len(selection.best_n(self.scores, 3)) == 3) 
     31        self.assertTrue(len(selection.top_rated(self.scores, 3)) == 3) 
    3232         
    3333        # all returned values should be strings. 
    3434        self.assertTrue(all(isinstance(item, basestring) for item in \ 
    35                             selection.best_n(self.scores, 3))) 
     35                            selection.top_rated(self.scores, 3))) 
    3636         
    37         new_data = selection.select_best_n(self.data, self.scores, 3) 
     37        new_data = selection.select(self.data, self.scores, 3) 
    3838        self.assertEqual(best_3, [a.name for a in new_data.domain.attributes]) 
    3939        self.assertEqual(new_data.domain.class_var, self.data.domain.class_var) 
  • docs/reference/rst/Orange.feature.selection.rst

    r10172 r10708  
    1010   single: feature; feature selection 
    1111 
    12 Feature selection module contains several functions for selecting features based on they scores. A typical example is the function :obj:`select_best_n` that returns the best n features: 
     12Feature selection module contains several utility functions for selecting features based on they scores normally 
     13obtained in classification or regression problems. A typical example is the function :obj:`select` 
     14that returns a subsets of highest-scored features features: 
    1315 
    14     .. literalinclude:: code/selection-best3.py 
    15         :lines: 7- 
     16.. literalinclude:: code/selection-best3.py 
     17    :lines: 7- 
    1618 
    17     The script outputs:: 
     19The script outputs:: 
    1820 
    19         Best 3 features: 
    20         physician-fee-freeze 
    21         el-salvador-aid 
    22         synfuels-corporation-cutback 
     21    Best 3 features: 
     22    physician-fee-freeze 
     23    el-salvador-aid 
     24    synfuels-corporation-cutback 
    2325 
    2426The module also includes a learner that incorporates feature subset 
     
    2931-------------------------------------- 
    3032 
    31 .. automethod:: Orange.feature.selection.best_n 
     33.. automethod:: Orange.feature.selection.top_rated 
    3234 
    3335.. automethod:: Orange.feature.selection.above_threshold 
    3436 
    35 .. automethod:: Orange.feature.selection.select_best_n 
     37.. automethod:: Orange.feature.selection.select 
    3638 
    3739.. automethod:: Orange.feature.selection.select_above_threshold 
     
    5658.. autoclass:: Orange.feature.selection.FilterAboveThreshold(data=None, measure=Orange.feature.scoring.Relief(k=20, m=50), threshold=0.0) 
    5759   :members: 
     60 
     61Below are few examples of utility of this class:: 
     62 
     63    >>> filter = Orange.feature.selection.FilterAboveThreshold(threshold=.15) 
     64    >>> new_data = filter(data) 
     65    >>> new_data = Orange.feature.selection.FilterAboveThreshold(data) 
     66    >>> new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1) 
     67    >>> new_data = Orange.feature.selection.FilterAboveThreshold(data, threshold=.1, \ 
     68        measure=Orange.feature.scoring.Gini()) 
    5869 
    5970.. autoclass:: Orange.feature.selection.FilterBestN(data=None, measure=Orange.feature.scoring.Relief(k=20, m=50), n=5) 
  • docs/reference/rst/code/selection-best3.py

    r10171 r10708  
    1010n = 3 
    1111ma = Orange.feature.scoring.score_all(voting) 
    12 best = Orange.feature.selection.best_n(ma, n) 
     12best = Orange.feature.selection.top_rated(ma, n) 
    1313print 'Best %d features:' % n 
    1414for s in best: 
Note: See TracChangeset for help on using the changeset viewer.