Changeset 9653:652ca8b091ed in orange


Ignore:
Timestamp:
02/06/12 01:18:17 (2 years ago)
Author:
Miha Stajdohar <miha.stajdohar@…>
Branch:
default
rebase_source:
29620a1890ea959bf5b4b850ef5c0f4f4019c03c
Message:

Fixes some bugs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/feature/selection.py

    r9645 r9653  
    157157from Orange.feature.scoring import score_all 
    158158 
     159 
    159160def best_n(scores, N): 
    160161    """Return the best N features (without scores) from the list returned 
    161162    by :obj:`Orange.feature.scoring.score_all`. 
    162      
    163     :param scores: a list such as returned by  
     163 
     164    :param scores: a list such as returned by 
    164165      :obj:`Orange.feature.scoring.score_all` 
    165166    :type scores: list 
    166     :param N: number of best features to select.  
     167    :param N: number of best features to select. 
    167168    :type N: int 
    168169    :rtype: :obj:`list` 
     
    172173 
    173174bestNAtts = best_n 
     175 
    174176 
    175177def above_threshold(scores, threshold=0.0): 
     
    177179    :obj:`Orange.feature.scoring.score_all` with score above or 
    178180    equal to a specified threshold. 
    179      
     181 
    180182    :param scores: a list such as one returned by 
    181183      :obj:`Orange.feature.scoring.score_all` 
     
    187189    """ 
    188190    pairs = filter(lambda x, t=threshold: x[1] > t, scores) 
    189     return map(lambda x:x[0], pairs) 
     191    return map(lambda x: x[0], pairs) 
    190192 
    191193attsAboveThreshold = above_threshold 
     
    195197    """Construct and return a new set of examples that includes a 
    196198    class and only N best features from a list scores. 
    197      
     199 
    198200    :param data: an example table 
    199201    :type data: Orange.data.table 
    200     :param scores: a list such as one returned by  
     202    :param scores: a list such as one returned by 
    201203      :obj:`Orange.feature.scoring.score_all` 
    202204    :type scores: list 
     
    212214 
    213215def select_above_threshold(data, scores, threshold=0.0): 
    214     """Construct and return a new set of examples that includes a class and  
    215     features from the list returned by  
    216     :obj:`Orange.feature.scoring.score_all` that have the score above or  
     216    """Construct and return a new set of examples that includes a class and 
     217    features from the list returned by 
     218    :obj:`Orange.feature.scoring.score_all` that have the score above or 
    217219    equal to a specified threshold. 
    218      
     220 
    219221    :param data: an example table 
    220222    :type data: Orange.data.table 
    221223    :param scores: a list such as one returned by 
    222       :obj:`Orange.feature.scoring.score_all`     
     224      :obj:`Orange.feature.scoring.score_all` 
    223225    :type scores: list 
    224226    :param threshold: score threshold for attribute selection. Defaults to 0. 
    225227    :type threshold: float 
    226228    :rtype: :obj:`list` first N features (without measures) 
    227    
    228     """ 
    229     return data.select(above_threshold(scores, threshold) + [data.domain.classVar.name]) 
     229 
     230    """ 
     231    return data.select(above_threshold(scores, threshold) + \ 
     232                       [data.domain.classVar.name]) 
    230233 
    231234selectAttsAboveThresh = select_above_threshold 
     
    233236 
    234237def select_relief(data, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0): 
    235     """Take the data set and use an attribute measure to remove the worst  
     238    """Take the data set and use an attribute measure to remove the worst 
    236239    scored attribute (those below the margin). Repeats, until no attribute has 
    237240    negative or zero score. 
    238      
     241 
    239242    .. note:: Notice that this filter procedure was originally designed for \ 
    240243    measures such as Relief, which are context dependent, i.e., removal of \ 
     
    244247    :param data: an data table 
    245248    :type data: Orange.data.table 
    246     :param measure: an attribute measure (derived from  
    247       :obj:`Orange.feature.scoring.Measure`). Defaults to  
     249    :param measure: an attribute measure (derived from 
     250      :obj:`Orange.feature.scoring.Measure`). Defaults to 
    248251      :obj:`Orange.feature.scoring.Relief` for k=20 and m=50. 
    249252    :param margin: if score is higher than margin, attribute is not removed. 
    250253      Defaults to 0. 
    251254    :type margin: float 
    252      
     255 
    253256    """ 
    254257    measl = score_all(data, measure) 
    255258    while len(data.domain.attributes) > 0 and measl[-1][1] < margin: 
    256         data = (data, measl, len(data.domain.attributes) - 1) 
     259        data = select_best_n(data, measl, len(data.domain.attributes) - 1) 
    257260#        print 'remaining ', len(data.domain.attributes) 
    258261        measl = score_all(data, measure) 
    259262    return data 
    260263 
    261 select_relief = filterRelieff 
     264filterRelieff = select_relief 
    262265 
    263266 
     
    337340            return self(data) 
    338341 
    339     def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), n=5): 
     342    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), 
     343                 n=5): 
    340344        self.measure = measure 
    341345        self.n = n 
     
    344348        ma = score_all(data, self.measure) 
    345349        self.n = min(self.n, len(data.domain.attributes)) 
    346         return (data, ma, self.n) 
     350        return select_best_n(data, ma, self.n) 
    347351 
    348352FilterBestNAtts = FilterBestN 
    349353FilterBestNAtts_Class = FilterBestN 
    350354 
     355 
    351356class FilterRelief(object): 
    352     """Similarly to :obj:`FilterBestNAtts`, wrap around class  
     357    """Similarly to :obj:`FilterBestNAtts`, wrap around class 
    353358    :obj:`FilterRelief_Class`. 
    354      
    355     :param measure: an attribute measure (derived from  
    356       :obj:`Orange.feature.scoring.Measure`). Defaults to  
    357       :obj:`Orange.feature.scoring.Relief` for k=20 and m=50.   
     359 
     360    :param measure: an attribute measure (derived from 
     361      :obj:`Orange.feature.scoring.Measure`). Defaults to 
     362      :obj:`Orange.feature.scoring.Relief` for k=20 and m=50. 
    358363    :param margin: margin for Relief scoring. Defaults to 0. 
    359364    :type margin: float 
     
    371376            return self(data) 
    372377 
    373     def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), margin=0): 
     378    def __init__(self, measure=orange.MeasureAttribute_relief(k=20, m=50), 
     379                 margin=0): 
    374380        self.measure = measure 
    375381        self.margin = margin 
     
    384390 
    385391 
    386 def FilteredLearner(baseLearner, examples=None, weight=None, **kwds): 
    387     """Return the corresponding learner that wraps  
    388     :obj:`Orange.classification.baseLearner` and a data selection method.  
    389      
    390     When such learner is presented a data table, data is first filtered and  
    391     then passed to :obj:`Orange.classification.baseLearner`. This comes handy  
     392class FilteredLearner(object): 
     393    """Return the corresponding learner that wraps 
     394    :obj:`Orange.classification.baseLearner` and a data selection method. 
     395 
     396    When such learner is presented a data table, data is first filtered and 
     397    then passed to :obj:`Orange.classification.baseLearner`. This comes handy 
    392398    when one wants to test the schema of feature-subset-selection-and-learning 
    393     by some repetitive evaluation method, e.g., cross validation.  
    394      
     399    by some repetitive evaluation method, e.g., cross validation. 
     400 
    395401    :param filter: defatuls to 
    396       :obj:`Orange.feature.selection.FilterAttsAboveThresh` 
    397     :type filter: Orange.feature.selection.FilterAttsAboveThresh 
     402      :obj:`Orange.feature.selection.FilterAboveThreshold` 
     403    :type filter: Orange.feature.selection.FilterAboveThreshold 
    398404 
    399405    Here is an example of how to build a wrapper around naive Bayesian learner 
     
    401407 
    402408        nb = Orange.classification.bayes.NaiveBayesLearner() 
    403         learner = Orange.feature.selection.FilteredLearner(nb,  
     409        learner = Orange.feature.selection.FilteredLearner(nb, 
    404410                  filter=Orange.feature.selection.FilterBestNAtts(n=5), name='filtered') 
    405411        classifier = learner(data) 
    406412 
    407413    """ 
    408     learner = apply(FilteredLearner_Class, [baseLearner], kwds) 
    409     if examples: 
    410         return learner(examples, weight) 
    411     else: 
    412         return learner 
    413  
    414 class FilteredLearner_Class: 
    415     def __init__(self, baseLearner, filter=FilterAttsAboveThresh(), name='filtered'): 
     414    def __new__(cls, baseLearner, data=None, weight=0, 
     415                filter=FilterAboveThreshold(), name='filtered'): 
     416 
     417        if data is None: 
     418            self = object.__new__(cls, baseLearner, filter=filter, name=name) 
     419            return self 
     420        else: 
     421            self = cls(baseLearner, filter=filter, name=name) 
     422            return self(data, weight) 
     423 
     424    def __init__(self, baseLearner, filter=FilterAboveThreshold(), 
     425                 name='filtered'): 
    416426        self.baseLearner = baseLearner 
    417427        self.filter = filter 
    418428        self.name = name 
     429 
    419430    def __call__(self, data, weight=0): 
    420431        # filter the data and then learn 
     
    423434        return FilteredClassifier(classifier=model, domain=model.domain) 
    424435 
     436FilteredLearner_Class = FilteredLearner 
     437 
     438 
    425439class FilteredClassifier: 
    426440    def __init__(self, **kwds): 
    427441        self.__dict__.update(kwds) 
     442 
    428443    def __call__(self, example, resultType=orange.GetValue): 
    429444        return self.classifier(example, resultType) 
     445 
    430446    def atts(self): 
    431447        return self.domain.attributes 
Note: See TracChangeset for help on using the changeset viewer.