Ignore:
Timestamp:
03/15/12 15:14:54 (2 years ago)
Author:
anzeh <anze.staric@…>
Branch:
default
Message:

Moved preprocess from Orange to Orange.data.

File:
1 moved

Legend:

Unmodified
Added
Removed
  • Orange/data/preprocess/__init__.py

    r10393 r10542  
    1 """ 
    2 .. autoclass:: Preprocessor_discretizeEntropy(method=Orange.feature.discretization.Entropy()) 
    3  
    4 .. autoclass:: Preprocessor_removeContinuous 
    5  
    6 .. autoclass:: Preprocessor_continuize 
    7  
    8 .. autoclass:: Preprocessor_removeDiscrete 
    9  
    10 .. autoclass:: Preprocessor_impute 
    11  
    12 .. autoclass:: Preprocessor_featureSelection(measure=Orange.feature.scoring.Relief(), filter=None, limit=10) 
    13  
    14 .. autofunction:: bestP 
    15  
    16 .. autofunction:: bestN 
    17  
    18 .. autofunction:: selectNRandom 
    19  
    20 .. autofunction:: selectPRandom 
    21  
    22 .. autoclass:: Preprocessor_RFE 
    23  
    24 .. autoclass:: Preprocessor_sample 
    25  
    26 .. autoclass:: Preprocessor_preprocessorList 
    27  
    28 .. class:: RemoveUnusedValues(variable, data, remove_one_valued=False) 
    29  
    30     Removes unused values and reduces the variable, if a variable 
    31     declares values that do not appear in the data. 
    32  
    33     :param variable: :class:`Orange.feature.Descriptor` 
    34     :param data: :class:`Orange.data.Table` 
    35     :param remove_one_valued: Decides whether to remove or to retain 
    36         the attributes with only one value defined (default: False). 
    37      
    38     Example: 
    39      
    40     .. literalinclude:: code/unusedValues.py     
    41  
    42     There are four possible outcomes: 
    43      
    44     1. The variable does not have any used values in the data - value 
    45     of this variable is undefined for all examples. The variable is 
    46     thus useless and the class returns None. 
    47  
    48     2. The variable has only one used value (or, possibly, only one 
    49     value at all). Such a variable is in fact useless, and can 
    50     probably be removed without harm. Nevertheless, its fate is 
    51     decided by the flag remove_one_valued which is False by default, 
    52     so such variables are retained unless explicitly specified 
    53     otherwise. 
    54  
    55     3. All variable's values occur in the data (and the variable has more 
    56     than one value; otherwise the above case applies). The original variable 
    57     is returned. 
    58  
    59     4. There are some unused values. A new variable is constructed and the 
    60     unused values are omitted. The value of the new variable is computed 
    61     automatically from the value of the original variable  
    62     :class:`Orange.classification.lookup.ClassifierByLookupTable` is used 
    63     for mapping. 
    64      
    65     Results of example: 
    66      
    67     .. literalinclude:: code/unusedValues.res 
    68      
    69     Variables a and y are OK and are left alone. In b, value 1 is not used 
    70     and is removed (not in the original variable, of course; a new variable 
    71     is created). c is useless and is removed altogether. d is retained since 
    72     remove_one_valued was left at False; if we set it to True, this variable 
    73     would be removed as well. 
    74  
    75 """ 
    76  
    77 from orange import \ 
    78      DomainContinuizer, \ 
    79     VariableFilterMap, \ 
    80     ValueFilter, \ 
    81          ValueFilter_continuous, \ 
    82          ValueFilter_discrete, \ 
    83          ValueFilter_string, \ 
    84          ValueFilter_stringList, \ 
    85     ValueFilterList, \ 
    86     TransformValue, \ 
    87          Discrete2Continuous, \ 
    88          Discretizer, \ 
    89               BiModalDiscretizer, \ 
    90               EquiDistDiscretizer, \ 
    91               IntervalDiscretizer, \ 
    92               ThresholdDiscretizer, \ 
    93          MapIntValue, \ 
    94          NormalizeContinuous, \ 
    95          Ordinal2Continuous, \ 
    96          TransformValue_IsDefined, \ 
    97     TableAverager, \ 
    98     Preprocessor, \ 
    99          Preprocessor_addCensorWeight, \ 
    100          Preprocessor_addClassNoise, \ 
    101          Preprocessor_addClassWeight, \ 
    102          Preprocessor_addGaussianClassNoise, \ 
    103          Preprocessor_addGaussianNoise, \ 
    104          Preprocessor_addMissing, \ 
    105          Preprocessor_addMissingClasses, \ 
    106          Preprocessor_addNoise, \ 
    107          Preprocessor_discretize, \ 
    108          Preprocessor_drop, \ 
    109          Preprocessor_dropMissing, \ 
    110          Preprocessor_dropMissingClasses, \ 
    111          Preprocessor_filter, \ 
    112          Preprocessor_ignore, \ 
    113          Preprocessor_imputeByLearner, \ 
    114          Preprocessor_removeDuplicates, \ 
    115          Preprocessor_select, \ 
    116          Preprocessor_shuffle, \ 
    117          Preprocessor_take, \ 
    118          Preprocessor_takeMissing, \ 
    119          Preprocessor_takeMissingClasses, \ 
    120     Imputer, \ 
    121          Imputer_asValue, \ 
    122          Imputer_defaults, \ 
    123          Imputer_model, \ 
    124          Imputer_random, \ 
    125     ImputerConstructor, \ 
    126          ImputerConstructor_asValue, \ 
    127          ImputerConstructor_average, \ 
    128          ImputerConstructor_maximal, \ 
    129          ImputerConstructor_minimal, \ 
    130          ImputerConstructor_model, \ 
    131          ImputerConstructor_random, \ 
    132     FilterList, \ 
    133     Filter, \ 
    134          Filter_conjunction, \ 
    135          Filter_disjunction, \ 
    136          Filter_hasClassValue, \ 
    137          Filter_hasMeta, \ 
    138          Filter_hasSpecial, \ 
    139          Filter_isDefined, \ 
    140          Filter_random, \ 
    141          Filter_sameValue, \ 
    142          Filter_values, \ 
    143     Discretization, \ 
    144          BiModalDiscretization, \ 
    145          EntropyDiscretization, \ 
    146          EquiDistDiscretization, \ 
    147          EquiNDiscretization, \ 
    148     DomainTransformerConstructor, \ 
    149     RemoveRedundant, \ 
    150          RemoveRedundantByInduction, \ 
    151          RemoveRedundantByQuality, \ 
    152          RemoveRedundantOneValue, \ 
    153     RemoveUnusedValues 
     1from Orange import core 
     2 
     3DomainContinuizer = core.DomainContinuizer 
     4VariableFilterMap = core.VariableFilterMap 
     5 
     6ValueFilter = core.ValueFilter 
     7ValueFilter_continuous = core.ValueFilter_continuous 
     8ValueFilter_discrete = core.ValueFilter_discrete 
     9ValueFilter_string = core.ValueFilter_string 
     10ValueFilter_stringList = core.ValueFilter_stringList 
     11ValueFilterList = core.ValueFilterList 
     12 
     13TransformValue = core.TransformValue 
     14Discrete2Continuous = core.Discrete2Continuous 
     15Discretizer = core.Discretizer 
     16BiModalDiscretizer = core.BiModalDiscretizer 
     17EquiDistDiscretizer = core.EquiDistDiscretizer 
     18IntervalDiscretizer = core.IntervalDiscretizer 
     19ThresholdDiscretizer = core.ThresholdDiscretizer 
     20MapIntValue = core.MapIntValue 
     21NormalizeContinuous = core.NormalizeContinuous 
     22Ordinal2Continuous = core.Ordinal2Continuous 
     23TransformValue_IsDefined = core.TransformValue_IsDefined 
     24 
     25TableAverager = core.TableAverager 
     26 
     27Preprocessor = core.Preprocessor 
     28AddCensorWeight = core.Preprocessor_addCensorWeight 
     29AddClassNoise = core.Preprocessor_addClassNoise 
     30AddClassWeight = core.Preprocessor_addClassWeight 
     31AddGaussianClassNoise = core.Preprocessor_addGaussianClassNoise 
     32AddGaussianNoise = core.Preprocessor_addGaussianNoise 
     33AddMissing = core.Preprocessor_addMissing 
     34AddMissingClasses = core.Preprocessor_addMissingClasses 
     35AddNoise = core.Preprocessor_addNoise 
     36 
     37Discretize = core.Preprocessor_discretize 
     38Drop = core.Preprocessor_drop 
     39DropMissing = core.Preprocessor_dropMissing 
     40DropMissingClasses = core.Preprocessor_dropMissingClasses 
     41Filter = core.Preprocessor_filter 
     42Ignore = core.Preprocessor_ignore 
     43ImputeByLearner = core.Preprocessor_imputeByLearner 
     44RemoveDuplicates = core.Preprocessor_removeDuplicates 
     45Select = core.Preprocessor_select 
     46Shuffle = core.Preprocessor_shuffle 
     47Take = core.Preprocessor_take 
     48TakeMissing = core.Preprocessor_takeMissing 
     49TakeMissingClasses = core.Preprocessor_takeMissingClasses 
     50 
     51Imputer = core.Imputer 
     52Imputer_asValue = core.Imputer_asValue 
     53Imputer_defaults = core.Imputer_defaults 
     54Imputer_model = core.Imputer_model 
     55Imputer_random = core.Imputer_random 
     56 
     57ImputerConstructor = core.ImputerConstructor 
     58ImputerConstructor_asValue = core.ImputerConstructor_asValue 
     59ImputerConstructor_average = core.ImputerConstructor_average 
     60ImputerConstructor_maximal = core.ImputerConstructor_maximal 
     61ImputerConstructor_minimal = core.ImputerConstructor_minimal 
     62ImputerConstructor_model = core.ImputerConstructor_model 
     63ImputerConstructor_random = core.ImputerConstructor_random 
     64 
     65FilterList = core.FilterList 
     66Filter = core.Filter 
     67Filter_conjunction = core.Filter_conjunction 
     68Filter_disjunction = core.Filter_disjunction 
     69Filter_hasClassValue = core.Filter_hasClassValue 
     70Filter_hasMeta = core.Filter_hasMeta 
     71Filter_hasSpecial = core.Filter_hasSpecial 
     72Filter_isDefined = core.Filter_isDefined 
     73Filter_random = core.Filter_random 
     74Filter_sameValue = core.Filter_sameValue 
     75Filter_values = core.Filter_values 
     76 
     77Discretization = core.Discretization 
     78BiModalDiscretization = core.BiModalDiscretization 
     79EntropyDiscretization = core.EntropyDiscretization 
     80EquiDistDiscretization = core.EquiDistDiscretization 
     81EquiNDiscretization = core.EquiNDiscretization 
     82 
     83DomainTransformerConstructor = core.DomainTransformerConstructor 
     84 
     85RemoveRedundant = core.RemoveRedundant 
     86RemoveRedundantByInduction = core.RemoveRedundantByInduction 
     87RemoveRedundantByQuality = core.RemoveRedundantByQuality 
     88RemoveRedundantOneValue = core.RemoveRedundantOneValue 
     89RemoveUnusedValues = core.RemoveUnusedValues 
    15490 
    15591import math 
    15692 
    157 import orange 
    15893import Orange 
     94import Orange.classification.majority 
     95import Orange.data 
     96import Orange.feature 
     97import Orange.feature.discretization 
     98import Orange.feature.scoring 
    15999from Orange.misc import _orange__new__, _orange__reduce__ 
    160100 
    161 class Preprocessor_discretizeEntropy(Preprocessor_discretize): 
     101class DiscretizeEntropy(Discretize): 
    162102    """ An discretizer that uses orange.EntropyDiscretization method but, 
    163103    unlike Preprocessor_discretize class, also removes unused attributes 
    164104    from the domain. 
    165      
    166     """ 
    167      
    168     __new__ = _orange__new__(Preprocessor_discretize) 
     105    """ 
     106     
     107    __new__ = _orange__new__(Discretize) 
    169108    __reduce__ = _orange__reduce__ 
    170109     
     
    173112        assert(isinstance(method, Orange.feature.discretization.Entropy)) 
    174113         
    175     def __call__(self, data, wightId=0): 
     114    def __call__(self, data, weightId=0): 
    176115        newattr_list = [] 
    177116        for attr in data.domain.attributes: 
    178             if attr.varType == orange.VarTypes.Continuous: 
     117            if attr.varType == Orange.feature.Type.Continuous: 
    179118                newattr = self.method(attr, data) 
    180119                if newattr.getValueFrom.transformer.points: 
     
    182121            else: 
    183122                newattr_list.append(attr) 
    184         newdomain = orange.Domain(newattr_list, data.domain.classVar) 
     123        newdomain = Orange.data.Domain(newattr_list, data.domain.classVar) 
    185124        newdomain.addmetas(data.domain.getmetas()) 
    186         return orange.ExampleTable(newdomain, data) 
    187      
    188 class Preprocessor_removeContinuous(Preprocessor_discretize): 
     125        return Orange.data.Table(newdomain, data) 
     126     
     127class RemoveContinuous(Discretize): 
    189128    """ A preprocessor that removes all continuous features. 
    190129    """ 
    191     __new__ = _orange__new__(Preprocessor_discretize) 
    192     __reduce__ = _orange__reduce__ 
    193      
    194     def __call__(self, data, weightId=None): 
    195         attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Discrete] 
    196         domain = orange.Domain(attrs, data.domain.classVar) 
     130    __new__ = _orange__new__(Discretize) 
     131    __reduce__ = _orange__reduce__ 
     132     
     133    def __call__(self, data, weightId=None): 
     134        attrs = [attr for attr in data.domain.attributes if attr.varType == Orange.feature.Type.Discrete] 
     135        domain = Orange.data.Domain(attrs, data.domain.classVar) 
    197136        domain.addmetas(data.domain.getmetas()) 
    198         return orange.ExampleTable(domain, data) 
     137        return Orange.data.Table(domain, data) 
    199138                 
    200 class Preprocessor_continuize(orange.Preprocessor): 
     139class Continuize(Preprocessor): 
    201140    """ A preprocessor that continuizes a discrete domain (and optionally normalizes it). 
    202141    See :obj:`Orange.data.continuization.DomainContinuizer` for list of 
     
    204143     
    205144    """ 
    206     __new__ = _orange__new__(orange.Preprocessor) 
    207     __reduce__ = _orange__reduce__ 
    208      
    209     def __init__(self, zeroBased=True, multinomialTreatment=orange.DomainContinuizer.NValues, 
    210                  continuousTreatment=orange.DomainContinuizer.Leave, 
    211                  classTreatment=orange.DomainContinuizer.Ignore, 
     145    __new__ = _orange__new__(Preprocessor) 
     146    __reduce__ = _orange__reduce__ 
     147     
     148    def __init__(self, zeroBased=True, multinomialTreatment=DomainContinuizer.NValues, 
     149                 continuousTreatment=DomainContinuizer.Leave, 
     150                 classTreatment=DomainContinuizer.Ignore, 
    212151                 **kwargs): 
    213152        self.zeroBased = zeroBased 
     
    217156             
    218157    def __call__(self, data, weightId=0): 
    219         continuizer = orange.DomainContinuizer(zeroBased=self.zeroBased, 
    220                                                multinomialTreatment=self.multinomialTreatment, 
    221                                                continuousTreatment=self.continuousTreatment, 
    222                                                classTreatment=self.classTreatment) 
     158        continuizer = DomainContinuizer(zeroBased=self.zeroBased, 
     159                                        multinomialTreatment=self.multinomialTreatment, 
     160                                        continuousTreatment=self.continuousTreatment, 
     161                                        classTreatment=self.classTreatment) 
    223162        c_domain = continuizer(data, weightId) 
    224163        return data.translate(c_domain) 
    225164     
    226 class Preprocessor_removeDiscrete(Preprocessor_continuize): 
     165class RemoveDiscrete(Continuize): 
    227166    """ A Preprocessor that removes all discrete attributes from the domain. 
    228167    """ 
    229     __new__ = _orange__new__(Preprocessor_continuize) 
    230      
    231     def __call__(self, data, weightId=None): 
    232         attrs = [attr for attr in data.domain.attributes if attr.varType == orange.VarTypes.Continuous] 
    233         domain = orange.Domain(attrs, data.domain.classVar) 
     168    __new__ = _orange__new__(Continuize) 
     169     
     170    def __call__(self, data, weightId=None): 
     171        attrs = [attr for attr in data.domain.attributes if attr.varType == Orange.feature.Type.Continuous] 
     172        domain = Orange.data.Domain(attrs, data.domain.classVar) 
    234173        domain.addmetas(data.domain.getmetas()) 
    235         return orange.ExampleTable(domain, data) 
     174        return Orange.data.Table(domain, data) 
    236175          
    237 class Preprocessor_impute(orange.Preprocessor): 
     176class Impute(Preprocessor): 
    238177    """ A preprocessor that imputes unknown values using a learner. 
    239178     
     
    241180     
    242181    """ 
    243     __new__ = _orange__new__(orange.Preprocessor) 
     182    __new__ = _orange__new__(Preprocessor) 
    244183    __reduce__ = _orange__reduce__ 
    245184     
    246185    def __init__(self, model=None, **kwargs): 
    247         self.model = orange.MajorityLearner() if model is None else model 
     186        self.model = Orange.classification.majority.MajorityLearner() if model is None else model 
    248187         
    249188    def __call__(self, data, weightId=0): 
    250         return orange.Preprocessor_imputeByLearner(data, learner=self.model) 
     189        return ImputeByLearner(data, learner=self.model) 
    251190 
    252191def bestN(attrMeasures, N=10): 
     
    254193    """ 
    255194    return attrMeasures[-N:] 
     195_bestN = bestN 
    256196 
    257197def bestP(attrMeasures, P=10): 
     
    260200    count = len(attrMeasures) 
    261201    return  attrMeasures[-max(int(math.ceil(count * P / 100.0)), 1):] 
    262  
    263 class Preprocessor_featureSelection(orange.Preprocessor): 
     202_bestP = bestP 
     203 
     204class FeatureSelection(Preprocessor): 
    264205    """ A preprocessor that runs feature selection using an feature scoring function. 
    265206     
     
    269210         
    270211    """ 
    271     __new__ = _orange__new__(orange.Preprocessor) 
    272     __reduce__ = _orange__reduce__ 
    273      
    274     bestN = staticmethod(bestN) 
    275     bestP = staticmethod(bestP) 
     212    __new__ = _orange__new__(Preprocessor) 
     213    __reduce__ = _orange__reduce__ 
     214     
     215    bestN = staticmethod(_bestN) 
     216    bestP = staticmethod(_bestP) 
    276217     
    277218    def __init__(self, measure=Orange.feature.scoring.Relief(), filter=None, limit=10): 
     
    289230        measures = self.attrScores(data) 
    290231        attrs = [attr for _, attr in self.filter(measures, self.limit)] 
    291         domain = orange.Domain(attrs, data.domain.classVar) 
     232        domain = Orange.data.Domain(attrs, data.domain.classVar) 
    292233        domain.addmetas(data.domain.getmetas()) 
    293         return orange.ExampleTable(domain, data) 
    294      
    295 class Preprocessor_RFE(Preprocessor_featureSelection): 
     234        return Orange.data.Table(domain, data) 
     235     
     236class RFE(FeatureSelection): 
    296237    """ A preprocessor that runs RFE(Recursive Feature Elimination) using 
    297238    linear SVM derived attribute weights. 
     
    302243         
    303244    """ 
    304     __new__ = _orange__new__(Preprocessor_featureSelection) 
    305     __reduce__ = _orange__reduce__ 
     245    __new__ = _orange__new__(FeatureSelection) 
     246    __reduce__ = _orange__reduce__ 
     247 
    306248    def __init__(self, filter=None, limit=10): 
    307         self.limit = limit 
    308         self.filter = filter if filter is not None else self.bestN 
     249        super(RFE, self).__init__(filter=filter, limit=limit) 
    309250         
    310251    def __call__(self, data, weightId=None): 
     
    319260    import random 
    320261    return random.sample(examples, N) 
     262_selectNRandom = selectNRandom 
    321263 
    322264def selectPRandom(examples, P=10): 
     
    326268    count = len(examples) 
    327269    return random.sample(examples, max(int(math.ceil(count * P / 100.0)), 1)) 
    328  
    329 class Preprocessor_sample(orange.Preprocessor): 
     270_selectPRandom = selectPRandom 
     271 
     272class Sample(Preprocessor): 
    330273    """ A preprocessor that samples a subset of the data. 
    331274     
     
    335278     
    336279    """ 
    337     __new__ = _orange__new__(orange.Preprocessor) 
    338     __reduce__ = _orange__reduce__ 
    339  
    340     selectNRandom = staticmethod(selectNRandom) 
    341     selectPRandom = staticmethod(selectPRandom) 
     280    __new__ = _orange__new__(Preprocessor) 
     281    __reduce__ = _orange__reduce__ 
     282 
     283    selectNRandom = staticmethod(_selectNRandom) 
     284    selectPRandom = staticmethod(_selectPRandom) 
    342285     
    343286    def __init__(self, filter=None, limit=10): 
     
    346289         
    347290    def __call__(self, data, weightId=None): 
    348         return orange.ExampleTable(data.domain, self.filter(data, self.limit)) 
    349      
    350  
    351 class Preprocessor_preprocessorList(orange.Preprocessor): 
     291        return Orange.data.Table(data.domain, self.filter(data, self.limit)) 
     292     
     293 
     294class PreprocessorList(Preprocessor): 
    352295    """ A preprocessor wrapping a sequence of other preprocessors. 
    353296     
     
    356299    """ 
    357300     
    358     __new__ = _orange__new__(orange.Preprocessor) 
    359     __reduce__ = _orange__reduce__ 
    360      
    361     def __init__(self, preprocessors=[]): 
     301    __new__ = _orange__new__(Preprocessor) 
     302    __reduce__ = _orange__reduce__ 
     303     
     304    def __init__(self, preprocessors=()): 
    362305        self.preprocessors = preprocessors 
    363306         
    364307    def __call__(self, data, weightId=None): 
    365         import orange 
    366308        hadWeight = hasWeight = weightId is not None 
    367309        for preprocessor in self.preprocessors: 
Note: See TracChangeset for help on using the changeset viewer.