Ignore:
Files:
5 added
3 deleted
30 edited

Legend:

Unmodified
Added
Removed
  • .hgignore

    r10159 r10545  
    2020MANIFEST 
    2121Orange.egg-info 
     22Orange/version.py 
    2223 
    2324# Ignore dot files. 
  • Orange/OrangeWidgets/Data/OWPreprocess.py

    r9671 r10542  
    1717import math 
    1818 
    19 from Orange.preprocess import * 
     19from Orange.data import preprocess 
    2020 
    2121def _gettype(obj): 
     
    7676    def getDiscretizer(self): 
    7777        if self.discInd == 0: 
    78             preprocessor = Preprocessor_discretizeEntropy(method=orange.EntropyDiscretization()) 
     78            preprocessor = preprocess.DiscretizeEntropy(method=orange.EntropyDiscretization()) 
    7979        elif self.discInd in [1, 2]: 
    8080            name, disc, kwds = self.DISCRETIZERS[self.discInd] 
    81             preprocessor = Preprocessor_discretize(method=disc(**dict([(key, getattr(self, key, val)) for key, val in kwds.items()]))) 
     81            preprocessor = preprocess.Discretize(method=disc(**dict([(key, getattr(self, key, val)) for key, 
     82                                                                                              val in kwds.items()]))) 
    8283        elif self.discInd == 3: 
    83             preprocessor = Preprocessor_removeContinuous() 
     84            preprocessor = preprocess.RemoveContinuous() 
    8485        return preprocessor 
    8586     
     
    120121    def getContinuizer(self): 
    121122        if self.contInd in [0, 1, 2, 4, 5]: 
    122             preprocessor = Preprocessor_continuize(multinomialTreatment=self.CONTINUIZERS[self.contInd][1]) 
     123            preprocessor = preprocess.Continuize(multinomialTreatment=self.CONTINUIZERS[self.contInd][1]) 
    123124        elif self.contInd == 3: 
    124             preprocessor = Preprocessor_removeDiscrete() 
     125            preprocessor = preprocess.RemoveDiscrete() 
    125126        return preprocessor 
    126127     
    127128    def setContinuizer(self, continuizer): 
    128         if isinstance(continuizer, Preprocessor_removeDiscrete): 
     129        if isinstance(continuizer, preprocess.RemoveDiscrete): 
    129130            self.contInd = 3 #Ignore all discrete 
    130         elif isinstance(continuizer,Preprocessor_continuize): 
     131        elif isinstance(continuizer,preprocess.Continuize): 
    131132            self.contInd = self.TREATMENT_TO_IND.get(continuizer.multinomialTreatment, 3) 
    132133     
    133     data = _pyqtProperty(Preprocessor_continuize, 
     134    data = _pyqtProperty(preprocess.Continuize, 
    134135                        fget=getContinuizer, 
    135136                        fset=setContinuizer, 
     
    155156        if self.methodInd in [0, 1, 2]: 
    156157            learner = self.IMPUTERS[self.methodInd][1]() 
    157             imputer = Preprocessor_imputeByLearner(learner=learner) 
     158            imputer = preprocess.ImputeByLearner(learner=learner) 
    158159        elif self.methodInd == 3: 
    159             imputer = orange.Preprocessor_dropMissing() 
     160            imputer = preprocess.DropMissing() 
    160161        return imputer 
    161162             
     
    163164    def setImputer(self, imputer): 
    164165        self.methodInd = 0 
    165         if isinstance(imputer, Preprocessor_imputeByLearner): 
     166        if isinstance(imputer, preprocess.ImputeByLearner): 
    166167            learner = imputer.learner 
    167168            dd = dict([(t, i) for i, (_, t) in enumerate(self.IMPUTERS)]) 
    168169            self.methodInd = dd.get(_gettype(learner), 0) 
    169         elif isinstance(imputer, orange.Preprocessor_dropMissing): 
     170        elif isinstance(imputer, preprocess.DropMissing): 
    170171            self.methodInd = 3 
    171172             
    172     data = _pyqtProperty(Preprocessor_imputeByLearner, 
     173    data = _pyqtProperty(preprocess.ImputeByLearner, 
    173174                        fget=getImputer, 
    174175                        fset=setImputer, 
     
    183184                ("Linear SVM weights", orngSVM.MeasureAttribute_SVMWeights)] 
    184185     
    185     FILTERS = [Preprocessor_featureSelection.bestN, 
    186                Preprocessor_featureSelection.bestP] 
     186    FILTERS = [preprocess.FeatureSelection.bestN, 
     187               preprocess.FeatureSelection.bestP] 
    187188     
    188189    def __init__(self, parent=None): 
     
    235236     
    236237    def getFeatureSelection(self): 
    237         return Preprocessor_featureSelection(measure=self.MEASURES[self.measureInd][1], 
     238        return preprocess.FeatureSelection(measure=self.MEASURES[self.measureInd][1], 
    238239                                             filter=self.FILTERS[self.selectBy], 
    239240                                             limit=self.bestP if self.selectBy  else self.bestN) 
    240241     
    241     data = _pyqtProperty(Preprocessor_featureSelection, 
     242    data = _pyqtProperty(preprocess.FeatureSelection, 
    242243                        fget=getFeatureSelection, 
    243244                        fset=setFeatureSelection, 
     
    245246         
    246247class SampleEditor(BaseEditor): 
    247     FILTERS = [Preprocessor_sample.selectNRandom, 
    248                Preprocessor_sample.selectPRandom] 
     248    FILTERS = [preprocess.Sample.selectNRandom, 
     249               preprocess.Sample.selectPRandom] 
    249250    def __init__(self, parent=None): 
    250251        BaseEditor.__init__(self, parent) 
     
    278279         
    279280    def getSampler(self): 
    280         return Preprocessor_sample(filter=self.FILTERS[self.methodInd], 
     281        return preprocess.Sample(filter=self.FILTERS[self.methodInd], 
    281282                                   limit=self.sampleN if self.methodInd == 0 else self.sampleP) 
    282283     
     
    291292        self.updateSpinStates() 
    292293             
    293     data = _pyqtProperty(Preprocessor_sample, 
     294    data = _pyqtProperty(preprocess.Sample, 
    294295                        fget=getSampler, 
    295296                        fset=setSampler, 
     
    302303         
    303304    #Preprocessor name replacement rules 
    304     REPLACE = {Preprocessor_discretize: "Discretize ({0.method})", 
    305                Preprocessor_discretizeEntropy: "Discretize (entropy)", 
    306                Preprocessor_removeContinuous: "Discretize (remove continuous)", 
    307                Preprocessor_continuize: "Continuize ({0.multinomialTreatment})", 
    308                Preprocessor_removeDiscrete: "Continuize (remove discrete)", 
    309                Preprocessor_impute: "Impute ({0.model})", 
    310                Preprocessor_imputeByLearner: "Impute ({0.learner})", 
    311                Preprocessor_dropMissing: "Remove missing", 
    312                Preprocessor_featureSelection: "Feature selection ({0.measure}, {0.filter}, {0.limit})", 
    313                Preprocessor_sample: "Sample ({0.filter}, {0.limit})", 
     305    REPLACE = {preprocess.Discretize: "Discretize ({0.method})", 
     306               preprocess.DiscretizeEntropy: "Discretize (entropy)", 
     307               preprocess.RemoveContinuous: "Discretize (remove continuous)", 
     308               preprocess.Continuize: "Continuize ({0.multinomialTreatment})", 
     309               preprocess.RemoveDiscrete: "Continuize (remove discrete)", 
     310               preprocess.Impute: "Impute ({0.model})", 
     311               preprocess.ImputeByLearner: "Impute ({0.learner})", 
     312               preprocess.DropMissing: "Remove missing", 
     313               preprocess.FeatureSelection: "Feature selection ({0.measure}, {0.filter}, {0.limit})", 
     314               preprocess.Sample: "Sample ({0.filter}, {0.limit})", 
    314315               orange.EntropyDiscretization: "entropy", 
    315316               orange.EquiNDiscretization: "freq, {0.numberOfIntervals}", 
     
    447448     
    448449    # Default preprocessors 
    449     preprocessors =[("Discretize", Preprocessor_discretizeEntropy, {}), 
    450                     ("Continuize", Preprocessor_continuize, {}), 
    451                     ("Impute", Preprocessor_impute, {}), 
    452                     ("Feature selection", Preprocessor_featureSelection, {}), 
    453                     ("Sample", Preprocessor_sample, {})] 
     450    preprocessors =[("Discretize", preprocess.DiscretizeEntropy, {}), 
     451                    ("Continuize", preprocess.Continuize, {}), 
     452                    ("Impute", preprocess.Impute, {}), 
     453                    ("Feature selection", preprocess.FeatureSelection, {}), 
     454                    ("Sample", preprocess.Sample, {})] 
    454455     
    455456    # Editor widgets for preprocessors 
    456     EDITORS = {Preprocessor_discretize: DiscretizeEditor, 
    457                Preprocessor_discretizeEntropy: DiscretizeEditor, 
    458                Preprocessor_removeContinuous: DiscretizeEditor, 
    459                Preprocessor_continuize: ContinuizeEditor, 
    460                Preprocessor_removeDiscrete: ContinuizeEditor, 
    461                Preprocessor_impute: ImputeEditor, 
    462                Preprocessor_imputeByLearner: ImputeEditor, 
    463                Preprocessor_dropMissing: ImputeEditor, 
    464                Preprocessor_featureSelection: FeatureSelectEditor, 
    465                Preprocessor_sample: SampleEditor, 
     457    EDITORS = {preprocess.Discretize: DiscretizeEditor, 
     458               preprocess.DiscretizeEntropy: DiscretizeEditor, 
     459               preprocess.RemoveContinuous: DiscretizeEditor, 
     460               preprocess.Continuize: ContinuizeEditor, 
     461               preprocess.RemoveDiscrete: ContinuizeEditor, 
     462               preprocess.Impute: ImputeEditor, 
     463               preprocess.ImputeByLearner: ImputeEditor, 
     464               preprocess.DropMissing: ImputeEditor, 
     465               preprocess.FeatureSelection: FeatureSelectEditor, 
     466               preprocess.Sample: SampleEditor, 
    466467               type(None): QWidget} 
    467468     
     
    476477         
    477478#        self.allSchemas = [PreprocessorSchema("Default" , [Preprocessor_discretize(method=orange.EntropyDiscretization()), Preprocessor_dropMissing()])] 
    478         self.allSchemas = [("Default" , [Preprocessor_discretizeEntropy(method=orange.EntropyDiscretization()), Preprocessor_dropMissing()], 0)] 
     479        self.allSchemas = [("Default" , [preprocess.DiscretizeEntropy(method=orange.EntropyDiscretization()), 
     480                                         preprocess.DropMissing()], 0)] 
    479481         
    480482        self.lastSelectedSchemaIndex = 0 
  • Orange/OrangeWidgets/Visualize Qt/OWLinProj3DPlot.py

    r9671 r10542  
    55from plot import OWPoint 
    66 
    7 from Orange.preprocess.scaling import ScaleLinProjData3D, get_variable_values_sorted 
     7from Orange.data.preprocess.scaling import ScaleLinProjData3D, get_variable_values_sorted 
    88import orange 
    99Discrete = orange.VarTypes.Discrete 
  • Orange/OrangeWidgets/Visualize Qt/OWScatterPlot3D.py

    r9671 r10542  
    1616Continuous = orange.VarTypes.Continuous 
    1717 
    18 from Orange.preprocess.scaling import get_variable_values_sorted 
     18from Orange.data.preprocess.scaling import get_variable_values_sorted 
    1919 
    2020import OWGUI 
  • Orange/OrangeWidgets/plot/owtools.py

    r9671 r10542  
    4141from owpalette import OWPalette 
    4242 
    43 from Orange.preprocess.scaling import get_variable_values_sorted 
     43from Orange.data.preprocess.scaling import get_variable_values_sorted 
    4444import orangeom 
    4545import ColorPalette 
  • Orange/__init__.py

    r10491 r10549  
    3131_import("data.sample") 
    3232_import("data.outliers") 
     33_import("data.preprocess") 
     34_import("data.preprocess.scaling") 
    3335_import("data.utils") 
    3436_import("data.discretization") 
     
    8082_import("ensemble.boosting") 
    8183_import("ensemble.forest") 
     84_import("ensemble.stacking") 
    8285 
    8386_import("regression") 
     
    102105 
    103106_import("associate") 
    104  
    105 _import("preprocess") 
    106 _import("preprocess.scaling") 
    107107 
    108108_import("distance") 
  • Orange/classification/logreg.py

    r10387 r10542  
    11import Orange 
    22from Orange.misc import deprecated_keywords, deprecated_members 
     3from Orange.data import preprocess 
    34import math 
     5 
     6 
    47from numpy import dot, array, identity, reshape, diagonal, \ 
    58    transpose, concatenate, sqrt, sign 
     
    746749        examples = self.imputer(examples)(examples) 
    747750    if getattr(self, "removeMissing", 0): 
    748         examples = Orange.core.Preprocessor_dropMissing(examples) 
    749     continuizer = Orange.preprocess.DomainContinuizer(zeroBased=1, 
    750         continuousTreatment=Orange.preprocess.DomainContinuizer.Leave, 
    751                                            multinomialTreatment = Orange.preprocess.DomainContinuizer.FrequentIsBase, 
    752                                            classTreatment = Orange.preprocess.DomainContinuizer.Ignore) 
     751        examples = preprocess.DropMissing(examples) 
     752    continuizer = preprocess.DomainContinuizer(zeroBased=1, 
     753        continuousTreatment=preprocess.DomainContinuizer.Leave, 
     754                                           multinomialTreatment = preprocess.DomainContinuizer.FrequentIsBase, 
     755                                           classTreatment = preprocess.DomainContinuizer.Ignore) 
    753756    attr = [] 
    754757    remain_attr = examples.domain.features[:] 
  • Orange/classification/svm/__init__.py

    r10369 r10542  
    1818                        SVMClassifierSparse 
    1919 
    20 from Orange.preprocess import Preprocessor_impute, \ 
    21                               Preprocessor_continuize, \ 
    22                               Preprocessor_preprocessorList, \ 
    23                               DomainContinuizer 
     20from Orange.data import preprocess 
    2421 
    2522from Orange import feature as variable 
     
    243240 
    244241    def _normalize(self, data): 
    245         dc = Orange.core.DomainContinuizer() 
    246         dc.class_treatment = Orange.core.DomainContinuizer.Ignore 
    247         dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan 
    248         dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues 
     242        dc = preprocess.DomainContinuizer() 
     243        dc.class_treatment = preprocess.DomainContinuizer.Ignore 
     244        dc.continuous_treatment = preprocess.DomainContinuizer.NormalizeBySpan 
     245        dc.multinomial_treatment = preprocess.DomainContinuizer.NValues 
    249246        newdomain = dc(data) 
    250247        return data.translate(newdomain) 
     
    308305    def _normalize(self, data): 
    309306        if self.use_non_meta: 
    310             dc = Orange.core.DomainContinuizer() 
    311             dc.class_treatment = Orange.core.DomainContinuizer.Ignore 
    312             dc.continuous_treatment = Orange.core.DomainContinuizer.NormalizeBySpan 
    313             dc.multinomial_treatment = Orange.core.DomainContinuizer.NValues 
     307            dc = preprocess.DomainContinuizer() 
     308            dc.class_treatment = preprocess.DomainContinuizer.Ignore 
     309            dc.continuous_treatment = preprocess.DomainContinuizer.NormalizeBySpan 
     310            dc.multinomial_treatment = preprocess.DomainContinuizer.NValues 
    314311            newdomain = dc(data) 
    315312            data = data.translate(newdomain) 
     
    332329 
    333330    def learn_classifier(self, data): 
    334         transformer = Orange.core.DomainContinuizer() 
    335         transformer.multinomialTreatment = Orange.core.DomainContinuizer.NValues 
     331        transformer = preprocess.DomainContinuizer() 
     332        transformer.multinomialTreatment = preprocess.DomainContinuizer.NValues 
    336333        transformer.continuousTreatment = \ 
    337             Orange.core.DomainContinuizer.NormalizeBySpan 
    338         transformer.classTreatment = Orange.core.DomainContinuizer.Ignore 
     334            preprocess.DomainContinuizer.NormalizeBySpan 
     335        transformer.classTreatment = preprocess.DomainContinuizer.Ignore 
    339336        newdomain = transformer(data) 
    340337        newexamples = data.translate(newdomain) 
     
    371368    # Construct and return a default preprocessor for use by 
    372369    # Orange.core.LinearLearner learner. 
    373     impute = Preprocessor_impute() 
    374     cont = Preprocessor_continuize(multinomialTreatment= 
    375                                    DomainContinuizer.AsOrdinal) 
    376     preproc = Preprocessor_preprocessorList(preprocessors= 
     370    impute = preprocess.Impute() 
     371    cont = preprocess.Continuize(multinomialTreatment= 
     372                                   preprocess.DomainContinuizer.AsOrdinal) 
     373    preproc = preprocess.PreprocessorList(preprocessors= 
    377374                                            [impute, cont]) 
    378375    return preproc 
  • Orange/clustering/mixture.py

    r9976 r10542  
    290290         
    291291    def __call__(self, data, weight_id=None): 
    292         from Orange.preprocess import Preprocessor_impute, DomainContinuizer 
     292        from Orange.data import preprocess 
     293        #import Preprocessor_impute, DomainContinuizer 
    293294#        data = Preprocessor_impute(data) 
    294         dc = DomainContinuizer() 
    295         dc.multinomial_treatment = DomainContinuizer.AsOrdinal 
    296         dc.continuous_treatment = DomainContinuizer.NormalizeByVariance 
    297         dc.class_treatment = DomainContinuizer.Ignore 
     295        dc = preprocess.DomainContinuizer() 
     296        dc.multinomial_treatment = preprocess.DomainContinuizer.AsOrdinal 
     297        dc.continuous_treatment = preprocess.DomainContinuizer.NormalizeByVariance 
     298        dc.class_treatment = preprocess.DomainContinuizer.Ignore 
    298299        domain = dc(data) 
    299300        data = data.translate(domain) 
     
    308309#        array /= std.reshape((1, -1)) 
    309310#        means /= std.reshape((1, -1)) 
    310         solver = EMSolver(array, numpy.ones((self.n)) / self.n, 
     311        solver = EMSolver(array, numpy.ones(self.n) / self.n, 
    311312                          means, correlations) 
    312313        solver.run() 
  • Orange/doc/extend-widgets/owplot_example.py

    r9671 r10542  
    55import random 
    66import orange 
    7 from Orange.preprocess.scaling import get_variable_values_sorted 
     7from Orange.data.preprocess.scaling import get_variable_values_sorted 
    88 
    99class BasicPlot(OWPlot): 
  • Orange/ensemble/__init__.py

    r9994 r10540  
    1 """ 
    2  
    3 .. index:: ensemble 
    4  
    5 Module Orange.ensemble implements Breiman's bagging and Random Forest,  
    6 and Freund and Schapire's boosting algorithms. 
    7  
    8  
    9 ******* 
    10 Bagging 
    11 ******* 
    12  
    13 .. index:: bagging 
    14 .. index:: 
    15    single: ensemble; ensemble 
    16  
    17 .. autoclass:: Orange.ensemble.bagging.BaggedLearner 
    18    :members: 
    19    :show-inheritance: 
    20  
    21 .. autoclass:: Orange.ensemble.bagging.BaggedClassifier 
    22    :members: 
    23    :show-inheritance: 
    24  
    25 ******** 
    26 Boosting 
    27 ******** 
    28  
    29 .. index:: boosting 
    30 .. index:: 
    31    single: ensemble; boosting 
    32  
    33  
    34 .. autoclass:: Orange.ensemble.boosting.BoostedLearner 
    35   :members: 
    36   :show-inheritance: 
    37  
    38 .. autoclass:: Orange.ensemble.boosting.BoostedClassifier 
    39    :members: 
    40    :show-inheritance: 
    41  
    42 Example 
    43 ======= 
    44 Let us try boosting and bagging on Lymphography data set and use TreeLearner 
    45 with post-pruning as a base learner. For testing, we use 10-fold cross 
    46 validation and observe classification accuracy. 
    47  
    48 :download:`ensemble.py <code/ensemble.py>` 
    49  
    50 .. literalinclude:: code/ensemble.py 
    51   :lines: 7- 
    52  
    53 Running this script, we may get something like:: 
    54  
    55     Classification Accuracy: 
    56                tree: 0.764 
    57        boosted tree: 0.770 
    58         bagged tree: 0.790 
    59  
    60  
    61 ************* 
    62 Random Forest 
    63 ************* 
    64  
    65 .. index:: random forest 
    66 .. index:: 
    67    single: ensemble; random forest 
    68     
    69 .. autoclass:: Orange.ensemble.forest.RandomForestLearner 
    70   :members: 
    71   :show-inheritance: 
    72  
    73 .. autoclass:: Orange.ensemble.forest.RandomForestClassifier 
    74   :members: 
    75   :show-inheritance: 
    76  
    77  
    78 Example 
    79 ======== 
    80  
    81 The following script assembles a random forest learner and compares it 
    82 to a tree learner on a liver disorder (bupa) and housing data sets. 
    83  
    84 :download:`ensemble-forest.py <code/ensemble-forest.py>` 
    85  
    86 .. literalinclude:: code/ensemble-forest.py 
    87   :lines: 7- 
    88  
    89 Notice that our forest contains 50 trees. Learners are compared through  
    90 3-fold cross validation:: 
    91  
    92     Classification: bupa.tab 
    93     Learner  CA     Brier  AUC 
    94     tree     0.586  0.829  0.575 
    95     forest   0.710  0.392  0.752 
    96     Regression: housing.tab 
    97     Learner  MSE    RSE    R2 
    98     tree     23.708  0.281  0.719 
    99     forest   11.988  0.142  0.858 
    100  
    101 Perhaps the sole purpose of the following example is to show how to 
    102 access the individual classifiers once they are assembled into the 
    103 forest, and to show how we can assemble a tree learner to be used in 
    104 random forests. In the following example the best feature for decision 
    105 nodes is selected among three randomly chosen features, and maxDepth 
    106 and minExamples are both set to 5. 
    107  
    108 :download:`ensemble-forest2.py <code/ensemble-forest2.py>` 
    109  
    110 .. literalinclude:: code/ensemble-forest2.py 
    111   :lines: 7- 
    112  
    113 Running the above code would report on sizes (number of nodes) of the tree 
    114 in a constructed random forest. 
    115  
    116      
    117 Score Feature 
    118 ============= 
    119  
    120 L. Breiman (2001) suggested the possibility of using random forests as a 
    121 non-myopic measure of feature importance. 
    122  
    123 The assessment of feature relevance with random forests is based on the 
    124 idea that randomly changing the value of an important feature greatly 
    125 affects instance's classification, while changing the value of an 
    126 unimportant feature does not affect it much. Implemented algorithm 
    127 accumulates feature scores over given number of trees. Importance of 
    128 all features for a single tree are computed as: correctly classified  
    129 OOB instances minus correctly classified OOB instances when the feature is 
    130 randomly shuffled. The accumulated feature scores are divided by the 
    131 number of used trees and multiplied by 100 before they are returned. 
    132  
    133 .. autoclass:: Orange.ensemble.forest.ScoreFeature 
    134   :members: 
    135  
    136 Computation of feature importance with random forests is rather slow and 
    137 importances for all features need to be computes simultaneously. When it  
    138 is called to compute a quality of certain feature, it computes qualities 
    139 for all features in the dataset. When called again, it uses the stored  
    140 results if the domain is still the same and the data table has not 
    141 changed (this is done by checking the data table's version and is 
    142 not foolproof; it will not detect if you change values of existing instances, 
    143 but will notice adding and removing instances; see the page on  
    144 :class:`Orange.data.Table` for details). 
    145  
    146 :download:`ensemble-forest-measure.py <code/ensemble-forest-measure.py>` 
    147  
    148 .. literalinclude:: code/ensemble-forest-measure.py 
    149   :lines: 7- 
    150  
    151 Corresponding output:: 
    152  
    153     DATA:iris.tab 
    154  
    155     first: 3.91, second: 0.38 
    156  
    157     different random seed 
    158     first: 3.39, second: 0.46 
    159  
    160     All importances: 
    161        sepal length:   3.39 
    162         sepal width:   0.46 
    163        petal length:  30.15 
    164         petal width:  31.98 
    165  
    166 References 
    167 ----------- 
    168 * L Breiman. Bagging Predictors. `Technical report No. 421 \ 
    169     <http://www.stat.berkeley.edu/tech-reports/421.ps.Z>`_. University of \ 
    170     California, Berkeley, 1994. 
    171 * Y Freund, RE Schapire. `Experiments with a New Boosting Algorithm \ 
    172     <http://citeseer.ist.psu.edu/freund96experiments.html>`_. Machine \ 
    173     Learning: Proceedings of the Thirteenth International Conference (ICML'96), 1996. 
    174 * JR Quinlan. `Boosting, bagging, and C4.5 \ 
    175     <http://www.rulequest.com/Personal/q.aaai96.ps>`_ . In Proc. of 13th \ 
    176     National Conference on Artificial Intelligence (AAAI'96). pp. 725-730, 1996.  
    177 * L Brieman. `Random Forests \ 
    178     <http://www.springerlink.com/content/u0p06167n6173512/>`_.\ 
    179     Machine Learning, 45, 5-32, 2001.  
    180 * M Robnik-Sikonja. `Improving Random Forests \ 
    181     <http://lkm.fri.uni-lj.si/rmarko/papers/robnik04-ecml.pdf>`_. In \ 
    182     Proc. of European Conference on Machine Learning (ECML 2004),\ 
    183     pp. 359-370, 2004. 
    184 """ 
    185  
    186 __all__ = ["bagging", "boosting", "forest"] 
     1__all__ = ["bagging", "boosting", "forest", "stacking"] 
    1872__docformat__ = 'restructuredtext' 
    188 import Orange.core as orange 
  • Orange/ensemble/forest.py

    r10530 r10540  
    9494            of completion of the learning progress. 
    9595 
    96     :param name: name of the learner. 
     96    :param name: learner name. 
    9797    :type name: string 
    9898 
  • Orange/evaluation/scoring.py

    r10429 r10548  
    18161816                                "classIndex2": "class_index2"})( 
    18171817           deprecated_function_name(AUC_for_pair_of_classes))) 
    1818 AUC_matrix = replace_use_weights(deprecated_function_name(AUC_matrix)) 
     1818AUC_matrix = replace_use_weights(AUC_matrix) 
    18191819 
    18201820 
  • Orange/feature/discretization.py

    r9944 r10544  
    3131    """ 
    3232    orange.setrandseed(0) 
    33     data_new = orange.Preprocessor_discretize(data, method=Entropy()) 
     33    data_new = Orange.data.preprocess.Discretize(data, method=Entropy()) 
    3434     
    3535    attrlist = [] 
     
    109109    def __call__(self, data, weight=None): 
    110110        # filter the data and then learn 
    111         from Orange.preprocess import Preprocessor_discretize 
    112         ddata = Preprocessor_discretize(data, method=self.discretizer) 
     111        from Orange.data.preprocess import Discretize 
     112        ddata = Discretize(data, method=self.discretizer) 
    113113        if weight<>None: 
    114114            model = self.baseLearner(ddata, weight) 
  • Orange/feature/scoring.py

    r10524 r10543  
    1 import Orange.core as orange 
    2 import Orange.misc 
    3  
    4 from orange import MeasureAttribute as Score 
    5 from orange import MeasureAttributeFromProbabilities as ScoreFromProbabilities 
    6 from orange import MeasureAttribute_info as InfoGain 
    7 from orange import MeasureAttribute_gainRatio as GainRatio 
    8 from orange import MeasureAttribute_gini as Gini 
    9 from orange import MeasureAttribute_relevance as Relevance  
    10 from orange import MeasureAttribute_cost as Cost 
    11 from orange import MeasureAttribute_relief as Relief 
    12 from orange import MeasureAttribute_MSE as MSE 
     1from Orange import core, feature 
     2from Orange.statistics import contingency, distribution 
     3 
     4from Orange.misc import deprecated_keywords, deprecated_members 
     5 
     6Score = core.MeasureAttribute 
     7ScoreFromProbabilities = core.MeasureAttributeFromProbabilities 
     8InfoGain = core.MeasureAttribute_info 
     9GainRatio = core.MeasureAttribute_gainRatio 
     10Gini = core.MeasureAttribute_gini 
     11Relevance = core.MeasureAttribute_relevance 
     12Cost = core.MeasureAttribute_cost 
     13Relief = core.MeasureAttribute_relief 
     14MSE = core.MeasureAttribute_MSE 
    1315 
    1416###### 
     
    3133 
    3234        :param data: a data table used to score features 
    33         :type data: Orange.data.Table 
     35        :type data: :obj:`~Orange.data.Table` 
    3436 
    3537        :param weight: meta attribute that stores weights of instances 
    36         :type weight: Orange.feature.Descriptor 
     38        :type weight: :obj:`~Orange.feature.Descriptor` 
    3739 
    3840        """ 
     
    4648        return [x[0] for x in measured] 
    4749 
    48 OrderAttributes = Orange.misc.deprecated_members({ 
     50OrderAttributes = deprecated_members({ 
    4951          "measure": "score", 
    5052}, wrap_methods=[])(OrderAttributes) 
     
    5961    """ 
    6062 
    61     @Orange.misc.deprecated_keywords({"aprioriDist": "apriori_dist"}) 
     63    @deprecated_keywords({"aprioriDist": "apriori_dist"}) 
    6264    def __new__(cls, attr=None, data=None, apriori_dist=None, weightID=None): 
    6365        self = Score.__new__(cls) 
    64         if attr != None and data != None: 
     66        if attr is not None and data is not None: 
    6567            #self.__init__(**argkw) 
    6668            return self.__call__(attr, data, apriori_dist, weightID) 
     
    6870            return self 
    6971 
    70     @Orange.misc.deprecated_keywords({"aprioriDist": "apriori_dist"}) 
     72    @deprecated_keywords({"aprioriDist": "apriori_dist"}) 
    7173    def __call__(self, attr, data, apriori_dist=None, weightID=None): 
    7274        """Score the given feature. 
    7375 
    7476        :param attr: feature to score 
    75         :type attr: Orange.feature.Descriptor 
     77        :type attr: :obj:`~Orange.feature.Descriptor` 
    7678 
    7779        :param data: a data table used to score features 
    78         :type data: Orange.data.table 
     80        :type data: :obj:`~Orange.data.Table` 
    7981 
    8082        :param apriori_dist:  
     
    8284         
    8385        :param weightID: meta feature used to weight individual data instances 
    84         :type weightID: Orange.feature.Descriptor 
     86        :type weightID: :obj:`~Orange.feature.Descriptor` 
    8587 
    8688        """ 
    8789        import numpy 
    88         from orngContingency import Entropy 
     90        from orngContingency import Entropy #TODO: Move to new hierarchy 
    8991        if attr in data.domain:  # if we receive attr as string we have to convert to variable 
    9092            attr = data.domain[attr] 
    91         attrClassCont = orange.ContingencyAttrClass(attr, data) 
     93        attrClassCont = contingency.VarClass(attr, data) 
    9294        dist = [] 
    9395        for vals in attrClassCont.values(): 
     
    116118    """ 
    117119 
    118     @Orange.misc.deprecated_keywords({"aprioriDist": "apriori_dist"}) 
     120    @deprecated_keywords({"aprioriDist": "apriori_dist"}) 
    119121    def __new__(cls, attr=None, data=None, apriori_dist=None, weightID=None): 
    120122        self = Score.__new__(cls) 
    121         if attr != None and data != None: 
     123        if attr is not None and data is not None: 
    122124            #self.__init__(**argkw) 
    123125            return self.__call__(attr, data, apriori_dist, weightID) 
     
    125127            return self 
    126128 
    127     @Orange.misc.deprecated_keywords({"aprioriDist": "apriori_dist"}) 
     129    @deprecated_keywords({"aprioriDist": "apriori_dist"}) 
    128130    def __call__(self, attr, data, apriori_dist=None, weightID=None): 
    129131        """Score the given feature. 
    130132 
    131133        :param attr: feature to score 
    132         :type attr: Orange.feature.Descriptor 
     134        :type attr: :obj:`~Orange.feature.Descriptor` 
    133135 
    134136        :param data: a data table used to score the feature 
    135         :type data: Orange.data.table 
     137        :type data: :obj:`~Orange.data.Table` 
    136138 
    137139        :param apriori_dist:  
     
    139141         
    140142        :param weightID: meta feature used to weight individual data instances 
    141         :type weightID: Orange.feature.Descriptor 
     143        :type weightID: :obj:`~Orange.feature.Descriptor` 
    142144 
    143145        """ 
    144         attrClassCont = orange.ContingencyAttrClass(attr, data) 
    145         classDist = orange.Distribution(data.domain.classVar, data).values() 
     146        attrClassCont = contingency.VarClass(attr, data) 
     147        classDist = distribution.Distribution(data.domain.classVar, data).values() 
    146148        nCls = len(classDist) 
    147149        nEx = len(data) 
     
    177179 
    178180 
    179 @Orange.misc.deprecated_keywords({"attrList": "attr_list", "attrMeasure": "attr_score", "removeUnusedValues": "remove_unused_values"}) 
     181@deprecated_keywords({"attrList": "attr_list", "attrMeasure": "attr_score", "removeUnusedValues": "remove_unused_values"}) 
    180182def merge_values(data, attr_list, attr_score, remove_unused_values = 1): 
    181183    import orngCI 
     
    183185    newData = data.select(attr_list + [data.domain.class_var]) 
    184186    newAttr = orngCI.FeatureByCartesianProduct(newData, attr_list)[0] 
    185     dist = orange.Distribution(newAttr, newData) 
     187    dist = distribution.Distribution(newAttr, newData) 
    186188    activeValues = [] 
    187189    for i in range(len(newAttr.values)): 
     
    213215        return newAttr 
    214216 
    215     reducedAttr = orange.EnumVariable(newAttr.name, values = [newAttr.values[i] for i in activeValues]) 
     217    reducedAttr = feature.Discrete.EnumVariable(newAttr.name, values = [newAttr.values[i] for i in activeValues]) 
    216218    reducedAttr.get_value_from = newAttr.get_value_from 
    217219    reducedAttr.get_value_from.class_var = reducedAttr 
     
    220222###### 
    221223# from orngFSS 
    222 @Orange.misc.deprecated_keywords({"measure": "score"}) 
     224@deprecated_keywords({"measure": "score"}) 
    223225def score_all(data, score=Relief(k=20, m=50)): 
    224226    """Assess the quality of features using the given measure and return 
     
    226228 
    227229    :param data: data table should include a discrete class. 
    228     :type data: :obj:`Orange.data.Table` 
     230    :type data: :obj:`~Orange.data.Table` 
    229231    :param score:  feature scoring function. Derived from 
    230232      :obj:`~Orange.feature.scoring.Score`. Defaults to  
    231233      :obj:`~Orange.feature.scoring.Relief` with k=20 and m=50. 
    232     :type measure: :obj:`~Orange.feature.scoring.Score`  
    233     :rtype: :obj:`list`; a sorted (by descending score) list of 
    234       tuples (feature name, score) 
     234    :type score: :obj:`~Orange.feature.scoring.Score` 
     235    :rtype: :obj:`list`; a sorted list of tuples (feature name, score) 
    235236 
    236237    """ 
  • Orange/fixes/fix_changed_names.py

    r10378 r10542  
    472472           "orngLR.zprob":"Orange.classification.logreg.zprob", 
    473473 
    474            "orange.Preprocessor": "Orange.preprocess.Preprocessor", 
    475            "orange.Preprocessor_addCensorWeight": "Orange.preprocess.Preprocessor_addCensorWeight", 
    476            "orange.Preprocessor_addClassNoise": "Orange.preprocess.Preprocessor_addClassNoise", 
    477            "orange.Preprocessor_addClassWeight": "Orange.preprocess.Preprocessor_addClassWeight", 
    478            "orange.Preprocessor_addGaussianClassNoise": "Orange.preprocess.Preprocessor_addGaussianClassNoise", 
    479            "orange.Preprocessor_addGaussianNoise": "Orange.preprocess.Preprocessor_addGaussianNoise", 
    480            "orange.Preprocessor_addMissing": "Orange.preprocess.Preprocessor_addMissing", 
    481            "orange.Preprocessor_addMissingClasses": "Orange.preprocess.Preprocessor_addMissingClasses", 
    482            "orange.Preprocessor_addNoise": "Orange.preprocess.Preprocessor_addNoise", 
    483            "orange.Preprocessor_discretize": "Orange.preprocess.Preprocessor_discretize", 
    484            "orange.Preprocessor_drop": "Orange.preprocess.Preprocessor_drop", 
    485            "orange.Preprocessor_dropMissing": "Orange.preprocess.Preprocessor_dropMissing", 
    486            "orange.Preprocessor_dropMissingClasses": "Orange.preprocess.Preprocessor_dropMissingClasses", 
    487            "orange.Preprocessor_filter": "Orange.preprocess.Preprocessor_filter", 
    488            "orange.Preprocessor_ignore": "Orange.preprocess.Preprocessor_ignore", 
    489            "orange.Preprocessor_imputeByLearner": "Orange.preprocess.Preprocessor_imputeByLearner", 
    490            "orange.Preprocessor_removeDuplicates": "Orange.preprocess.Preprocessor_removeDuplicates", 
    491            "orange.Preprocessor_select": "Orange.preprocess.Preprocessor_select", 
    492            "orange.Preprocessor_shuffle": "Orange.preprocess.Preprocessor_shuffle", 
    493            "orange.Preprocessor_take": "Orange.preprocess.Preprocessor_take", 
    494            "orange.Preprocessor_takeMissing": "Orange.preprocess.Preprocessor_takeMissing", 
    495            "orange.Preprocessor_takeMissingClasses": "Orange.preprocess.Preprocessor_takeMissingClasses", 
     474           "orange.Preprocessor": "Orange.data.preprocess.Preprocessor", 
     475           "orange.Preprocessor_addCensorWeight": "Orange.data.preprocess.AddCensorWeight", 
     476           "orange.Preprocessor_addClassNoise": "Orange.data.preprocess.AddClassNoise", 
     477           "orange.Preprocessor_addClassWeight": "Orange.data.preprocess.AddClassWeight", 
     478           "orange.Preprocessor_addGaussianClassNoise": "Orange.data.preprocess.AddGaussianClassNoise", 
     479           "orange.Preprocessor_addGaussianNoise": "Orange.data.preprocess.AddGaussianNoise", 
     480           "orange.Preprocessor_addMissing": "Orange.data.preprocess.AddMissing", 
     481           "orange.Preprocessor_addMissingClasses": "Orange.data.preprocess.AddMissingClasses", 
     482           "orange.Preprocessor_addNoise": "Orange.data.preprocess.AddNoise", 
     483           "orange.Preprocessor_discretize": "Orange.data.preprocess.Discretize", 
     484           "orange.Preprocessor_drop": "Orange.data.preprocess.Drop", 
     485           "orange.Preprocessor_dropMissing": "Orange.data.preprocess.DropMissing", 
     486           "orange.Preprocessor_dropMissingClasses": "Orange.data.preprocess.DropMissingClasses", 
     487           "orange.Preprocessor_filter": "Orange.data.preprocess.Filter", 
     488           "orange.Preprocessor_ignore": "Orange.data.preprocess.Ignore", 
     489           "orange.Preprocessor_imputeByLearner": "Orange.data.preprocess.ImputeByLearner", 
     490           "orange.Preprocessor_removeDuplicates": "Orange.data.preprocess.RemoveDuplicates", 
     491           "orange.Preprocessor_select": "Orange.data.preprocess.Select", 
     492           "orange.Preprocessor_shuffle": "Orange.data.preprocess.Shuffle", 
     493           "orange.Preprocessor_take": "Orange.data.preprocess.Take", 
     494           "orange.Preprocessor_takeMissing": "Orange.data.preprocess.TakeMissing", 
     495           "orange.Preprocessor_takeMissingClasses": "Orange.data.preprocess.TakeMissingClasses", 
    496496 
    497497           "orange.Discretizer": "Orange.feature.discretization.Discretizer", 
     
    573573           "orngEnviron.addOrangeDirectoriesToPath": "Orange.misc.environ.add_orange_directories_to_path", 
    574574 
    575            "orngScaleData.getVariableValuesSorted": "Orange.preprocess.scaling.get_variable_values_sorted", 
    576            "orngScaleData.getVariableValueIndices": "Orange.preprocess.scaling.get_variable_value_indices", 
    577            "orngScaleData.discretizeDomain": "Orange.preprocess.scaling.discretize_domain", 
    578            "orngScaleData.orngScaleData": "Orange.preprocess.scaling.ScaleData", 
    579            "orngScaleLinProjData.orngScaleLinProjData": "Orange.preprocess.scaling.ScaleLinProjData", 
    580            "orngScalePolyvizData.orngScalePolyvizData": "Orange.preprocess.scaling.ScalePolyvizData", 
    581            "orngScaleScatterPlotData.orngScaleScatterPlotData": "Orange.preprocess.scaling.ScaleScatterPlotData", 
     575           "orngScaleData.getVariableValuesSorted": "Orange.data.preprocess.scaling.get_variable_values_sorted", 
     576           "orngScaleData.getVariableValueIndices": "Orange.data.preprocess.scaling.get_variable_value_indices", 
     577           "orngScaleData.discretizeDomain": "Orange.data.preprocess.scaling.discretize_domain", 
     578           "orngScaleData.orngScaleData": "Orange.data.preprocess.scaling.ScaleData", 
     579           "orngScaleLinProjData.orngScaleLinProjData": "Orange.data.preprocess.scaling.ScaleLinProjData", 
     580           "orngScalePolyvizData.orngScalePolyvizData": "Orange.data.preprocess.scaling.ScalePolyvizData", 
     581           "orngScaleScatterPlotData.orngScaleScatterPlotData": "Orange.data.preprocess.scaling.ScaleScatterPlotData", 
    582582 
    583583           "orngEvalAttr.mergeAttrValues": "Orange.feature.scoring.merge_values", 
  • Orange/misc/testing.py

    r10305 r10542  
    8686 
    8787import orange 
    88 from Orange.preprocess import Preprocessor_discretize, Preprocessor_continuize 
     88from Orange.data import preprocess 
    8989 
    9090TEST_CLASSIFICATION = 1 
     
    108108    dataset = orange.ExampleTable(os.path.join(datasetsdir, name)) 
    109109    if flags & CONTINUIZE_DOMAIN: 
    110         preprocessor = Preprocessor_continuize() 
     110        preprocessor = preprocess.Continuize() 
    111111        dataset = preprocessor(dataset) 
    112112    elif flags & DISCRETIZE_DOMAIN: 
    113         preprocessor = Preprocessor_discretize(method=orange.EquiNDiscretization(), 
     113        preprocessor = preprocess.Discretize(method=orange.EquiNDiscretization(), 
    114114                                               discretize_class=False) 
    115115        dataset = preprocessor(dataset) 
  • Orange/orng/orngScaleData.py

    r9671 r10542  
    1 from Orange.preprocess.scaling import get_variable_values_sorted as getVariableValuesSorted 
    2 from Orange.preprocess.scaling import get_variable_value_indices as getVariableValueIndices 
    3 from Orange.preprocess.scaling import discretize_domain as discretizeDomain 
    4 from Orange.preprocess.scaling import ScaleData as orngScaleData 
     1from Orange.data.preprocess.scaling import get_variable_values_sorted as getVariableValuesSorted 
     2from Orange.data.preprocess.scaling import get_variable_value_indices as getVariableValueIndices 
     3from Orange.data.preprocess.scaling import discretize_domain as discretizeDomain 
     4from Orange.data.preprocess.scaling import ScaleData as orngScaleData 
  • Orange/orng/orngScaleLinProjData.py

    r10475 r10542  
    11from orngScaleData import * 
    2 from Orange.preprocess.scaling import ScaleLinProjData as orngScaleLinProjData 
    3 from Orange.preprocess.scaling import graph_deprecator 
     2from Orange.data.preprocess.scaling import ScaleLinProjData as orngScaleLinProjData 
     3from Orange.data.preprocess.scaling import graph_deprecator 
  • Orange/orng/orngScalePolyvizData.py

    r9671 r10542  
    11from orngScaleLinProjData import * 
    2 from Orange.preprocess.scaling import ScalePolyvizData as orngScalePolyvizData 
     2from Orange.data.preprocess.scaling import ScalePolyvizData as orngScalePolyvizData 
  • Orange/orng/orngScaleScatterPlotData.py

    r9671 r10542  
    11from orngScaleData import * 
    2 from Orange.preprocess.scaling import ScaleScatterPlotData as orngScaleScatterPlotData 
     2from Orange.data.preprocess.scaling import ScaleScatterPlotData as orngScaleScatterPlotData 
  • Orange/orng/orngVizRank.py

    r9671 r10542  
    114114                graph.normalize_examples = 1 
    115115            elif visualizationMethod == SCATTERPLOT3D: 
    116                 from Orange.preprocess.scaling import ScaleScatterPlotData3D 
     116                from Orange.data.preprocess.scaling import ScaleScatterPlotData3D 
    117117                graph = ScaleScatterPlotData3D() 
    118118            elif visualizationMethod == SPHEREVIZ3D: 
    119                 from Orange.preprocess.scaling import ScaleLinProjData3D 
     119                from Orange.data.preprocess.scaling import ScaleLinProjData3D 
    120120                graph = ScaleLinProjData3D() 
    121121                graph.normalize_examples = 1 
    122122            elif visualizationMethod == LINEAR_PROJECTION3D: 
    123                 from Orange.preprocess.scaling import ScaleLinProjData3D 
     123                from Orange.data.preprocess.scaling import ScaleLinProjData3D 
    124124                graph = ScaleLinProjData3D() 
    125125                graph.normalize_examples = 0 
  • Orange/projection/linear.py

    r10490 r10542  
    88import numpy 
    99 
    10 from Orange.preprocess.scaling import ScaleLinProjData 
     10from Orange.data.preprocess.scaling import ScaleLinProjData 
    1111from Orange.orng import orngVisFuncts as visfuncts 
    1212from Orange.misc import deprecated_keywords 
  • Orange/regression/earth.py

    r10420 r10542  
    5555from Orange.feature import Discrete, Continuous 
    5656from Orange.data import Table, Domain 
    57 from Orange.preprocess import Preprocessor_continuize, \ 
    58                               Preprocessor_impute, \ 
    59                               Preprocessor_preprocessorList, \ 
     57from Orange.data.preprocess import Continuize as Preprocessor_continuize, \ 
     58                              Impute as Preprocessor_impute, \ 
     59                              PreprocessorList as Preprocessor_preprocessorList, \ 
    6060                              DomainContinuizer 
    6161 
  • Orange/regression/lasso.py

    r10314 r10535  
    1 """\ 
    2 ############################ 
    3 Lasso regression (``lasso``) 
    4 ############################ 
    5  
    6 .. index:: regression 
    7  
    8 .. _`Lasso regression. Regression shrinkage and selection via the lasso`: 
    9     http://www-stat.stanford.edu/~tibs/lasso/lasso.pdf 
    10  
    11  
    12 `The Lasso <http://www-stat.stanford.edu/~tibs/lasso/lasso.pdf>`_ is a shrinkage 
    13 and selection method for linear regression. It minimizes the usual sum of squared 
    14 errors, with a bound on the sum of the absolute values of the coefficients.  
    15  
    16 To fit the regression parameters on housing data set use the following code: 
    17  
    18 .. literalinclude:: code/lasso-example.py 
    19    :lines: 7,9,10,11 
    20  
    21 .. autoclass:: LassoRegressionLearner 
    22     :members: 
    23  
    24 .. autoclass:: LassoRegression 
    25     :members: 
    26  
    27  
    28 .. autoclass:: LassoRegressionLearner 
    29     :members: 
    30  
    31 .. autoclass:: LassoRegression 
    32     :members: 
    33  
    34 Utility functions 
    35 ----------------- 
    36  
    37 .. autofunction:: center 
    38  
    39 .. autofunction:: get_bootstrap_sample 
    40  
    41 .. autofunction:: permute_responses 
    42  
    43  
    44 ======== 
    45 Examples 
    46 ======== 
    47  
    48 To predict values of the response for the first five instances 
    49 use the code 
    50  
    51 .. literalinclude:: code/lasso-example.py 
    52    :lines: 14,15 
    53  
    54 Output 
    55  
    56 :: 
    57  
    58     Actual: 24.00, predicted: 24.58  
    59     Actual: 21.60, predicted: 23.30  
    60     Actual: 34.70, predicted: 24.98  
    61     Actual: 33.40, predicted: 24.78  
    62     Actual: 36.20, predicted: 24.66  
    63  
    64 To see the fitted regression coefficients, print the model 
    65  
    66 .. literalinclude:: code/lasso-example.py 
    67    :lines: 17 
    68  
    69 The output 
    70  
    71 :: 
    72  
    73     Variable  Coeff Est  Std Error          p 
    74      Intercept     22.533 
    75           CRIM     -0.000      0.023      0.480       
    76          INDUS     -0.010      0.023      0.300       
    77             RM      1.303      0.994      0.000   *** 
    78            AGE     -0.002      0.000      0.320       
    79        PTRATIO     -0.191      0.209      0.050     . 
    80          LSTAT     -0.126      0.105      0.000   *** 
    81     Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1 empty 1 
    82  
    83  
    84     For 7 variables the regression coefficient equals 0:  
    85     ZN 
    86     CHAS 
    87     NOX 
    88     DIS 
    89     RAD 
    90     TAX 
    91     B 
    92  
    93 shows that some of the regression coefficients are equal to 0.     
    94  
    95  
    96  
    97  
    98  
    99 """ 
    100  
    1011import Orange 
    1022import numpy 
  • Orange/testing/unit/tests/test_preprocessors.py

    r10278 r10542  
    44    import unittest 
    55 
    6 from Orange.preprocess import (Preprocessor_addCensorWeight, 
    7          Preprocessor_addClassNoise, 
    8          Preprocessor_addClassWeight, 
    9          Preprocessor_addGaussianClassNoise, 
    10          Preprocessor_addGaussianNoise, 
    11          Preprocessor_addMissing, 
    12          Preprocessor_addMissingClasses, 
    13          Preprocessor_addNoise, 
    14          Preprocessor_discretize, 
    15          Preprocessor_drop, 
    16          Preprocessor_dropMissing, 
    17          Preprocessor_dropMissingClasses, 
    18          Preprocessor_filter, 
    19          Preprocessor_ignore, 
    20          Preprocessor_imputeByLearner, 
    21          Preprocessor_removeDuplicates, 
    22          Preprocessor_select, 
    23          Preprocessor_shuffle, 
    24          Preprocessor_take, 
    25          Preprocessor_takeMissing, 
    26          Preprocessor_takeMissingClasses, 
    27          Preprocessor_discretizeEntropy, 
    28          Preprocessor_removeContinuous, 
    29          Preprocessor_removeDiscrete, 
    30          Preprocessor_continuize, 
    31          Preprocessor_impute, 
    32          Preprocessor_featureSelection, 
    33          Preprocessor_RFE, 
    34          Preprocessor_sample, 
    35          Preprocessor_preprocessorList, 
     6from Orange.data.preprocess import (AddCensorWeight as Preprocessor_addCensorWeight, 
     7         AddClassNoise as  Preprocessor_addClassNoise, 
     8         AddClassWeight as Preprocessor_addClassWeight, 
     9         AddGaussianClassNoise as  Preprocessor_addGaussianClassNoise, 
     10         AddGaussianNoise as Preprocessor_addGaussianNoise, 
     11         AddMissing as Preprocessor_addMissing, 
     12         AddMissingClasses as Preprocessor_addMissingClasses, 
     13         AddNoise as Preprocessor_addNoise, 
     14         Discretize as Preprocessor_discretize, 
     15         Drop as Preprocessor_drop, 
     16         DropMissing as Preprocessor_dropMissing, 
     17         DropMissingClasses as Preprocessor_dropMissingClasses, 
     18         Filter as Preprocessor_filter, 
     19         Ignore as Preprocessor_ignore, 
     20         ImputeByLearner as Preprocessor_imputeByLearner, 
     21         RemoveDuplicates as Preprocessor_removeDuplicates, 
     22         Select as Preprocessor_select, 
     23         Shuffle as Preprocessor_shuffle, 
     24         Take as Preprocessor_take, 
     25         TakeMissing as Preprocessor_takeMissing, 
     26         TakeMissingClasses as Preprocessor_takeMissingClasses, 
     27         DiscretizeEntropy as Preprocessor_discretizeEntropy, 
     28         RemoveContinuous as Preprocessor_removeContinuous, 
     29         RemoveDiscrete as Preprocessor_removeDiscrete, 
     30         Continuize as Preprocessor_continuize, 
     31         Impute as Preprocessor_impute, 
     32         FeatureSelection as Preprocessor_featureSelection, 
     33         RFE as Preprocessor_RFE, 
     34         Sample as Preprocessor_sample, 
     35         PreprocessorList as Preprocessor_preprocessorList, 
    3636         ) 
    3737 
  • docs/reference/rst/Orange.ensemble.rst

    r9372 r10540  
    33################################## 
    44 
     5.. index:: ensemble 
     6 
     7`Ensembles <http://en.wikipedia.org/wiki/Ensemble_learning>`_ use 
     8multiple models to improve prediction performance. The module 
     9implements a number of popular approaches, including bagging, 
     10boosting, stacking and forest trees. Most of these are available both 
     11for classification and regression with exception of stacking, which 
     12with present implementation supports classification only. 
     13 
     14******* 
     15Bagging 
     16******* 
     17 
     18.. index:: bagging 
     19.. index:: 
     20   single: ensemble; ensemble 
     21 
     22.. autoclass:: Orange.ensemble.bagging.BaggedLearner 
     23   :members: 
     24   :show-inheritance: 
     25 
     26.. autoclass:: Orange.ensemble.bagging.BaggedClassifier 
     27   :members: 
     28   :show-inheritance: 
     29 
     30******** 
     31Boosting 
     32******** 
     33 
     34.. index:: boosting 
     35.. index:: 
     36   single: ensemble; boosting 
     37 
     38 
     39.. autoclass:: Orange.ensemble.boosting.BoostedLearner 
     40  :members: 
     41  :show-inheritance: 
     42 
     43.. autoclass:: Orange.ensemble.boosting.BoostedClassifier 
     44   :members: 
     45   :show-inheritance: 
     46 
     47Example 
     48======= 
     49 
     50The following script fits classification models by boosting and 
     51bagging on Lymphography data set with TreeLearner and post-pruning as 
     52a base learner. Classification accuracy of the methods is estimated by 
     5310-fold cross validation (:download:`ensemble.py <code/ensemble.py>`): 
     54 
     55.. literalinclude:: code/ensemble.py 
     56  :lines: 7- 
     57 
     58Running this script demonstrates some benefit of boosting and bagging 
     59over the baseline learner:: 
     60 
     61    Classification Accuracy: 
     62               tree: 0.764 
     63       boosted tree: 0.770 
     64        bagged tree: 0.790 
     65 
     66******** 
     67Stacking 
     68******** 
     69 
     70.. index:: stacking 
     71.. index:: 
     72   single: ensemble; stacking 
     73 
     74 
     75.. autoclass:: Orange.ensemble.stacking.StackedClassificationLearner 
     76  :members: 
     77  :show-inheritance: 
     78 
     79.. autoclass:: Orange.ensemble.stacking.StackedClassifier 
     80   :members: 
     81   :show-inheritance: 
     82 
     83Example 
     84======= 
     85 
     86Stacking often produces classifiers that are more predictive than 
     87individual classifiers in the ensemble. This effect is illustrated by 
     88a script that combines four different classification 
     89algorithms (:download:`ensemble-stacking.py <code/ensemble-stacking.py>`): 
     90 
     91.. literalinclude:: code/ensemble-stacking.py 
     92  :lines: 3- 
     93 
     94The benefits of stacking on this particular data set are 
     95substantial (numbers show classification accuracy):: 
     96 
     97   stacking: 0.934 
     98      bayes: 0.858 
     99       tree: 0.688 
     100         lr: 0.764 
     101        knn: 0.830 
     102 
     103************* 
     104Random Forest 
     105************* 
     106 
     107.. index:: random forest 
     108.. index:: 
     109   single: ensemble; random forest 
     110    
     111.. autoclass:: Orange.ensemble.forest.RandomForestLearner 
     112  :members: 
     113  :show-inheritance: 
     114 
     115.. autoclass:: Orange.ensemble.forest.RandomForestClassifier 
     116  :members: 
     117  :show-inheritance: 
     118 
     119 
     120Example 
     121======== 
     122 
     123The following script assembles a random forest learner and compares it 
     124to a tree learner on a liver disorder (bupa) and housing data sets. 
     125 
     126:download:`ensemble-forest.py <code/ensemble-forest.py>` 
     127 
     128.. literalinclude:: code/ensemble-forest.py 
     129  :lines: 7- 
     130 
     131Notice that our forest contains 50 trees. Learners are compared through  
     1323-fold cross validation:: 
     133 
     134    Classification: bupa.tab 
     135    Learner  CA     Brier  AUC 
     136    tree     0.586  0.829  0.575 
     137    forest   0.710  0.392  0.752 
     138    Regression: housing.tab 
     139    Learner  MSE    RSE    R2 
     140    tree     23.708  0.281  0.719 
     141    forest   11.988  0.142  0.858 
     142 
     143Perhaps the sole purpose of the following example is to show how to 
     144access the individual classifiers once they are assembled into the 
     145forest, and to show how we can assemble a tree learner to be used in 
     146random forests. In the following example the best feature for decision 
     147nodes is selected among three randomly chosen features, and maxDepth 
     148and minExamples are both set to 5. 
     149 
     150:download:`ensemble-forest2.py <code/ensemble-forest2.py>` 
     151 
     152.. literalinclude:: code/ensemble-forest2.py 
     153  :lines: 7- 
     154 
     155Running the above code would report on sizes (number of nodes) of the tree 
     156in a constructed random forest. 
     157 
     158     
     159Feature scoring 
     160=============== 
     161 
     162L. Breiman (2001) suggested the possibility of using random forests as a 
     163non-myopic measure of feature importance. 
     164 
     165The assessment of feature relevance with random forests is based on the 
     166idea that randomly changing the value of an important feature greatly 
     167affects instance's classification, while changing the value of an 
     168unimportant feature does not affect it much. Implemented algorithm 
     169accumulates feature scores over given number of trees. Importance of 
     170all features for a single tree are computed as: correctly classified  
     171OOB instances minus correctly classified OOB instances when the feature is 
     172randomly shuffled. The accumulated feature scores are divided by the 
     173number of used trees and multiplied by 100 before they are returned. 
     174 
     175.. autoclass:: Orange.ensemble.forest.ScoreFeature 
     176  :members: 
     177 
     178Computation of feature importance with random forests is rather slow 
     179and importances for all features need to be computes 
     180simultaneously. When it is called to compute a quality of certain 
     181feature, it computes qualities for all features in the dataset. When 
     182called again, it uses the stored results if the domain is still the 
     183same and the data table has not changed (this is done by checking the 
     184data table's version and is not foolproof; it will not detect if you 
     185change values of existing instances, but will notice adding and 
     186removing instances; see the page on :class:`Orange.data.Table` for 
     187details). 
     188 
     189:download:`ensemble-forest-measure.py <code/ensemble-forest-measure.py>` 
     190 
     191.. literalinclude:: code/ensemble-forest-measure.py 
     192  :lines: 7- 
     193 
     194The output of the above script is:: 
     195 
     196    DATA:iris.tab 
     197 
     198    first: 3.91, second: 0.38 
     199 
     200    different random seed 
     201    first: 3.39, second: 0.46 
     202 
     203    All importances: 
     204       sepal length:   3.39 
     205        sepal width:   0.46 
     206       petal length:  30.15 
     207        petal width:  31.98 
     208 
     209References 
     210---------- 
     211 
     212* L Breiman. Bagging Predictors. `Technical report No. 421 
     213  <http://www.stat.berkeley.edu/tech-reports/421.ps.Z>`_. University 
     214  of California, Berkeley, 1994. 
     215* Y Freund, RE Schapire. `Experiments with a New Boosting Algorithm 
     216  <http://citeseer.ist.psu.edu/freund96experiments.html>`_. Machine 
     217  Learning: Proceedings of the Thirteenth International Conference 
     218  (ICML'96), 1996.  
     219* JR Quinlan. `Boosting, bagging, and C4.5 
     220  <http://www.rulequest.com/Personal/q.aaai96.ps>`_ . In Proc. of 13th 
     221  National Conference on Artificial Intelligence 
     222  (AAAI'96). pp. 725-730, 1996. 
     223* L Brieman. `Random Forests 
     224  <http://www.springerlink.com/content/u0p06167n6173512/>`_. Machine 
     225  Learning, 45, 5-32, 2001. 
     226* M Robnik-Sikonja. `Improving Random Forests 
     227  <http://lkm.fri.uni-lj.si/rmarko/papers/robnik04-ecml.pdf>`_. In 
     228  Proc. of European Conference on Machine Learning (ECML 2004), 
     229  pp. 359-370, 2004. 
     230 
    5231.. automodule:: Orange.ensemble 
    6232 
  • docs/reference/rst/Orange.regression.lasso.rst

    r9372 r10536  
     1############################ 
     2Lasso regression (``lasso``) 
     3############################ 
     4 
    15.. automodule:: Orange.regression.lasso 
     6 
     7.. index:: regression 
     8 
     9.. _`Lasso regression. Regression shrinkage and selection via the lasso`: 
     10    http://www-stat.stanford.edu/~tibs/lasso/lasso.pdf 
     11 
     12 
     13`The Lasso <http://www-stat.stanford.edu/~tibs/lasso/lasso.pdf>`_ is a shrinkage 
     14and selection method for linear regression. It minimizes the usual sum of squared 
     15errors, with a bound on the sum of the absolute values of the coefficients.  
     16 
     17To fit the regression parameters on housing data set use the following code: 
     18 
     19.. literalinclude:: code/lasso-example.py 
     20   :lines: 9,10,11 
     21 
     22.. autoclass:: LassoRegressionLearner 
     23    :members: 
     24 
     25.. autoclass:: LassoRegression 
     26    :members: 
     27 
     28 
     29.. autoclass:: LassoRegressionLearner 
     30    :members: 
     31 
     32.. autoclass:: LassoRegression 
     33    :members: 
     34 
     35Utility functions 
     36----------------- 
     37 
     38.. autofunction:: center 
     39 
     40.. autofunction:: get_bootstrap_sample 
     41 
     42.. autofunction:: permute_responses 
     43 
     44 
     45======== 
     46Examples 
     47======== 
     48 
     49To predict values of the response for the first five instances 
     50use the code 
     51 
     52.. literalinclude:: code/lasso-example.py 
     53   :lines: 14,15 
     54 
     55Output 
     56 
     57:: 
     58 
     59    Actual: 24.00, predicted: 24.58  
     60    Actual: 21.60, predicted: 23.30  
     61    Actual: 34.70, predicted: 24.98  
     62    Actual: 33.40, predicted: 24.78  
     63    Actual: 36.20, predicted: 24.66  
     64 
     65To see the fitted regression coefficients, print the model 
     66 
     67.. literalinclude:: code/lasso-example.py 
     68   :lines: 17 
     69 
     70The output 
     71 
     72:: 
     73 
     74    Variable  Coeff Est  Std Error          p 
     75     Intercept     22.533 
     76          CRIM     -0.000      0.023      0.480       
     77         INDUS     -0.010      0.023      0.300       
     78            RM      1.303      0.994      0.000   *** 
     79           AGE     -0.002      0.000      0.320       
     80       PTRATIO     -0.191      0.209      0.050     . 
     81         LSTAT     -0.126      0.105      0.000   *** 
     82    Signif. codes:  0 *** 0.001 ** 0.01 * 0.05 . 0.1 empty 1 
     83 
     84 
     85    For 7 variables the regression coefficient equals 0:  
     86    ZN 
     87    CHAS 
     88    NOX 
     89    DIS 
     90    RAD 
     91    TAX 
     92    B 
     93 
     94shows that some of the regression coefficients are equal to 0.     
     95 
  • docs/reference/rst/Orange.regression.rst

    r10396 r10537  
    33########################### 
    44 
    5 Orange uses the term `classification` to also denote the 
    6 regression. For instance, the dependent variable is called a `class 
    7 variable` even when it is continuous, and models are generally called 
    8 classifiers. A part of the reason is that classification and 
    9 regression rely on the same set of basic classes. 
    10  
    11 Please see the documentation on :doc:`Orange.classification` for 
    12 information on how to fit models in general. 
    13  
    14 Orange contains a number of regression models which are listed below. 
     5Orange implements a set of methods for regression modeling, that is, 
     6where the outcome - dependent variable is real-valued: 
    157 
    168.. toctree:: 
    179   :maxdepth: 1 
    1810 
    19    Orange.regression.mean 
    2011   Orange.regression.linear 
    2112   Orange.regression.lasso 
     
    2314   Orange.regression.earth 
    2415   Orange.regression.tree 
     16   Orange.regression.mean 
     17 
     18Notice that the dependent variable is in this documentation and in the 
     19implementation referred to as `class variable`. See also the documentation 
     20on :doc:`Orange.classification` for information on how to fit models 
     21and use them for prediction. 
     22 
     23************************* 
     24Base class for regression 
     25************************* 
     26 
     27All regression learners are inherited from `BaseRegressionLearner`. 
    2528 
    2629.. automodule:: Orange.regression.base 
  • docs/reference/rst/code/unusedValues.py

    r10149 r10547  
    22data = Orange.data.Table("unusedValues.tab") 
    33 
    4 new_variables = [Orange.preprocess.RemoveUnusedValues(var, data) for var in data.domain.variables] 
     4new_variables = [Orange.data.preprocess.RemoveUnusedValues(var, data) for var in data.domain.variables] 
    55 
    66print 
Note: See TracChangeset for help on using the changeset viewer.