Ignore:
Files:
35 added
20 deleted
125 edited

Legend:

Unmodified
Added
Removed
  • .hgignore

    r9586 r9881  
    3333# Built documentation. 
    3434docs/reference/html 
     35 
     36# Images generated by tests. 
     37Orange/doc/modules/*.png 
     38docs/reference/rst/code/*.png 
  • Orange/OrangeCanvas/orngSignalManager.py

    r9671 r9873  
    99 
    1010import orange 
    11 import orngDebugging 
     11from Orange.misc import debugging 
    1212 
    1313Single = 2 
     
    124124    def __init__(self, *args): 
    125125        self.debugFile = None 
    126         self.verbosity = orngDebugging.orngVerbosity 
     126        self.verbosity = debugging.orngVerbosity 
    127127        self.stderr = sys.stderr 
    128128        self._seenExceptions = {} 
     
    130130        self.asyncProcessingEnabled = False 
    131131         
    132         import orngEnviron 
     132        from Orange.misc import environ 
    133133        if not hasattr(self, "log"): 
    134134            SignalManager.log = logging.getLogger("SignalManager") 
    135             self.logFileName = os.path.join(orngEnviron.canvasSettingsDir, "signalManager.log") 
     135            self.logFileName = os.path.join(environ.canvas_settings_dir, 
     136                "signalManager.log") 
    136137            try: 
    137138                self.log.addHandler(logging.handlers.RotatingFileHandler(self.logFileName, maxBytes=2**20, backupCount=2)) 
     
    151152        self.myerr = err() 
    152153             
    153         if orngDebugging.orngDebuggingEnabled: 
    154             self.debugHandler = logging.FileHandler(orngDebugging.orngDebuggingFileName, mode="wb") 
     154        if debugging.orngDebuggingEnabled: 
     155            self.debugHandler = logging.FileHandler(debugging.orngDebuggingFileName, mode="wb") 
    155156            self.log.addHandler(self.debugHandler) 
    156             self.log.setLevel(logging.DEBUG if orngDebugging.orngVerbosity > 0 else logging.INFO)  
     157            self.log.setLevel(logging.DEBUG if debugging.orngVerbosity > 0 
     158            else logging.INFO) 
    157159            sys.excepthook = self.exceptionHandler 
    158160            sys.stderr = self.myerr 
  • Orange/OrangeWidgets/OWBaseWidget.py

    r9671 r9873  
    44# A General Orange Widget, from which all the Orange Widgets are derived 
    55# 
    6 import orngEnviron 
     6from Orange.misc import environ 
     7from Orange.orng.orngEnviron import directoryNames as old_directory_names 
    78from PyQt4.QtCore import * 
    89from PyQt4.QtGui import * 
     
    2021 
    2122from OWContexts import * 
    22 import sys, time, random, user, os, os.path, cPickle, copy, orngMisc 
     23import sys, time, random, user, os, os.path, cPickle, copy 
    2324import orange 
    24 import orngDebugging 
     25from Orange import misc 
     26from Orange.misc import debugging 
    2527from string import * 
    2628from orngSignalManager import * 
     
    147149 
    148150        # directories are better defined this way, otherwise .ini files get written in many places 
    149         self.__dict__.update(orngEnviron.directoryNames) 
     151        self.__dict__.update(old_directory_names) 
    150152        try: 
    151153            self.__dict__["thisWidgetDir"] = os.path.dirname(sys.modules[self.__class__.__module__].__file__) 
     
    848850    def getWidgetStateIcons(cls): 
    849851        if not hasattr(cls, "_cached__widget_state_icons"): 
    850             iconsDir = os.path.join(orngEnviron.canvasDir, "icons") 
    851             QDir.addSearchPath("canvasIcons",os.path.join(orngEnviron.canvasDir, "icons/")) 
     852            iconsDir = os.path.join(environ.canvas_install_dir, "icons") 
     853            QDir.addSearchPath("canvasIcons",os.path.join(environ.canvas_install_dir, 
     854                "icons/")) 
    852855            info = QPixmap("canvasIcons:information.png") 
    853856            warning = QPixmap("canvasIcons:warning.png") 
  • Orange/OrangeWidgets/OWGUI.py

    r9671 r9873  
    20832083        self.widget.progressBarFinished() 
    20842084         
    2085 from orngMisc import progressBarMilestones 
     2085from Orange.misc import progress_bar_milestones as progressBarMilestones 
    20862086 
    20872087############################################################################## 
  • Orange/OrangeWidgets/OWReport.py

    r9671 r9873  
    2323from PyQt4.QtWebKit import * 
    2424 
     25from Orange.misc import environ 
     26 
    2527import os, time, tempfile, shutil, re, shutil, pickle, binascii 
    2628import xml.dom.minidom 
     
    4850     
    4951class ReportWindow(OWWidget): 
    50     indexfile = os.path.join(orngEnviron.widgetDir, "report", "index.html") 
     52    indexfile = os.path.join(environ.widget_install_dir, "report", "index.html") 
    5153     
    5254    def __init__(self): 
  • Orange/classification/logreg.py

    r9671 r9878  
    1 """ 
    2 .. index: logistic regression 
    3 .. index: 
    4    single: classification; logistic regression 
    5  
    6 ******************************** 
    7 Logistic regression (``logreg``) 
    8 ******************************** 
    9  
    10 Implements `logistic regression 
    11 <http://en.wikipedia.org/wiki/Logistic_regression>`_ with an extension for 
    12 proper treatment of discrete features.  The algorithm can handle various 
    13 anomalies in features, such as constant variables and singularities, that 
    14 could make fitting of logistic regression almost impossible. Stepwise 
    15 logistic regression, which iteratively selects the most informative 
    16 features, is also supported. 
    17  
    18 Logistic regression is a popular classification method that comes 
    19 from statistics. The model is described by a linear combination of 
    20 coefficients, 
    21  
    22 .. math:: 
    23      
    24     F = \\beta_0 + \\beta_1*X_1 + \\beta_2*X_2 + ... + \\beta_k*X_k 
    25  
    26 and the probability (p) of a class value is  computed as: 
    27  
    28 .. math:: 
    29  
    30     p = \\frac{\exp(F)}{1 + \exp(F)} 
    31  
    32  
    33 .. class :: LogRegClassifier 
    34  
    35     :obj:`LogRegClassifier` stores estimated values of regression 
    36     coefficients and their significances, and uses them to predict 
    37     classes and class probabilities using the equations described above. 
    38  
    39     .. attribute :: beta 
    40  
    41         Estimated regression coefficients. 
    42  
    43     .. attribute :: beta_se 
    44  
    45         Estimated standard errors for regression coefficients. 
    46  
    47     .. attribute :: wald_Z 
    48  
    49         Wald Z statistics for beta coefficients. Wald Z is computed 
    50         as beta/beta_se. 
    51  
    52     .. attribute :: P 
    53  
    54         List of P-values for beta coefficients, that is, the probability 
    55         that beta coefficients differ from 0.0. The probability is 
    56         computed from squared Wald Z statistics that is distributed with 
    57         Chi-Square distribution. 
    58  
    59     .. attribute :: likelihood 
    60  
    61         The probability of the sample (ie. learning examples) observed on 
    62         the basis of the derived model, as a function of the regression 
    63         parameters. 
    64  
    65     .. attribute :: fitStatus 
    66  
    67         Tells how the model fitting ended - either regularly 
    68         (:obj:`LogRegFitter.OK`), or it was interrupted due to one of beta 
    69         coefficients escaping towards infinity (:obj:`LogRegFitter.Infinity`) 
    70         or since the values didn't converge (:obj:`LogRegFitter.Divergence`). The 
    71         value tells about the classifier's "reliability"; the classifier 
    72         itself is useful in either case. 
    73  
    74 .. autoclass:: LogRegLearner 
    75  
    76 .. class:: LogRegFitter 
    77  
    78     :obj:`LogRegFitter` is the abstract base class for logistic fitters. It 
    79     defines the form of call operator and the constants denoting its 
    80     (un)success: 
    81  
    82     .. attribute:: OK 
    83  
    84         Fitter succeeded to converge to the optimal fit. 
    85  
    86     .. attribute:: Infinity 
    87  
    88         Fitter failed due to one or more beta coefficients escaping towards infinity. 
    89  
    90     .. attribute:: Divergence 
    91  
    92         Beta coefficients failed to converge, but none of beta coefficients escaped. 
    93  
    94     .. attribute:: Constant 
    95  
    96         There is a constant attribute that causes the matrix to be singular. 
    97  
    98     .. attribute:: Singularity 
    99  
    100         The matrix is singular. 
    101  
    102  
    103     .. method:: __call__(examples, weightID) 
    104  
    105         Performs the fitting. There can be two different cases: either 
    106         the fitting succeeded to find a set of beta coefficients (although 
    107         possibly with difficulties) or the fitting failed altogether. The 
    108         two cases return different results. 
    109  
    110         `(status, beta, beta_se, likelihood)` 
    111             The fitter managed to fit the model. The first element of 
    112             the tuple, result, tells about the problems occurred; it can 
    113             be either :obj:`OK`, :obj:`Infinity` or :obj:`Divergence`. In 
    114             the latter cases, returned values may still be useful for 
    115             making predictions, but it's recommended that you inspect 
    116             the coefficients and their errors and make your decision 
    117             whether to use the model or not. 
    118  
    119         `(status, attribute)` 
    120             The fitter failed and the returned attribute is responsible 
    121             for it. The type of failure is reported in status, which 
    122             can be either :obj:`Constant` or :obj:`Singularity`. 
    123  
    124         The proper way of calling the fitter is to expect and handle all 
    125         the situations described. For instance, if fitter is an instance 
    126         of some fitter and examples contain a set of suitable examples, 
    127         a script should look like this:: 
    128  
    129             res = fitter(examples) 
    130             if res[0] in [fitter.OK, fitter.Infinity, fitter.Divergence]: 
    131                status, beta, beta_se, likelihood = res 
    132                < proceed by doing something with what you got > 
    133             else: 
    134                status, attr = res 
    135                < remove the attribute or complain to the user or ... > 
    136  
    137  
    138 .. class :: LogRegFitter_Cholesky 
    139  
    140     :obj:`LogRegFitter_Cholesky` is the sole fitter available at the 
    141     moment. It is a C++ translation of `Alan Miller's logistic regression 
    142     code <http://users.bigpond.net.au/amiller/>`_. It uses Newton-Raphson 
    143     algorithm to iteratively minimize least squares error computed from 
    144     learning examples. 
    145  
    146  
    147 .. autoclass:: StepWiseFSS 
    148 .. autofunction:: dump 
    149  
    150  
    151  
    152 Examples 
    153 -------- 
    154  
    155 The first example shows a very simple induction of a logistic regression 
    156 classifier (:download:`logreg-run.py <code/logreg-run.py>`, uses :download:`titanic.tab <code/titanic.tab>`). 
    157  
    158 .. literalinclude:: code/logreg-run.py 
    159  
    160 Result:: 
    161  
    162     Classification accuracy: 0.778282598819 
    163  
    164     class attribute = survived 
    165     class values = <no, yes> 
    166  
    167         Attribute       beta  st. error     wald Z          P OR=exp(beta) 
    168  
    169         Intercept      -1.23       0.08     -15.15      -0.00 
    170      status=first       0.86       0.16       5.39       0.00       2.36 
    171     status=second      -0.16       0.18      -0.91       0.36       0.85 
    172      status=third      -0.92       0.15      -6.12       0.00       0.40 
    173         age=child       1.06       0.25       4.30       0.00       2.89 
    174        sex=female       2.42       0.14      17.04       0.00      11.25 
    175  
    176 The next examples shows how to handle singularities in data sets 
    177 (:download:`logreg-singularities.py <code/logreg-singularities.py>`, uses :download:`adult_sample.tab <code/adult_sample.tab>`). 
    178  
    179 .. literalinclude:: code/logreg-singularities.py 
    180  
    181 The first few lines of the output of this script are:: 
    182  
    183     <=50K <=50K 
    184     <=50K <=50K 
    185     <=50K <=50K 
    186     >50K >50K 
    187     <=50K >50K 
    188  
    189     class attribute = y 
    190     class values = <>50K, <=50K> 
    191  
    192                                Attribute       beta  st. error     wald Z          P OR=exp(beta) 
    193  
    194                                Intercept       6.62      -0.00       -inf       0.00 
    195                                      age      -0.04       0.00       -inf       0.00       0.96 
    196                                   fnlwgt      -0.00       0.00       -inf       0.00       1.00 
    197                            education-num      -0.28       0.00       -inf       0.00       0.76 
    198                  marital-status=Divorced       4.29       0.00        inf       0.00      72.62 
    199             marital-status=Never-married       3.79       0.00        inf       0.00      44.45 
    200                 marital-status=Separated       3.46       0.00        inf       0.00      31.95 
    201                   marital-status=Widowed       3.85       0.00        inf       0.00      46.96 
    202     marital-status=Married-spouse-absent       3.98       0.00        inf       0.00      53.63 
    203         marital-status=Married-AF-spouse       4.01       0.00        inf       0.00      55.19 
    204                  occupation=Tech-support      -0.32       0.00       -inf       0.00       0.72 
    205  
    206 If :obj:`removeSingular` is set to 0, inducing a logistic regression 
    207 classifier would return an error:: 
    208  
    209     Traceback (most recent call last): 
    210       File "logreg-singularities.py", line 4, in <module> 
    211         lr = classification.logreg.LogRegLearner(table, removeSingular=0) 
    212       File "/home/jure/devel/orange/Orange/classification/logreg.py", line 255, in LogRegLearner 
    213         return lr(examples, weightID) 
    214       File "/home/jure/devel/orange/Orange/classification/logreg.py", line 291, in __call__ 
    215         lr = learner(examples, weight) 
    216     orange.KernelException: 'orange.LogRegLearner': singularity in workclass=Never-worked 
    217  
    218 We can see that the attribute workclass is causing a singularity. 
    219  
    220 The example below shows, how the use of stepwise logistic regression can help to 
    221 gain in classification performance (:download:`logreg-stepwise.py <code/logreg-stepwise.py>`, uses :download:`ionosphere.tab <code/ionosphere.tab>`): 
    222  
    223 .. literalinclude:: code/logreg-stepwise.py 
    224  
    225 The output of this script is:: 
    226  
    227     Learner      CA 
    228     logistic     0.841 
    229     filtered     0.846 
    230  
    231     Number of times attributes were used in cross-validation: 
    232      1 x a21 
    233     10 x a22 
    234      8 x a23 
    235      7 x a24 
    236      1 x a25 
    237     10 x a26 
    238     10 x a27 
    239      3 x a28 
    240      7 x a29 
    241      9 x a31 
    242      2 x a16 
    243      7 x a12 
    244      1 x a32 
    245      8 x a15 
    246     10 x a14 
    247      4 x a17 
    248      7 x a30 
    249     10 x a11 
    250      1 x a10 
    251      1 x a13 
    252     10 x a34 
    253      2 x a19 
    254      1 x a18 
    255     10 x a3 
    256     10 x a5 
    257      4 x a4 
    258      4 x a7 
    259      8 x a6 
    260     10 x a9 
    261     10 x a8 
    262  
    263 """ 
    264  
    265 from Orange.core import LogRegLearner, LogRegClassifier, LogRegFitter, LogRegFitter_Cholesky 
    266  
    2671import Orange 
    268 import math, os 
    269 import warnings 
    270 from numpy import * 
    271 from numpy.linalg import * 
    272  
    273  
    274 ########################################################################## 
    275 ## Print out methods 
     2from Orange.misc import deprecated_keywords, deprecated_members 
     3import math 
     4from numpy import dot, array, identity, reshape, diagonal, \ 
     5    transpose, concatenate, sqrt, sign 
     6from numpy.linalg import inv 
     7from Orange.core import LogRegClassifier, LogRegFitter, LogRegFitter_Cholesky 
    2768 
    2779def dump(classifier): 
    278     """ Formatted string of all major features in logistic 
    279     regression classifier.  
    280  
    281     :param classifier: logistic regression classifier 
     10    """ Return a formatted string of all major features in logistic regression 
     11    classifier. 
     12 
     13    :param classifier: logistic regression classifier. 
    28214    """ 
    28315 
    28416    # print out class values 
    28517    out = [''] 
    286     out.append("class attribute = " + classifier.domain.classVar.name) 
    287     out.append("class values = " + str(classifier.domain.classVar.values)) 
     18    out.append("class attribute = " + classifier.domain.class_var.name) 
     19    out.append("class values = " + str(classifier.domain.class_var.values)) 
    28820    out.append('') 
    28921     
    29022    # get the longest attribute name 
    29123    longest=0 
    292     for at in classifier.continuizedDomain.attributes: 
     24    for at in classifier.continuized_domain.features: 
    29325        if len(at.name)>longest: 
    294             longest=len(at.name); 
     26            longest=len(at.name) 
    29527 
    29628    # print out the head 
     
    30133    out.append(formatstr % ("Intercept", classifier.beta[0], classifier.beta_se[0], classifier.wald_Z[0], classifier.P[0])) 
    30234    formatstr = "%"+str(longest)+"s %10.2f %10.2f %10.2f %10.2f %10.2f"     
    303     for i in range(len(classifier.continuizedDomain.attributes)): 
    304         out.append(formatstr % (classifier.continuizedDomain.attributes[i].name, classifier.beta[i+1], classifier.beta_se[i+1], classifier.wald_Z[i+1], abs(classifier.P[i+1]), math.exp(classifier.beta[i+1]))) 
     35    for i in range(len(classifier.continuized_domain.features)): 
     36        out.append(formatstr % (classifier.continuized_domain.features[i].name, classifier.beta[i+1], classifier.beta_se[i+1], classifier.wald_Z[i+1], abs(classifier.P[i+1]), math.exp(classifier.beta[i+1]))) 
    30537 
    30638    return '\n'.join(out) 
     
    30840 
    30941def has_discrete_values(domain): 
    310     for at in domain.attributes: 
    311         if at.varType == Orange.core.VarTypes.Discrete: 
    312             return 1 
    313     return 0 
     42    """ 
     43    Return 1 if the given domain contains any discrete features, else 0. 
     44 
     45    :param domain: domain. 
     46    :type domain: :class:`Orange.data.Domain` 
     47    """ 
     48    return any(at.var_type == Orange.data.Type.Discrete 
     49               for at in domain.features) 
     50 
    31451 
    31552class LogRegLearner(Orange.classification.Learner): 
    31653    """ Logistic regression learner. 
    31754 
    318     Implements logistic regression. If data instances are provided to 
     55    If data instances are provided to 
    31956    the constructor, the learning algorithm is called and the resulting 
    32057    classifier is returned instead of the learner. 
    32158 
    322     :param table: data table with either discrete or continuous features 
    323     :type table: Orange.data.Table 
    324     :param weightID: the ID of the weight meta attribute 
    325     :type weightID: int 
    326     :param removeSingular: set to 1 if you want automatic removal of disturbing features, such as constants and singularities 
    327     :type removeSingular: bool 
    328     :param fitter: the fitting algorithm (by default the Newton-Raphson fitting algorithm is used) 
    329     :param stepwiseLR: set to 1 if you wish to use stepwise logistic regression 
    330     :type stepwiseLR: bool 
    331     :param addCrit: parameter for stepwise feature selection 
    332     :type addCrit: float 
    333     :param deleteCrit: parameter for stepwise feature selection 
    334     :type deleteCrit: float 
    335     :param numFeatures: parameter for stepwise feature selection 
    336     :type numFeatures: int 
     59    :param instances: data table with either discrete or continuous features 
     60    :type instances: Orange.data.Table 
     61    :param weight_id: the ID of the weight meta attribute 
     62    :type weight_id: int 
     63    :param remove_singular: set to 1 if you want automatic removal of 
     64        disturbing features, such as constants and singularities 
     65    :type remove_singular: bool 
     66    :param fitter: the fitting algorithm (by default the Newton-Raphson 
     67        fitting algorithm is used) 
     68    :param stepwise_lr: set to 1 if you wish to use stepwise logistic 
     69        regression 
     70    :type stepwise_lr: bool 
     71    :param add_crit: parameter for stepwise feature selection 
     72    :type add_crit: float 
     73    :param delete_crit: parameter for stepwise feature selection 
     74    :type delete_crit: float 
     75    :param num_features: parameter for stepwise feature selection 
     76    :type num_features: int 
    33777    :rtype: :obj:`LogRegLearner` or :obj:`LogRegClassifier` 
    33878 
    33979    """ 
    340     def __new__(cls, instances=None, weightID=0, **argkw): 
     80 
     81    @deprecated_keywords({"weightID": "weight_id"}) 
     82    def __new__(cls, instances=None, weight_id=0, **argkw): 
    34183        self = Orange.classification.Learner.__new__(cls, **argkw) 
    34284        if instances: 
    34385            self.__init__(**argkw) 
    344             return self.__call__(instances, weightID) 
     86            return self.__call__(instances, weight_id) 
    34587        else: 
    34688            return self 
    34789 
    348     def __init__(self, removeSingular=0, fitter = None, **kwds): 
     90    @deprecated_keywords({"removeSingular": "remove_singular"}) 
     91    def __init__(self, remove_singular=0, fitter = None, **kwds): 
    34992        self.__dict__.update(kwds) 
    350         self.removeSingular = removeSingular 
     93        self.remove_singular = remove_singular 
    35194        self.fitter = None 
    35295 
    353     def __call__(self, examples, weight=0): 
     96    @deprecated_keywords({"examples": "instances"}) 
     97    def __call__(self, instances, weight=0): 
     98        """Learn from the given table of data instances. 
     99 
     100        :param instances: Data instances to learn from. 
     101        :type instances: :class:`~Orange.data.Table` 
     102        :param weight: Id of meta attribute with weights of instances 
     103        :type weight: int 
     104        :rtype: :class:`~Orange.classification.logreg.LogRegClassifier` 
     105        """ 
    354106        imputer = getattr(self, "imputer", None) or None 
    355         if getattr(self, "removeMissing", 0): 
    356             examples = Orange.core.Preprocessor_dropMissing(examples) 
     107        if getattr(self, "remove_missing", 0): 
     108            instances = Orange.core.Preprocessor_dropMissing(instances) 
    357109##        if hasDiscreteValues(examples.domain): 
    358110##            examples = createNoDiscTable(examples) 
    359         if not len(examples): 
     111        if not len(instances): 
    360112            return None 
    361         if getattr(self, "stepwiseLR", 0): 
    362             addCrit = getattr(self, "addCrit", 0.2) 
    363             removeCrit = getattr(self, "removeCrit", 0.3) 
    364             numFeatures = getattr(self, "numFeatures", -1) 
    365             attributes = StepWiseFSS(examples, addCrit = addCrit, deleteCrit = removeCrit, imputer = imputer, numFeatures = numFeatures) 
    366             tmpDomain = Orange.core.Domain(attributes, examples.domain.classVar) 
    367             tmpDomain.addmetas(examples.domain.getmetas()) 
    368             examples = examples.select(tmpDomain) 
    369         learner = Orange.core.LogRegLearner() 
    370         learner.imputerConstructor = imputer 
     113        if getattr(self, "stepwise_lr", 0): 
     114            add_crit = getattr(self, "add_crit", 0.2) 
     115            delete_crit = getattr(self, "delete_crit", 0.3) 
     116            num_features = getattr(self, "num_features", -1) 
     117            attributes = StepWiseFSS(instances, add_crit= add_crit, 
     118                delete_crit=delete_crit, imputer = imputer, num_features= num_features) 
     119            tmp_domain = Orange.data.Domain(attributes, 
     120                instances.domain.class_var) 
     121            tmp_domain.addmetas(instances.domain.getmetas()) 
     122            instances = instances.select(tmp_domain) 
     123        learner = Orange.core.LogRegLearner() # Yes, it has to be from core. 
     124        learner.imputer_constructor = imputer 
    371125        if imputer: 
    372             examples = self.imputer(examples)(examples) 
    373         examples = Orange.core.Preprocessor_dropMissing(examples) 
     126            instances = self.imputer(instances)(instances) 
     127        instances = Orange.core.Preprocessor_dropMissing(instances) 
    374128        if self.fitter: 
    375129            learner.fitter = self.fitter 
    376         if self.removeSingular: 
    377             lr = learner.fitModel(examples, weight) 
     130        if self.remove_singular: 
     131            lr = learner.fit_model(instances, weight) 
    378132        else: 
    379             lr = learner(examples, weight) 
    380         while isinstance(lr, Orange.core.Variable): 
     133            lr = learner(instances, weight) 
     134        while isinstance(lr, Orange.data.variable.Variable): 
    381135            if isinstance(lr.getValueFrom, Orange.core.ClassifierFromVar) and isinstance(lr.getValueFrom.transformer, Orange.core.Discrete2Continuous): 
    382136                lr = lr.getValueFrom.variable 
    383             attributes = examples.domain.attributes[:] 
     137            attributes = instances.domain.features[:] 
    384138            if lr in attributes: 
    385139                attributes.remove(lr) 
    386140            else: 
    387141                attributes.remove(lr.getValueFrom.variable) 
    388             newDomain = Orange.core.Domain(attributes, examples.domain.classVar) 
    389             newDomain.addmetas(examples.domain.getmetas()) 
    390             examples = examples.select(newDomain) 
    391             lr = learner.fitModel(examples, weight) 
     142            new_domain = Orange.data.Domain(attributes,  
     143                instances.domain.class_var) 
     144            new_domain.addmetas(instances.domain.getmetas()) 
     145            instances = instances.select(new_domain) 
     146            lr = learner.fit_model(instances, weight) 
    392147        return lr 
    393148 
    394  
     149LogRegLearner = deprecated_members({"removeSingular": "remove_singular", 
     150                                    "weightID": "weight_id", 
     151                                    "stepwiseLR": "stepwise_lr", 
     152                                    "addCrit": "add_crit", 
     153                                    "deleteCrit": "delete_crit", 
     154                                    "numFeatures": "num_features", 
     155                                    "removeMissing": "remove_missing" 
     156                                    })(LogRegLearner) 
    395157 
    396158class UnivariateLogRegLearner(Orange.classification.Learner): 
     
    406168        self.__dict__.update(kwds) 
    407169 
    408     def __call__(self, examples): 
    409         examples = createFullNoDiscTable(examples) 
    410         classifiers = map(lambda x: LogRegLearner(Orange.core.Preprocessor_dropMissing(examples.select(Orange.core.Domain(x, examples.domain.classVar)))), examples.domain.attributes) 
    411         maj_classifier = LogRegLearner(Orange.core.Preprocessor_dropMissing(examples.select(Orange.core.Domain(examples.domain.classVar)))) 
     170    @deprecated_keywords({"examples": "instances"}) 
     171    def __call__(self, instances): 
     172        instances = createFullNoDiscTable(instances) 
     173        classifiers = map(lambda x: LogRegLearner(Orange.core.Preprocessor_dropMissing( 
     174            instances.select(Orange.data.Domain(x,  
     175            instances.domain.class_var)))), instances.domain.features) 
     176        maj_classifier = LogRegLearner(Orange.core.Preprocessor_dropMissing 
     177            (instances.select(Orange.data.Domain(instances.domain.class_var)))) 
    412178        beta = [maj_classifier.beta[0]] + [x.beta[1] for x in classifiers] 
    413179        beta_se = [maj_classifier.beta_se[0]] + [x.beta_se[1] for x in classifiers] 
    414180        P = [maj_classifier.P[0]] + [x.P[1] for x in classifiers] 
    415181        wald_Z = [maj_classifier.wald_Z[0]] + [x.wald_Z[1] for x in classifiers] 
    416         domain = examples.domain 
     182        domain = instances.domain 
    417183 
    418184        return Univariate_LogRegClassifier(beta = beta, beta_se = beta_se, P = P, wald_Z = wald_Z, domain = domain) 
    419185 
    420 class UnivariateLogRegClassifier(Orange.core.Classifier): 
     186class UnivariateLogRegClassifier(Orange.classification.Classifier): 
    421187    def __init__(self, **kwds): 
    422188        self.__dict__.update(kwds) 
    423189 
    424     def __call__(self, example, resultType = Orange.core.GetValue): 
     190    def __call__(self, instance, resultType = Orange.classification.Classifier.GetValue): 
    425191        # classification not implemented yet. For now its use is only to provide regression coefficients and its statistics 
    426192        pass 
     
    429195class LogRegLearnerGetPriors(object): 
    430196    def __new__(cls, instances=None, weightID=0, **argkw): 
    431         self = object.__new__(cls, **argkw) 
     197        self = object.__new__(cls) 
    432198        if instances: 
    433199            self.__init__(**argkw) 
     
    436202            return self 
    437203 
    438     def __init__(self, removeSingular=0, **kwds): 
     204    @deprecated_keywords({"removeSingular": "remove_singular"}) 
     205    def __init__(self, remove_singular=0, **kwds): 
    439206        self.__dict__.update(kwds) 
    440         self.removeSingular = removeSingular 
    441     def __call__(self, examples, weight=0): 
     207        self.remove_singular = remove_singular 
     208 
     209    @deprecated_keywords({"examples": "instances"}) 
     210    def __call__(self, instances, weight=0): 
    442211        # next function changes data set to a extended with unknown values  
    443         def createLogRegExampleTable(data, weightID): 
    444             setsOfData = [] 
    445             for at in data.domain.attributes: 
    446                 # za vsak atribut kreiraj nov newExampleTable newData 
    447                 # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable 
    448                 if at.varType == Orange.core.VarTypes.Continuous: 
    449                     atDisc = Orange.core.FloatVariable(at.name + "Disc") 
    450                     newDomain = Orange.core.Domain(data.domain.attributes+[atDisc,data.domain.classVar]) 
    451                     newDomain.addmetas(data.domain.getmetas()) 
    452                     newData = Orange.core.ExampleTable(newDomain,data) 
    453                     altData = Orange.core.ExampleTable(newDomain,data) 
    454                     for i,d in enumerate(newData): 
    455                         d[atDisc] = 0 
    456                         d[weightID] = 1*data[i][weightID] 
    457                     for i,d in enumerate(altData): 
    458                         d[atDisc] = 1 
     212        def createLogRegExampleTable(data, weight_id): 
     213            sets_of_data = [] 
     214            for at in data.domain.features: 
     215                # za vsak atribut kreiraj nov newExampleTable new_data 
     216                # v dataOrig, dataFinal in new_data dodaj nov atribut -- continuous variable 
     217                if at.var_type == Orange.data.Type.Continuous: 
     218                    at_disc = Orange.data.variable.Continuous(at.name+ "Disc") 
     219                    new_domain = Orange.data.Domain(data.domain.features+[at_disc,data.domain.class_var]) 
     220                    new_domain.addmetas(data.domain.getmetas()) 
     221                    new_data = Orange.data.Table(new_domain,data) 
     222                    alt_data = Orange.data.Table(new_domain,data) 
     223                    for i,d in enumerate(new_data): 
     224                        d[at_disc] = 0 
     225                        d[weight_id] = 1*data[i][weight_id] 
     226                    for i,d in enumerate(alt_data): 
     227                        d[at_disc] = 1 
    459228                        d[at] = 0 
    460                         d[weightID] = 0.000001*data[i][weightID] 
    461                 elif at.varType == Orange.core.VarTypes.Discrete: 
    462                 # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X" 
    463                     atNew = Orange.core.EnumVariable(at.name, values = at.values + [at.name+"X"]) 
    464                     newDomain = Orange.core.Domain(filter(lambda x: x!=at, data.domain.attributes)+[atNew,data.domain.classVar]) 
    465                     newDomain.addmetas(data.domain.getmetas()) 
    466                     newData = Orange.core.ExampleTable(newDomain,data) 
    467                     altData = Orange.core.ExampleTable(newDomain,data) 
    468                     for i,d in enumerate(newData): 
    469                         d[atNew] = data[i][at] 
    470                         d[weightID] = 1*data[i][weightID] 
    471                     for i,d in enumerate(altData): 
    472                         d[atNew] = at.name+"X" 
    473                         d[weightID] = 0.000001*data[i][weightID] 
    474                 newData.extend(altData) 
    475                 setsOfData.append(newData) 
    476             return setsOfData 
     229                        d[weight_id] = 0.000001*data[i][weight_id] 
     230                elif at.var_type == Orange.data.Type.Discrete: 
     231                # v dataOrig, dataFinal in new_data atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X" 
     232                    at_new = Orange.data.variable.Discrete(at.name, values = at.values + [at.name+"X"]) 
     233                    new_domain = Orange.data.Domain(filter(lambda x: x!=at, data.domain.features)+[at_new,data.domain.class_var]) 
     234                    new_domain.addmetas(data.domain.getmetas()) 
     235                    new_data = Orange.data.Table(new_domain,data) 
     236                    alt_data = Orange.data.Table(new_domain,data) 
     237                    for i,d in enumerate(new_data): 
     238                        d[at_new] = data[i][at] 
     239                        d[weight_id] = 1*data[i][weight_id] 
     240                    for i,d in enumerate(alt_data): 
     241                        d[at_new] = at.name+"X" 
     242                        d[weight_id] = 0.000001*data[i][weight_id] 
     243                new_data.extend(alt_data) 
     244                sets_of_data.append(new_data) 
     245            return sets_of_data 
    477246                   
    478         learner = LogRegLearner(imputer = Orange.core.ImputerConstructor_average(), removeSingular = self.removeSingular) 
     247        learner = LogRegLearner(imputer=Orange.feature.imputation.ImputerConstructor_average(), 
     248            remove_singular = self.remove_singular) 
    479249        # get Original Model 
    480         orig_model = learner(examples,weight) 
     250        orig_model = learner(instances,weight) 
    481251        if orig_model.fit_status: 
    482252            print "Warning: model did not converge" 
     
    485255        if weight == 0: 
    486256            weight = Orange.data.new_meta_id() 
    487             examples.addMetaAttribute(weight, 1.0) 
    488         extended_set_of_examples = createLogRegExampleTable(examples, weight) 
     257            instances.addMetaAttribute(weight, 1.0) 
     258        extended_set_of_examples = createLogRegExampleTable(instances, weight) 
    489259        extended_models = [learner(extended_examples, weight) \ 
    490260                           for extended_examples in extended_set_of_examples] 
     
    494264##        print orig_model.domain 
    495265##        print orig_model.beta 
    496 ##        print orig_model.beta[orig_model.continuizedDomain.attributes[-1]] 
     266##        print orig_model.beta[orig_model.continuized_domain.features[-1]] 
    497267##        for i,m in enumerate(extended_models): 
    498 ##            print examples.domain.attributes[i] 
     268##            print examples.domain.features[i] 
    499269##            printOUT(m) 
    500270             
     
    505275        betas_ap = [] 
    506276        for m in extended_models: 
    507             beta_add = m.beta[m.continuizedDomain.attributes[-1]] 
     277            beta_add = m.beta[m.continuized_domain.features[-1]] 
    508278            betas_ap.append(beta_add) 
    509279            beta = beta + beta_add 
     
    514284         
    515285        # compare it to bayes prior 
    516         bayes = Orange.core.BayesLearner(examples) 
     286        bayes = Orange.classification.bayes.NaiveLearner(instances) 
    517287        bayes_prior = math.log(bayes.distribution[1]/bayes.distribution[0]) 
    518288 
     
    521291##        print "lr", orig_model.beta[0] 
    522292##        print "lr2", logistic_prior 
    523 ##        print "dist", Orange.core.Distribution(examples.domain.classVar,examples) 
     293##        print "dist", Orange.statistics.distribution.Distribution(examples.domain.class_var,examples) 
    524294##        print "prej", betas_ap 
    525295 
     
    544314        # vrni originalni model in pripadajoce apriorne niclele 
    545315        return (orig_model, betas_ap) 
    546         #return (bayes_prior,orig_model.beta[examples.domain.classVar],logistic_prior) 
     316        #return (bayes_prior,orig_model.beta[examples.domain.class_var],logistic_prior) 
     317 
     318LogRegLearnerGetPriors = deprecated_members({"removeSingular": 
     319                                                 "remove_singular"} 
     320)(LogRegLearnerGetPriors) 
    547321 
    548322class LogRegLearnerGetPriorsOneTable: 
    549     def __init__(self, removeSingular=0, **kwds): 
     323    @deprecated_keywords({"removeSingular": "remove_singular"}) 
     324    def __init__(self, remove_singular=0, **kwds): 
    550325        self.__dict__.update(kwds) 
    551         self.removeSingular = removeSingular 
    552     def __call__(self, examples, weight=0): 
     326        self.remove_singular = remove_singular 
     327 
     328    @deprecated_keywords({"examples": "instances"}) 
     329    def __call__(self, instances, weight=0): 
    553330        # next function changes data set to a extended with unknown values  
    554331        def createLogRegExampleTable(data, weightID): 
    555             finalData = Orange.core.ExampleTable(data) 
    556             origData = Orange.core.ExampleTable(data) 
    557             for at in data.domain.attributes: 
     332            finalData = Orange.data.Table(data) 
     333            orig_data = Orange.data.Table(data) 
     334            for at in data.domain.features: 
    558335                # za vsak atribut kreiraj nov newExampleTable newData 
    559336                # v dataOrig, dataFinal in newData dodaj nov atribut -- continuous variable 
    560                 if at.varType == Orange.core.VarTypes.Continuous: 
    561                     atDisc = Orange.core.FloatVariable(at.name + "Disc") 
    562                     newDomain = Orange.core.Domain(origData.domain.attributes+[atDisc,data.domain.classVar]) 
     337                if at.var_type == Orange.data.Type.Continuous: 
     338                    atDisc = Orange.data.variable.Continuous(at.name + "Disc") 
     339                    newDomain = Orange.data.Domain(orig_data.domain.features+[atDisc,data.domain.class_var]) 
    563340                    newDomain.addmetas(newData.domain.getmetas()) 
    564                     finalData = Orange.core.ExampleTable(newDomain,finalData) 
    565                     newData = Orange.core.ExampleTable(newDomain,origData) 
    566                     origData = Orange.core.ExampleTable(newDomain,origData) 
    567                     for d in origData: 
     341                    finalData = Orange.data.Table(newDomain,finalData) 
     342                    newData = Orange.data.Table(newDomain,orig_data) 
     343                    orig_data = Orange.data.Table(newDomain,orig_data) 
     344                    for d in orig_data: 
    568345                        d[atDisc] = 0 
    569346                    for d in finalData: 
     
    574351                        d[weightID] = 100*data[i][weightID] 
    575352                         
    576                 elif at.varType == Orange.core.VarTypes.Discrete: 
     353                elif at.var_type == Orange.data.Type.Discrete: 
    577354                # v dataOrig, dataFinal in newData atributu "at" dodaj ee  eno  vreednost, ki ima vrednost kar  ime atributa +  "X" 
    578                     atNew = Orange.core.EnumVariable(at.name, values = at.values + [at.name+"X"]) 
    579                     newDomain = Orange.core.Domain(filter(lambda x: x!=at, origData.domain.attributes)+[atNew,origData.domain.classVar]) 
    580                     newDomain.addmetas(origData.domain.getmetas()) 
    581                     temp_finalData = Orange.core.ExampleTable(finalData) 
    582                     finalData = Orange.core.ExampleTable(newDomain,finalData) 
    583                     newData = Orange.core.ExampleTable(newDomain,origData) 
    584                     temp_origData = Orange.core.ExampleTable(origData) 
    585                     origData = Orange.core.ExampleTable(newDomain,origData) 
    586                     for i,d in enumerate(origData): 
    587                         d[atNew] = temp_origData[i][at] 
     355                    at_new = Orange.data.variable.Discrete(at.name, values = at.values + [at.name+"X"]) 
     356                    newDomain = Orange.data.Domain(filter(lambda x: x!=at, orig_data.domain.features)+[at_new,orig_data.domain.class_var]) 
     357                    newDomain.addmetas(orig_data.domain.getmetas()) 
     358                    temp_finalData = Orange.data.Table(finalData) 
     359                    finalData = Orange.data.Table(newDomain,finalData) 
     360                    newData = Orange.data.Table(newDomain,orig_data) 
     361                    temp_origData = Orange.data.Table(orig_data) 
     362                    orig_data = Orange.data.Table(newDomain,orig_data) 
     363                    for i,d in enumerate(orig_data): 
     364                        d[at_new] = temp_origData[i][at] 
    588365                    for i,d in enumerate(finalData): 
    589                         d[atNew] = temp_finalData[i][at]                         
     366                        d[at_new] = temp_finalData[i][at] 
    590367                    for i,d in enumerate(newData): 
    591                         d[atNew] = at.name+"X" 
     368                        d[at_new] = at.name+"X" 
    592369                        d[weightID] = 10*data[i][weightID] 
    593370                finalData.extend(newData) 
    594371            return finalData 
    595372                   
    596         learner = LogRegLearner(imputer = Orange.core.ImputerConstructor_average(), removeSingular = self.removeSingular) 
     373        learner = LogRegLearner(imputer = Orange.feature.imputation.ImputerConstructor_average(), removeSingular = self.remove_singular) 
    597374        # get Original Model 
    598         orig_model = learner(examples,weight) 
     375        orig_model = learner(instances,weight) 
    599376 
    600377        # get extended Model (you should not change data) 
    601378        if weight == 0: 
    602379            weight = Orange.data.new_meta_id() 
    603             examples.addMetaAttribute(weight, 1.0) 
    604         extended_examples = createLogRegExampleTable(examples, weight) 
     380            instances.addMetaAttribute(weight, 1.0) 
     381        extended_examples = createLogRegExampleTable(instances, weight) 
    605382        extended_model = learner(extended_examples, weight) 
    606383 
     
    616393        betas_ap = [] 
    617394        for m in extended_models: 
    618             beta_add = m.beta[m.continuizedDomain.attributes[-1]] 
     395            beta_add = m.beta[m.continuized_domain.features[-1]] 
    619396            betas_ap.append(beta_add) 
    620397            beta = beta + beta_add 
     
    625402         
    626403        # compare it to bayes prior 
    627         bayes = Orange.core.BayesLearner(examples) 
     404        bayes = Orange.classification.bayes.NaiveLearner(instances) 
    628405        bayes_prior = math.log(bayes.distribution[1]/bayes.distribution[0]) 
    629406 
     
    632409        #print "lr", orig_model.beta[0] 
    633410        #print "lr2", logistic_prior 
    634         #print "dist", Orange.core.Distribution(examples.domain.classVar,examples) 
     411        #print "dist", Orange.statistics.distribution.Distribution(examples.domain.class_var,examples) 
    635412        k = (bayes_prior-orig_model.beta[0])/(logistic_prior-orig_model.beta[0]) 
    636413        #print "prej", betas_ap 
     
    640417        # vrni originalni model in pripadajoce apriorne niclele 
    641418        return (orig_model, betas_ap) 
    642         #return (bayes_prior,orig_model.beta[data.domain.classVar],logistic_prior) 
     419        #return (bayes_prior,orig_model.beta[data.domain.class_var],logistic_prior) 
     420 
     421LogRegLearnerGetPriorsOneTable = deprecated_members({"removeSingular": 
     422                                                         "remove_singular"} 
     423)(LogRegLearnerGetPriorsOneTable) 
    643424 
    644425 
     
    655436    for i,x_i in enumerate(x): 
    656437        pr = pr(x_i,betas) 
    657         llh += y[i]*log(max(pr,1e-6)) + (1-y[i])*log(max(1-pr,1e-6)) 
     438        llh += y[i]*math.log(max(pr,1e-6)) + (1-y[i])*log(max(1-pr,1e-6)) 
    658439    return llh 
    659440 
    660441 
    661442def diag(vector): 
    662     mat = identity(len(vector), Float) 
     443    mat = identity(len(vector)) 
    663444    for i,v in enumerate(vector): 
    664445        mat[i][i] = v 
    665446    return mat 
    666447     
    667 class SimpleFitter(Orange.core.LogRegFitter): 
     448class SimpleFitter(LogRegFitter): 
    668449    def __init__(self, penalty=0, se_penalty = False): 
    669450        self.penalty = penalty 
    670451        self.se_penalty = se_penalty 
     452 
    671453    def __call__(self, data, weight=0): 
    672454        ml = data.native(0) 
    673         for i in range(len(data.domain.attributes)): 
    674           a = data.domain.attributes[i] 
    675           if a.varType == Orange.core.VarTypes.Discrete: 
     455        for i in range(len(data.domain.features)): 
     456          a = data.domain.features[i] 
     457          if a.var_type == Orange.data.Type.Discrete: 
    676458            for m in ml: 
    677459              m[i] = a.values.index(m[i]) 
    678460        for m in ml: 
    679           m[-1] = data.domain.classVar.values.index(m[-1]) 
     461          m[-1] = data.domain.class_var.values.index(m[-1]) 
    680462        Xtmp = array(ml) 
    681463        y = Xtmp[:,-1]   # true probabilities (1's or 0's) 
     
    683465        X=concatenate((one, Xtmp[:,:-1]),1)  # intercept first, then data 
    684466 
    685         betas = array([0.0] * (len(data.domain.attributes)+1)) 
    686         oldBetas = array([1.0] * (len(data.domain.attributes)+1)) 
     467        betas = array([0.0] * (len(data.domain.features)+1)) 
     468        oldBetas = array([1.0] * (len(data.domain.features)+1)) 
    687469        N = len(data) 
    688470 
    689         pen_matrix = array([self.penalty] * (len(data.domain.attributes)+1)) 
     471        pen_matrix = array([self.penalty] * (len(data.domain.features)+1)) 
    690472        if self.se_penalty: 
    691473            p = array([pr(X[i], betas) for i in range(len(data))]) 
    692             W = identity(len(data), Float) 
     474            W = identity(len(data)) 
    693475            pp = p * (1.0-p) 
    694476            for i in range(N): 
    695477                W[i,i] = pp[i] 
    696             se = sqrt(diagonal(inverse(matrixmultiply(transpose(X), matrixmultiply(W, X))))) 
     478            se = sqrt(diagonal(inv(dot(transpose(X), dot(W, X))))) 
    697479            for i,p in enumerate(pen_matrix): 
    698480                pen_matrix[i] *= se[i] 
     
    706488            p = array([pr(X[i], betas) for i in range(len(data))]) 
    707489 
    708             W = identity(len(data), Float) 
     490            W = identity(len(data)) 
    709491            pp = p * (1.0-p) 
    710492            for i in range(N): 
    711493                W[i,i] = pp[i] 
    712494 
    713             WI = inverse(W) 
    714             z = matrixmultiply(X, betas) + matrixmultiply(WI, y - p) 
    715  
    716             tmpA = inverse(matrixmultiply(transpose(X), matrixmultiply(W, X))+diag(pen_matrix)) 
    717             tmpB = matrixmultiply(transpose(X), y-p) 
    718             betas = oldBetas + matrixmultiply(tmpA,tmpB) 
    719 #            betaTemp = matrixmultiply(matrixmultiply(matrixmultiply(matrixmultiply(tmpA,transpose(X)),W),X),oldBetas) 
     495            WI = inv(W) 
     496            z = dot(X, betas) + dot(WI, y - p) 
     497 
     498            tmpA = inv(dot(transpose(X), dot(W, X))+diag(pen_matrix)) 
     499            tmpB = dot(transpose(X), y-p) 
     500            betas = oldBetas + dot(tmpA,tmpB) 
     501#            betaTemp = dot(dot(dot(dot(tmpA,transpose(X)),W),X),oldBetas) 
    720502#            print betaTemp 
    721 #            tmpB = matrixmultiply(transpose(X), matrixmultiply(W, z)) 
    722 #            betas = matrixmultiply(tmpA, tmpB) 
     503#            tmpB = dot(transpose(X), dot(W, z)) 
     504#            betas = dot(tmpA, tmpB) 
    723505            likelihood_new = lh(X,y,betas)-self.penalty*sum([b*b for b in betas]) 
    724506            print likelihood_new 
     
    726508             
    727509             
    728 ##        XX = sqrt(diagonal(inverse(matrixmultiply(transpose(X),X)))) 
     510##        XX = sqrt(diagonal(inv(dot(transpose(X),X)))) 
    729511##        yhat = array([pr(X[i], betas) for i in range(len(data))]) 
    730 ##        ss = sum((y - yhat) ** 2) / (N - len(data.domain.attributes) - 1) 
     512##        ss = sum((y - yhat) ** 2) / (N - len(data.domain.features) - 1) 
    731513##        sigma = math.sqrt(ss) 
    732514        p = array([pr(X[i], betas) for i in range(len(data))]) 
    733         W = identity(len(data), Float) 
     515        W = identity(len(data)) 
    734516        pp = p * (1.0-p) 
    735517        for i in range(N): 
    736518            W[i,i] = pp[i] 
    737         diXWX = sqrt(diagonal(inverse(matrixmultiply(transpose(X), matrixmultiply(W, X))))) 
    738         xTemp = matrixmultiply(matrixmultiply(inverse(matrixmultiply(transpose(X), matrixmultiply(W, X))),transpose(X)),y) 
     519        diXWX = sqrt(diagonal(inv(dot(transpose(X), dot(W, X))))) 
     520        xTemp = dot(dot(inv(dot(transpose(X), dot(W, X))),transpose(X)),y) 
    739521        beta = [] 
    740522        beta_se = [] 
     
    752534    return exp(bx)/(1+exp(bx)) 
    753535 
    754 class BayesianFitter(Orange.core.LogRegFitter): 
     536class BayesianFitter(LogRegFitter): 
    755537    def __init__(self, penalty=0, anch_examples=[], tau = 0): 
    756538        self.penalty = penalty 
     
    763545        # convert data to numeric 
    764546        ml = data.native(0) 
    765         for i,a in enumerate(data.domain.attributes): 
    766           if a.varType == Orange.core.VarTypes.Discrete: 
     547        for i,a in enumerate(data.domain.features): 
     548          if a.var_type == Orange.data.Type.Discrete: 
    767549            for m in ml: 
    768550              m[i] = a.values.index(m[i]) 
    769551        for m in ml: 
    770           m[-1] = data.domain.classVar.values.index(m[-1]) 
     552          m[-1] = data.domain.class_var.values.index(m[-1]) 
    771553        Xtmp = array(ml) 
    772554        y = Xtmp[:,-1]   # true probabilities (1's or 0's) 
     
    778560        (X,y)=self.create_array_data(data) 
    779561 
    780         exTable = Orange.core.ExampleTable(data.domain) 
     562        exTable = Orange.data.Table(data.domain) 
    781563        for id,ex in self.anch_examples: 
    782             exTable.extend(Orange.core.ExampleTable(ex,data.domain)) 
     564            exTable.extend(Orange.data.Table(ex,data.domain)) 
    783565        (X_anch,y_anch)=self.create_array_data(exTable) 
    784566 
    785         betas = array([0.0] * (len(data.domain.attributes)+1)) 
     567        betas = array([0.0] * (len(data.domain.features)+1)) 
    786568 
    787569        likelihood,betas = self.estimate_beta(X,y,betas,[0]*(len(betas)),X_anch,y_anch) 
    788570 
    789571        # get attribute groups atGroup = [(startIndex, number of values), ...) 
    790         ats = data.domain.attributes 
     572        ats = data.domain.features 
    791573        atVec=reduce(lambda x,y: x+[(y,not y==x[-1][0])], [a.getValueFrom and a.getValueFrom.whichVar or a for a in ats],[(ats[0].getValueFrom and ats[0].getValueFrom.whichVar or ats[0],0)])[1:] 
    792574        atGroup=[[0,0]] 
     
    808590            print "betas", betas[0], betas_temp[0] 
    809591            sumB += betas[0]-betas_temp[0] 
    810         apriori = Orange.core.Distribution(data.domain.classVar, data) 
     592        apriori = Orange.statistics.distribution.Distribution(data.domain.class_var, data) 
    811593        aprioriProb = apriori[0]/apriori.abs 
    812594         
     
    839621            for j in range(len(betas)): 
    840622                if const_betas[j]: continue 
    841                 dl = matrixmultiply(X[:,j],transpose(y-p)) 
     623                dl = dot(X[:,j], transpose(y-p)) 
    842624                for xi,x in enumerate(X_anch): 
    843625                    dl += self.penalty*x[j]*(y_anch[xi] - pr_bx(r_anch[xi]*self.penalty)) 
    844626 
    845                 ddl = matrixmultiply(X_sq[:,j],transpose(p*(1-p))) 
     627                ddl = dot(X_sq[:,j], transpose(p*(1-p))) 
    846628                for xi,x in enumerate(X_anch): 
    847629                    ddl += self.penalty*x[j]*pr_bx(r[xi]*self.penalty)*(1-pr_bx(r[xi]*self.penalty)) 
     
    887669#  Feature subset selection for logistic regression 
    888670 
    889 def get_likelihood(fitter, examples): 
    890     res = fitter(examples) 
     671@deprecated_keywords({"examples": "instances"}) 
     672def get_likelihood(fitter, instances): 
     673    res = fitter(instances) 
    891674    if res[0] in [fitter.OK]: #, fitter.Infinity, fitter.Divergence]: 
    892675       status, beta, beta_se, likelihood = res 
    893676       if sum([abs(b) for b in beta])<sum([abs(b) for b in beta_se]): 
    894            return -100*len(examples) 
     677           return -100*len(instances) 
    895678       return likelihood 
    896679    else: 
    897        return -100*len(examples) 
     680       return -100*len(instances) 
    898681         
    899682 
    900683 
    901684class StepWiseFSS(Orange.classification.Learner): 
    902   """Implementation of algorithm described in [Hosmer and Lemeshow, Applied Logistic Regression, 2000]. 
     685  """ 
     686  Algorithm described in Hosmer and Lemeshow, 
     687  Applied Logistic Regression, 2000. 
    903688 
    904689  Perform stepwise logistic regression and return a list of the 
     
    907692  chosen feature is tested for a significant contribution to the overall 
    908693  model. If the worst among all tested features has higher significance 
    909   than is specified in :obj:`deleteCrit`, the feature is removed from 
     694  than is specified in :obj:`delete_crit`, the feature is removed from 
    910695  the model. The second step is forward selection, which is similar to 
    911696  backward elimination. It loops through all the features that are not 
    912697  in the model and tests whether they contribute to the common model 
    913   with significance lower that :obj:`addCrit`. The algorithm stops when 
     698  with significance lower that :obj:`add_crit`. The algorithm stops when 
    914699  no feature in the model is to be removed and no feature not in the 
    915   model is to be added. By setting :obj:`numFeatures` larger than -1, 
     700  model is to be added. By setting :obj:`num_features` larger than -1, 
    916701  the algorithm will stop its execution when the number of features in model 
    917702  exceeds that number. 
     
    923708  If :obj:`table` is specified, stepwise logistic regression implemented 
    924709  in :obj:`StepWiseFSS` is performed and a list of chosen features 
    925   is returned. If :obj:`table` is not specified an instance of 
    926   :obj:`StepWiseFSS` with all parameters set is returned. 
    927  
    928   :param table: data set 
     710  is returned. If :obj:`table` is not specified, an instance of 
     711  :obj:`StepWiseFSS` with all parameters set is returned and can be called 
     712  with data later. 
     713 
     714  :param table: data set. 
    929715  :type table: Orange.data.Table 
    930716 
    931   :param addCrit: "Alpha" level to judge if variable has enough importance to be added in the new set. (e.g. if addCrit is 0.2, then features is added if its P is lower than 0.2) 
    932   :type addCrit: float 
    933  
    934   :param deleteCrit: Similar to addCrit, just that it is used at backward elimination. It should be higher than addCrit! 
    935   :type deleteCrit: float 
    936  
    937   :param numFeatures: maximum number of selected features, use -1 for infinity. 
    938   :type numFeatures: int 
     717  :param add_crit: "Alpha" level to judge if variable has enough importance to 
     718       be added in the new set. (e.g. if add_crit is 0.2, 
     719       then features is added if its P is lower than 0.2). 
     720  :type add_crit: float 
     721 
     722  :param delete_crit: Similar to add_crit, just that it is used at backward 
     723      elimination. It should be higher than add_crit! 
     724  :type delete_crit: float 
     725 
     726  :param num_features: maximum number of selected features, 
     727      use -1 for infinity. 
     728  :type num_features: int 
    939729  :rtype: :obj:`StepWiseFSS` or list of features 
    940730 
     
    949739          return self 
    950740 
    951  
    952   def __init__(self, addCrit=0.2, deleteCrit=0.3, numFeatures = -1, **kwds): 
     741  @deprecated_keywords({"addCrit": "add_crit", "deleteCrit": "delete_crit", 
     742                        "numFeatures": "num_features"}) 
     743  def __init__(self, add_crit=0.2, delete_crit=0.3, num_features = -1, **kwds): 
    953744    self.__dict__.update(kwds) 
    954     self.addCrit = addCrit 
    955     self.deleteCrit = deleteCrit 
    956     self.numFeatures = numFeatures 
     745    self.add_crit = add_crit 
     746    self.delete_crit = delete_crit 
     747    self.num_features = num_features 
     748 
    957749  def __call__(self, examples): 
    958750    if getattr(self, "imputer", 0): 
     
    960752    if getattr(self, "removeMissing", 0): 
    961753        examples = Orange.core.Preprocessor_dropMissing(examples) 
    962     continuizer = Orange.core.DomainContinuizer(zeroBased=1,continuousTreatment=Orange.core.DomainContinuizer.Leave, 
    963                                            multinomialTreatment = Orange.core.DomainContinuizer.FrequentIsBase, 
    964                                            classTreatment = Orange.core.DomainContinuizer.Ignore) 
     754    continuizer = Orange.preprocess.DomainContinuizer(zeroBased=1, 
     755        continuousTreatment=Orange.preprocess.DomainContinuizer.Leave, 
     756                                           multinomialTreatment = Orange.preprocess.DomainContinuizer.FrequentIsBase, 
     757                                           classTreatment = Orange.preprocess.DomainContinuizer.Ignore) 
    965758    attr = [] 
    966     remain_attr = examples.domain.attributes[:] 
     759    remain_attr = examples.domain.features[:] 
    967760 
    968761    # get LL for Majority Learner  
    969     tempDomain = Orange.core.Domain(attr,examples.domain.classVar) 
     762    tempDomain = Orange.data.Domain(attr,examples.domain.class_var) 
    970763    #tempData  = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    971764    tempData  = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    972765 
    973     ll_Old = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     766    ll_Old = get_likelihood(LogRegFitter_Cholesky(), tempData) 
    974767    ll_Best = -1000000 
    975768    length_Old = float(len(tempData)) 
     
    989782 
    990783                tempAttr = filter(lambda x: x!=at, attr) 
    991                 tempDomain = Orange.core.Domain(tempAttr,examples.domain.classVar) 
     784                tempDomain = Orange.data.Domain(tempAttr,examples.domain.class_var) 
    992785                tempDomain.addmetas(examples.domain.getmetas()) 
    993786                # domain, calculate P for LL improvement. 
     
    995788                tempData = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    996789 
    997                 ll_Delete = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     790                ll_Delete = get_likelihood(LogRegFitter_Cholesky(), tempData) 
    998791                length_Delete = float(len(tempData)) 
    999792                length_Avg = (length_Delete + length_Old)/2.0 
     
    1001794                G=-2*length_Avg*(ll_Delete/length_Delete-ll_Old/length_Old) 
    1002795 
    1003                 # set new worst attribute                 
     796                # set new worst attribute 
    1004797                if G<minG: 
    1005798                    worstAt = at 
     
    1008801                    length_Best = length_Delete 
    1009802            # deletion of attribute 
    1010              
    1011             if worstAt.varType==Orange.core.VarTypes.Continuous: 
     803 
     804            if worstAt.var_type==Orange.data.Type.Continuous: 
    1012805                P=lchisqprob(minG,1); 
    1013806            else: 
    1014807                P=lchisqprob(minG,len(worstAt.values)-1); 
    1015             if P>=self.deleteCrit: 
     808            if P>=self.delete_crit: 
    1016809                attr.remove(worstAt) 
    1017810                remain_attr.append(worstAt) 
     
    1024817            nodeletion = 1 
    1025818            # END OF DELETION PART 
    1026              
     819 
    1027820        # if enough attributes has been chosen, stop the procedure 
    1028         if self.numFeatures>-1 and len(attr)>=self.numFeatures: 
     821        if self.num_features>-1 and len(attr)>=self.num_features: 
    1029822            remain_attr=[] 
    1030           
     823 
    1031824        # for each attribute in the remaining 
    1032825        maxG=-1 
     
    1036829        for at in remain_attr: 
    1037830            tempAttr = attr + [at] 
    1038             tempDomain = Orange.core.Domain(tempAttr,examples.domain.classVar) 
     831            tempDomain = Orange.data.Domain(tempAttr,examples.domain.class_var) 
    1039832            tempDomain.addmetas(examples.domain.getmetas()) 
    1040833            # domain, calculate P for LL improvement. 
    1041834            tempDomain  = continuizer(Orange.core.Preprocessor_dropMissing(examples.select(tempDomain))) 
    1042835            tempData = Orange.core.Preprocessor_dropMissing(examples.select(tempDomain)) 
    1043             ll_New = get_likelihood(Orange.core.LogRegFitter_Cholesky(), tempData) 
     836            ll_New = get_likelihood(LogRegFitter_Cholesky(), tempData) 
    1044837 
    1045838            length_New = float(len(tempData)) # get number of examples in tempData to normalize likelihood 
     
    1056849            stop = 1 
    1057850            continue 
    1058          
    1059         if bestAt.varType==Orange.core.VarTypes.Continuous: 
     851 
     852        if bestAt.var_type==Orange.data.Type.Continuous: 
    1060853            P=lchisqprob(maxG,1); 
    1061854        else: 
    1062855            P=lchisqprob(maxG,len(bestAt.values)-1); 
    1063856        # Add attribute with smallest P to attributes(attr) 
    1064         if P<=self.addCrit: 
     857        if P<=self.add_crit: 
    1065858            attr.append(bestAt) 
    1066859            remain_attr.remove(bestAt) 
     
    1068861            length_Old = length_Best 
    1069862 
    1070         if (P>self.addCrit and nodeletion) or (bestAt == worstAt): 
     863        if (P>self.add_crit and nodeletion) or (bestAt == worstAt): 
    1071864            stop = 1 
    1072865 
    1073866    return attr 
     867 
     868StepWiseFSS = deprecated_members({"addCrit": "add_crit", 
     869                                   "deleteCrit": "delete_crit", 
     870                                   "numFeatures": "num_features"})(StepWiseFSS) 
    1074871 
    1075872 
    1076873class StepWiseFSSFilter(object): 
    1077874    def __new__(cls, instances=None, **argkw): 
    1078         self = object.__new__(cls, **argkw) 
     875        self = object.__new__(cls) 
    1079876        if instances: 
    1080877            self.__init__(**argkw) 
     
    1082879        else: 
    1083880            return self 
    1084      
    1085     def __init__(self, addCrit=0.2, deleteCrit=0.3, numFeatures = -1): 
    1086         self.addCrit = addCrit 
    1087         self.deleteCrit = deleteCrit 
    1088         self.numFeatures = numFeatures 
    1089  
    1090     def __call__(self, examples): 
    1091         attr = StepWiseFSS(examples, addCrit=self.addCrit, deleteCrit = self.deleteCrit, numFeatures = self.numFeatures) 
    1092         return examples.select(Orange.core.Domain(attr, examples.domain.classVar)) 
    1093                  
     881 
     882    @deprecated_keywords({"addCrit": "add_crit", "deleteCrit": "delete_crit", 
     883                          "numFeatures": "num_features"}) 
     884    def __init__(self, add_crit=0.2, delete_crit=0.3, num_features = -1): 
     885        self.add_crit = add_crit 
     886        self.delete_crit = delete_crit 
     887        self.num_features = num_features 
     888 
     889    @deprecated_keywords({"examples": "instances"}) 
     890    def __call__(self, instances): 
     891        attr = StepWiseFSS(instances, add_crit=self.add_crit, 
     892            delete_crit= self.delete_crit, num_features= self.num_features) 
     893        return instances.select(Orange.data.Domain(attr, instances.domain.class_var)) 
     894 
     895StepWiseFSSFilter = deprecated_members({"addCrit": "add_crit", 
     896                                        "deleteCrit": "delete_crit", 
     897                                        "numFeatures": "num_features"})\ 
     898    (StepWiseFSSFilter) 
     899 
    1094900 
    1095901#################################### 
  • Orange/classification/rules.py

    r9738 r9857  
    11491149        self.rule_finder.rule_stoppingValidator = RuleValidator_LRS(alpha=1.0, min_quality=0., max_rule_complexity=max_rule_complexity - 1, min_coverage=min_coverage) 
    11501150        self.refiner = RuleBeamRefiner_Selector() 
    1151         self.refiner_arguments = SelectorAdder(discretizer=Orange.feature.discretization.EntropyDiscretization(forceAttribute=1, 
     1151        self.refiner_arguments = SelectorAdder(discretizer=Orange.feature.discretization.Entropy(forceAttribute=1, 
    11521152                                                                                           maxNumberOfIntervals=2)) 
    11531153        self.prune_arguments = prune_arguments 
     
    22212221    """ 
    22222222    def __init__(self, example=None, not_allowed_selectors=[], argument_id=None, 
    2223                  discretizer=Orange.core.EntropyDiscretization(forceAttribute=True)): 
     2223                 discretizer=Orange.feature.discretization.Entropy(forceAttribute=True)): 
    22242224        # required values - needed values of attributes 
    22252225        self.example = example 
  • Orange/doc/modules/fss3.py

    r9671 r9878  
    99class BayesFSS(object): 
    1010  def __new__(cls, examples=None, **kwds): 
    11     learner = object.__new__(cls, **kwds) 
     11    learner = object.__new__(cls) 
    1212    if examples: 
    1313      return learner(examples) 
  • Orange/doc/ofb/bayes.py

    r9671 r9878  
    77class Learner(object): 
    88    def __new__(cls, examples=None, **kwds): 
    9         learner = object.__new__(cls, **kwds) 
     9        learner = object.__new__(cls) 
    1010        if examples: 
    1111            learner.__init__(**kwds) 
  • Orange/doc/ofb/c_nb.htm

    r9671 r9878  
    7070<xmp class="code">class Learner(object): 
    7171    def __new__(cls, examples=None, **kwds): 
    72         learner = object.__new__(cls, **kwds) 
     72        learner = object.__new__(cls) 
    7373        if examples: 
    7474            learner.__init__(**kwds) 
  • Orange/doc/ofb/c_nb_disc.htm

    r9671 r9878  
    2525<xmp class="code">class Learner(object): 
    2626    def __new__(cls, examples=None, name='discretized bayes', **kwds): 
    27         learner = object.__new__(cls, **kwds) 
     27        learner = object.__new__(cls) 
    2828        if examples: 
    2929            learner.__init__(name) # force init 
  • Orange/doc/ofb/c_pythonlearner.htm

    r9671 r9878  
    9494<xmp class="code">class Learner(object): 
    9595    def __new__(cls, examples=None, name='discretized bayes', **kwds): 
    96         learner = object.__new__(cls, **kwds) 
     96        learner = object.__new__(cls) 
    9797        if examples: 
    9898            learner.__init__(name) # force init 
  • Orange/doc/ofb/nbdisc.py

    r9671 r9878  
    77class Learner(object): 
    88    def __new__(cls, examples=None, name='discretized bayes', **kwds): 
    9         learner = object.__new__(cls, **kwds) 
     9        learner = object.__new__(cls) 
    1010        if examples: 
    1111            learner.__init__(name) # force init 
  • Orange/evaluation/reliability.py

    r9725 r9816  
    763763    :obj:`Orange.classification.Classifier.GetBoth` is passed) contain an 
    764764    additional attribute :obj:`reliability_estimate`, which is an instance of 
    765      :class:`~Orange.evaluation.reliability.Estimate`. 
     765    :class:`~Orange.evaluation.reliability.Estimate`. 
    766766 
    767767    """ 
  • Orange/feature/discretization.py

    r9671 r9878  
    1 """ 
    2 ################################### 
    3 Discretization (``discretization``) 
    4 ################################### 
    5  
    6 .. index:: discretization 
    7  
    8 .. index::  
    9    single: feature; discretization 
    10  
    11  
    12 Example-based automatic discretization is in essence similar to learning: 
    13 given a set of examples, discretization method proposes a list of suitable 
    14 intervals to cut the attribute's values into. For this reason, Orange 
    15 structures for discretization resemble its structures for learning. Objects 
    16 derived from ``orange.Discretization`` play a role of "learner" that,  
    17 upon observing the examples, construct an ``orange.Discretizer`` whose role 
    18 is to convert continuous values into discrete according to the rule found by 
    19 ``Discretization``. 
    20  
    21 Orange supports several methods of discretization; here's a 
    22 list of methods with belonging classes. 
    23  
    24 * Equi-distant discretization (:class:`EquiDistDiscretization`,  
    25   :class:`EquiDistDiscretizer`). The range of attribute's values is split 
    26   into prescribed number equal-sized intervals. 
    27 * Quantile-based discretization (:class:`EquiNDiscretization`, 
    28   :class:`IntervalDiscretizer`). The range is split into intervals 
    29   containing equal number of examples. 
    30 * Entropy-based discretization (:class:`EntropyDiscretization`, 
    31   :class:`IntervalDiscretizer`). Developed by Fayyad and Irani, 
    32   this method balances between entropy in intervals and MDL of discretization. 
    33 * Bi-modal discretization (:class:`BiModalDiscretization`, 
    34   :class:`BiModalDiscretizer`/:class:`IntervalDiscretizer`). 
    35   Two cut-off points set to optimize the difference of the distribution in 
    36   the middle interval and the distributions outside it. 
    37 * Fixed discretization (:class:`IntervalDiscretizer`). Discretization with  
    38   user-prescribed cut-off points. 
    39  
    40 Instances of classes derived from :class:`Discretization`. It define a 
    41 single method: the call operator. The object can also be called through 
    42 constructor. 
    43  
    44 .. class:: Discretization 
    45  
    46     .. method:: __call__(attribute, examples[, weightID]) 
    47  
    48         Given a continuous ``attribute`, ``examples`` and, optionally id of 
    49         attribute with example weight, this function returns a discretized 
    50         attribute. Argument ``attribute`` can be a descriptor, index or 
    51         name of the attribute. 
    52  
    53 Here's an example. Part of :download:`discretization.py <code/discretization.py>`: 
    54  
    55 .. literalinclude:: code/discretization.py 
    56     :lines: 7-15 
    57  
    58 The discretized attribute ``sep_w`` is constructed with a call to 
    59 :class:`EntropyDiscretization` (instead of constructing it and calling 
    60 it afterwards, we passed the arguments for calling to the constructor, as 
    61 is often allowed in Orange). We then constructed a new  
    62 :class:`Orange.data.Table` with attributes "sepal width" (the original  
    63 continuous attribute), ``sep_w`` and the class attribute. Script output is:: 
    64  
    65     Entropy discretization, first 10 examples 
    66     [3.5, '>3.30', 'Iris-setosa'] 
    67     [3.0, '(2.90, 3.30]', 'Iris-setosa'] 
    68     [3.2, '(2.90, 3.30]', 'Iris-setosa'] 
    69     [3.1, '(2.90, 3.30]', 'Iris-setosa'] 
    70     [3.6, '>3.30', 'Iris-setosa'] 
    71     [3.9, '>3.30', 'Iris-setosa'] 
    72     [3.4, '>3.30', 'Iris-setosa'] 
    73     [3.4, '>3.30', 'Iris-setosa'] 
    74     [2.9, '<=2.90', 'Iris-setosa'] 
    75     [3.1, '(2.90, 3.30]', 'Iris-setosa'] 
    76  
    77 :class:`EntropyDiscretization` named the new attribute's values by the 
    78 interval range (it also named the attribute as "D_sepal width"). The new 
    79 attribute's values get computed automatically when they are needed. 
    80  
    81 As those that have read about :class:`Orange.data.variable.Variable` know, 
    82 the answer to  
    83 "How this works?" is hidden in the field  
    84 :obj:`~Orange.data.variable.Variable.get_value_from`. 
    85 This little dialog reveals the secret. 
    86  
    87 :: 
    88  
    89     >>> sep_w 
    90     EnumVariable 'D_sepal width' 
    91     >>> sep_w.get_value_from 
    92     <ClassifierFromVar instance at 0x01BA7DC0> 
    93     >>> sep_w.get_value_from.whichVar 
    94     FloatVariable 'sepal width' 
    95     >>> sep_w.get_value_from.transformer 
    96     <IntervalDiscretizer instance at 0x01BA2100> 
    97     >>> sep_w.get_value_from.transformer.points 
    98     <2.90000009537, 3.29999995232> 
    99  
    100 So, the ``select`` statement in the above example converted all examples 
    101 from ``data`` to the new domain. Since the new domain includes the attribute 
    102 ``sep_w`` that is not present in the original, ``sep_w``'s values are 
    103 computed on the fly. For each example in ``data``, ``sep_w.get_value_from``  
    104 is called to compute ``sep_w``'s value (if you ever need to call 
    105 ``get_value_from``, you shouldn't call ``get_value_from`` directly but call 
    106 ``compute_value`` instead). ``sep_w.get_value_from`` looks for value of 
    107 "sepal width" in the original example. The original, continuous sepal width 
    108 is passed to the ``transformer`` that determines the interval by its field 
    109 ``points``. Transformer returns the discrete value which is in turn returned 
    110 by ``get_value_from`` and stored in the new example. 
    111  
    112 You don't need to understand this mechanism exactly. It's important to know 
    113 that there are two classes of objects for discretization. Those derived from 
    114 :obj:`Discretizer` (such as :obj:`IntervalDiscretizer` that we've seen above) 
    115 are used as transformers that translate continuous value into discrete. 
    116 Discretization algorithms are derived from :obj:`Discretization`. Their  
    117 job is to construct a :obj:`Discretizer` and return a new variable 
    118 with the discretizer stored in ``get_value_from.transformer``. 
    119  
    120 Discretizers 
    121 ============ 
    122  
    123 Different discretizers support different methods for conversion of 
    124 continuous values into discrete. The most general is  
    125 :class:`IntervalDiscretizer` that is also used by most discretization 
    126 methods. Two other discretizers, :class:`EquiDistDiscretizer` and  
    127 :class:`ThresholdDiscretizer`> could easily be replaced by  
    128 :class:`IntervalDiscretizer` but are used for speed and simplicity. 
    129 The fourth discretizer, :class:`BiModalDiscretizer` is specialized 
    130 for discretizations induced by :class:`BiModalDiscretization`. 
    131  
    132 .. class:: Discretizer 
    133  
    134     All discretizers support a handy method for construction of a new 
    135     attribute from an existing one. 
    136  
    137     .. method:: construct_variable(attribute) 
    138  
    139         Constructs a new attribute descriptor; the new attribute is discretized 
    140         ``attribute``. The new attribute's name equal ``attribute.name``  
    141         prefixed  by "D\_", and its symbolic values are discretizer specific. 
    142         The above example shows what comes out form :class:`IntervalDiscretizer`.  
    143         Discretization algorithms actually first construct a discretizer and 
    144         then call its :class:`construct_variable` to construct an attribute 
    145         descriptor. 
    146  
    147 .. class:: IntervalDiscretizer 
    148  
    149     The most common discretizer.  
    150  
    151     .. attribute:: points 
    152  
    153         Cut-off points. All values below or equal to the first point belong 
    154         to the first interval, those between the first and the second 
    155         (including those equal to the second) go to the second interval and 
    156         so forth to the last interval which covers all values greater than 
    157         the last element in ``points``. The number of intervals is thus  
    158         ``len(points)+1``. 
    159  
    160 Let us manually construct an interval discretizer with cut-off points at 3.0 
    161 and 5.0. We shall use the discretizer to construct a discretized sepal length  
    162 (part of :download:`discretization.py <code/discretization.py>`): 
    163  
    164 .. literalinclude:: code/discretization.py 
    165     :lines: 22-26 
    166  
    167 That's all. First five examples of ``data2`` are now 
    168  
    169 :: 
    170  
    171     [5.1, '>5.00', 'Iris-setosa'] 
    172     [4.9, '(3.00, 5.00]', 'Iris-setosa'] 
    173     [4.7, '(3.00, 5.00]', 'Iris-setosa'] 
    174     [4.6, '(3.00, 5.00]', 'Iris-setosa'] 
    175     [5.0, '(3.00, 5.00]', 'Iris-setosa'] 
    176  
    177 Can you use the same discretizer for more than one attribute? Yes, as long 
    178 as they have same cut-off points, of course. Simply call construct_var for each 
    179 continuous attribute (part of :download:`discretization.py <code/discretization.py>`): 
    180  
    181 .. literalinclude:: code/discretization.py 
    182     :lines: 30-34 
    183  
    184 Each attribute now has its own (FIXME) ClassifierFromVar in its  
    185 ``get_value_from``, but all use the same :class:`IntervalDiscretizer`,  
    186 ``idisc``. Changing an element of its ``points`` affect all attributes. 
    187  
    188 Do not change the length of :obj:`~IntervalDiscretizer.points` if the 
    189 discretizer is used by any attribute. The length of 
    190 :obj:`~IntervalDiscretizer.points` should always match the number of values 
    191 of the attribute, which is determined by the length of the attribute's field 
    192 ``values``. Therefore, if ``attr`` is a discretized 
    193 attribute, than ``len(attr.values)`` must equal 
    194 ``len(attr.get_value_from.transformer.points)+1``. It always 
    195 does, unless you deliberately change it. If the sizes don't match, 
    196 Orange will probably crash, and it will be entirely your fault. 
    197  
    198  
    199  
    200 .. class:: EquiDistDiscretizer 
    201  
    202     More rigid than :obj:`IntervalDiscretizer`:  
    203     it uses intervals of fixed width. 
    204  
    205     .. attribute:: first_cut 
    206          
    207         The first cut-off point. 
    208      
    209     .. attribute:: step 
    210  
    211         Width of intervals. 
    212  
    213     .. attribute:: number_of_intervals 
    214          
    215         Number of intervals. 
    216  
    217     .. attribute:: points (read-only) 
    218          
    219         The cut-off points; this is not a real attribute although it behaves 
    220         as one. Reading it constructs a list of cut-off points and returns it, 
    221         but changing the list doesn't affect the discretizer - it's a separate 
    222         list. This attribute is here only for to give the  
    223         :obj:`EquiDistDiscretizer` the same interface as that of  
    224         :obj:`IntervalDiscretizer`. 
    225  
    226 All values below :obj:`~EquiDistDiscretizer.first_cut` belong to the first 
    227 intervala (including possible values smaller than ``firstVal``. Otherwise, 
    228 value ``val``'s interval is ``floor((val-firstVal)/step)``. If this is turns 
    229 out to be greater or equal to :obj:`~EquiDistDiscretizer.number_of_intervals`,  
    230 it is decreased to ``number_of_intervals-1``. 
    231  
    232 This discretizer is returned by :class:`EquiDistDiscretization`; you can 
    233 see an example in the corresponding section. You can also construct it  
    234 manually and call its ``construct_variable``, just as shown for the 
    235 :obj:`IntervalDiscretizer`. 
    236  
    237  
    238 .. class:: ThresholdDiscretizer 
    239  
    240     Threshold discretizer converts continuous values into binary by comparing 
    241     them with a threshold. This discretizer is actually not used by any 
    242     discretization method, but you can use it for manual discretization. 
    243     Orange needs this discretizer for binarization of continuous attributes 
    244     in decision trees. 
    245  
    246     .. attribute:: threshold 
    247  
    248         Threshold; values below or equal to the threshold belong to the first 
    249         interval and those that are greater go to the second. 
    250  
    251 .. class:: BiModalDiscretizer 
    252  
    253     This discretizer is the first discretizer that couldn't be replaced by 
    254     :class:`IntervalDiscretizer`. It has two cut off points and values are 
    255     discretized according to whether they belong to the middle region 
    256     (which includes the lower but not the upper boundary) or not. The 
    257     discretizer is returned by :class:`BiModalDiscretization` if its 
    258     field :obj:`~BiModalDiscretization.split_in_two` is true (the default). 
    259  
    260     .. attribute:: low 
    261          
    262         Lower boudary of the interval (included in the interval). 
    263  
    264     .. attribute:: high 
    265  
    266         Upper boundary of the interval (not included in the interval). 
    267  
    268  
    269 Discretization Algorithms 
    270 ========================= 
    271  
    272 .. class:: EquiDistDiscretization  
    273  
    274     Discretizes the attribute by cutting it into the prescribed number 
    275     of intervals of equal width. The examples are needed to determine the  
    276     span of attribute values. The interval between the smallest and the 
    277     largest is then cut into equal parts. 
    278  
    279     .. attribute:: number_of_intervals 
    280  
    281         Number of intervals into which the attribute is to be discretized.  
    282         Default value is 4. 
    283  
    284 For an example, we shall discretize all attributes of Iris dataset into 6 
    285 intervals. We shall construct an :class:`Orange.data.Table` with discretized 
    286 attributes and print description of the attributes (part 
    287 of :download:`discretization.py <code/discretization.py>`): 
    288  
    289 .. literalinclude:: code/discretization.py 
    290     :lines: 38-43 
    291  
    292 Script's answer is 
    293  
    294 :: 
    295  
    296     D_sepal length: <<4.90, [4.90, 5.50), [5.50, 6.10), [6.10, 6.70), [6.70, 7.30), >7.30> 
    297     D_sepal width: <<2.40, [2.40, 2.80), [2.80, 3.20), [3.20, 3.60), [3.60, 4.00), >4.00> 
    298     D_petal length: <<1.98, [1.98, 2.96), [2.96, 3.94), [3.94, 4.92), [4.92, 5.90), >5.90> 
    299     D_petal width: <<0.50, [0.50, 0.90), [0.90, 1.30), [1.30, 1.70), [1.70, 2.10), >2.10> 
    300  
    301 Any more decent ways for a script to find the interval boundaries than  
    302 by parsing the symbolic values? Sure, they are hidden in the discretizer, 
    303 which is, as usual, stored in ``attr.get_value_from.transformer``. 
    304  
    305 Compare the following with the values above. 
    306  
    307 :: 
    308  
    309     >>> for attr in newattrs: 
    310     ...    print "%s: first interval at %5.3f, step %5.3f" % \ 
    311     ...    (attr.name, attr.get_value_from.transformer.first_cut, \ 
    312     ...    attr.get_value_from.transformer.step) 
    313     D_sepal length: first interval at 4.900, step 0.600 
    314     D_sepal width: first interval at 2.400, step 0.400 
    315     D_petal length: first interval at 1.980, step 0.980 
    316     D_petal width: first interval at 0.500, step 0.400 
    317  
    318 As all discretizers, :class:`EquiDistDiscretizer` also has the method  
    319 ``construct_variable`` (part of :download:`discretization.py <code/discretization.py>`): 
    320  
    321 .. literalinclude:: code/discretization.py 
    322     :lines: 69-73 
    323  
    324  
    325 .. class:: EquiNDiscretization 
    326  
    327     Discretization with Intervals Containing (Approximately) Equal Number 
    328     of Examples. 
    329  
    330     Discretizes the attribute by cutting it into the prescribed number of 
    331     intervals so that each of them contains equal number of examples. The 
    332     examples are obviously needed for this discretization, too. 
    333  
    334     .. attribute:: number_of_intervals 
    335  
    336         Number of intervals into which the attribute is to be discretized. 
    337         Default value is 4. 
    338  
    339 The use of this discretization is the same as the use of  
    340 :class:`EquiDistDiscretization`. The resulting discretizer is  
    341 :class:`IntervalDiscretizer`, hence it has ``points`` instead of ``first_cut``/ 
    342 ``step``/``number_of_intervals``. 
    343  
    344 .. class:: EntropyDiscretization 
    345  
    346     Entropy-based Discretization (Fayyad-Irani). 
    347  
    348     Fayyad-Irani's discretization method works without a predefined number of 
    349     intervals. Instead, it recursively splits intervals at the cut-off point 
    350     that minimizes the entropy, until the entropy decrease is smaller than the 
    351     increase of MDL induced by the new point. 
    352  
    353     An interesting thing about this discretization technique is that an 
    354     attribute can be discretized into a single interval, if no suitable 
    355     cut-off points are found. If this is the case, the attribute is rendered 
    356     useless and can be removed. This discretization can therefore also serve 
    357     for feature subset selection. 
    358  
    359     .. attribute:: force_attribute 
    360  
    361         Forces the algorithm to induce at least one cut-off point, even when 
    362         its information gain is lower than MDL (default: false). 
    363  
    364 Part of :download:`discretization.py <code/discretization.py>`: 
    365  
    366 .. literalinclude:: code/discretization.py 
    367     :lines: 77-80 
    368  
    369 The output shows that all attributes are discretized onto three intervals:: 
    370  
    371     sepal length: <5.5, 6.09999990463> 
    372     sepal width: <2.90000009537, 3.29999995232> 
    373     petal length: <1.89999997616, 4.69999980927> 
    374     petal width: <0.600000023842, 1.0000004768> 
    375  
    376 .. class:: BiModalDiscretization 
    377  
    378     Bi-Modal Discretization 
    379  
    380     Sets two cut-off points so that the class distribution of examples in 
    381     between is as different from the overall distribution as possible. The 
    382     difference is measure by chi-square statistics. All possible cut-off 
    383     points are tried, thus the discretization runs in O(n^2). 
    384  
    385     This discretization method is especially suitable for the attributes in 
    386     which the middle region corresponds to normal and the outer regions to 
    387     abnormal values of the attribute. Depending on the nature of the 
    388     attribute, we can treat the lower and higher values separately, thus 
    389     discretizing the attribute into three intervals, or together, in a 
    390     binary attribute whose values correspond to normal and abnormal. 
    391  
    392     .. attribute:: split_in_two 
    393          
    394         Decides whether the resulting attribute should have three or two. 
    395         If true (default), we have three intervals and the discretizer is 
    396         of type :class:`BiModalDiscretizer`. If false the result is the  
    397         ordinary :class:`IntervalDiscretizer`. 
    398  
    399 Iris dataset has three-valued class attribute, classes are setosa, virginica 
    400 and versicolor. As the picture below shows, sepal lenghts of versicolors are 
    401 between lengths of setosas and virginicas (the picture itself is drawn using 
    402 LOESS probability estimation). 
    403  
    404 .. image:: files/bayes-iris.gif 
    405  
    406 If we merge classes setosa and virginica into one, we can observe whether 
    407 the bi-modal discretization would correctly recognize the interval in 
    408 which versicolors dominate. 
    409  
    410 .. literalinclude:: code/discretization.py 
    411     :lines: 84-87 
    412  
    413 In this script, we have constructed a new class attribute which tells whether 
    414 an iris is versicolor or not. We have told how this attribute's value is 
    415 computed from the original class value with a simple lambda function. 
    416 Finally, we have constructed a new domain and converted the examples. 
    417 Now for discretization. 
    418  
    419 .. literalinclude:: code/discretization.py 
    420     :lines: 97-100 
    421  
    422 The script prints out the middle intervals:: 
    423  
    424     sepal length: (5.400, 6.200] 
    425     sepal width: (2.000, 2.900] 
    426     petal length: (1.900, 4.700] 
    427     petal width: (0.600, 1.600] 
    428  
    429 Judging by the graph, the cut-off points for "sepal length" make sense. 
    430  
    431 Additional functions 
    432 ==================== 
    433  
    434 Some functions and classes that can be used for 
    435 categorization of continuous features. Besides several general classes that 
    436 can help in this task, we also provide a function that may help in 
    437 entropy-based discretization (Fayyad & Irani), and a wrapper around classes for 
    438 categorization that can be used for learning. 
    439  
    440 .. automethod:: Orange.feature.discretization.entropyDiscretization_wrapper 
    441  
    442 .. autoclass:: Orange.feature.discretization.EntropyDiscretization_wrapper 
    443  
    444 .. autoclass:: Orange.feature.discretization.DiscretizedLearner_Class 
    445  
    446 .. rubric:: Example 
    447  
    448 FIXME. A chapter on `feature subset selection <../ofb/o_fss.htm>`_ in Orange 
    449 for Beginners tutorial shows the use of DiscretizedLearner. Other 
    450 discretization classes from core Orange are listed in chapter on 
    451 `categorization <../ofb/o_categorization.htm>`_ of the same tutorial. 
    452  
    453 ========== 
    454 References 
    455 ========== 
    456  
    457 * UM Fayyad and KB Irani. Multi-interval discretization of continuous valued 
    458   attributes for classification learning. In Proceedings of the 13th 
    459   International Joint Conference on Artificial Intelligence, pages 
    460   1022--1029, Chambery, France, 1993. 
    461  
    462 """ 
    463  
     1import Orange 
    4642import Orange.core as orange 
    4653 
     
    4675    Discrete2Continuous, \ 
    4686    Discretizer, \ 
    469         BiModalDiscretizer, \ 
    470         EquiDistDiscretizer, \ 
    471         IntervalDiscretizer, \ 
    472         ThresholdDiscretizer, \ 
    473         EntropyDiscretization, \ 
    474         EquiDistDiscretization, \ 
    475         EquiNDiscretization, \ 
    476         BiModalDiscretization, \ 
    477         Discretization 
     7    BiModalDiscretizer, \ 
     8    EquiDistDiscretizer as EqualWidthDiscretizer, \ 
     9    IntervalDiscretizer, \ 
     10    ThresholdDiscretizer,\ 
     11    EntropyDiscretization as Entropy, \ 
     12    EquiDistDiscretization as EqualWidth, \ 
     13    EquiNDiscretization as EqualFreq, \ 
     14    BiModalDiscretization as BiModal, \ 
     15    Discretization, \ 
     16    Preprocessor_discretize 
    47817 
    479 ###### 
    480 # from orngDics.py 
    481 def entropyDiscretization_wrapper(table): 
    482     """Take the classified table set (table) and categorize all continuous 
    483     features using the entropy based discretization 
    484     :obj:`EntropyDiscretization`. 
     18 
     19 
     20def entropyDiscretization_wrapper(data): 
     21    """Discretize all continuous features in class-labeled data set with the entropy-based discretization 
     22    :obj:`Entropy`. 
    48523     
    486     :param table: data to discretize. 
    487     :type table: Orange.data.Table 
     24    :param data: data to discretize. 
     25    :type data: Orange.data.Table 
    48826    :rtype: :obj:`Orange.data.Table` includes all categorical and discretized\ 
    48927    continuous features from the original data table. 
     
    49533    """ 
    49634    orange.setrandseed(0) 
    497     tablen=orange.Preprocessor_discretize(table, method=EntropyDiscretization()) 
     35    data_new = orange.Preprocessor_discretize(data, method=Entropy()) 
    49836     
    499     attrlist=[] 
    500     nrem=0 
    501     for i in tablen.domain.attributes: 
     37    attrlist = [] 
     38    nrem = 0 
     39    for i in data_new.domain.attributes: 
    50240        if (len(i.values)>1): 
    50341            attrlist.append(i) 
    50442        else: 
    50543            nrem=nrem+1 
    506     attrlist.append(tablen.domain.classVar) 
    507     return tablen.select(attrlist) 
     44    attrlist.append(data_new.domain.classVar) 
     45    return data_new.select(attrlist) 
    50846 
    50947 
     
    51957    discretized data set:: 
    52058 
    521         discretizer = Orange.feature.dicretization.EntropyDiscretization() 
     59        discretizer = Orange.feature.dicretization.Entropy() 
    52260        disc_data = discretizer(table) 
    523         another_disc_data = Orange.feature.dicretization.EntropyDiscretization(table) 
     61        another_disc_data = Orange.feature.dicretization.Entropy(table) 
    52462 
    52563    """ 
     
    54785    :param discretizer: a discretizer that converts continuous values into 
    54886      discrete. Defaults to 
    549       :obj:`Orange.feature.discretization.EntropyDiscretization`. 
     87      :obj:`Orange.feature.discretization.Entropy`. 
    55088    :type discretizer: Orange.feature.discretization.Discretization 
    55189     
     
    565103 
    566104    """ 
    567     def __init__(self, baseLearner, discretizer=EntropyDiscretization(), **kwds): 
     105    def __init__(self, baseLearner, discretizer=Entropy(), **kwds): 
    568106        self.baseLearner = baseLearner 
    569107        if hasattr(baseLearner, "name"): 
     
    591129  def __call__(self, example, resultType = orange.GetValue): 
    592130    return self.classifier(example, resultType) 
     131 
     132class DiscretizeTable(object): 
     133    """Discretizes all continuous features of the data table. 
     134 
     135    :param data: data to discretize. 
     136    :type data: :class:`Orange.data.Table` 
     137 
     138    :param features: data features to discretize. None (default) to discretize all features. 
     139    :type features: list of :class:`Orange.data.variable.Variable` 
     140 
     141    :param method: feature discretization method. 
     142    :type method: :class:`Discretization` 
     143    """ 
     144    def __new__(cls, data=None, features=None, discretize_class=False, method=EqualFreq(n=3)): 
     145        if data is None: 
     146            self = object.__new__(cls) 
     147            return self 
     148        else: 
     149            self = cls(features=features, discretize_class=discretize_class, method=method) 
     150            return self(data) 
     151 
     152    def __init__(self, features=None, discretize_class=False, method=EqualFreq(n=3)): 
     153        self.features = features 
     154        self.discretize_class = discretize_class 
     155        self.method = method 
     156 
     157    def __call__(self, data): 
     158        pp = Preprocessor_discretize(attributes=self.features, discretizeClass=self.discretize_class) 
     159        pp.method = self.method 
     160        return pp(data) 
     161 
  • Orange/feature/imputation.py

    r9671 r9806  
    1 """ 
    2 ########################### 
    3 Imputation (``imputation``) 
    4 ########################### 
    5  
    6 .. index:: imputation 
    7  
    8 .. index::  
    9    single: feature; value imputation 
    10  
    11  
    12 Imputation is a procedure of replacing the missing feature values with some  
    13 appropriate values. Imputation is needed because of the methods (learning  
    14 algorithms and others) that are not capable of handling unknown values, for  
    15 instance logistic regression. 
    16  
    17 Missing values sometimes have a special meaning, so they need to be replaced 
    18 by a designated value. Sometimes we know what to replace the missing value 
    19 with; for instance, in a medical problem, some laboratory tests might not be 
    20 done when it is known what their results would be. In that case, we impute  
    21 certain fixed value instead of the missing. In the most complex case, we assign 
    22 values that are computed based on some model; we can, for instance, impute the 
    23 average or majority value or even a value which is computed from values of 
    24 other, known feature, using a classifier. 
    25  
    26 In a learning/classification process, imputation is needed on two occasions. 
    27 Before learning, the imputer needs to process the training examples. 
    28 Afterwards, the imputer is called for each example to be classified. 
    29  
    30 In general, imputer itself needs to be trained. This is, of course, not needed 
    31 when the imputer imputes certain fixed value. However, when it imputes the 
    32 average or majority value, it needs to compute the statistics on the training 
    33 examples, and use it afterwards for imputation of training and testing 
    34 examples. 
    35  
    36 While reading this document, bear in mind that imputation is a part of the 
    37 learning process. If we fit the imputation model, for instance, by learning 
    38 how to predict the feature's value from other features, or even if we  
    39 simply compute the average or the minimal value for the feature and use it 
    40 in imputation, this should only be done on learning data. If cross validation 
    41 is used for sampling, imputation should be done on training folds only. Orange 
    42 provides simple means for doing that. 
    43  
    44 This page will first explain how to construct various imputers. Then follow 
    45 the examples for `proper use of imputers <#using-imputers>`_. Finally, quite 
    46 often you will want to use imputation with special requests, such as certain 
    47 features' missing values getting replaced by constants and other by values 
    48 computed using models induced from specified other features. For instance, 
    49 in one of the studies we worked on, the patient's pulse rate needed to be 
    50 estimated using regression trees that included the scope of the patient's 
    51 injuries, sex and age, some attributes' values were replaced by the most 
    52 pessimistic ones and others were computed with regression trees based on 
    53 values of all features. If you are using learners that need the imputer as a 
    54 component, you will need to `write your own imputer constructor  
    55 <#write-your-own-imputer-constructor>`_. This is trivial and is explained at 
    56 the end of this page. 
    57  
    58 Wrapper for learning algorithms 
    59 =============================== 
    60  
    61 This wrapper can be used with learning algorithms that cannot handle missing 
    62 values: it will impute the missing examples using the imputer, call the  
    63 earning and, if the imputation is also needed by the classifier, wrap the 
    64 resulting classifier into another wrapper that will impute the missing values 
    65 in examples to be classified. 
    66  
    67 Even so, the module is somewhat redundant, as all learners that cannot handle  
    68 missing values should, in principle, provide the slots for imputer constructor. 
    69 For instance, :obj:`Orange.classification.logreg.LogRegLearner` has an attribute  
    70 :obj:`Orange.classification.logreg.LogRegLearner.imputerConstructor`, and even 
    71 if you don't set it, it will do some imputation by default. 
    72  
    73 .. class:: ImputeLearner 
    74  
    75     Wraps a learner and performs data discretization before learning. 
    76  
    77     Most of Orange's learning algorithms do not use imputers because they can 
    78     appropriately handle the missing values. Bayesian classifier, for instance, 
    79     simply skips the corresponding attributes in the formula, while 
    80     classification/regression trees have components for handling the missing 
    81     values in various ways. 
    82  
    83     If for any reason you want to use these algorithms to run on imputed data, 
    84     you can use this wrapper. The class description is a matter of a separate 
    85     page, but we shall show its code here as another demonstration of how to 
    86     use the imputers - logistic regression is implemented essentially the same 
    87     as the below classes. 
    88  
    89     This is basically a learner, so the constructor will return either an 
    90     instance of :obj:`ImputerLearner` or, if called with examples, an instance 
    91     of some classifier. There are a few attributes that need to be set, though. 
    92  
    93     .. attribute:: base_learner  
    94      
    95     A wrapped learner. 
    96  
    97     .. attribute:: imputer_constructor 
    98      
    99     An instance of a class derived from :obj:`ImputerConstructor` (or a class 
    100     with the same call operator). 
    101  
    102     .. attribute:: dont_impute_classifier 
    103  
    104     If given and set (this attribute is optional), the classifier will not be 
    105     wrapped into an imputer. Do this if the classifier doesn't mind if the 
    106     examples it is given have missing values. 
    107  
    108     The learner is best illustrated by its code - here's its complete 
    109     :obj:`__call__` method:: 
    110  
    111         def __call__(self, data, weight=0): 
    112             trained_imputer = self.imputer_constructor(data, weight) 
    113             imputed_data = trained_imputer(data, weight) 
    114             base_classifier = self.base_learner(imputed_data, weight) 
    115             if self.dont_impute_classifier: 
    116                 return base_classifier 
    117             else: 
    118                 return ImputeClassifier(base_classifier, trained_imputer) 
    119  
    120     So "learning" goes like this. :obj:`ImputeLearner` will first construct 
    121     the imputer (that is, call :obj:`self.imputer_constructor` to get a (trained) 
    122     imputer. Than it will use the imputer to impute the data, and call the 
    123     given :obj:`baseLearner` to construct a classifier. For instance, 
    124     :obj:`baseLearner` could be a learner for logistic regression and the 
    125     result would be a logistic regression model. If the classifier can handle 
    126     unknown values (that is, if :obj:`dont_impute_classifier`, we return it as  
    127     it is, otherwise we wrap it into :obj:`ImputeClassifier`, which is given 
    128     the base classifier and the imputer which it can use to impute the missing 
    129     values in (testing) examples. 
    130  
    131 .. class:: ImputeClassifier 
    132  
    133     Objects of this class are returned by :obj:`ImputeLearner` when given data. 
    134  
    135     .. attribute:: baseClassifier 
    136      
    137     A wrapped classifier. 
    138  
    139     .. attribute:: imputer 
    140      
    141     An imputer for imputation of unknown values. 
    142  
    143     .. method:: __call__  
    144      
    145     This class is even more trivial than the learner. Its constructor accepts  
    146     two arguments, the classifier and the imputer, which are stored into the 
    147     corresponding attributes. The call operator which does the classification 
    148     then looks like this:: 
    149  
    150         def __call__(self, ex, what=orange.GetValue): 
    151             return self.base_classifier(self.imputer(ex), what) 
    152  
    153     It imputes the missing values by calling the :obj:`imputer` and passes the 
    154     class to the base classifier. 
    155  
    156 .. note::  
    157    In this setup the imputer is trained on the training data - even if you do 
    158    cross validation, the imputer will be trained on the right data. In the 
    159    classification phase we again use the imputer which was classified on the 
    160    training data only. 
    161  
    162 .. rubric:: Code of ImputeLearner and ImputeClassifier  
    163  
    164 :obj:`Orange.feature.imputation.ImputeLearner` puts the keyword arguments into 
    165 the instance's  dictionary. You are expected to call it like 
    166 :obj:`ImputeLearner(base_learner=<someLearner>, 
    167 imputer=<someImputerConstructor>)`. When the learner is called with examples, it 
    168 trains the imputer, imputes the data, induces a :obj:`base_classifier` by the 
    169 :obj:`base_cearner` and constructs :obj:`ImputeClassifier` that stores the 
    170 :obj:`base_classifier` and the :obj:`imputer`. For classification, the missing 
    171 values are imputed and the classifier's prediction is returned. 
    172  
    173 Note that this code is slightly simplified, although the omitted details handle 
    174 non-essential technical issues that are unrelated to imputation:: 
    175  
    176     class ImputeLearner(orange.Learner): 
    177         def __new__(cls, examples = None, weightID = 0, **keyw): 
    178             self = orange.Learner.__new__(cls, **keyw) 
    179             self.__dict__.update(keyw) 
    180             if examples: 
    181                 return self.__call__(examples, weightID) 
    182             else: 
    183                 return self 
    184      
    185         def __call__(self, data, weight=0): 
    186             trained_imputer = self.imputer_constructor(data, weight) 
    187             imputed_data = trained_imputer(data, weight) 
    188             base_classifier = self.base_learner(imputed_data, weight) 
    189             return ImputeClassifier(base_classifier, trained_imputer) 
    190      
    191     class ImputeClassifier(orange.Classifier): 
    192         def __init__(self, base_classifier, imputer): 
    193             self.base_classifier = base_classifier 
    194             self.imputer = imputer 
    195      
    196         def __call__(self, ex, what=orange.GetValue): 
    197             return self.base_classifier(self.imputer(ex), what) 
    198  
    199 .. rubric:: Example 
    200  
    201 Although most Orange's learning algorithms will take care of imputation 
    202 internally, if needed, it can sometime happen that an expert will be able to 
    203 tell you exactly what to put in the data instead of the missing values. In this 
    204 example we shall suppose that we want to impute the minimal value of each 
    205 feature. We will try to determine whether the naive Bayesian classifier with 
    206 its  implicit internal imputation works better than one that uses imputation by  
    207 minimal values. 
    208  
    209 :download:`imputation-minimal-imputer.py <code/imputation-minimal-imputer.py>` (uses :download:`voting.tab <code/voting.tab>`): 
    210  
    211 .. literalinclude:: code/imputation-minimal-imputer.py 
    212     :lines: 7- 
    213      
    214 Should ouput this:: 
    215  
    216     Without imputation: 0.903 
    217     With imputation: 0.899 
    218  
    219 .. note::  
    220    Note that we constructed just one instance of \ 
    221    :obj:`Orange.classification.bayes.NaiveLearner`, but this same instance is 
    222    used twice in each fold, once it is given the examples as they are (and  
    223    returns an instance of :obj:`Orange.classification.bayes.NaiveClassifier`. 
    224    The second time it is called by :obj:`imba` and the \ 
    225    :obj:`Orange.classification.bayes.NaiveClassifier` it returns is wrapped 
    226    into :obj:`Orange.feature.imputation.Classifier`. We thus have only one 
    227    learner, but which produces two different classifiers in each round of 
    228    testing. 
    229  
    230 Abstract imputers 
    231 ================= 
    232  
    233 As common in Orange, imputation is done by pairs of two classes: one that does 
    234 the work and another that constructs it. :obj:`ImputerConstructor` is an 
    235 abstract root of the hierarchy of classes that get the training data (with an  
    236 optional id for weight) and constructs an instance of a class, derived from 
    237 :obj:`Imputer`. An :obj:`Imputer` can be called with an 
    238 :obj:`Orange.data.Instance` and it will return a new example with the missing 
    239 values imputed (it will leave the original example intact!). If imputer is 
    240 called with an :obj:`Orange.data.Table`, it will return a new example table 
    241 with imputed examples. 
    242  
    243 .. class:: ImputerConstructor 
    244  
    245     .. attribute:: imputeClass 
    246      
    247     Tell whether to impute the class value (default) or not. 
    248  
    249 Simple imputation 
    250 ================= 
    251  
    252 The simplest imputers always impute the same value for a particular attribute, 
    253 disregarding the values of other attributes. They all use the same imputer 
    254 class, :obj:`Imputer_defaults`. 
    255      
    256 .. class:: Imputer_defaults 
    257  
    258     .. attribute::  defaults 
    259      
    260     An example with the default values to be imputed instead of the missing.  
    261     Examples to be imputed must be from the same domain as :obj:`defaults`. 
    262  
    263     Instances of this class can be constructed by  
    264     :obj:`Orange.feature.imputation.ImputerConstructor_minimal`,  
    265     :obj:`Orange.feature.imputation.ImputerConstructor_maximal`, 
    266     :obj:`Orange.feature.imputation.ImputerConstructor_average`.  
    267  
    268     For continuous features, they will impute the smallest, largest or the 
    269     average  values encountered in the training examples. For discrete, they 
    270     will impute the lowest (the one with index 0, e. g. attr.values[0]), the  
    271     highest (attr.values[-1]), and the most common value encountered in the 
    272     data. The first two imputers will mostly be used when the discrete values 
    273     are ordered according to their impact on the class (for instance, possible 
    274     values for symptoms of some disease can be ordered according to their 
    275     seriousness). The minimal and maximal imputers will then represent 
    276     optimistic and pessimistic imputations. 
    277  
    278     The following code will load the bridges data, and first impute the values 
    279     in a single examples and then in the whole table. 
    280  
    281 :download:`imputation-complex.py <code/imputation-complex.py>` (uses :download:`bridges.tab <code/bridges.tab>`): 
    282  
    283 .. literalinclude:: code/imputation-complex.py 
    284     :lines: 9-23 
    285  
    286 This is example shows what the imputer does, not how it is to be used. Don't 
    287 impute all the data and then use it for cross-validation. As warned at the top 
    288 of this page, see the instructions for actual `use of 
    289 imputers <#using-imputers>`_. 
    290  
    291 .. note:: The :obj:`ImputerConstructor` are another class with schizophrenic 
    292   constructor: if you give the constructor the data, it will return an \ 
    293   :obj:`Imputer` - the above call is equivalent to calling \ 
    294   :obj:`Orange.feature.imputation.ImputerConstructor_minimal()(data)`. 
    295  
    296 You can also construct the :obj:`Orange.feature.imputation.Imputer_defaults` 
    297 yourself and specify your own defaults. Or leave some values unspecified, in 
    298 which case the imputer won't impute them, as in the following example. Here, 
    299 the only attribute whose values will get imputed is "LENGTH"; the imputed value 
    300 will be 1234. 
    301  
    302 .. literalinclude:: code/imputation-complex.py 
    303     :lines: 56-69 
    304  
    305 :obj:`Orange.feature.imputation.Imputer_defaults`'s constructor will accept an 
    306 argument of type :obj:`Orange.data.Domain` (in which case it will construct an 
    307 empty instance for :obj:`defaults`) or an example. (Be careful with this: 
    308 :obj:`Orange.feature.imputation.Imputer_defaults` will have a reference to the 
    309 instance and not a copy. But you can make a copy yourself to avoid problems: 
    310 instead of `Imputer_defaults(data[0])` you may want to write 
    311 `Imputer_defaults(Orange.data.Instance(data[0]))`. 
    312  
    313 Random imputation 
    314 ================= 
    315  
    316 .. class:: Imputer_Random 
    317  
    318     Imputes random values. The corresponding constructor is 
    319     :obj:`ImputerConstructor_Random`. 
    320  
    321     .. attribute:: impute_class 
    322      
    323     Tells whether to impute the class values or not. Defaults to True. 
    324  
    325     .. attribute:: deterministic 
    326  
    327     If true (default is False), random generator is initialized for each 
    328     example using the example's hash value as a seed. This results in same 
    329     examples being always imputed the same values. 
    330      
    331 Model-based imputation 
    332 ====================== 
    333  
    334 .. class:: ImputerConstructor_model 
    335  
    336     Model-based imputers learn to predict the attribute's value from values of 
    337     other attributes. :obj:`ImputerConstructor_model` are given a learning 
    338     algorithm (two, actually - one for discrete and one for continuous 
    339     attributes) and they construct a classifier for each attribute. The 
    340     constructed imputer :obj:`Imputer_model` stores a list of classifiers which 
    341     are used when needed. 
    342  
    343     .. attribute:: learner_discrete, learner_continuous 
    344      
    345     Learner for discrete and for continuous attributes. If any of them is 
    346     missing, the attributes of the corresponding type won't get imputed. 
    347  
    348     .. attribute:: use_class 
    349      
    350     Tells whether the imputer is allowed to use the class value. As this is 
    351     most often undesired, this option is by default set to False. It can 
    352     however be useful for a more complex design in which we would use one 
    353     imputer for learning examples (this one would use the class value) and 
    354     another for testing examples (which would not use the class value as this 
    355     is unavailable at that moment). 
    356  
    357 .. class:: Imputer_model 
    358  
    359     .. attribute: models 
    360  
    361     A list of classifiers, each corresponding to one attribute of the examples 
    362     whose values are to be imputed. The :obj:`classVar`'s of the models should 
    363     equal the examples' attributes. If any of classifier is missing (that is, 
    364     the corresponding element of the table is :obj:`None`, the corresponding 
    365     attribute's values will not be imputed. 
    366  
    367 .. rubric:: Examples 
    368  
    369 The following imputer predicts the missing attribute values using 
    370 classification and regression trees with the minimum of 20 examples in a leaf.  
    371 Part of :download:`imputation-complex.py <code/imputation-complex.py>` (uses :download:`bridges.tab <code/bridges.tab>`): 
    372  
    373 .. literalinclude:: code/imputation-complex.py 
    374     :lines: 74-76 
    375  
    376 We could even use the same learner for discrete and continuous attributes, 
    377 as :class:`Orange.classification.tree.TreeLearner` checks the class type 
    378 and constructs regression or classification trees accordingly. The  
    379 common parameters, such as the minimal number of 
    380 examples in leaves, are used in both cases. 
    381  
    382 You can also use different learning algorithms for discrete and 
    383 continuous attributes. Probably a common setup will be to use 
    384 :class:`Orange.classification.bayes.BayesLearner` for discrete and  
    385 :class:`Orange.regression.mean.MeanLearner` (which 
    386 just remembers the average) for continuous attributes. Part of  
    387 :download:`imputation-complex.py <code/imputation-complex.py>` (uses :download:`bridges.tab <code/bridges.tab>`): 
    388  
    389 .. literalinclude:: code/imputation-complex.py 
    390     :lines: 91-94 
    391  
    392 You can also construct an :class:`Imputer_model` yourself. You will do  
    393 this if different attributes need different treatment. Brace for an  
    394 example that will be a bit more complex. First we shall construct an  
    395 :class:`Imputer_model` and initialize an empty list of models.  
    396 The following code snippets are from 
    397 :download:`imputation-complex.py <code/imputation-complex.py>` (uses :download:`bridges.tab <code/bridges.tab>`): 
    398  
    399 .. literalinclude:: code/imputation-complex.py 
    400     :lines: 108-109 
    401  
    402 Attributes "LANES" and "T-OR-D" will always be imputed values 2 and 
    403 "THROUGH". Since "LANES" is continuous, it suffices to construct a 
    404 :obj:`DefaultClassifier` with the default value 2.0 (don't forget the 
    405 decimal part, or else Orange will think you talk about an index of a discrete 
    406 value - how could it tell?). For the discrete attribute "T-OR-D", we could 
    407 construct a :class:`Orange.classification.ConstantClassifier` and give the index of value 
    408 "THROUGH" as an argument. But we shall do it nicer, by constructing a 
    409 :class:`Orange.data.Value`. Both classifiers will be stored at the appropriate places 
    410 in :obj:`imputer.models`. 
    411  
    412 .. literalinclude:: code/imputation-complex.py 
    413     :lines: 110-112 
    414  
    415  
    416 "LENGTH" will be computed with a regression tree induced from "MATERIAL",  
    417 "SPAN" and "ERECTED" (together with "LENGTH" as the class attribute, of 
    418 course). Note that we initialized the domain by simply giving a list with 
    419 the names of the attributes, with the domain as an additional argument 
    420 in which Orange will look for the named attributes. 
    421  
    422 .. literalinclude:: code/imputation-complex.py 
    423     :lines: 114-119 
    424  
    425 We printed the tree just to see what it looks like. 
    426  
    427 :: 
    428  
    429     <XMP class=code>SPAN=SHORT: 1158 
    430     SPAN=LONG: 1907 
    431     SPAN=MEDIUM 
    432     |    ERECTED<1908.500: 1325 
    433     |    ERECTED>=1908.500: 1528 
    434     </XMP> 
    435  
    436 Small and nice. Now for the "SPAN". Wooden bridges and walkways are short, 
    437 while the others are mostly medium. This could be done by 
    438 :class:`Orange.classifier.ClassifierByLookupTable` - this would be faster 
    439 than what we plan here. See the corresponding documentation on lookup 
    440 classifier. Here we are going to do it with a Python function. 
    441  
    442 .. literalinclude:: code/imputation-complex.py 
    443     :lines: 121-128 
    444  
    445 :obj:`compute_span` could also be written as a class, if you'd prefer 
    446 it. It's important that it behaves like a classifier, that is, gets an example 
    447 and returns a value. The second element tells, as usual, what the caller expect 
    448 the classifier to return - a value, a distribution or both. Since the caller, 
    449 :obj:`Imputer_model`, always wants values, we shall ignore the argument 
    450 (at risk of having problems in the future when imputers might handle 
    451 distribution as well). 
    452  
    453 Missing values as special values 
    454 ================================ 
    455  
    456 Missing values sometimes have a special meaning. The fact that something was 
    457 not measured can sometimes tell a lot. Be, however, cautious when using such 
    458 values in decision models; it the decision not to measure something (for 
    459 instance performing a laboratory test on a patient) is based on the expert's 
    460 knowledge of the class value, such unknown values clearly should not be used  
    461 in models. 
    462  
    463 .. class:: ImputerConstructor_asValue 
    464  
    465     Constructs a new domain in which each 
    466     discrete attribute is replaced with a new attribute that has one value more: 
    467     "NA". The new attribute will compute its values on the fly from the old one, 
    468     copying the normal values and replacing the unknowns with "NA". 
    469  
    470     For continuous attributes, it will 
    471     construct a two-valued discrete attribute with values "def" and "undef", 
    472     telling whether the continuous attribute was defined or not. The attribute's 
    473     name will equal the original's with "_def" appended. The original continuous 
    474     attribute will remain in the domain and its unknowns will be replaced by 
    475     averages. 
    476  
    477     :class:`ImputerConstructor_asValue` has no specific attributes. 
    478  
    479     It constructs :class:`Imputer_asValue` (I bet you 
    480     wouldn't guess). It converts the example into the new domain, which imputes  
    481     the values for discrete attributes. If continuous attributes are present, it  
    482     will also replace their values by the averages. 
    483  
    484 .. class:: Imputer_asValue 
    485  
    486     .. attribute:: domain 
    487  
    488         The domain with the new attributes constructed by  
    489         :class:`ImputerConstructor_asValue`. 
    490  
    491     .. attribute:: defaults 
    492  
    493         Default values for continuous attributes. Present only if there are any. 
    494  
    495 The following code shows what this imputer actually does to the domain. 
    496 Part of :download:`imputation-complex.py <code/imputation-complex.py>` (uses :download:`bridges.tab <code/bridges.tab>`): 
    497  
    498 .. literalinclude:: code/imputation-complex.py 
    499     :lines: 137-151 
    500  
    501 The script's output looks like this:: 
    502  
    503     [RIVER, ERECTED, PURPOSE, LENGTH, LANES, CLEAR-G, T-OR-D, MATERIAL, SPAN, REL-L, TYPE] 
    504  
    505     [RIVER, ERECTED_def, ERECTED, PURPOSE, LENGTH_def, LENGTH, LANES_def, LANES, CLEAR-G, T-OR-D, MATERIAL, SPAN, REL-L, TYPE] 
    506  
    507     RIVER: M -> M 
    508     ERECTED: 1874 -> 1874 (def) 
    509     PURPOSE: RR -> RR 
    510     LENGTH: ? -> 1567 (undef) 
    511     LANES: 2 -> 2 (def) 
    512     CLEAR-G: ? -> NA 
    513     T-OR-D: THROUGH -> THROUGH 
    514     MATERIAL: IRON -> IRON 
    515     SPAN: ? -> NA 
    516     REL-L: ? -> NA 
    517     TYPE: SIMPLE-T -> SIMPLE-T 
    518  
    519 Seemingly, the two examples have the same attributes (with 
    520 :samp:`imputed` having a few additional ones). If you check this by 
    521 :samp:`original.domain[0] == imputed.domain[0]`, you shall see that this 
    522 first glance is False. The attributes only have the same names, 
    523 but they are different attributes. If you read this page (which is already a 
    524 bit advanced), you know that Orange does not really care about the attribute 
    525 names). 
    526  
    527 Therefore, if we wrote :samp:`imputed[i]` the program would fail 
    528 since :samp:`imputed` has no attribute :samp:`i`. But it has an 
    529 attribute with the same name (which even usually has the same value). We 
    530 therefore use :samp:`i.name` to index the attributes of 
    531 :samp:`imputed`. (Using names for indexing is not fast, though; if you do 
    532 it a lot, compute the integer index with 
    533 :samp:`imputed.domain.index(i.name)`.)</P> 
    534  
    535 For continuous attributes, there is an additional attribute with "_def" 
    536 appended; we get it by :samp:`i.name+"_def"`. 
    537  
    538 The first continuous attribute, "ERECTED" is defined. Its value remains 1874 
    539 and the additional attribute "ERECTED_def" has value "def". Not so for 
    540 "LENGTH". Its undefined value is replaced by the average (1567) and the new 
    541 attribute has value "undef". The undefined discrete attribute "CLEAR-G" (and 
    542 all other undefined discrete attributes) is assigned the value "NA". 
    543  
    544 Using imputers 
    545 ============== 
    546  
    547 To properly use the imputation classes in learning process, they must be 
    548 trained on training examples only. Imputing the missing values and subsequently 
    549 using the data set in cross-validation will give overly optimistic results. 
    550  
    551 Learners with imputer as a component 
    552 ------------------------------------ 
    553  
    554 Orange learners that cannot handle missing values will generally provide a slot 
    555 for the imputer component. An example of such a class is 
    556 :obj:`Orange.classification.logreg.LogRegLearner` with an attribute called 
    557 :obj:`Orange.classification.logreg.LogRegLearner.imputerConstructor`. To it you 
    558 can assign an imputer constructor - one of the above constructors or a specific 
    559 constructor you wrote yourself. When given learning examples, 
    560 :obj:`Orange.classification.logreg.LogRegLearner` will pass them to 
    561 :obj:`Orange.classification.logreg.LogRegLearner.imputerConstructor` to get an 
    562 imputer (again some of the above or a specific imputer you programmed). It will 
    563 immediately use the imputer to impute the missing values in the learning data 
    564 set, so it can be used by the actual learning algorithm. Besides, when the 
    565 classifier :obj:`Orange.classification.logreg.LogRegClassifier` is constructed, 
    566 the imputer will be stored in its attribute 
    567 :obj:`Orange.classification.logreg.LogRegClassifier.imputer`. At 
    568 classification, the imputer will be used for imputation of missing values in 
    569 (testing) examples. 
    570  
    571 Although details may vary from algorithm to algorithm, this is how the 
    572 imputation is generally used in Orange's learners. Also, if you write your own 
    573 learners, it is recommended that you use imputation according to the described 
    574 procedure. 
    575  
    576 Write your own imputer 
    577 ====================== 
    578  
    579 Imputation classes provide the Python-callback functionality (not all Orange 
    580 classes do so, refer to the documentation on `subtyping the Orange classes  
    581 in Python <callbacks.htm>`_ for a list). If you want to write your own 
    582 imputation constructor or an imputer, you need to simply program a Python 
    583 function that will behave like the built-in Orange classes (and even less, 
    584 for imputer, you only need to write a function that gets an example as 
    585 argument, imputation for example tables will then use that function). 
    586  
    587 You will most often write the imputation constructor when you have a special 
    588 imputation procedure or separate procedures for various attributes, as we've  
    589 demonstrated in the description of 
    590 :obj:`Orange.feature.imputation.ImputerConstructor_model`. You basically only  
    591 need to pack everything we've written there to an imputer constructor that 
    592 will accept a data set and the id of the weight meta-attribute (ignore it if 
    593 you will, but you must accept two arguments), and return the imputer (probably 
    594 :obj:`Orange.feature.imputation.Imputer_model`. The benefit of implementing an 
    595 imputer constructor as opposed to what we did above is that you can use such a 
    596 constructor as a component for Orange learners (like logistic regression) or 
    597 for wrappers from module orngImpute, and that way properly use the in 
    598 classifier testing procedures. 
    599  
    600 """ 
    601  
    6021import Orange.core as orange 
    6032from orange import ImputerConstructor_minimal  
  • Orange/feature/scoring.py

    r9671 r9813  
    206206    Assesses features' ability to distinguish between very similar 
    207207    instances from different classes. This scoring method was first 
    208     developed by Kira and Rendell and then improved by Kononenko. The 
     208    developed by Kira and Rendell and then improved by  Kononenko. The 
    209209    class :obj:`Relief` works on discrete and continuous classes and 
    210210    thus implements ReliefF and RReliefF. 
  • Orange/feature/selection.py

    r9676 r9878  
    290290 
    291291        if data is None: 
    292             self = object.__new__(cls, measure=measure, threshold=threshold) 
     292            self = object.__new__(cls) 
    293293            return self 
    294294        else: 
     
    332332 
    333333        if data is None: 
    334             self = object.__new__(cls, measure=measure, n=n) 
     334            self = object.__new__(cls) 
    335335            return self 
    336336        else: 
     
    368368 
    369369        if data is None: 
    370             self = object.__new__(cls, measure=measure, margin=margin) 
     370            self = object.__new__(cls) 
    371371            return self 
    372372        else: 
     
    414414 
    415415        if data is None: 
    416             self = object.__new__(cls, baseLearner, filter=filter, name=name) 
     416            self = object.__new__(cls) 
    417417            return self 
    418418        else: 
  • Orange/fixes/fix_changed_names.py

    r9722 r9860  
    2020""" 
    2121from lib2to3 import fixer_base 
    22 from lib2to3 import fixer_util 
    2322from lib2to3 import pytree 
    24 from lib2to3.fixer_util import Name, Dot, Node, attr_chain, touch_import 
     23from lib2to3.fixer_util import Name, Dot, touch_import 
    2524 
    2625# Keys must be in the form of 'orange.name' not name or orange.bla.name  
     
    7675           "orange.MeasureAttribute_MSE": "Orange.feature.scoring.MSE", 
    7776 
     77           "orange.EntropyDiscretization": "Orange.feature.discretization.Entropy", 
     78           "orange.EquiDistDiscretization": "Orange.feature.discretization.EqualWidth", 
     79           "orange.EquiNDiscretization": "Orange.feature.discretization.EqualFreq", 
     80           "orange.BiModalDiscretization": "Orange.feature.discretization.BiModal", 
     81 
    7882           "orngFSS.attMeasure": "Orange.feature.scoring.score_all", 
    7983           "orngFSS.bestNAtts": "Orange.feature.selection.best_n", 
     
    102106           "orange.ImputerConstructor_average": "Orange.feature.imputation.ImputerConstructor_average", 
    103107 
    104            "orange.ExampleDistance": "Orange.distance.Distance", 
     108           "orange.ExamplesDistance": "Orange.distance.Distance", 
    105109           "orange.ExamplesDistance_Normalized": "Orange.distance.DistanceNormalized", 
    106110           "orange.ExamplesDistanceConstructor": "Orange.distance.DistanceConstructor", 
     
    179183           "orange.GraphAsTree":"Orange.network.GraphAsTree", 
    180184 
    181            "orngEnsemble.MeasureAttribute_randomForests":"Orange.ensemble.forest.ScoreFeature", 
    182  
    183185           "orange.TreeLearner": "Orange.classification.tree.TreeLearner", 
    184186           "orange.TreeClassifier": "Orange.classification.tree.TreeClassifier", 
     
    186188           "orange.C45Classifier": "Orange.classification.tree.C45Classifier", 
    187189           "orange.C45TreeNode": "Orange.classification.tree.C45Node", 
    188            "orange.C45TreeNodeList": "C45NodeList", 
     190           "orange.C45TreeNodeList": "Orange.classification.tree.C45NodeList", 
    189191           "orange.TreeDescender": "Orange.classification.tree.Descender", 
    190192           "orange.TreeDescender_UnknownMergeAsBranchSizes": "Orange.classification.tree.Descender_UnknownMergeAsBranchSizes", 
     
    276278#           "orngCN2.median": "Orange.classification.rules.median", 
    277279#           "orngCN2.perc": "Orange.classification.rules.perc", 
    278            "orngCN2.createRandomDataSet": "Orange.classification.rules.createRandomDataSet", 
    279            "orngCN2.compParameters": "Orange.classification.rules.compParameters", 
    280            "orngCN2.computeDists": "Orange.classification.rules.computeDists", 
    281            "orngCN2.createEVDistList": "Orange.classification.rules.createEVDistList", 
     280#           "orngCN2.createRandomDataSet": "Orange.classification.rules.createRandomDataSet", 
     281#           "orngCN2.compParameters": "Orange.classification.rules.compParameters", 
     282#           "orngCN2.computeDists": "Orange.classification.rules.computeDists", 
     283#           "orngCN2.createEVDistList": "Orange.classification.rules.createEVDistList", 
    282284           "orngCN2.CovererAndRemover_Prob": "Orange.classification.rules.CovererAndRemover_Prob", 
    283285           "orngCN2.add_sub_rules": "Orange.classification.rules.add_sub_rules", 
     
    396398           "orngMisc.selectBestIndex":"Orange.misc.selection.select_best_index", 
    397399           "orngMisc.compare2_firstBigger":"Orange.misc.selection.compare_first_bigger", 
    398            "orngMisc.compare2_firstBigger":"Orange.misc.selection.compare_first_bigger", 
    399400           "orngMisc.compare2_firstSmaller":"Orange.misc.selection.compare_first_smaller", 
    400401           "orngMisc.compare2_lastBigger":"Orange.misc.selection.compare_last_bigger", 
     
    485486           "orange.IntervalDiscretizer": "Orange.feature.discretization.IntervalDiscretizer", 
    486487           "orange.ThresholdDiscretizer": "Orange.feature.discretization.ThresholdDiscretizer", 
    487            "orange.EntropyDiscretization": "Orange.feature.discretization.EntropyDiscretization", 
    488488           "orange.Discrete2Continuous": "Orange.feature.discretization.Discrete2Continuous", 
    489489 
     
    519519           "orngDisc.EntropyDiscretization": "Orange.feature.discretization.EntropyDiscretization_wrapper", 
    520520 
    521            "orange.ProbabilityEstimator": "Orange.statistics.estimate.ProbabilityEstimator", 
    522            "orange.ProbabilityEstimator_FromDistribution": "Orange.statistics.estimate.ProbabilityEstimator_FromDistribution", 
    523            "orange.ProbabilityEstimatorConstructor": "Orange.statistics.estimate.ProbabilityEstimatorConstructor", 
    524            "orange.ProbabilityEstimatorConstructor_Laplace": "Orange.statistics.estimate.ProbabilityEstimatorConstructor_Laplace", 
    525            "orange.ProbabilityEstimatorConstructor_kernel": "Orange.statistics.estimate.ProbabilityEstimatorConstructor_kernel", 
    526            "orange.ProbabilityEstimatorConstructor_loess": "Orange.statistics.estimate.ProbabilityEstimatorConstructor_loess", 
    527            "orange.ProbabilityEstimatorConstructor_m": "Orange.statistics.estimate.ProbabilityEstimatorConstructor_m", 
    528            "orange.ProbabilityEstimatorConstructor_relative": "Orange.statistics.estimate.ProbabilityEstimatorConstructor_relative", 
    529521           "orange.ProbabilityEstimatorList": "Orange.statistics.estimate.ProbabilityEstimatorList", 
    530522 
     
    585577           "orange.ProbabilityEstimatorConstructor_m": "Orange.statistics.estimate.M", 
    586578           "orange.ProbabilityEstimatorConstructor_relative": "Orange.statistics.estimate.RelativeFrequency", 
    587            "orange.onditionalProbabilityEstimator": "Orange.statistics.estimate.ConditionalEstimator", 
     579           "orange.ConditionalProbabilityEstimator": "Orange.statistics.estimate.ConditionalEstimator", 
    588580           "orange.ConditionalProbabilityEstimator_FromDistribution": "Orange.statistics.estimate.ConditionalEstimatorFromDistribution", 
    589581           "orange.ConditionalProbabilityEstimator_ByRows": "Orange.statistics.estimate.ConditionalEstimatorByRows", 
     
    594586 
    595587           } 
    596  
    597 for k, v in MAPPING.items(): 
    598     if k.startswith('orngTest.'): 
    599         orig = k.split('.')[-1] 
    600         new = v.split('.')[-1] 
    601         if orig != new: 
    602             MAPPING['Orange.evaluation.testing.' + orig] = 'Orange.evaluation.testing.' + new 
    603  
    604 for k, v in MAPPING.items(): 
    605     if k.startswith('orngStat.'): 
    606         orig = k.split('.')[-1] 
    607         new = v.split('.')[-1] 
    608         if orig != new: 
    609             MAPPING['Orange.evaluation.scoring.' + orig] = 'Orange.evaluation.scoring.' + new 
    610588 
    611589def build_pattern(mapping=MAPPING): 
     
    636614        """ 
    637615        path = package.split('.') 
    638         nodes = [] 
    639         nodes.append(Name(path[0])) 
     616        nodes = [Name(path[0])] 
    640617        for name in path[1:]: 
    641618            new = pytree.Node(self.syms.trailer, [Dot(), Name(name)]) 
  • Orange/fixes/fix_orange_imports.py

    r9671 r9818  
    4242           "orngSOM": "Orange.projection.som", 
    4343           "orngBayes":"Orange.classification.bayes", 
     44           "orngLR":"Orange.classification.logreg", 
    4445           "orngNetwork":"Orange.network", 
    4546           "orngMisc":"Orange.misc", 
  • Orange/misc/counters.py

    r9671 r9871  
    66.. index:: misc 
    77.. index:: 
    8    single: misc; counters 
     8     single: misc; counters 
     9 
     10:class:`Orange.misc.counters` contains a bunch of classes that generate sequences of various kinds. 
     11 
    912""" 
    1013 
    1114class BooleanCounter: 
    12   def __init__(self, bits): 
    13     self.bits = bits 
    14     self.state = None 
    15  
    16   def __iter__(self): 
    17     if self.state: 
    18         return self 
    19     else: 
    20         return BooleanCounter(self.bits) 
    21      
    22   def next(self): 
    23     if self.state: 
    24       for bit in range(self.bits-1, -1, -1): 
    25         self.state[bit] = (self.state[bit]+1) % 2 
    26         if self.state[bit]: 
    27           break 
    28       else: 
     15    """ 
     16    A class which represents a boolean counter. The constructor is given the number of bits and during the 
     17    iteration the counter returns a list of that length with 0 and 1's in it. 
     18 
     19    One way to use the counter is within a for-loop: 
     20 
     21    >>> for r in Orange.misc.counters.BooleanCounter(3): 
     22    ...    print r 
     23    [0, 0, 0] 
     24    [0, 0, 1] 
     25    [0, 1, 0] 
     26    [0, 1, 1] 
     27    [1, 0, 0] 
     28    [1, 0, 1] 
     29    [1, 1, 0] 
     30    [1, 1, 1] 
     31 
     32    You can also call it manually. 
     33 
     34    >>> r = Orange.misc.counters.BooleanCounter(3) 
     35    >>> r.next() 
     36    [0, 0, 0] 
     37    >>> r.next() 
     38    [0, 0, 1] 
     39    >>> r.next() 
     40    [0, 1, 0] 
     41 
     42    .. attribute:: state 
     43     
     44        The current counter state (the last result of a call to next) is also stored as attribute attribute. 
     45 
     46    """ 
     47     
     48    def __init__(self, bits): 
     49        """ 
     50            :param bits: Number of bits. 
     51            :type bits: int 
     52        """ 
     53        self.bits = bits 
    2954        self.state = None 
    30     else: 
    31       self.state = [0]*self.bits 
    32  
    33     if not self.state: 
    34         raise StopIteration, "BooleanCounter: counting finished" 
    35  
    36     return self.state 
    37  
     55 
     56    def __iter__(self): 
     57        if self.state: 
     58            return self 
     59        else: 
     60            return BooleanCounter(self.bits) 
     61         
     62    def next(self): 
     63        """Return the next state of the counter.""" 
     64        if self.state: 
     65            for bit in range(self.bits-1, -1, -1): 
     66                self.state[bit] = (self.state[bit]+1) % 2 
     67                if self.state[bit]: 
     68                    break 
     69            else: 
     70                self.state = None 
     71        else: 
     72            self.state = [0]*self.bits 
     73        if not self.state: 
     74            raise StopIteration, "BooleanCounter: counting finished" 
     75        return self.state 
    3876 
    3977class LimitedCounter: 
    40   def __init__(self, limits): 
    41     self.limits = limits 
    42     self.state = None 
    43      
    44   def __iter__(self): 
    45     if self.state: 
    46         return self 
    47     else: 
    48         return LimitedCounter(self.limits) 
    49  
    50   def next(self): 
    51     if self.state: 
    52       i = len(self.limits)-1 
    53       while (i>=0) and (self.state[i]==self.limits[i]-1): 
    54         self.state[i] = 0 
    55         i -= 1 
    56       if i==-1: 
     78    """ 
     79    This class is similar to :class:`~Orange.misc.counters.BooleanCounter` except that the digits do not count 
     80    from 0 to 1, but to the limits that are specified individually for each digit. 
     81 
     82    >>> for t in Orange.misc.counters.LimitedCounter([3, 5, 2]): 
     83    ...     print t 
     84    [0, 0, 0] 
     85    [0, 0, 1] 
     86    [0, 1, 0] 
     87    [0, 1, 1] 
     88    [0, 2, 0] 
     89    [0, 2, 1] 
     90    [0, 3, 0] 
     91    [0, 3, 1] 
     92    [0, 4, 0] 
     93    [0, 4, 1] 
     94    [1, 0, 0] 
     95    [1, 0, 1] 
     96    [1, 1, 0] 
     97    [1, 1, 1] 
     98 
     99    .. attribute:: state 
     100 
     101        The current counter state (the last result of a call to next) is also stored as attribute attribute. 
     102    """ 
     103     
     104    def __init__(self, limits): 
     105        """ 
     106            :param limits: Domain size per bit position. 
     107            :type limits: list 
     108        """ 
     109        self.limits = limits 
    57110        self.state = None 
    58       else: 
    59         self.state[i] += 1 
    60     else: 
    61       self.state = [0]*len(self.limits) 
    62    
    63     if not self.state: 
    64       raise StopIteration, "LimitedCounter: counting finished" 
    65  
    66     return self.state 
    67  
     111         
     112    def __iter__(self): 
     113        if self.state: 
     114            return self 
     115        else: 
     116            return LimitedCounter(self.limits) 
     117 
     118    def next(self): 
     119        """Return the next state of the counter.""" 
     120        if self.state: 
     121            i = len(self.limits)-1 
     122            while (i>=0) and (self.state[i]==self.limits[i]-1): 
     123                self.state[i] = 0 
     124                i -= 1 
     125            if i==-1: 
     126                self.state = None 
     127            else: 
     128                self.state[i] += 1 
     129        else: 
     130            self.state = [0]*len(self.limits) 
     131     
     132        if not self.state: 
     133            raise StopIteration, "LimitedCounter: counting finished" 
     134 
     135        return self.state 
    68136 
    69137class MofNCounter: 
     138    """ 
     139    Counter returns all consecutive subset lists of length ``m`` out of ``n`` where ``m`` <= ``n``. 
     140 
     141    >>> for t in Orange.misc.counters.MofNCounter(3,7): 
     142    ...     print t 
     143    ... 
     144    [0, 1, 2] 
     145    [1, 2, 3] 
     146    [2, 3, 4] 
     147    [3, 4, 5] 
     148    [4, 5, 6] 
     149 
     150    .. attribute:: state 
     151 
     152        The current counter state (the last result of a call to next) is also stored as attribute attribute. 
     153    """ 
     154     
    70155    def __init__(self, m, n): 
     156        """ 
     157        :param m: Length of subset list. 
     158        :type m: int 
     159 
     160        :param n: Total length. 
     161        :type n: int 
     162        """ 
    71163        if m > n: 
    72164            raise TypeError, "Number of selected items exceeds the number of items" 
    73          
     165                 
    74166        self.state = None 
    75167        self.m = m 
    76168        self.n = n 
    77          
     169                 
    78170    def __iter__(self): 
    79171        if self.state: 
     
    81173        else: 
    82174            return MofNCounter(self.m, self.n) 
    83          
    84     def next(self): 
     175                 
     176    def next(self): 
     177        """Return the next state of the counter.""" 
    85178        if self.state: 
    86179            m, n, state = self.m, self.n, self.state 
     
    90183                    for place in range(place+1, m): 
    91184                        state[place] = state[place-1] + 1 
    92                     break 
     185                        break 
     186                else: 
     187                    self.state = None 
     188                    raise StopIteration, "MofNCounter: counting finished" 
     189        else: 
     190            self.state = range(self.m) 
     191        return self.state[:] 
     192                          
     193class NondecreasingCounter: 
     194    """ 
     195    Nondecreasing counter generates all non-decreasing integer sequences in which no numbers are skipped, 
     196    that is, if n is in the sequence, the sequence also includes all numbers between 0 and n. For instance, 
     197    [0, 0, 1, 0] is illegal since it decreases, and [0, 0, 2, 2] is illegal since it has 2 without having 1 
     198    first. Or, with an example 
     199 
     200    Nondecreasing counter generates all non-decreasing integer sequences in which no numbers are skipped, 
     201    that is, if ``n`` is in the sequence, the sequence also includes all numbers between 0 and ``n``. For instance, 
     202    [0, 0, 1, 0] is illegal since it decreases, and [0, 0, 2, 2] is illegal since it has 2 without having 1 first. 
     203    Or, with an example 
     204 
     205    >>> for t in Orange.misc.counters.NondecreasingCounter(4): 
     206    ...     print t 
     207    ... 
     208    [0, 0, 0, 0] 
     209    [0, 0, 0, 1] 
     210    [0, 0, 1, 1] 
     211    [0, 0, 1, 2] 
     212    [0, 1, 1, 1] 
     213    [0, 1, 1, 2] 
     214    [0, 1, 2, 2] 
     215    [0, 1, 2, 3] 
     216 
     217    .. attribute:: state 
     218 
     219        The current counter state (the last result of a call to next) is also stored as attribute attribute. 
     220    """ 
     221    def __init__(self, places): 
     222        """ 
     223            :param places: Number of places. 
     224            :type places: int 
     225        """ 
     226        self.state=None 
     227        self.subcounter=None 
     228        self.places=places 
     229 
     230    def __iter__(self): 
     231        if self.state: 
     232            return self 
     233        else: 
     234            return NondecreasingCounter(self.places) 
     235 
     236    def next(self): 
     237        """Return the next state of the counter.""" 
     238        if not self.subcounter: 
     239            self.subcounter=BooleanCounter(self.places-1) 
     240        if self.subcounter.next(): 
     241            self.state=[0] 
     242            for add_one in self.subcounter.state: 
     243                self.state.append(self.state[-1]+add_one) 
     244        else: 
     245            self.state=None 
     246        if not self.state: 
     247            raise StopIteration, "NondecreasingCounter: counting finished" 
     248        return self.state 
     249 
     250 
     251class CanonicFuncCounter: 
     252    """ 
     253    Returns all sequences of a given length where no numbers are skipped (see below) and none of 
     254    the generated sequence is equal to another if only the labels are changed. For instance, [0, 2, 2, 1] 
     255    and [1, 0, 0, 2] are considered equivalent: if we take the former and replace 0 by 1, 2 
     256    by 0 and 1 by 2 we get the second list. 
     257 
     258    The sequences generated are equivalent to all possible functions from a set of cardinality of the sequences length. 
     259 
     260    >>> for t in Orange.misc.counters.CanonicFuncCounter(4): 
     261    ...     print t 
     262    ... 
     263    [0, 0, 0, 0] 
     264    [0, 0, 0, 1] 
     265    [0, 0, 1, 0] 
     266    [0, 0, 1, 1] 
     267    [0, 0, 1, 2] 
     268    [0, 1, 0, 0] 
     269    [0, 1, 0, 1] 
     270    [0, 1, 0, 2] 
     271    [0, 1, 1, 0] 
     272    [0, 1, 1, 1] 
     273    [0, 1, 1, 2] 
     274    [0, 1, 2, 0] 
     275    [0, 1, 2, 1] 
     276    [0, 1, 2, 2] 
     277    [0, 1, 2, 3] 
     278 
     279    .. attribute:: state 
     280 
     281        The current counter state (the last result of a call to next) is also stored as attribute attribute. 
     282    """ 
     283    def __init__(self, places): 
     284        """ 
     285            :param places: Number of places. 
     286            :type places: int 
     287        """ 
     288        self.places = places 
     289        self.state = None 
     290 
     291    def __iter__(self): 
     292        if self.state: 
     293            return self 
     294        else: 
     295            return CanonicFuncCounter(self.places) 
     296 
     297    def next(self): 
     298        """Return the next state of the counter.""" 
     299        if self.state: 
     300            i = self.places-1 
     301            while (i>0) and (self.state[i]==max(self.state[:i])+1): 
     302                self.state[i] = 0 
     303                i -= 1 
     304            if i: 
     305                self.state[i] += 1 
    93306            else: 
    94                 self.state = None 
    95                 raise StopIteration, "MofNCounter: counting finished" 
    96         else: 
    97             self.state = range(self.m) 
    98              
    99         return self.state[:] 
    100               
    101 class NondecreasingCounter: 
    102   def __init__(self, places): 
    103     self.state=None 
    104     self.subcounter=None 
    105     self.places=places 
    106  
    107   def __iter__(self): 
    108     if self.state: 
    109         return self 
    110     else: 
    111         return NondecreasingCounter(self.places) 
    112  
    113   def next(self): 
    114     if not self.subcounter: 
    115       self.subcounter=BooleanCounter(self.places-1) 
    116     if self.subcounter.next(): 
    117       self.state=[0] 
    118       for add_one in self.subcounter.state: 
    119         self.state.append(self.state[-1]+add_one) 
    120     else: 
    121       self.state=None 
    122    
    123     if not self.state: 
    124       raise StopIteration, "NondecreasingCounter: counting finished" 
    125  
    126     return self.state 
    127  
    128  
    129 class CanonicFuncCounter: 
    130   def __init__(self, places): 
    131     self.places = places 
    132     self.state = None 
    133  
    134   def __iter__(self): 
    135     if self.state: 
    136         return self 
    137     else: 
    138         return CanonicFuncCounter(self.places) 
    139  
    140   def next(self): 
    141     if self.state: 
    142       i = self.places-1 
    143       while (i>0) and (self.state[i]==max(self.state[:i])+1): 
    144         self.state[i] = 0 
    145         i -= 1 
    146       if i: 
    147         self.state[i] += 1 
    148       else: 
    149         self.state=None 
    150     else: 
    151       self.state = [0]*self.places 
    152  
    153     if not self.state: 
    154       raise StopIteration, "CanonicFuncCounter: counting finished" 
    155      
    156     return self.state 
     307                self.state=None 
     308        else: 
     309            self.state = [0]*self.places 
     310        if not self.state: 
     311            raise StopIteration, "CanonicFuncCounter: counting finished" 
     312        return self.state 
  • Orange/orng/orngCA.py

    r9671 r9817  
     1# This has to be seriously outdated, as it uses matrixmultiply, which is not 
     2# present in numpy since, like, 2006.     Matija Polajnar, 2012 a.d. 
     3 
    14""" 
    25Correspondence analysis is a descriptive/exploratory technique designed to analyze simple two-way and  
  • Orange/orng/orngLR.py

    r9671 r9861  
    66    UnivariateLogRegLearner as Univariate_LogRegLearner,\ 
    77    UnivariateLogRegLearner as Univariate_LogRegLearner_Class,\ 
    8     UnivariateLogRegClassifier as UnivariateLogRegClassifier,\ 
     8    UnivariateLogRegClassifier as Univariate_LogRegClassifier,\ 
    99    LogRegLearnerGetPriors as LogRegLearner_getPriors,\ 
    1010    LogRegLearnerGetPriors as LogRegLearnerClass_getPriors,\ 
  • Orange/orng/orngMCPrediction.py

    r9671 r9878  
    55class MultiClassPredictionLearner(object): 
    66    def __new__(self, data=None, name='PLS regression', **kwds): 
    7         learner = object.__new__(self, **kwds) 
     7        learner = object.__new__(self) 
    88        if data: 
    99            learner.__init__(name) # force init 
  • Orange/orng/orngMultivariatePrediction.py

    r9671 r9878  
    55    """(self, data, y, x=None)""" 
    66    def __new__(self, data=None, name='multivar pred', **kwds): 
    7         learner = object.__new__(self, **kwds) 
     7        learner = object.__new__(self) 
    88        if data: 
    99            learner.__init__(name) # force init 
  • Orange/orng/orngRegression.py

    r9671 r9878  
    1111class LinearRegressionLearner(object): 
    1212    def __new__(self, data=None, name='linear regression', **kwds): 
    13         learner = object.__new__(self, **kwds) 
     13        learner = object.__new__(self) 
    1414        if data: 
    1515            learner.__init__(name,**kwds) # force init 
     
    236236    """PLSRegressionLearner(data, y, x=None, nc=None)""" 
    237237    def __new__(self, data=None, **kwds): 
    238         learner = object.__new__(self, **kwds) 
     238        learner = object.__new__(self) 
    239239        if data: 
    240240            learner.__init__(**kwds) # force init 
  • Orange/projection/linear.py

    r9725 r9880  
    7171from numpy.linalg import inv, pinv, eig      # matrix inverse and eigenvectors 
    7272from Orange.preprocess.scaling import ScaleLinProjData 
    73 from Orange.misc import visfuncts 
     73from Orange.orng import orngVisFuncts as visfuncts 
    7474from Orange.misc import deprecated_keywords 
    7575from Orange.misc import deprecated_members 
  • Orange/testing/regression/results_modules/logreg2.py.txt

    r9689 r9876  
    1818            marital-status=Separated       3.46       0.00        inf       0.00      31.95 
    1919              marital-status=Widowed       3.85       0.00        inf       0.00      46.96 
    20 marital-status=Married-spouse-absent       3.98       0.00        inf       0.00      53.63 
     20marital-status=Married-spouse-absent       3.98        nan        nan        nan      53.63 
    2121    marital-status=Married-AF-spouse       4.01       0.00        inf       0.00      55.19 
    2222             occupation=Tech-support      -0.32       0.00       -inf       0.00       0.72 
     
    2424            occupation=Other-service       2.68       0.00        inf       0.00      14.61 
    2525                    occupation=Sales       0.22       0.00        inf       0.00       1.24 
    26            occupation=Prof-specialty       0.18       0.00        inf       0.00       1.19 
     26           occupation=Prof-specialty       0.18        nan        nan        nan       1.19 
    2727        occupation=Handlers-cleaners       1.29       0.00        inf       0.00       3.64 
    2828        occupation=Machine-op-inspct       0.86       0.00        inf       0.00       2.37 
    29              occupation=Adm-clerical       0.30       0.00        inf       0.00       1.35 
     29             occupation=Adm-clerical       0.30        nan        nan        nan       1.35 
    3030          occupation=Farming-fishing       1.12       0.00        inf       0.00       3.06 
    3131         occupation=Transport-moving       0.62       0.00        inf       0.00       1.85 
    32           occupation=Priv-house-serv       3.46       0.00        inf       0.00      31.87 
     32          occupation=Priv-house-serv       3.46        nan        nan        nan      31.87 
    3333          occupation=Protective-serv       0.11       0.00        inf       0.00       1.12 
    3434             occupation=Armed-Forces       0.59       0.00        inf       0.00       1.81 
    35                    relationship=Wife      -1.06       0.00       -inf       0.00       0.35 
     35                   relationship=Wife      -1.06        nan        nan        nan       0.35 
    3636              relationship=Own-child      -1.04       0.00       -inf       0.00       0.35 
    3737          relationship=Not-in-family      -1.94       0.00       -inf       0.00       0.14 
    38          relationship=Other-relative      -2.42       0.00       -inf       0.00       0.09 
     38         relationship=Other-relative      -2.42        nan        nan        nan       0.09 
    3939              relationship=Unmarried      -1.92       0.00       -inf       0.00       0.15 
    4040             race=Asian-Pac-Islander      -0.19       0.00       -inf       0.00       0.83 
    41              race=Amer-Indian-Eskimo       2.88       0.00        inf       0.00      17.78 
     41             race=Amer-Indian-Eskimo       2.88        nan        nan        nan      17.78 
    4242                          race=Other       3.93       0.00        inf       0.00      51.07 
    4343                          race=Black       0.11       0.00        inf       0.00       1.12 
    44                           sex=Female       0.30       0.00        inf       0.00       1.36 
     44                          sex=Female       0.30        nan        nan        nan       1.36 
    4545                        capital-gain      -0.00       0.00       -inf       0.00       1.00 
    4646                        capital-loss      -0.00       0.00       -inf       0.00       1.00 
    47                       hours-per-week      -0.04       0.00       -inf       0.00       0.96 
     47                      hours-per-week      -0.04        nan        nan        nan       0.96 
  • Orange/testing/regression/results_modules/statExamples.py.txt

    r9790 r9825  
    11 
    22method  CA  AP  Brier   IS 
    3 bayes   0.903   0.902   0.175    0.759 
    4 tree    0.846   0.845   0.286    0.641 
     3bayes   0.903   0.902   0.176    0.758 
     4tree    0.825   0.824   0.326    0.599 
    55majrty  0.614   0.526   0.474   -0.000 
    66 
    77method  CA  AP  Brier   IS 
    8 bayes   0.903+-0.019    0.902+-0.019    0.175+-0.036     0.759+-0.039 
    9 tree    0.846+-0.016    0.845+-0.015    0.286+-0.030     0.641+-0.032 
     8bayes   0.903+-0.008    0.902+-0.008    0.176+-0.016     0.758+-0.017 
     9tree    0.825+-0.016    0.824+-0.016    0.326+-0.033     0.599+-0.034 
    1010majrty  0.614+-0.003    0.526+-0.001    0.474+-0.001    -0.000+-0.000 
    1111 
     
    1414 
    1515Confusion matrix for naive Bayes: 
    16 TP: 239, FP: 18, FN: 28.0, TN: 150 
     16TP: 240, FP: 18, FN: 27.0, TN: 150 
    1717 
    1818Confusion matrix for naive Bayes for 'van': 
    19 TP: 189, FP: 241, FN: 10.0, TN: 406 
     19TP: 192, FP: 151, FN: 7.0, TN: 496 
    2020 
    2121Confusion matrix for naive Bayes for 'opel': 
    22 TP: 86, FP: 112, FN: 126.0, TN: 522 
     22TP: 79, FP: 75, FN: 133.0, TN: 559 
    2323 
    2424    bus van saab    opel 
    25 bus 56  95  21  46 
    26 van 6   189 4   0 
    27 saab    3   75  73  66 
    28 opel    4   71  51  86 
     25bus 156 19  17  26 
     26van 4   192 2   1 
     27saab    8   68  93  48 
     28opel    8   64  61  79 
    2929 
    3030Sensitivity and specificity for 'voting' 
    3131method  sens    spec 
    3232bayes   0.891   0.923 
    33 tree    0.816   0.893 
     33tree    0.801   0.863 
    3434majrty  1.000   0.000 
    3535 
    3636Sensitivity and specificity for 'vehicle=van' 
    3737method  sens    spec 
    38 bayes   0.950   0.628 
    39 tree    0.809   0.966 
     38bayes   0.965   0.767 
     39tree    0.834   0.966 
    4040majrty  0.000   1.000 
    4141 
    4242AUC (voting) 
    4343     bayes: 0.974 
    44       tree: 0.930 
     44      tree: 0.926 
    4545    majrty: 0.500 
    4646 
    4747AUC for vehicle using weighted single-out method 
    4848bayes   tree    majority 
    49 0.783   0.800   0.500 
     490.840   0.816   0.500 
    5050 
    5151AUC for vehicle, using different methods 
    5252                            bayes   tree    majority 
    53        by pairs, weighted:  0.789   0.870   0.500 
    54                  by pairs:  0.791   0.871   0.500 
    55     one vs. all, weighted:  0.783   0.800   0.500 
    56               one vs. all:  0.783   0.800   0.500 
     53       by pairs, weighted:  0.861   0.883   0.500 
     54                 by pairs:  0.863   0.884   0.500 
     55    one vs. all, weighted:  0.840   0.816   0.500 
     56              one vs. all:  0.840   0.816   0.500 
    5757 
    5858AUC for detecting class 'van' in 'vehicle' 
    59 0.858   0.888   0.500 
     590.923   0.900   0.500 
    6060 
    6161AUCs for detecting various classes in 'vehicle' 
    62 bus (218.000) vs others:    0.894   0.932   0.500 
    63 van (199.000) vs others:    0.858   0.888   0.500 
    64 saab (217.000) vs others:   0.699   0.687   0.500 
    65 opel (212.000) vs others:   0.682   0.694   0.500 
     62bus (218.000) vs others:    0.952   0.936   0.500 
     63van (199.000) vs others:    0.923   0.900   0.500 
     64saab (217.000) vs others:   0.737   0.707   0.500 
     65opel (212.000) vs others:   0.749   0.718   0.500 
    6666 
    6767    bus van saab 
    68 van 0.933 
    69 saab    0.820   0.828 
    70 opel    0.822   0.825   0.519 
     68van 0.987 
     69saab    0.927   0.860 
     70opel    0.921   0.894   0.587 
    7171 
    7272AUCs for detecting various pairs of classes in 'vehicle' 
    73 van vs bus:     0.933   0.978   0.500 
    74 saab vs bus:    0.820   0.938   0.500 
    75 saab vs van:    0.828   0.879   0.500 
    76 opel vs bus:    0.822   0.932   0.500 
    77 opel vs van:    0.825   0.903   0.500 
    78 opel vs saab:   0.519   0.599   0.500 
     73van vs bus:     0.987   0.976   0.500 
     74saab vs bus:    0.927   0.936   0.500 
     75saab vs van:    0.860   0.906   0.500 
     76opel vs bus:    0.921   0.951   0.500 
     77opel vs van:    0.894   0.915   0.500 
     78opel vs saab:   0.587   0.622   0.500 
    7979 
    8080AUC and SE for voting 
    81 bayes: 0.968+-0.015 
    82 tree: 0.924+-0.022 
     81bayes: 0.982+-0.008 
     82tree: 0.888+-0.025 
    8383majrty: 0.500+-0.045 
    8484 
    85 Difference between naive Bayes and tree: 0.014+-0.062 
     85Difference between naive Bayes and tree: 0.065+-0.066 
    8686 
    8787ROC (first 20 points) for bayes on 'voting' 
    88881.000   1.000 
    89890.970   1.000 
     900.940   1.000 
    90910.910   1.000 
     920.896   1.000 
    91930.881   1.000 
     940.836   1.000 
    92950.821   1.000 
    93960.806   1.000 
    94 0.791   1.000 
    95970.761   1.000 
    96980.746   1.000 
     
    991010.687   1.000 
    1001020.672   1.000 
    101 0.672   0.991 
    102 0.657   0.991 
    103 0.642   0.991 
    104 0.552   0.991 
    105 0.537   0.991 
    106 0.522   0.991 
    107 0.507   0.991 
     1030.627   1.000 
     1040.612   1.000 
     1050.597   1.000 
     1060.582   1.000 
     1070.567   1.000 
  • Orange/testing/regression/results_modules/tree8.py.txt

    r9689 r9834  
    11m = 0.000: 239 nodes, 134 leaves 
    2 m = 0.000: 228 nodes, 128 leaves 
     2m = 0.000: 183 nodes, 104 leaves 
    33m = 0.100: 173 nodes, 99 leaves 
    44m = 0.500: 179 nodes, 102 leaves 
  • Orange/testing/regression/results_modules/tuning1.py.txt

    r9689 r9834  
    1 *** optimization  1: [0.97058829757682885]: 
    2 *** optimization  2: [0.97642948164590782]: 
    3 *** optimization  3: [0.98338801755375926]: 
    4 *** optimization  4: [0.98788177744892502]: 
    5 *** optimization  5: [0.98894238973780235]: 
    6 *** optimization  10: [0.98692031923626455]: 
    7 *** optimization  15: [0.98842239138206578]: 
    8 *** optimization  20: [0.97804067310171638]: 
     1*** optimization  1: [0.9706992853681718]: 
     2*** optimization  2: [0.9743207136103917]: 
     3*** optimization  3: [0.9833880175537593]: 
     4*** optimization  4: [0.987881777448925]: 
     5*** optimization  5: [0.9889423897378024]: 
     6*** optimization  10: [0.9869203192362646]: 
     7*** optimization  15: [0.9884223913820658]: 
     8*** optimization  20: [0.9780406731017164]: 
    99*** Optimal parameter: minSubset = 5 
    1010Optimal setting:  5 
    11 *** optimization  1: [0.98321908602150532]: 
    12 *** optimization  2: [0.97819892473118264]: 
     11*** optimization  1: [0.9832190860215053]: 
     12*** optimization  2: [0.9781989247311826]: 
    1313*** optimization  3: [0.9912679211469535]: 
    1414*** optimization  4: [0.9937656810035842]: 
    15 *** optimization  5: [0.99075044802867385]: 
     15*** optimization  5: [0.9907504480286738]: 
    1616*** optimization  10: [0.9872647849462366]: 
    17 *** optimization  15: [0.98976926523297493]: 
    18 *** optimization  20: [0.99105062724014337]: 
     17*** optimization  15: [0.9897692652329749]: 
     18*** optimization  20: [0.9910506272401434]: 
    1919*** Optimal parameter: minSubset = 4 
    20 *** optimization  1: [0.97296370967741941]: 
    21 *** optimization  2: [0.97278673835125462]: 
    22 *** optimization  3: [0.98086245519713267]: 
    23 *** optimization  4: [0.98209901433691749]: 
    24 *** optimization  5: [0.98543682795698928]: 
    25 *** optimization  10: [0.98856854838709673]: 
    26 *** optimization  15: [0.99162634408602157]: 
    27 *** optimization  20: [0.98600806451612899]: 
     20*** optimization  1: [0.9729637096774194]: 
     21*** optimization  2: [0.9727867383512546]: 
     22*** optimization  3: [0.9808624551971327]: 
     23*** optimization  4: [0.9820990143369175]: 
     24*** optimization  5: [0.9854368279569893]: 
     25*** optimization  10: [0.9885685483870967]: 
     26*** optimization  15: [0.9916263440860216]: 
     27*** optimization  20: [0.986008064516129]: 
    2828*** Optimal parameter: minSubset = 15 
    29 *** optimization  1: [0.98023073476702494]: 
    30 *** optimization  2: [0.98306899641577061]: 
    31 *** optimization  3: [0.98245295698924728]: 
     29*** optimization  1: [0.9802307347670249]: 
     30*** optimization  2: [0.9830689964157706]: 
     31*** optimization  3: [0.9824529569892473]: 
    3232*** optimization  4: [0.9896012544802868]: 
    33 *** optimization  5: [0.98472670250896055]: 
    34 *** optimization  10: [0.98965277777777783]: 
    35 *** optimization  15: [0.98743503584229386]: 
    36 *** optimization  20: [0.97437948028673826]: 
     33*** optimization  5: [0.9847267025089605]: 
     34*** optimization  10: [0.9896527777777778]: 
     35*** optimization  15: [0.9874350358422939]: 
     36*** optimization  20: [0.9743794802867383]: 
    3737*** Optimal parameter: minSubset = 10 
    38 *** optimization  1: [0.96825044802867388]: 
    39 *** optimization  2: [0.97539202508960576]: 
    40 *** optimization  3: [0.97483422939068098]: 
    41 *** optimization  4: [0.98026657706093201]: 
    42 *** optimization  5: [0.97956765232974918]: 
     38*** optimization  1: [0.9682504480286739]: 
     39*** optimization  2: [0.9763328853046596]: 
     40*** optimization  3: [0.974834229390681]: 
     41*** optimization  4: [0.980266577060932]: 
     42*** optimization  5: [0.9795676523297492]: 
    4343*** optimization  10: [0.9769332437275986]: 
    44 *** optimization  15: [0.97734543010752684]: 
     44*** optimization  15: [0.9773454301075268]: 
    4545*** optimization  20: [0.9740815412186381]: 
    4646*** Optimal parameter: minSubset = 4 
    47 *** optimization  1: [0.96364247311827955]: 
    48 *** optimization  2: [0.974209229390681]: 
    49 *** optimization  3: [0.97841621863799277]: 
    50 *** optimization  4: [0.98721102150537632]: 
    51 *** optimization  5: [0.98688396057347672]: 
    52 *** optimization  10: [0.98780689964157697]: 
    53 *** optimization  15: [0.98020833333333335]: 
    54 *** optimization  20: [0.97671370967741944]: 
     47*** optimization  1: [0.9640591397849462]: 
     48*** optimization  2: [0.9741397849462365]: 
     49*** optimization  3: [0.9783467741935483]: 
     50*** optimization  4: [0.9872110215053763]: 
     51*** optimization  5: [0.9868839605734767]: 
     52*** optimization  10: [0.987806899641577]: 
     53*** optimization  15: [0.9802083333333333]: 
     54*** optimization  20: [0.9767137096774194]: 
    5555*** Optimal parameter: minSubset = 10 
    56 *** optimization  1: [0.97435707885304657]: 
    57 *** optimization  2: [0.97433705471435883]: 
    58 *** optimization  3: [0.97782107197717805]: 
    59 *** optimization  4: [0.97559295223465736]: 
    60 *** optimization  5: [0.98345644978421476]: 
    61 *** optimization  10: [0.98451827774120404]: 
    62 *** optimization  15: [0.98160682283666145]: 
     56*** optimization  1: [0.9743570788530466]: 
     57*** optimization  2: [0.9743370547143588]: 
     58*** optimization  3: [0.977821071977178]: 
     59*** optimization  4: [0.9755929522346574]: 
     60*** optimization  5: [0.9834564497842148]: 
     61*** optimization  10: [0.984518277741204]: 
     62*** optimization  15: [0.9816068228366615]: 
    6363*** optimization  20: [0.9802781892326824]: 
    6464*** Optimal parameter: minSubset = 10 
    6565*** optimization  1: [0.9764701740911419]: 
    66 *** optimization  2: [0.98519868700168234]: 
    67 *** optimization  3: [0.98775030173359668]: 
    68 *** optimization  4: [0.98942574610489364]: 
    69 *** optimization  5: [0.98909228476336764]: 
    70 *** optimization  10: [0.98262768817204293]: 
    71 *** optimization  15: [0.98151337685611872]: 
    72 *** optimization  20: [0.98251312083973374]: 
     66*** optimization  2: [0.9851986870016823]: 
     67*** optimization  3: [0.9877503017335967]: 
     68*** optimization  4: [0.9894257461048936]: 
     69*** optimization  5: [0.9890922847633676]: 
     70*** optimization  10: [0.9826276881720429]: 
     71*** optimization  15: [0.9815133768561187]: 
     72*** optimization  20: [0.9825131208397337]: 
    7373*** Optimal parameter: minSubset = 4 
    74 *** optimization  1: [0.98090168056469895]: 
    75 *** optimization  2: [0.99166556945358786]: 
    76 *** optimization  3: [0.98834444261575594]: 
    77 *** optimization  4: [0.98837159863945589]: 
    78 *** optimization  5: [0.99087895911052581]: 
    79 *** optimization  10: [0.98963769109794453]: 
    80 *** optimization  15: [0.98902969790066564]: 
     74*** optimization  1: [0.980901680564699]: 
     75*** optimization  2: [0.9916655694535879]: 
     76*** optimization  3: [0.9883444426157559]: 
     77*** optimization  4: [0.9884388029405311]: 
     78*** optimization  5: [0.9908789591105258]: 
     79*** optimization  10: [0.9896376910979445]: 
     80*** optimization  15: [0.9890296979006656]: 
    8181*** optimization  20: [0.9841350760734402]: 
    8282*** Optimal parameter: minSubset = 2 
    83 *** optimization  1: [0.97177195340501799]: 
    84 *** optimization  2: [0.97940412186379944]: 
    85 *** optimization  3: [0.98024641577060934]: 
    86 *** optimization  4: [0.98027105734767028]: 
    87 *** optimization  5: [0.98310707885304649]: 
    88 *** optimization  10: [0.98413978494623655]: 
    89 *** optimization  15: [0.98651209677419371]: 
    90 *** optimization  20: [0.97995519713261658]: 
     83*** optimization  1: [0.971771953405018]: 
     84*** optimization  2: [0.9794041218637994]: 
     85*** optimization  3: [0.9802464157706093]: 
     86*** optimization  4: [0.9802710573476703]: 
     87*** optimization  5: [0.9831070788530465]: 
     88*** optimization  10: [0.9841397849462366]: 
     89*** optimization  15: [0.9865120967741937]: 
     90*** optimization  20: [0.9799551971326166]: 
    9191*** Optimal parameter: minSubset = 15 
    92 *** optimization  1: [0.97084229390681009]: 
    93 *** optimization  2: [0.97555107526881724]: 
    94 *** optimization  3: [0.98384184587813628]: 
    95 *** optimization  4: [0.97722222222222244]: 
    96 *** optimization  5: [0.98428539426523298]: 
     92*** optimization  1: [0.9708422939068101]: 
     93*** optimization  2: [0.9755510752688172]: 
     94*** optimization  3: [0.9838418458781363]: 
     95*** optimization  4: [0.9772222222222224]: 
     96*** optimization  5: [0.984285394265233]: 
    9797*** optimization  10: [0.989247311827957]: 
    9898*** optimization  15: [0.987780017921147]: 
    99 *** optimization  20: [0.98074148745519696]: 
     99*** optimization  20: [0.980741487455197]: 
    100100*** Optimal parameter: minSubset = 10 
    101 Untuned tree: 0.925 
     101Untuned tree: 0.926 
    102102Tuned tree: 0.983 
    103 *** optimization  1: [0.97058829757682885]: 
    104 *** optimization  2: [0.97642948164590782]: 
    105 *** optimization  3: [0.98338801755375926]: 
    106 *** optimization  4: [0.98788177744892502]: 
    107 *** optimization  5: [0.98894238973780235]: 
    108 *** optimization  10: [0.98692031923626455]: 
    109 *** optimization  15: [0.98842239138206578]: 
    110 *** optimization  20: [0.97804067310171638]: 
     103*** optimization  1: [0.9706992853681718]: 
     104*** optimization  2: [0.9743207136103917]: 
     105*** optimization  3: [0.9833880175537593]: 
     106*** optimization  4: [0.987881777448925]: 
     107*** optimization  5: [0.9889423897378024]: 
     108*** optimization  10: [0.9869203192362646]: 
     109*** optimization  15: [0.9884223913820658]: 
     110*** optimization  20: [0.9780406731017164]: 
    111111*** Optimal parameter: ['split.continuousSplitConstructor.minSubset', 'split.discreteSplitConstructor.minSubset'] = 5 
    112112Optimal setting:  5.0 
  • Orange/testing/regression/results_ofb/bagging_test.py.txt

    r9689 r9843  
    1 tree: 0.796 
    2 bagged classifier: 0.799 
     1tree: 0.795 
     2bagged classifier: 0.802 
  • Orange/testing/regression/results_ofb/data_characteristics4.py.txt

    r9689 r9844  
    11Average values and mean square errors: 
    22age, mean=38.91 +-  0.44 
    3 fnlwgt, mean=187280.29 +- 3464.37 
     3fnlwgt, mean=187280.30 +- 3464.37 
    44education-num, mean= 9.98 +-  0.08 
    55capital-gain, mean=1041.83 +- 218.78 
  • Orange/testing/regression/results_orange25/simple_tree_random_forest.py.txt

    r9802 r9868  
    44 
    55Runtimes: 
    6 for_gain 0.0934960842133 
    7 for_simp 0.0251078605652 
     6for_gain 0.0942988395691 
     7for_simp 0.0253610610962 
  • Orange/testing/regression/results_orange25/svm-linear-weights.py.txt

    r9804 r9870  
    1 defaultdict(<type 'float'>, {FloatVariable 'Elu 30': 0.4786304184632838, FloatVariable 'spo 0': 0.13024372486415015, FloatVariable 'Elu 60': 0.36698713537474476, FloatVariable 'spo 2': 0.7078062232168753, FloatVariable 'Elu 90': 0.3834942300173535, FloatVariable 'spo 5': 1.0683498605674933, FloatVariable 'alpha 7': 0.06557659077448137, FloatVariable 'Elu 120': 0.5939764872379445, FloatVariable 'spo 7': 0.8081568079276176, FloatVariable 'diau d': 0.44932601596409105, FloatVariable 'Elu 150': 0.5965599387054419, FloatVariable 'alpha 119': 0.1699365258012951, FloatVariable 'spo 9': 0.2498282401589412, FloatVariable 'Elu 180': 0.42425268429856355, FloatVariable 'spo 11': 0.20615575833508282, FloatVariable 'alpha 70': 0.26459268873021585, FloatVariable 'Elu 210': 0.12396520046361383, FloatVariable 'spo5 2': 0.40417556232809326, FloatVariable 'Elu 240': 0.2093390824178926, FloatVariable 'spo5 7': 0.26780459416067937, FloatVariable 'Elu 270': 0.33471969574325466, FloatVariable 'alpha 84': 0.1308234316119738, FloatVariable 'spo5 11': 1.200079459496442, FloatVariable 'diau e': 0.864767371223623, FloatVariable 'Elu 300': 0.15913983311663107, FloatVariable 'spo- early': 1.9466556509082333, FloatVariable 'Elu 330': 0.11474308886955724, FloatVariable 'alpha 42': 0.13641650791763626, FloatVariable 'spo- mid': 3.2086605964825132, FloatVariable 'Elu 360': 0.16577258493775038, FloatVariable 'alpha 14': 0.18310901108005986, FloatVariable 'Elu 390': 0.1820761083768519, FloatVariable 'alpha 21': 0.030539018557891578, FloatVariable 'cdc15 10': 0.11428450056762224, FloatVariable 'alpha 28': 0.04645238275160125, FloatVariable 'alpha 91': 0.1905674816738207, FloatVariable 'cdc15 30': 0.18270335600911874, FloatVariable 'alpha 35': 0.21379911334384863, FloatVariable 'heat 0': 0.19091815990337407, FloatVariable 'cdc15 50': 0.24968263583989325, FloatVariable 'cdc15 170': 0.44789694844623534, FloatVariable 'heat 80': 0.38909905042009185, FloatVariable 'diau f': 1.4452997087693935, FloatVariable 'cdc15 70': 0.13876265882583333, FloatVariable 'alpha 49': 0.16085715739160683, FloatVariable 'cdc15 90': 0.32729758144823035, FloatVariable 'alpha 56': 0.3367416107117914, FloatVariable 'cold 0': 0.27980454530046744, FloatVariable 'cdc15 110': 0.564756618474929, FloatVariable 'Elu 0': 0.8466167037587657, FloatVariable 'alpha 63': 0.18433878873311124, FloatVariable 'cdc15 130': 0.3658301477295572, FloatVariable 'dtt 60': 0.5951914850021424, FloatVariable 'alpha 105': 0.14088060621674625, FloatVariable 'cdc15 150': 0.693249161777514, FloatVariable 'dtt 120': 0.55305024494988, FloatVariable 'alpha 112': 0.19329749923741518, FloatVariable 'diau g': 2.248793727194904, FloatVariable 'heat 10': 1.000320207469925, FloatVariable 'cdc15 190': 0.16956982427965123, FloatVariable 'heat 160': 0.3192185000510415, FloatVariable 'dtt 15': 0.49451797411099035, FloatVariable 'cold 20': 0.4097605043285248, FloatVariable 'alpha 0': 0.19198054903386352, FloatVariable 'cdc15 210': 0.15183429673463036, FloatVariable 'cold 40': 0.3092287272528724, FloatVariable 'alpha 98': 0.21754881357923167, FloatVariable 'cdc15 230': 0.5474715182870784, FloatVariable 'cold 160': 0.6947037871090163, FloatVariable 'heat 40': 0.4580618143377812, FloatVariable 'cdc15 250': 0.3573777070361102, FloatVariable 'dtt 30': 0.5838556086306895, FloatVariable 'diau a': 0.14935761521655416, FloatVariable 'alpha 77': 0.20088381949723239, FloatVariable 'cdc15 270': 0.21951931922594184, FloatVariable 'diau b': 0.23473821977067888, FloatVariable 'heat 20': 0.9867456006798212, FloatVariable 'cdc15 290': 0.24965577080121784, FloatVariable 'diau c': 0.13741762346585432}) 
     1defaultdict(<type 'float'>, {FloatVariable 'alpha 0': 0.19198054903386352, FloatVariable 'Elu 300': 0.15913983311663107, FloatVariable 'spo- mid': 3.2086605964825132, FloatVariable 'Elu 330': 0.11474308886955724, FloatVariable 'alpha 14': 0.18310901108005986, FloatVariable 'alpha 98': 0.21754881357923167, FloatVariable 'Elu 360': 0.16577258493775038, FloatVariable 'Elu 180': 0.42425268429856355, FloatVariable 'alpha 21': 0.030539018557891578, FloatVariable 'Elu 30': 0.4786304184632838, FloatVariable 'Elu 390': 0.1820761083768519, FloatVariable 'spo- early': 1.9466556509082333, FloatVariable 'alpha 28': 0.04645238275160125, FloatVariable 'cdc15 10': 0.11428450056762224, FloatVariable 'alpha 35': 0.21379911334384863, FloatVariable 'cdc15 30': 0.18270335600911874, FloatVariable 'alpha 42': 0.13641650791763626, FloatVariable 'cdc15 50': 0.24968263583989325, FloatVariable 'alpha 70': 0.26459268873021585, FloatVariable 'alpha 49': 0.16085715739160683, FloatVariable 'cdc15 70': 0.13876265882583333, FloatVariable 'alpha 105': 0.14088060621674625, FloatVariable 'diau b': 0.23473821977067888, FloatVariable 'alpha 56': 0.3367416107117914, FloatVariable 'cdc15 90': 0.32729758144823035, FloatVariable 'alpha 63': 0.18433878873311124, FloatVariable 'cdc15 110': 0.564756618474929, FloatVariable 'Elu 60': 0.36698713537474476, FloatVariable 'dtt 60': 0.5951914850021424, FloatVariable 'cdc15 130': 0.3658301477295572, FloatVariable 'alpha 77': 0.20088381949723239, FloatVariable 'heat 80': 0.38909905042009185, FloatVariable 'cdc15 150': 0.693249161777514, FloatVariable 'alpha 84': 0.1308234316119738, FloatVariable 'cdc15 170': 0.44789694844623534, FloatVariable 'cold 20': 0.4097605043285248, FloatVariable 'cdc15 190': 0.16956982427965123, FloatVariable 'cold 40': 0.3092287272528724, FloatVariable 'alpha 112': 0.19329749923741518, FloatVariable 'cdc15 210': 0.15183429673463036, FloatVariable 'cold 160': 0.6947037871090163, FloatVariable 'diau f': 1.4452997087693935, FloatVariable 'cdc15 230': 0.5474715182870784, FloatVariable 'heat 0': 0.19091815990337407, FloatVariable 'diau a': 0.14935761521655416, FloatVariable 'heat 160': 0.3192185000510415, FloatVariable 'cdc15 250': 0.3573777070361102, FloatVariable 'heat 40': 0.4580618143377812, FloatVariable 'cdc15 270': 0.21951931922594184, FloatVariable 'spo5 2': 0.40417556232809326, FloatVariable 'Elu 0': 0.8466167037587657, FloatVariable 'alpha 7': 0.06557659077448137, FloatVariable 'cold 0': 0.27980454530046744, FloatVariable 'diau d': 0.44932601596409105, FloatVariable 'spo 0': 0.13024372486415015, FloatVariable 'alpha 119': 0.1699365258012951, FloatVariable 'diau e': 0.864767371223623, FloatVariable 'spo 2': 0.7078062232168753, FloatVariable 'heat 10': 1.000320207469925, FloatVariable 'spo 5': 1.0683498605674933, FloatVariable 'Elu 120': 0.5939764872379445, FloatVariable 'diau g': 2.248793727194904, FloatVariable 'spo 7': 0.8081568079276176, FloatVariable 'Elu 150': 0.5965599387054419, FloatVariable 'Elu 90': 0.3834942300173535, FloatVariable 'spo 9': 0.2498282401589412, FloatVariable 'dtt 30': 0.5838556086306895, FloatVariable 'alpha 91': 0.1905674816738207, FloatVariable 'spo 11': 0.20615575833508282, FloatVariable 'Elu 210': 0.12396520046361383, FloatVariable 'cdc15 290': 0.24965577080121784, FloatVariable 'dtt 15': 0.49451797411099035, FloatVariable 'Elu 240': 0.2093390824178926, FloatVariable 'diau c': 0.13741762346585432, FloatVariable 'spo5 7': 0.26780459416067937, FloatVariable 'dtt 120': 0.55305024494988, FloatVariable 'Elu 270': 0.33471969574325466, FloatVariable 'spo5 11': 1.200079459496442, FloatVariable 'heat 20': 0.9867456006798212}) 
  • Orange/testing/regression/results_reference/CostMatrix.py.txt

    r9689 r9846  
    2121prescription: 0.000 
    2222astigmatic: 0.000 
    23 tear_rate: 0.167 
     23tear_rate: 0.083 
    2424 
    2525 
  • Orange/testing/regression/xtest.py

    r9751 r9873  
    33import os, re, sys, time, subprocess 
    44import getopt 
    5 import orngEnviron 
     5from Orange.misc import environ 
    66 
    77regtestdir = os.getcwd().replace("\\", "/") 
     
    184184    module = opts.get("--module", "all") 
    185185    if module in ["all"]: 
    186         root = "%s/.." % orngEnviron.orangeDir 
     186        root = "%s/.." % environ.install_dir 
    187187        module = "orange" 
    188188        dirs = [("modules", "Orange/doc/modules"), 
     
    191191                ("orange25", "docs/reference/rst/code")] 
    192192    elif module in ["orange"]: 
    193         root = "%s/.." % orngEnviron.orangeDir 
     193        root = "%s/.." % environ.install_dir 
    194194        module = "orange" 
    195195        dirs = [("modules", "Orange/doc/modules"), 
     
    197197                ("ofb", "docs/tutorial/rst/code")] 
    198198    elif module in ["ofb-rst"]: 
    199         root = "%s/.." % orngEnviron.orangeDir 
     199        root = "%s/.." % environ.install_dir 
    200200        module = "orange" 
    201201        dirs = [("ofb", "docs/tutorial/rst/code")] 
    202202    elif module in ["orange25"]: 
    203         root = "%s/.." % orngEnviron.orangeDir 
     203        root = "%s/.." % environ.install_dir 
    204204        module = "orange" 
    205205        dirs = [("orange25", "docs/reference/rst/code")] 
    206206    elif module == "obi": 
    207         root = orngEnviron.addOnsDirSys + "/Bioinformatics/doc" 
     207        root = environ.add_ons_dir + "/Bioinformatics/doc" 
    208208        dirs = [("modules", "modules")] 
    209209    elif module == "text": 
    210         root = orngEnviron.addOnsDirSys + "/Text/doc" 
     210        root = environ.add_ons_dir + "/Text/doc" 
    211211        dirs = [("modules", "modules")] 
    212212    else: 
  • docs/reference/rst/Orange.classification.logreg.rst

    r9372 r9818  
    11.. automodule:: Orange.classification.logreg 
     2 
     3.. index: logistic regression 
     4.. index: 
     5   single: classification; logistic regression 
     6 
     7******************************** 
     8Logistic regression (``logreg``) 
     9******************************** 
     10 
     11`Logistic regression <http://en.wikipedia.org/wiki/Logistic_regression>`_ 
     12is a statistical classification methods that fits data to a logistic 
     13function. Orange's implementation of algorithm 
     14can handle various anomalies in features, such as constant variables and 
     15singularities, that could make direct fitting of logistic regression almost 
     16impossible. Stepwise logistic regression, which iteratively selects the most 
     17informative features, is also supported. 
     18 
     19.. autoclass:: LogRegLearner 
     20   :members: 
     21 
     22.. class :: LogRegClassifier 
     23 
     24    A logistic regression classification model. Stores estimated values of 
     25    regression coefficients and their significances, and uses them to predict 
     26    classes and class probabilities. 
     27 
     28    .. attribute :: beta 
     29 
     30        Estimated regression coefficients. 
     31 
     32    .. attribute :: beta_se 
     33 
     34        Estimated standard errors for regression coefficients. 
     35 
     36    .. attribute :: wald_Z 
     37 
     38        Wald Z statistics for beta coefficients. Wald Z is computed 
     39        as beta/beta_se. 
     40 
     41    .. attribute :: P 
     42 
     43        List of P-values for beta coefficients, that is, the probability 
     44        that beta coefficients differ from 0.0. The probability is 
     45        computed from squared Wald Z statistics that is distributed with 
     46        Chi-Square distribution. 
     47 
     48    .. attribute :: likelihood 
     49 
     50        The probability of the sample (ie. learning examples) observed on 
     51        the basis of the derived model, as a function of the regression 
     52        parameters. 
     53 
     54    .. attribute :: fit_status 
     55 
     56        Tells how the model fitting ended - either regularly 
     57        (:obj:`LogRegFitter.OK`), or it was interrupted due to one of beta 
     58        coefficients escaping towards infinity (:obj:`LogRegFitter.Infinity`) 
     59        or since the values didn't converge (:obj:`LogRegFitter.Divergence`). The 
     60        value tells about the classifier's "reliability"; the classifier 
     61        itself is useful in either case. 
     62 
     63    .. method:: __call__(instance, result_type) 
     64 
     65        Classify a new instance. 
     66 
     67        :param instance: instance to be classified. 
     68        :type instance: :class:`~Orange.data.Instance` 
     69        :param result_type: :class:`~Orange.classification.Classifier.GetValue` or 
     70              :class:`~Orange.classification.Classifier.GetProbabilities` or 
     71              :class:`~Orange.classification.Classifier.GetBoth` 
     72 
     73        :rtype: :class:`~Orange.data.Value`, 
     74              :class:`~Orange.statistics.distribution.Distribution` or a 
     75              tuple with both 
     76 
     77 
     78.. class:: LogRegFitter 
     79 
     80    :obj:`LogRegFitter` is the abstract base class for logistic fitters. It 
     81    defines the form of call operator and the constants denoting its 
     82    (un)success: 
     83 
     84    .. attribute:: OK 
     85 
     86        Fitter succeeded to converge to the optimal fit. 
     87 
     88    .. attribute:: Infinity 
     89 
     90        Fitter failed due to one or more beta coefficients escaping towards infinity. 
     91 
     92    .. attribute:: Divergence 
     93 
     94        Beta coefficients failed to converge, but none of beta coefficients escaped. 
     95 
     96    .. attribute:: Constant 
     97 
     98        There is a constant attribute that causes the matrix to be singular. 
     99 
     100    .. attribute:: Singularity 
     101 
     102        The matrix is singular. 
     103 
     104 
     105    .. method:: __call__(examples, weight_id) 
     106 
     107        Performs the fitting. There can be two different cases: either 
     108        the fitting succeeded to find a set of beta coefficients (although 
     109        possibly with difficulties) or the fitting failed altogether. The 
     110        two cases return different results. 
     111 
     112        `(status, beta, beta_se, likelihood)` 
     113            The fitter managed to fit the model. The first element of 
     114            the tuple, result, tells about the problems occurred; it can 
     115            be either :obj:`OK`, :obj:`Infinity` or :obj:`Divergence`. In 
     116            the latter cases, returned values may still be useful for 
     117            making predictions, but it's recommended that you inspect 
     118            the coefficients and their errors and make your decision 
     119            whether to use the model or not. 
     120 
     121        `(status, attribute)` 
     122            The fitter failed and the returned attribute is responsible 
     123            for it. The type of failure is reported in status, which 
     124            can be either :obj:`Constant` or :obj:`Singularity`. 
     125 
     126        The proper way of calling the fitter is to expect and handle all 
     127        the situations described. For instance, if fitter is an instance 
     128        of some fitter and examples contain a set of suitable examples, 
     129        a script should look like this:: 
     130 
     131            res = fitter(examples) 
     132            if res[0] in [fitter.OK, fitter.Infinity, fitter.Divergence]: 
     133               status, beta, beta_se, likelihood = res 
     134               < proceed by doing something with what you got > 
     135            else: 
     136               status, attr = res 
     137               < remove the attribute or complain to the user or ... > 
     138 
     139 
     140.. class :: LogRegFitter_Cholesky 
     141 
     142    The sole fitter available at the 
     143    moment. It is a C++ translation of `Alan Miller's logistic regression 
     144    code <http://users.bigpond.net.au/amiller/>`_. It uses Newton-Raphson 
     145    algorithm to iteratively minimize least squares error computed from 
     146    learning examples. 
     147 
     148 
     149.. autoclass:: StepWiseFSS 
     150   :members: 
     151   :show-inheritance: 
     152 
     153.. autofunction:: dump 
     154 
     155 
     156 
     157Examples 
     158-------- 
     159 
     160The first example shows a very simple induction of a logistic regression 
     161classifier (:download:`logreg-run.py <code/logreg-run.py>`). 
     162 
     163.. literalinclude:: code/logreg-run.py 
     164 
     165Result:: 
     166 
     167    Classification accuracy: 0.778282598819 
     168 
     169    class attribute = survived 
     170    class values = <no, yes> 
     171 
     172        Attribute       beta  st. error     wald Z          P OR=exp(beta) 
     173 
     174        Intercept      -1.23       0.08     -15.15      -0.00 
     175     status=first       0.86       0.16       5.39       0.00       2.36 
     176    status=second      -0.16       0.18      -0.91       0.36       0.85 
     177     status=third      -0.92       0.15      -6.12       0.00       0.40 
     178        age=child       1.06       0.25       4.30       0.00       2.89 
     179       sex=female       2.42       0.14      17.04       0.00      11.25 
     180 
     181The next examples shows how to handle singularities in data sets 
     182(:download:`logreg-singularities.py <code/logreg-singularities.py>`). 
     183 
     184.. literalinclude:: code/logreg-singularities.py 
     185 
     186The first few lines of the output of this script are:: 
     187 
     188    <=50K <=50K 
     189    <=50K <=50K 
     190    <=50K <=50K 
     191    >50K >50K 
     192    <=50K >50K 
     193 
     194    class attribute = y 
     195    class values = <>50K, <=50K> 
     196 
     197                               Attribute       beta  st. error     wald Z          P OR=exp(beta) 
     198 
     199                               Intercept       6.62      -0.00       -inf       0.00 
     200                                     age      -0.04       0.00       -inf       0.00       0.96 
     201                                  fnlwgt      -0.00       0.00       -inf       0.00       1.00 
     202                           education-num      -0.28       0.00       -inf       0.00       0.76 
     203                 marital-status=Divorced       4.29       0.00        inf       0.00      72.62 
     204            marital-status=Never-married       3.79       0.00        inf       0.00      44.45 
     205                marital-status=Separated       3.46       0.00        inf       0.00      31.95 
     206                  marital-status=Widowed       3.85       0.00        inf       0.00      46.96 
     207    marital-status=Married-spouse-absent       3.98       0.00        inf       0.00      53.63 
     208        marital-status=Married-AF-spouse       4.01       0.00        inf       0.00      55.19 
     209                 occupation=Tech-support      -0.32       0.00       -inf       0.00       0.72 
     210 
     211If :obj:`remove_singular` is set to 0, inducing a logistic regression 
     212classifier would return an error:: 
     213 
     214    Traceback (most recent call last): 
     215      File "logreg-singularities.py", line 4, in <module> 
     216        lr = classification.logreg.LogRegLearner(table, removeSingular=0) 
     217      File "/home/jure/devel/orange/Orange/classification/logreg.py", line 255, in LogRegLearner 
     218        return lr(examples, weightID) 
     219      File "/home/jure/devel/orange/Orange/classification/logreg.py", line 291, in __call__ 
     220        lr = learner(examples, weight) 
     221    orange.KernelException: 'orange.LogRegLearner': singularity in workclass=Never-worked 
     222 
     223We can see that the attribute workclass is causing a singularity. 
     224 
     225The example below shows how the use of stepwise logistic regression can help to 
     226gain in classification performance (:download:`logreg-stepwise.py <code/logreg-stepwise.py>`): 
     227 
     228.. literalinclude:: code/logreg-stepwise.py 
     229 
     230The output of this script is:: 
     231 
     232    Learner      CA 
     233    logistic     0.841 
     234    filtered     0.846 
     235 
     236    Number of times attributes were used in cross-validation: 
     237     1 x a21 
     238    10 x a22 
     239     8 x a23 
     240     7 x a24 
     241     1 x a25 
     242    10 x a26 
     243    10 x a27 
     244     3 x a28 
     245     7 x a29 
     246     9 x a31 
     247     2 x a16 
     248     7 x a12 
     249     1 x a32 
     250     8 x a15 
     251    10 x a14 
     252     4 x a17 
     253     7 x a30 
     254    10 x a11 
     255     1 x a10 
     256     1 x a13 
     257    10 x a34 
     258     2 x a19 
     259     1 x a18 
     260    10 x a3 
     261    10 x a5 
     262     4 x a4 
     263     4 x a7 
     264     8 x a6 
     265    10 x a9 
     266    10 x a8 
  • docs/reference/rst/Orange.classification.rst

    r9754 r9820  
    55################################### 
    66 
    7 All classifiers in Orange consist of two parts, a Learner and a Classifier. A 
    8 learner is constructed with all parameters that will be used for learning. 
    9 When a data table is passed to its __call__ method, a model is fitted to the 
    10 data and return in a form of a Classifier, which is then used for predicting 
    11 the dependent variable(s) of new instances. 
     7To facilitate correct evaluation, all classifiers in Orange consist of two 
     8parts, a Learner and a Classifier. A learner is constructed with all 
     9parameters that will be used for learning. When a data table is passed to its 
     10__call__ method, a model is fitted to the data and return in a form of a 
     11Classifier, which is then used for predicting the dependent variable(s) of 
     12new instances. 
    1213 
    1314.. class:: Learner() 
     
    5657              tuple with both 
    5758 
    58 You can often program learners and classifiers as classes or functions written 
    59 entirely in Python and independent from Orange. Such classes can participate, 
    60 for instance, in the common evaluation functions like those available in 
    61 modules :obj:`Orange.evaluation.testing` and :obj:`Orange.evaluation.scoring`. 
    6259 
    63 On the other hand, these classes can't be used as components for pure C++ 
    64 classes. For instance, :obj:`Orange.classification.tree.TreeLearner`'s 
    65 attribute nodeLearner should contain a (wrapped) C++ object derived from 
    66 :obj:`Learner`, such as :obj:`Orange.classification.majority.MajorityLearner` 
    67 or :obj:`Orange.classification.bayes.NaiveLearner`. They cannot accommodate 
    68 Python's classes or even functions. 
     60When developing new prediction models, one should extend :obj:`Learner` and 
     61:obj:`Classifier`\. Code that infers the model from the data should be placed 
     62in Learners's :obj:`~Learner.__call__` method. This method should 
     63return a :obj:`Classifier`. Classifiers' :obj:`~Classifier.__call__` method 
     64should  return the predicition; :class:`~Orange.data.Value`, 
     65:class:`~Orange.statistics.distribution.Distribution` or a tuple with both 
     66based on the value of the parameter :obj:`return_type`. 
    6967 
    70 There's a workaround, though. You can subtype Orange classes :obj:`Learner` or 
    71 :obj:`Classifier` as if the two classes were defined in Python, but later use your 
    72 derived Python classes as if they were written in Orange's core. That is, you 
    73 can define your class in a Python script like this:: 
    74  
    75     class MyLearner(Orange.classifier.Learner):  
    76         def __call__(self, examples, weightID = 0):  
    77             <do something smart here> 
    78  
    79 Such a learner can then be used as any regular learner written in 
    80 Orange. You can, for instance, construct a tree learner and use your 
    81 learner to learn node classifier:: 
    82  
    83     treeLearner = Orange.classification.tree.TreeLearner() 
    84     treeLearner.nodeLearner = MyLearner() 
    85  
    86 ----- 
    87  
    88 Orange contains implementations of various classifiers that are described in 
    89 detail on separate pages. 
     68Orange implements various classifiers that are described in detail on 
     69separate pages. 
    9070 
    9171.. toctree:: 
  • docs/reference/rst/Orange.data.domain.rst

    r9704 r9840  
    425425         keys and variables (:obj:`~Orange.data.variable.Variable`) as the 
    426426         corresponding values. The following example shows how to add all 
    427           meta attributes from another domain:: 
     427         meta attributes from another domain:: 
    428428 
    429429              >>> newdomain.add_metas(domain.get_metas()) 
  • docs/reference/rst/Orange.data.table.rst

    r9883 r9885  
    8080 
    8181        Read data from the given file. If the name includes the 
    82         extension, it must be one of the known file formats (see 
    83         :doc:`/Orange.data.formats`). If no extension is given, the directory 
    84         is searched for any file with recognized extensions. If the 
     82        extension it must be one of the known file formats 
     83        (see :doc:`/Orange.data.formats`). If no extension is given, the 
     84        directory is searched for any file with recognized extensions. If the 
    8585        file is not found, Orange will also search the directories 
    8686        specified in the environment variable `ORANGE_DATA_PATH`. 
     
    257257 
    258258        Note: This method should be used when the selected data 
    259         instances are going to be modified. In all other cases, method 
    260         :obj:`select_ref` is preferred. 
     259        instances are going to be modified. In all other cases, 
     260        method :obj:`select_ref` is preferred. 
    261261 
    262262        :param filt: filter list 
  • docs/reference/rst/Orange.data.variable.rst

    r9727 r9848  
    114114            possible values for this variable. Changing the size - especially 
    115115            shrinking the list - can crash Python. Also, do not add values 
    116             to the list by calling its append or extend method: use 
    117             :obj:`add_value` method instead. 
     116            to the list by calling its append or extend method: 
     117            use :obj:`add_value` method instead. 
    118118 
    119119            It is also assumed that this attribute is always defined (but can 
  • docs/reference/rst/Orange.feature.discretization.rst

    r9372 r9863  
    1 .. automodule:: Orange.feature.discretization 
     1.. py:currentmodule:: Orange.feature.discretization 
     2 
     3################################### 
     4Discretization (``discretization``) 
     5################################### 
     6 
     7.. index:: discretization 
     8 
     9.. index:: 
     10   single: feature; discretization 
     11 
     12Continues features can be discretized either one feature at a time, or, as demonstrated in the following script, 
     13using a single discretization method on entire set of data features: 
     14 
     15.. literalinclude:: code/discretization-table.py 
     16 
     17Discretization introduces new categorical features and computes their values in accordance to 
     18selected (or default) discretization method:: 
     19 
     20    Original data set: 
     21    [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'] 
     22    [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'] 
     23    [4.7, 3.2, 1.3, 0.2, 'Iris-setosa'] 
     24 
     25    Discretized data set: 
     26    ['<=5.45', '>3.15', '<=2.45', '<=0.80', 'Iris-setosa'] 
     27    ['<=5.45', '(2.85, 3.15]', '<=2.45', '<=0.80', 'Iris-setosa'] 
     28    ['<=5.45', '>3.15', '<=2.45', '<=0.80', 'Iris-setosa'] 
     29 
     30The following discretization methods are supported: 
     31 
     32* equal width discretization, where the domain of continuous feature is split to intervals of the same 
     33  width equal-sized intervals (:class:`EqualWidth`), 
     34* equal frequency discretization, where each intervals contains equal number of data instances (:class:`EqualFreq`), 
     35* entropy-based, as originally proposed by [FayyadIrani1993]_ that infers the intervals to minimize 
     36  within-interval entropy of class distributions (:class:`Entropy`), 
     37* bi-modal, using three intervals to optimize the difference of the class distribution in 
     38  the middle with the distribution outside it (:class:`BiModal`), 
     39* fixed, with the user-defined cut-off points. 
     40 
     41The above script used the default discretization method (equal frequency with three intervals). This can be changed 
     42as demonstrated below: 
     43 
     44.. literalinclude:: code/discretization-table-method.py 
     45    :lines: 3-5 
     46 
     47With exception to fixed discretization, discretization approaches infer the cut-off points from the 
     48training data set and thus construct a discretizer to convert continuous values of this feature into categorical 
     49value according to the rule found by discretization. In this respect, the discretization behaves similar to 
     50:class:`Orange.classification.Learner`. 
     51 
     52Utility functions 
     53================= 
     54 
     55Some functions and classes that can be used for 
     56categorization of continuous features. Besides several general classes that 
     57can help in this task, we also provide a function that may help in 
     58entropy-based discretization (Fayyad & Irani), and a wrapper around classes for 
     59categorization that can be used for learning. 
     60 
     61.. autoclass:: Orange.feature.discretization.DiscretizedLearner_Class 
     62 
     63.. autoclass:: DiscretizeTable 
     64 
     65.. rubric:: Example 
     66 
     67FIXME. A chapter on `feature subset selection <../ofb/o_fss.htm>`_ in Orange 
     68for Beginners tutorial shows the use of DiscretizedLearner. Other 
     69discretization classes from core Orange are listed in chapter on 
     70`categorization <../ofb/o_categorization.htm>`_ of the same tutorial. 
     71 
     72Discretization Algorithms 
     73========================= 
     74 
     75Instances of discretization classes are all derived from :class:`Discretization`. 
     76 
     77.. class:: Discretization 
     78 
     79    .. method:: __call__(feature, data[, weightID]) 
     80 
     81        Given a continuous ``feature``, ``data`` and, optionally id of 
     82        attribute with example weight, this function returns a discretized 
     83        feature. Argument ``feature`` can be a descriptor, index or 
     84        name of the attribute. 
     85 
     86 
     87.. class:: EqualWidth 
     88 
     89    Discretizes the feature by spliting its domain to a fixed number 
     90    of equal-width intervals. The span of original domain is computed 
     91    from the training data and is defined by the smallest and the 
     92    largest feature value. 
     93 
     94    .. attribute:: n 
     95 
     96        Number of discretization intervals (default: 4). 
     97 
     98The following example discretizes Iris dataset features using six 
     99intervals. The script constructs a :class:`Orange.data.Table` with discretized 
     100features and outputs their description: 
     101 
     102.. literalinclude:: code/discretization.py 
     103    :lines: 38-43 
     104 
     105The output of this script is:: 
     106 
     107    D_sepal length: <<4.90, [4.90, 5.50), [5.50, 6.10), [6.10, 6.70), [6.70, 7.30), >7.30> 
     108    D_sepal width: <<2.40, [2.40, 2.80), [2.80, 3.20), [3.20, 3.60), [3.60, 4.00), >4.00> 
     109    D_petal length: <<1.98, [1.98, 2.96), [2.96, 3.94), [3.94, 4.92), [4.92, 5.90), >5.90> 
     110    D_petal width: <<0.50, [0.50, 0.90), [0.90, 1.30), [1.30, 1.70), [1.70, 2.10), >2.10> 
     111 
     112The cut-off values are hidden in the discretizer and stored in ``attr.get_value_from.transformer``:: 
     113 
     114    >>> for attr in newattrs: 
     115    ...    print "%s: first interval at %5.3f, step %5.3f" % \ 
     116    ...    (attr.name, attr.get_value_from.transformer.first_cut, \ 
     117    ...    attr.get_value_from.transformer.step) 
     118    D_sepal length: first interval at 4.900, step 0.600 
     119    D_sepal width: first interval at 2.400, step 0.400 
     120    D_petal length: first interval at 1.980, step 0.980 
     121    D_petal width: first interval at 0.500, step 0.400 
     122 
     123All discretizers have the method 
     124``construct_variable``: 
     125 
     126.. literalinclude:: code/discretization.py 
     127    :lines: 69-73 
     128 
     129 
     130.. class:: EqualFreq 
     131 
     132    Infers the cut-off points so that the discretization intervals contain 
     133    approximately equal number of training data instances. 
     134 
     135    .. attribute:: n 
     136 
     137        Number of discretization intervals (default: 4). 
     138 
     139The resulting discretizer is of class :class:`IntervalDiscretizer`. Its ``transformer`` includes ``points`` 
     140that store the inferred cut-offs. 
     141 
     142.. class:: Entropy 
     143 
     144    Entropy-based discretization as originally proposed by [FayyadIrani1993]_. The approach infers the most 
     145    appropriate number of intervals by recursively splitting the domain of continuous feature to minimize the 
     146    class-entropy of training examples. The splitting is repeated until the entropy decrease is smaller than the 
     147    increase of minimal descripton length (MDL) induced by the new cut-off point. 
     148 
     149    Entropy-based discretization can reduce a continuous feature into 
     150    a single interval if no suitable cut-off points are found. In this case the new feature is constant and can be 
     151    removed. This discretization can 
     152    therefore also serve for identification of non-informative features and thus used for feature subset selection. 
     153 
     154    .. attribute:: force_attribute 
     155 
     156        Forces the algorithm to induce at least one cut-off point, even when 
     157        its information gain is lower than MDL (default: ``False``). 
     158 
     159Part of :download:`discretization.py <code/discretization.py>`: 
     160 
     161.. literalinclude:: code/discretization.py 
     162    :lines: 77-80 
     163 
     164The output shows that all attributes are discretized onto three intervals:: 
     165 
     166    sepal length: <5.5, 6.09999990463> 
     167    sepal width: <2.90000009537, 3.29999995232> 
     168    petal length: <1.89999997616, 4.69999980927> 
     169    petal width: <0.600000023842, 1.0000004768> 
     170 
     171.. class:: BiModal 
     172 
     173    Infers two cut-off points to optimize the difference of class distribution of data instances in the 
     174    middle and in the other two intervals. The 
     175    difference is scored by chi-square statistics. All possible cut-off 
     176    points are examined, thus the discretization runs in O(n^2). This discretization method is especially suitable 
     177    for the attributes in 
     178    which the middle region corresponds to normal and the outer regions to 
     179    abnormal values of the feature. 
     180 
     181    .. attribute:: split_in_two 
     182 
     183        Decides whether the resulting attribute should have three or two values. 
     184        If ``True`` (default), the feature will be discretized to three 
     185        intervals and the discretizer is of type :class:`BiModalDiscretizer`. 
     186        If ``False`` the result is the ordinary :class:`IntervalDiscretizer`. 
     187 
     188Iris dataset has three-valued class attribute. The figure below, drawn using LOESS probability estimation, shows that 
     189sepal lenghts of versicolors are between lengths of setosas and virginicas. 
     190 
     191.. image:: files/bayes-iris.gif 
     192 
     193If we merge classes setosa and virginica, we can observe if 
     194the bi-modal discretization would correctly recognize the interval in 
     195which versicolors dominate. The following scripts peforms the merging and construction of new data set with class 
     196that reports if iris is versicolor or not. 
     197 
     198.. literalinclude:: code/discretization.py 
     199    :lines: 84-87 
     200 
     201The following script implements the discretization: 
     202 
     203.. literalinclude:: code/discretization.py 
     204    :lines: 97-100 
     205 
     206The middle intervals are printed:: 
     207 
     208    sepal length: (5.400, 6.200] 
     209    sepal width: (2.000, 2.900] 
     210    petal length: (1.900, 4.700] 
     211    petal width: (0.600, 1.600] 
     212 
     213Judging by the graph, the cut-off points inferred by discretization for "sepal length" make sense. 
     214 
     215Discretizers 
     216============ 
     217 
     218Discretizers construct a categorical feature from the continuous feature according to the method they implement and 
     219its parameters. The most general is 
     220:class:`IntervalDiscretizer` that is also used by most discretization 
     221methods. Two other discretizers, :class:`EquiDistDiscretizer` and 
     222:class:`ThresholdDiscretizer`> could easily be replaced by 
     223:class:`IntervalDiscretizer` but are used for speed and simplicity. 
     224The fourth discretizer, :class:`BiModalDiscretizer` is specialized 
     225for discretizations induced by :class:`BiModalDiscretization`. 
     226 
     227.. class:: Discretizer 
     228 
     229    A superclass implementing the construction of a new 
     230    attribute from an existing one. 
     231 
     232    .. method:: construct_variable(feature) 
     233 
     234        Constructs a descriptor for a new feature. The new feature's name is equal to ``feature.name`` 
     235        prefixed by "D\_". Its symbolic values are discretizer specific. 
     236 
     237.. class:: IntervalDiscretizer 
     238 
     239    Discretizer defined with a set of cut-off points. 
     240 
     241    .. attribute:: points 
     242 
     243        The cut-off points; feature values below or equal to the first point will be mapped to the first interval, 
     244        those between the first and the second point 
     245        (including those equal to the second) are mapped to the second interval and 
     246        so forth to the last interval which covers all values greater than 
     247        the last value in ``points``. The number of intervals is thus 
     248        ``len(points)+1``. 
     249 
     250The script that follows is an examples of a manual construction of a discretizer with cut-off points 
     251at 3.0 and 5.0: 
     252 
     253.. literalinclude:: code/discretization.py 
     254    :lines: 22-26 
     255 
     256First five data instances of ``data2`` are:: 
     257 
     258    [5.1, '>5.00', 'Iris-setosa'] 
     259    [4.9, '(3.00, 5.00]', 'Iris-setosa'] 
     260    [4.7, '(3.00, 5.00]', 'Iris-setosa'] 
     261    [4.6, '(3.00, 5.00]', 'Iris-setosa'] 
     262    [5.0, '(3.00, 5.00]', 'Iris-setosa'] 
     263 
     264The same discretizer can be used on several features by calling the function construct_var: 
     265 
     266.. literalinclude:: code/discretization.py 
     267    :lines: 30-34 
     268 
     269Each feature has its own instance of :class:`ClassifierFromVar` stored in 
     270``get_value_from``, but all use the same :class:`IntervalDiscretizer`, 
     271``idisc``. Changing any element of its ``points`` affect all attributes. 
     272 
     273.. note:: 
     274 
     275    The length of :obj:`~IntervalDiscretizer.points` should not be changed if the 
     276    discretizer is used by any attribute. The length of 
     277    :obj:`~IntervalDiscretizer.points` should always match the number of values 
     278    of the feature, which is determined by the length of the attribute's field 
     279    ``values``. If ``attr`` is a discretized attribute, than ``len(attr.values)`` must equal 
     280    ``len(attr.get_value_from.transformer.points)+1``. 
     281 
     282 
     283.. class:: EqualWidthDiscretizer 
     284 
     285    Discretizes to intervals of the fixed width. All values lower than :obj:`~EquiDistDiscretizer.first_cut` are mapped to the first 
     286    interval. Otherwise, value ``val``'s interval is ``floor((val-first_cut)/step)``. Possible overflows are mapped to the 
     287    last intervals. 
     288 
     289 
     290    .. attribute:: first_cut 
     291 
     292        The first cut-off point. 
     293 
     294    .. attribute:: step 
     295 
     296        Width of the intervals. 
     297 
     298    .. attribute:: n 
     299 
     300        Number of the intervals. 
     301 
     302    .. attribute:: points (read-only) 
     303 
     304        The cut-off points; this is not a real attribute although it behaves 
     305        as one. Reading it constructs a list of cut-off points and returns it, 
     306        but changing the list doesn't affect the discretizer. Only present to provide 
     307        the :obj:`EquiDistDiscretizer` the same interface as that of 
     308        :obj:`IntervalDiscretizer`. 
     309 
     310 
     311.. class:: ThresholdDiscretizer 
     312 
     313    Threshold discretizer converts continuous values into binary by comparing 
     314    them to a fixed threshold. Orange uses this discretizer for 
     315    binarization of continuous attributes in decision trees. 
     316 
     317    .. attribute:: threshold 
     318 
     319        The value threshold; values below or equal to the threshold belong to the first 
     320        interval and those that are greater go to the second. 
     321 
     322 
     323.. class:: BiModalDiscretizer 
     324 
     325    Bimodal discretizer has two cut off points and values are 
     326    discretized according to whether or not they belong to the region between these points 
     327    which includes the lower but not the upper boundary. The 
     328    discretizer is returned by :class:`BiModalDiscretization` if its 
     329    field :obj:`~BiModalDiscretization.split_in_two` is true (the default). 
     330 
     331    .. attribute:: low 
     332 
     333        Lower boundary of the interval (included in the interval). 
     334 
     335    .. attribute:: high 
     336 
     337        Upper boundary of the interval (not included in the interval). 
     338 
     339 
     340Implementational details 
     341======================== 
     342 
     343Consider a following example (part of :download:`discretization.py <code/discretization.py>`): 
     344 
     345.. literalinclude:: code/discretization.py 
     346    :lines: 7-15 
     347 
     348The discretized attribute ``sep_w`` is constructed with a call to 
     349:class:`Entropy`; instead of constructing it and calling 
     350it afterwards, we passed the arguments for calling to the constructor. We then constructed a new 
     351:class:`Orange.data.Table` with attributes "sepal width" (the original 
     352continuous attribute), ``sep_w`` and the class attribute:: 
     353 
     354    Entropy discretization, first 5 data instances 
     355    [3.5, '>3.30', 'Iris-setosa'] 
     356    [3.0, '(2.90, 3.30]', 'Iris-setosa'] 
     357    [3.2, '(2.90, 3.30]', 'Iris-setosa'] 
     358    [3.1, '(2.90, 3.30]', 'Iris-setosa'] 
     359    [3.6, '>3.30', 'Iris-setosa'] 
     360 
     361The name of the new categorical variable derives from the name of original 
     362continuous variable by adding a prefix ``D_``. The values of the new attributes 
     363are computed automatically when they are needed using a transformation 
     364function :obj:`~Orange.data.variable.Variable.get_value_from` 
     365(see :class:`Orange.data.variable.Variable`) which encodes the discretization:: 
     366 
     367    >>> sep_w 
     368    EnumVariable 'D_sepal width' 
     369    >>> sep_w.get_value_from 
     370    <ClassifierFromVar instance at 0x01BA7DC0> 
     371    >>> sep_w.get_value_from.whichVar 
     372    FloatVariable 'sepal width' 
     373    >>> sep_w.get_value_from.transformer 
     374    <IntervalDiscretizer instance at 0x01BA2100> 
     375    >>> sep_w.get_value_from.transformer.points 
     376    <2.90000009537, 3.29999995232> 
     377 
     378The ``select`` statement in the discretization script converted all data instances 
     379from ``data`` to the new domain. This includes a new feature 
     380``sep_w`` whose values are computed on the fly by calling ``sep_w.get_value_from`` for each data instance. 
     381The original, continuous sepal width 
     382is passed to the ``transformer`` that determines the interval by its field 
     383``points``. Transformer returns the discrete value which is in turn returned 
     384by ``get_value_from`` and stored in the new example. 
     385 
     386References 
     387========== 
     388 
     389.. [FayyadIrani1993] UM Fayyad and KB Irani. Multi-interval discretization of continuous valued 
     390  attributes for classification learning. In Proc. 13th International Joint Conference on Artificial Intelligence, pages 
     391  1022--1029, Chambery, France, 1993. 
  • docs/reference/rst/Orange.feature.imputation.rst

    r9372 r9853  
    1 .. automodule:: Orange.feature.imputation 
     1.. py:currentmodule:: Orange.feature.imputation 
     2 
     3.. index:: imputation 
     4 
     5.. index:: 
     6   single: feature; value imputation 
     7 
     8*************************** 
     9Imputation (``imputation``) 
     10*************************** 
     11 
     12Imputation replaces missing feature values with appropriate values, in this 
     13case with minimal values: 
     14 
     15.. literalinclude:: code/imputation-values.py 
     16   :lines: 7- 
     17 
     18The output of this code is:: 
     19 
     20    Example with missing values 
     21    ['A', 1853, 'RR', ?, 2, 'N', 'DECK', 'WOOD', '?', 'S', 'WOOD'] 
     22    Imputed values: 
     23    ['A', 1853, 'RR', 804, 2, 'N', 'DECK', 'WOOD', 'SHORT', 'S', 'WOOD'] 
     24    ['A', 1853, 'RR', 804, 2, 'N', 'DECK', 'WOOD', 'SHORT', 'S', 'WOOD'] 
     25 
     26Imputers 
     27================= 
     28 
     29:obj:`ImputerConstructor` is the abstract root in a hierarchy of classes 
     30that accept training data and construct an instance of a class derived from 
     31:obj:`Imputer`. When an :obj:`Imputer` is called with an 
     32:obj:`Orange.data.Instance` it returns a new instance with the 
     33missing values imputed (leaving the original instance intact). If imputer is 
     34called with an :obj:`Orange.data.Table` it returns a new data table with 
     35imputed instances. 
     36 
     37.. class:: ImputerConstructor 
     38 
     39    .. attribute:: impute_class 
     40 
     41    Indicates whether to impute the class value. Defaults to True. 
     42 
     43Simple imputation 
     44================= 
     45 
     46Simple imputers always impute the same value for a particular feature, 
     47disregarding the values of other features. They all use the same class 
     48:obj:`Imputer_defaults`. 
     49 
     50.. class:: Imputer_defaults 
     51 
     52    .. attribute::  defaults 
     53 
     54    An instance :obj:`Orange.data.Instance` with the default values to be 
     55    imputed instead of missing value. Examples to be imputed must be from the 
     56    same :obj:`~Orange.data.Domain` as :obj:`defaults`. 
     57 
     58Instances of this class can be constructed by 
     59:obj:`~Orange.feature.imputation.ImputerConstructor_minimal`, 
     60:obj:`~Orange.feature.imputation.ImputerConstructor_maximal`, 
     61:obj:`~Orange.feature.imputation.ImputerConstructor_average`. 
     62 
     63For continuous features, they will impute the smallest, largest or the average 
     64values encountered in the training examples. For discrete, 
     65they will impute the lowest (the one with index 0, e. g. attr.values[0]), 
     66the highest (attr.values[-1]), and the most common value encountered in the 
     67data, respectively. If values of discrete features are ordered according to 
     68their impact on class (for example, possible values for symptoms of some 
     69disease can be ordered according to their seriousness), 
     70the minimal and maximal imputers  will then represent optimistic and 
     71pessimistic imputations. 
     72 
     73User-define defaults can be given when constructing a :obj:`~Orange.feature 
     74.imputation.Imputer_defaults`. Values that are left unspecified do not get 
     75imputed. In the following example "LENGTH" is the 
     76only attribute to get imputed with value 1234: 
     77 
     78.. literalinclude:: code/imputation-complex.py 
     79    :lines: 56-69 
     80 
     81If :obj:`~Orange.feature.imputation.Imputer_defaults`'s constructor is given 
     82an argument of type :obj:`~Orange.data.Domain` it constructs an empty instance 
     83for :obj:`defaults`. If an instance is given, the reference to the 
     84instance will be kept. To avoid problems associated with `Imputer_defaults 
     85(data[0])`, it is better to provide a copy of the instance: 
     86`Imputer_defaults(Orange.data.Instance(data[0]))`. 
     87 
     88Random imputation 
     89================= 
     90 
     91.. class:: Imputer_Random 
     92 
     93    Imputes random values. The corresponding constructor is 
     94    :obj:`ImputerConstructor_Random`. 
     95 
     96    .. attribute:: impute_class 
     97 
     98    Tells whether to impute the class values or not. Defaults to True. 
     99 
     100    .. attribute:: deterministic 
     101 
     102    If true (defaults to False), random generator is initialized for each 
     103    instance using the instance's hash value as a seed. This results in same 
     104    instances being always imputed with the same (random) values. 
     105 
     106Model-based imputation 
     107====================== 
     108 
     109.. class:: ImputerConstructor_model 
     110 
     111    Model-based imputers learn to predict the features's value from values of 
     112    other features. :obj:`ImputerConstructor_model` are given two learning 
     113    algorithms and they construct a classifier for each attribute. The 
     114    constructed imputer :obj:`Imputer_model` stores a list of classifiers that 
     115    are used for imputation. 
     116 
     117    .. attribute:: learner_discrete, learner_continuous 
     118 
     119    Learner for discrete and for continuous attributes. If any of them is 
     120    missing, the attributes of the corresponding type will not get imputed. 
     121 
     122    .. attribute:: use_class 
     123 
     124    Tells whether the imputer can use the class attribute. Defaults to 
     125    False. It is useful in more complex designs in which one imputer is used 
     126    on learning instances, where it uses the class value, 
     127    and a second imputer on testing instances, where class is not available. 
     128 
     129.. class:: Imputer_model 
     130 
     131    .. attribute:: models 
     132 
     133    A list of classifiers, each corresponding to one attribute to be imputed. 
     134    The :obj:`class_var`'s of the models should equal the instances' 
     135    attributes. If an element is :obj:`None`, the corresponding attribute's 
     136    values are not imputed. 
     137 
     138.. rubric:: Examples 
     139 
     140Examples are taken from :download:`imputation-complex.py 
     141<code/imputation-complex.py>`. The following imputer predicts the missing 
     142attribute values using classification and regression trees with the minimum 
     143of 20 examples in a leaf. 
     144 
     145.. literalinclude:: code/imputation-complex.py 
     146    :lines: 74-76 
     147 
     148A common setup, where different learning algorithms are used for discrete 
     149and continuous features, is to use 
     150:class:`~Orange.classification.bayes.NaiveLearner` for discrete and 
     151:class:`~Orange.regression.mean.MeanLearner` (which 
     152just remembers the average) for continuous attributes: 
     153 
     154.. literalinclude:: code/imputation-complex.py 
     155    :lines: 91-94 
     156 
     157To construct a user-defined :class:`Imputer_model`: 
     158 
     159.. literalinclude:: code/imputation-complex.py 
     160    :lines: 108-112 
     161 
     162A list of empty models is first initialized :obj:`Imputer_model.models`. 
     163Continuous feature "LANES" is imputed with value 2 using 
     164:obj:`DefaultClassifier`. A float must be given, because integer values are 
     165interpreted as indexes of discrete features. Discrete feature "T-OR-D" is 
     166imputed using :class:`Orange.classification.ConstantClassifier` which is 
     167given the index of value "THROUGH" as an argument. 
     168 
     169Feature "LENGTH" is computed with a regression tree induced from "MATERIAL", 
     170"SPAN" and "ERECTED" (feature "LENGTH" is used as class attribute here). 
     171Domain is initialized by giving a list of feature names and domain as an 
     172additional argument where Orange will look for features. 
     173 
     174.. literalinclude:: code/imputation-complex.py 
     175    :lines: 114-119 
     176 
     177This is how the inferred tree should look like:: 
     178 
     179    <XMP class=code>SPAN=SHORT: 1158 
     180    SPAN=LONG: 1907 
     181    SPAN=MEDIUM 
     182    |    ERECTED<1908.500: 1325 
     183    |    ERECTED>=1908.500: 1528 
     184    </XMP> 
     185 
     186Wooden bridges and walkways are short, while the others are mostly 
     187medium. This could be encoded in feature "SPAN" using 
     188:class:`Orange.classifier.ClassifierByLookupTable`, which is faster than the 
     189Python function used here: 
     190 
     191.. literalinclude:: code/imputation-complex.py 
     192    :lines: 121-128 
     193 
     194If :obj:`compute_span` is written as a class it must behave like a 
     195classifier: it accepts an example and returns a value. The second 
     196argument tells what the caller expects the classifier to return - a value, 
     197a distribution or both. Currently, :obj:`Imputer_model`, 
     198always expects values and the argument can be ignored. 
     199 
     200Missing values as special values 
     201================================ 
     202 
     203Missing values sometimes have a special meaning. Cautious is needed when 
     204using such values in decision models. When the decision not to measure 
     205something (for example, performing a laboratory test on a patient) is based 
     206on the expert's knowledge of the class value, such missing values clearly 
     207should not be used in models. 
     208 
     209.. class:: ImputerConstructor_asValue 
     210 
     211    Constructs a new domain in which each discrete feature is replaced 
     212    with a new feature that has one more value: "NA". The new feature 
     213    computes its values on the fly from the old one, 
     214    copying the normal values and replacing the unknowns with "NA". 
     215 
     216    For continuous attributes, it constructs a two-valued discrete attribute 
     217    with values "def" and "undef", telling whether the value is defined or 
     218    not.  The features's name will equal the original's with "_def" appended. 
     219    The original continuous feature will remain in the domain and its 
     220    unknowns will be replaced by averages. 
     221 
     222    :class:`ImputerConstructor_asValue` has no specific attributes. 
     223 
     224    It constructs :class:`Imputer_asValue` that converts the example into 
     225    the new domain. 
     226 
     227.. class:: Imputer_asValue 
     228 
     229    .. attribute:: domain 
     230 
     231        The domain with the new feature constructed by 
     232        :class:`ImputerConstructor_asValue`. 
     233 
     234    .. attribute:: defaults 
     235 
     236        Default values for continuous features. 
     237 
     238The following code shows what the imputer actually does to the domain: 
     239 
     240.. literalinclude:: code/imputation-complex.py 
     241    :lines: 137-151 
     242 
     243The script's output looks like this:: 
     244 
     245    [RIVER, ERECTED, PURPOSE, LENGTH, LANES, CLEAR-G, T-OR-D, MATERIAL, SPAN, REL-L, TYPE] 
     246 
     247    [RIVER, ERECTED_def, ERECTED, PURPOSE, LENGTH_def, LENGTH, LANES_def, LANES, CLEAR-G, T-OR-D, MATERIAL, SPAN, REL-L, TYPE] 
     248 
     249    RIVER: M -> M 
     250    ERECTED: 1874 -> 1874 (def) 
     251    PURPOSE: RR -> RR 
     252    LENGTH: ? -> 1567 (undef) 
     253    LANES: 2 -> 2 (def) 
     254    CLEAR-G: ? -> NA 
     255    T-OR-D: THROUGH -> THROUGH 
     256    MATERIAL: IRON -> IRON 
     257    SPAN: ? -> NA 
     258    REL-L: ? -> NA 
     259    TYPE: SIMPLE-T -> SIMPLE-T 
     260 
     261The two examples have the same attribute, :samp:`imputed` having a few 
     262additional ones. Comparing :samp:`original.domain[0] == imputed.domain[0]` 
     263will result in False. While the names are same, they represent different 
     264features. Writting, :samp:`imputed[i]`  would fail since :samp:`imputed` has 
     265no attribute :samp:`i`, but it has an attribute with the same name. 
     266Using :samp:`i.name` to index the attributes of :samp:`imputed` will work, 
     267yet it is not fast. If a frequently used, it is better to compute the index 
     268with :samp:`imputed.domain.index(i.name)`. 
     269 
     270For continuous features, there is an additional feature with name prefix 
     271"_def", which is accessible by :samp:`i.name+"_def"`. The value of the first 
     272continuous feature "ERECTED" remains 1874, and the additional attribute 
     273"ERECTED_def" has value "def". The undefined value  in "LENGTH" is replaced 
     274by the average (1567) and the new attribute has value "undef". The 
     275undefined discrete attribute  "CLEAR-G" (and all other undefined discrete 
     276attributes) is assigned the value "NA". 
     277 
     278Using imputers 
     279============== 
     280 
     281Imputation must run on training data only. Imputing the missing values 
     282and subsequently using the data in cross-validation will give overly 
     283optimistic results. 
     284 
     285Learners with imputer as a component 
     286------------------------------------ 
     287 
     288Learners that cannot handle missing values provide a slot for the imputer 
     289component. An example of such a class is 
     290:obj:`~Orange.classification.logreg.LogRegLearner` with an attribute called 
     291:obj:`~Orange.classification.logreg.LogRegLearner.imputer_constructor`. 
     292 
     293When given learning instances, 
     294:obj:`~Orange.classification.logreg.LogRegLearner` will pass them to 
     295:obj:`~Orange.classification.logreg.LogRegLearner.imputer_constructor` to get 
     296an imputer and used it to impute the missing values in the learning data. 
     297Imputed data is then used by the actual learning algorithm. Also, when a 
     298classifier :obj:`Orange.classification.logreg.LogRegClassifier` is constructed, 
     299the imputer is stored in its attribute 
     300:obj:`Orange.classification.logreg.LogRegClassifier.imputer`. At 
     301classification, the same imputer is used for imputation of missing values 
     302in (testing) examples. 
     303 
     304Details may vary from algorithm to algorithm, but this is how the imputation 
     305is generally used. When writing user-defined learners, 
     306it is recommended to use imputation according to the described procedure. 
     307 
     308Wrapper for learning algorithms 
     309=============================== 
     310 
     311Imputation is also used by learning algorithms and other methods that are not 
     312capable of handling unknown values. It imputes missing values, 
     313calls the learner and, if imputation is also needed by the classifier, 
     314it wraps the classifier that imputes missing values in instances to classify. 
     315 
     316.. literalinclude:: code/imputation-logreg.py 
     317   :lines: 7- 
     318 
     319The output of this code is:: 
     320 
     321    Without imputation: 0.945 
     322    With imputation: 0.954 
     323 
     324Even so, the module is somewhat redundant, as all learners that cannot handle 
     325missing values should, in principle, provide the slots for imputer constructor. 
     326For instance, :obj:`Orange.classification.logreg.LogRegLearner` has an 
     327attribute 
     328:obj:`Orange.classification.logreg.LogRegLearner.imputer_constructor`, 
     329and even if you don't set it, it will do some imputation by default. 
     330 
     331.. class:: ImputeLearner 
     332 
     333    Wraps a learner and performs data discretization before learning. 
     334 
     335    Most of Orange's learning algorithms do not use imputers because they can 
     336    appropriately handle the missing values. Bayesian classifier, for instance, 
     337    simply skips the corresponding attributes in the formula, while 
     338    classification/regression trees have components for handling the missing 
     339    values in various ways. 
     340 
     341    If for any reason you want to use these algorithms to run on imputed data, 
     342    you can use this wrapper. The class description is a matter of a separate 
     343    page, but we shall show its code here as another demonstration of how to 
     344    use the imputers - logistic regression is implemented essentially the same 
     345    as the below classes. 
     346 
     347    This is basically a learner, so the constructor will return either an 
     348    instance of :obj:`ImputerLearner` or, if called with examples, an instance 
     349    of some classifier. There are a few attributes that need to be set, though. 
     350 
     351    .. attribute:: base_learner 
     352 
     353    A wrapped learner. 
     354 
     355    .. attribute:: imputer_constructor 
     356 
     357    An instance of a class derived from :obj:`ImputerConstructor` (or a class 
     358    with the same call operator). 
     359 
     360    .. attribute:: dont_impute_classifier 
     361 
     362    If given and set (this attribute is optional), the classifier will not be 
     363    wrapped into an imputer. Do this if the classifier doesn't mind if the 
     364    examples it is given have missing values. 
     365 
     366    The learner is best illustrated by its code - here's its complete 
     367    :obj:`__call__` method:: 
     368 
     369        def __call__(self, data, weight=0): 
     370            trained_imputer = self.imputer_constructor(data, weight) 
     371            imputed_data = trained_imputer(data, weight) 
     372            base_classifier = self.base_learner(imputed_data, weight) 
     373            if self.dont_impute_classifier: 
     374                return base_classifier 
     375            else: 
     376                return ImputeClassifier(base_classifier, trained_imputer) 
     377 
     378    So "learning" goes like this. :obj:`ImputeLearner` will first construct 
     379    the imputer (that is, call :obj:`self.imputer_constructor` to get a (trained) 
     380    imputer. Than it will use the imputer to impute the data, and call the 
     381    given :obj:`baseLearner` to construct a classifier. For instance, 
     382    :obj:`baseLearner` could be a learner for logistic regression and the 
     383    result would be a logistic regression model. If the classifier can handle 
     384    unknown values (that is, if :obj:`dont_impute_classifier`, we return it as 
     385    it is, otherwise we wrap it into :obj:`ImputeClassifier`, which is given 
     386    the base classifier and the imputer which it can use to impute the missing 
     387    values in (testing) examples. 
     388 
     389.. class:: ImputeClassifier 
     390 
     391    Objects of this class are returned by :obj:`ImputeLearner` when given data. 
     392 
     393    .. attribute:: baseClassifier 
     394 
     395    A wrapped classifier. 
     396 
     397    .. attribute:: imputer 
     398 
     399    An imputer for imputation of unknown values. 
     400 
     401    .. method:: __call__ 
     402 
     403    This class is even more trivial than the learner. Its constructor accepts 
     404    two arguments, the classifier and the imputer, which are stored into the 
     405    corresponding attributes. The call operator which does the classification 
     406    then looks like this:: 
     407 
     408        def __call__(self, ex, what=orange.GetValue): 
     409            return self.base_classifier(self.imputer(ex), what) 
     410 
     411    It imputes the missing values by calling the :obj:`imputer` and passes the 
     412    class to the base classifier. 
     413 
     414.. note:: 
     415   In this setup the imputer is trained on the training data - even if you do 
     416   cross validation, the imputer will be trained on the right data. In the 
     417   classification phase we again use the imputer which was classified on the 
     418   training data only. 
     419 
     420.. rubric:: Code of ImputeLearner and ImputeClassifier 
     421 
     422:obj:`Orange.feature.imputation.ImputeLearner` puts the keyword arguments into 
     423the instance's  dictionary. You are expected to call it like 
     424:obj:`ImputeLearner(base_learner=<someLearner>, 
     425imputer=<someImputerConstructor>)`. When the learner is called with 
     426examples, it 
     427trains the imputer, imputes the data, induces a :obj:`base_classifier` by the 
     428:obj:`base_cearner` and constructs :obj:`ImputeClassifier` that stores the 
     429:obj:`base_classifier` and the :obj:`imputer`. For classification, the missing 
     430values are imputed and the classifier's prediction is returned. 
     431 
     432Note that this code is slightly simplified, although the omitted details handle 
     433non-essential technical issues that are unrelated to imputation:: 
     434 
     435    class ImputeLearner(orange.Learner): 
     436        def __new__(cls, examples = None, weightID = 0, **keyw): 
     437            self = orange.Learner.__new__(cls, **keyw) 
     438            self.__dict__.update(keyw) 
     439            if examples: 
     440                return self.__call__(examples, weightID) 
     441            else: 
     442                return self 
     443 
     444        def __call__(self, data, weight=0): 
     445            trained_imputer = self.imputer_constructor(data, weight) 
     446            imputed_data = trained_imputer(data, weight) 
     447            base_classifier = self.base_learner(imputed_data, weight) 
     448            return ImputeClassifier(base_classifier, trained_imputer) 
     449 
     450    class ImputeClassifier(orange.Classifier): 
     451        def __init__(self, base_classifier, imputer): 
     452            self.base_classifier = base_classifier 
     453            self.imputer = imputer 
     454 
     455        def __call__(self, ex, what=orange.GetValue): 
     456            return self.base_classifier(self.imputer(ex), what) 
     457 
     458.. rubric:: Example 
     459 
     460Although most Orange's learning algorithms will take care of imputation 
     461internally, if needed, it can sometime happen that an expert will be able to 
     462tell you exactly what to put in the data instead of the missing values. In this 
     463example we shall suppose that we want to impute the minimal value of each 
     464feature. We will try to determine whether the naive Bayesian classifier with 
     465its  implicit internal imputation works better than one that uses imputation by 
     466minimal values. 
     467 
     468:download:`imputation-minimal-imputer.py <code/imputation-minimal-imputer.py>` (uses :download:`voting.tab <code/voting.tab>`): 
     469 
     470.. literalinclude:: code/imputation-minimal-imputer.py 
     471    :lines: 7- 
     472 
     473Should ouput this:: 
     474 
     475    Without imputation: 0.903 
     476    With imputation: 0.899 
     477 
     478.. note:: 
     479   Note that we constructed just one instance of \ 
     480   :obj:`Orange.classification.bayes.NaiveLearner`, but this same instance is 
     481   used twice in each fold, once it is given the examples as they are (and 
     482   returns an instance of :obj:`Orange.classification.bayes.NaiveClassifier`. 
     483   The second time it is called by :obj:`imba` and the \ 
     484   :obj:`Orange.classification.bayes.NaiveClassifier` it returns is wrapped 
     485   into :obj:`Orange.feature.imputation.Classifier`. We thus have only one 
     486   learner, but which produces two different classifiers in each round of 
     487   testing. 
     488 
     489Write your own imputer 
     490====================== 
     491 
     492Imputation classes provide the Python-callback functionality (not all Orange 
     493classes do so, refer to the documentation on `subtyping the Orange classes 
     494in Python <callbacks.htm>`_ for a list). If you want to write your own 
     495imputation constructor or an imputer, you need to simply program a Python 
     496function that will behave like the built-in Orange classes (and even less, 
     497for imputer, you only need to write a function that gets an example as 
     498argument, imputation for example tables will then use that function). 
     499 
     500You will most often write the imputation constructor when you have a special 
     501imputation procedure or separate procedures for various attributes, as we've 
     502demonstrated in the description of 
     503:obj:`Orange.feature.imputation.ImputerConstructor_model`. You basically only 
     504need to pack everything we've written there to an imputer constructor that 
     505will accept a data set and the id of the weight meta-attribute (ignore it if 
     506you will, but you must accept two arguments), and return the imputer (probably 
     507:obj:`Orange.feature.imputation.Imputer_model`. The benefit of implementing an 
     508imputer constructor as opposed to what we did above is that you can use such a 
     509constructor as a component for Orange learners (like logistic regression) or 
     510for wrappers from module orngImpute, and that way properly use the in 
     511classifier testing procedures. 
  • docs/reference/rst/code/discretization.py

    r9372 r9812  
    99 
    1010print "\nEntropy discretization, first 10 examples" 
    11 sep_w = Orange.feature.discretization.EntropyDiscretization("sepal width", data) 
     11sep_w = Orange.feature.discretization.Entropy("sepal width", data) 
    1212 
    1313data2 = data.select([data.domain["sepal width"], sep_w, data.domain.class_var]) 
     
    1919print "Cut-off points:", sep_w.get_value_from.transformer.points 
    2020 
    21 print "\nManual construction of IntervalDiscretizer - single attribute" 
    22 idisc = Orange.feature.discretization.IntervalDiscretizer(points = [3.0, 5.0]) 
     21print "\nManual construction of Interval discretizer - single attribute" 
     22idisc = Orange.feature.discretization.Interval(points = [3.0, 5.0]) 
    2323sep_l = idisc.construct_variable(data.domain["sepal length"]) 
    2424data2 = data.select([data.domain["sepal length"], sep_l, data.domain.classVar]) 
     
    2727 
    2828 
    29 print "\nManual construction of IntervalDiscretizer - all attributes" 
    30 idisc = Orange.feature.discretization.IntervalDiscretizer(points = [3.0, 5.0]) 
     29print "\nManual construction of Interval discretizer - all attributes" 
     30idisc = Orange.feature.discretization.Interval(points = [3.0, 5.0]) 
    3131newattrs = [idisc.construct_variable(attr) for attr in data.domain.attributes] 
    3232data2 = data.select(newattrs + [data.domain.class_var]) 
     
    3535 
    3636 
    37 print "\n\nEqual interval size discretization" 
    38 disc = Orange.feature.discretization.EquiDistDiscretization(numberOfIntervals = 6) 
     37print "\n\nDiscretization with equal width intervals" 
     38disc = Orange.feature.discretization.EqualWidth(numberOfIntervals = 6) 
    3939newattrs = [disc(attr, data) for attr in data.domain.attributes] 
    4040data2 = data.select(newattrs + [data.domain.classVar]) 
     
    5151 
    5252 
    53 print "\n\nQuartile discretization" 
    54 disc = Orange.feature.discretization.EquiNDiscretization(numberOfIntervals = 6) 
     53print "\n\nQuartile (equal frequency) discretization" 
     54disc = Orange.feature.discretization.EqualFreq(numberOfIntervals = 6) 
    5555newattrs = [disc(attr, data) for attr in data.domain.attributes] 
    5656data2 = data.select(newattrs + [data.domain.classVar]) 
     
    6666 
    6767 
    68 print "\nManual construction of EquiDistDiscretizer - all attributes" 
    69 edisc = Orange.feature.discretization.EquiDistDiscretizer(first_cut = 2.0, step = 1.0, number_of_intervals = 5) 
     68print "\nManual construction of EqualWidth - all attributes" 
     69edisc = Orange.feature.discretization.EqualWidth(first_cut = 2.0, step = 1.0, number_of_intervals = 5) 
    7070newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes] 
    7171data2 = data.select(newattrs + [data.domain.classVar]) 
     
    7474 
    7575 
    76 print "\nFayyad-Irani discretization" 
    77 entro = Orange.feature.discretization.EntropyDiscretization() 
     76print "\nFayyad-Irani entropy-based discretization" 
     77entro = Orange.feature.discretization.Entropy() 
    7878for attr in data.domain.attributes: 
    7979    disc = entro(attr, data) 
     
    8787data_v = Orange.data.Table(newdomain, data) 
    8888 
    89 print "\nBi-Modal discretization on binary problem" 
    90 bimod = Orange.feature.discretization.BiModalDiscretization(split_in_two = 0) 
     89print "\nBi-modal discretization on a binary problem" 
     90bimod = Orange.feature.discretization.BiModal(split_in_two = 0) 
    9191for attr in data_v.domain.attributes: 
    9292    disc = bimod(attr, data_v) 
     
    9494print 
    9595 
    96 print "\nBi-Modal discretization on binary problem" 
    97 bimod = Orange.feature.discretization.BiModalDiscretization() 
     96print "\nBi-modal discretization on a binary problem" 
     97bimod = Orange.feature.discretization.BiModal() 
    9898for attr in data_v.domain.attributes: 
    9999    disc = bimod(attr, data_v) 
     
    102102 
    103103 
    104 print "\nEntropy discretization on binary problem" 
     104print "\nEntropy-based discretization on a binary problem" 
    105105for attr in data_v.domain.attributes: 
    106106    disc = entro(attr, data_v) 
  • docs/reference/rst/code/distances-test.py

    r9724 r9823  
    55 
    66# Euclidean distance constructor 
    7 d2Constr = Orange.distance.Euclidean() 
     7d2Constr = Orange.distance.instances.EuclideanConstructor() 
    88d2 = d2Constr(iris) 
    99 
    1010# Constructs  
    11 dPears = Orange.distance.PearsonR(iris) 
     11dPears = Orange.distance.instances.PearsonRConstructor(iris) 
    1212 
    1313#reference instance