Ignore:
Files:
2 added
1 deleted
16 edited

Legend:

Unmodified
Added
Removed
  • Orange/testing/regression/results_reference/svm-linear-weights.py.txt

    r9954 r9971  
    1 defaultdict(<type 'float'>, {FloatVariable 'alpha 0': 0.19198054903386352, FloatVariable 'Elu 300': 0.15913983311663107, FloatVariable 'spo- mid': 3.2086605964825132, FloatVariable 'Elu 330': 0.11474308886955724, FloatVariable 'alpha 14': 0.18310901108005986, FloatVariable 'alpha 98': 0.21754881357923167, FloatVariable 'Elu 360': 0.16577258493775038, FloatVariable 'Elu 180': 0.42425268429856355, FloatVariable 'alpha 21': 0.030539018557891578, FloatVariable 'Elu 30': 0.4786304184632838, FloatVariable 'Elu 390': 0.1820761083768519, FloatVariable 'spo- early': 1.9466556509082333, FloatVariable 'alpha 28': 0.04645238275160125, FloatVariable 'cdc15 10': 0.11428450056762224, FloatVariable 'alpha 35': 0.21379911334384863, FloatVariable 'cdc15 30': 0.18270335600911874, FloatVariable 'alpha 42': 0.13641650791763626, FloatVariable 'cdc15 50': 0.24968263583989325, FloatVariable 'alpha 70': 0.26459268873021585, FloatVariable 'alpha 49': 0.16085715739160683, FloatVariable 'cdc15 70': 0.13876265882583333, FloatVariable 'alpha 105': 0.14088060621674625, FloatVariable 'diau b': 0.23473821977067888, FloatVariable 'alpha 56': 0.3367416107117914, FloatVariable 'cdc15 90': 0.32729758144823035, FloatVariable 'alpha 63': 0.18433878873311124, FloatVariable 'cdc15 110': 0.564756618474929, FloatVariable 'Elu 60': 0.36698713537474476, FloatVariable 'dtt 60': 0.5951914850021424, FloatVariable 'cdc15 130': 0.3658301477295572, FloatVariable 'alpha 77': 0.20088381949723239, FloatVariable 'heat 80': 0.38909905042009185, FloatVariable 'cdc15 150': 0.693249161777514, FloatVariable 'alpha 84': 0.1308234316119738, FloatVariable 'cdc15 170': 0.44789694844623534, FloatVariable 'cold 20': 0.4097605043285248, FloatVariable 'cdc15 190': 0.16956982427965123, FloatVariable 'cold 40': 0.3092287272528724, FloatVariable 'alpha 112': 0.19329749923741518, FloatVariable 'cdc15 210': 0.15183429673463036, FloatVariable 'cold 160': 0.6947037871090163, FloatVariable 'diau f': 1.4452997087693935, FloatVariable 'cdc15 230': 0.5474715182870784, FloatVariable 'heat 0': 0.19091815990337407, FloatVariable 'diau a': 0.14935761521655416, FloatVariable 'heat 160': 0.3192185000510415, FloatVariable 'cdc15 250': 0.3573777070361102, FloatVariable 'heat 40': 0.4580618143377812, FloatVariable 'cdc15 270': 0.21951931922594184, FloatVariable 'spo5 2': 0.40417556232809326, FloatVariable 'Elu 0': 0.8466167037587657, FloatVariable 'alpha 7': 0.06557659077448137, FloatVariable 'cold 0': 0.27980454530046744, FloatVariable 'diau d': 0.44932601596409105, FloatVariable 'spo 0': 0.13024372486415015, FloatVariable 'alpha 119': 0.1699365258012951, FloatVariable 'diau e': 0.864767371223623, FloatVariable 'spo 2': 0.7078062232168753, FloatVariable 'heat 10': 1.000320207469925, FloatVariable 'spo 5': 1.0683498605674933, FloatVariable 'Elu 120': 0.5939764872379445, FloatVariable 'diau g': 2.248793727194904, FloatVariable 'spo 7': 0.8081568079276176, FloatVariable 'Elu 150': 0.5965599387054419, FloatVariable 'Elu 90': 0.3834942300173535, FloatVariable 'spo 9': 0.2498282401589412, FloatVariable 'dtt 30': 0.5838556086306895, FloatVariable 'alpha 91': 0.1905674816738207, FloatVariable 'spo 11': 0.20615575833508282, FloatVariable 'Elu 210': 0.12396520046361383, FloatVariable 'cdc15 290': 0.24965577080121784, FloatVariable 'dtt 15': 0.49451797411099035, FloatVariable 'Elu 240': 0.2093390824178926, FloatVariable 'diau c': 0.13741762346585432, FloatVariable 'spo5 7': 0.26780459416067937, FloatVariable 'dtt 120': 0.55305024494988, FloatVariable 'Elu 270': 0.33471969574325466, FloatVariable 'spo5 11': 1.200079459496442, FloatVariable 'heat 20': 0.9867456006798212}) 
     1defaultdict(<type 'float'>, {FloatVariable 'Elu 0': 0.8466167037587657, FloatVariable 'Elu 30': 0.4786304184632838, FloatVariable 'spo 0': 0.13024372486415015, FloatVariable 'Elu 60': 0.36698713537474476, FloatVariable 'spo 2': 0.7078062232168753, FloatVariable 'alpha 63': 0.18433878873311124, FloatVariable 'Elu 90': 0.3834942300173535, FloatVariable 'spo 5': 1.0683498605674933, FloatVariable 'alpha 7': 0.06557659077448137, FloatVariable 'Elu 120': 0.5939764872379445, FloatVariable 'spo 7': 0.8081568079276176, FloatVariable 'diau d': 0.44932601596409105, FloatVariable 'Elu 150': 0.5965599387054419, FloatVariable 'spo 9': 0.2498282401589412, FloatVariable 'Elu 180': 0.42425268429856355, FloatVariable 'alpha 77': 0.20088381949723239, FloatVariable 'spo 11': 0.20615575833508282, FloatVariable 'alpha 70': 0.26459268873021585, FloatVariable 'Elu 210': 0.12396520046361383, FloatVariable 'spo5 2': 0.40417556232809326, FloatVariable 'alpha 98': 0.21754881357923167, FloatVariable 'Elu 240': 0.2093390824178926, FloatVariable 'spo5 7': 0.26780459416067937, FloatVariable 'Elu 270': 0.33471969574325466, FloatVariable 'spo5 11': 1.200079459496442, FloatVariable 'diau e': 0.864767371223623, FloatVariable 'alpha 119': 0.1699365258012951, FloatVariable 'spo- early': 1.9466556509082333, FloatVariable 'alpha 112': 0.19329749923741518, FloatVariable 'Elu 330': 0.11474308886955724, FloatVariable 'alpha 42': 0.13641650791763626, FloatVariable 'spo- mid': 3.2086605964825132, FloatVariable 'alpha 91': 0.1905674816738207, FloatVariable 'Elu 360': 0.16577258493775038, FloatVariable 'alpha 14': 0.18310901108005986, FloatVariable 'alpha 105': 0.14088060621674625, FloatVariable 'Elu 390': 0.1820761083768519, FloatVariable 'alpha 21': 0.030539018557891578, FloatVariable 'cdc15 10': 0.11428450056762224, FloatVariable 'alpha 28': 0.04645238275160125, FloatVariable 'cdc15 30': 0.18270335600911874, FloatVariable 'heat 40': 0.4580618143377812, FloatVariable 'heat 0': 0.19091815990337407, FloatVariable 'cdc15 50': 0.24968263583989325, FloatVariable 'cdc15 170': 0.44789694844623534, FloatVariable 'heat 80': 0.38909905042009185, FloatVariable 'diau f': 1.4452997087693935, FloatVariable 'cdc15 70': 0.13876265882583333, FloatVariable 'heat 160': 0.3192185000510415, FloatVariable 'alpha 49': 0.16085715739160683, FloatVariable 'alpha 56': 0.3367416107117914, FloatVariable 'alpha 84': 0.1308234316119738, FloatVariable 'cdc15 110': 0.564756618474929, FloatVariable 'dtt 30': 0.5838556086306895, FloatVariable 'cdc15 130': 0.3658301477295572, FloatVariable 'dtt 60': 0.5951914850021424, FloatVariable 'cdc15 90': 0.32729758144823035, FloatVariable 'cdc15 150': 0.693249161777514, FloatVariable 'dtt 120': 0.55305024494988, FloatVariable 'heat 10': 1.000320207469925, FloatVariable 'diau g': 2.248793727194904, FloatVariable 'cold 0': 0.27980454530046744, FloatVariable 'cdc15 190': 0.16956982427965123, FloatVariable 'dtt 15': 0.49451797411099035, FloatVariable 'cold 20': 0.4097605043285248, FloatVariable 'Elu 300': 0.15913983311663107, FloatVariable 'cdc15 210': 0.15183429673463036, FloatVariable 'cold 40': 0.3092287272528724, FloatVariable 'cdc15 230': 0.5474715182870784, FloatVariable 'alpha 35': 0.21379911334384863, FloatVariable 'cold 160': 0.6947037871090163, FloatVariable 'cdc15 250': 0.3573777070361102, FloatVariable 'diau a': 0.14935761521655416, FloatVariable 'cdc15 270': 0.21951931922594184, FloatVariable 'diau b': 0.23473821977067888, FloatVariable 'heat 20': 0.9867456006798212, FloatVariable 'cdc15 290': 0.24965577080121784, FloatVariable 'diau c': 0.13741762346585432, FloatVariable 'alpha 0': 0.19198054903386352}) 
  • Orange/testing/regression/results_tests_20/modules_ensemble.py.txt

    r9951 r9972  
    11Classification Accuracy: 
    2            tree: 0.804 
    3    boosted tree: 0.811 
    4     bagged tree: 0.797 
     2           tree: 0.764 
     3   boosted tree: 0.770 
     4    bagged tree: 0.790 
  • Orange/testing/regression/results_tests_20/modules_kmeans-cmp-init.py.txt

    r9951 r9968  
    11           Rnd Div  HC 
    2       iris  11   2  10 
    3    housing  13   5   3 
    4    vehicle  10   3   2 
     2  iris.tab  11   2  10 
     3housing.tab  13   5   3 
     4vehicle.tab  10   3   2 
  • Orange/testing/regression/results_tests_20/modules_logreg2.py.txt

    r9956 r9965  
    44>50K >50K 
    55<=50K >50K 
    6  
    7 class attribute = y 
    8 class values = <>50K, <=50K> 
    9  
    10                              Feature       beta  st. error     wald Z          P OR=exp(beta) 
    11  
    12                            Intercept       6.62       0.00        inf       0.00 
    13                                  age      -0.04       0.00       -inf       0.00       0.96 
    14                               fnlwgt      -0.00       0.00       -inf       0.00       1.00 
    15                        education-num      -0.28       0.00       -inf       0.00       0.76 
    16              marital-status=Divorced       4.29       0.00        inf       0.00      72.62 
    17         marital-status=Never-married       3.79       0.00        inf       0.00      44.45 
    18             marital-status=Separated       3.46        nan        nan        nan      31.95 
    19               marital-status=Widowed       3.85       0.00        inf       0.00      46.96 
    20 marital-status=Married-spouse-absent       3.98       0.00        inf       0.00      53.63 
    21     marital-status=Married-AF-spouse       4.01        nan        nan        nan      55.19 
    22              occupation=Tech-support      -0.32       0.00       -inf       0.00       0.72 
    23              occupation=Craft-repair       0.37       0.00        inf       0.00       1.45 
    24             occupation=Other-service       2.68        nan        nan        nan      14.61 
    25                     occupation=Sales       0.22       0.00        inf       0.00       1.24 
    26            occupation=Prof-specialty       0.18       0.00        inf       0.00       1.19 
    27         occupation=Handlers-cleaners       1.29        nan        nan        nan       3.64 
    28         occupation=Machine-op-inspct       0.86       0.00        inf       0.00       2.37 
    29              occupation=Adm-clerical       0.30       0.00        inf       0.00       1.35 
    30           occupation=Farming-fishing       1.12        nan        nan        nan       3.06 
    31          occupation=Transport-moving       0.62       0.00        inf       0.00       1.85 
    32           occupation=Priv-house-serv       3.46       0.00        inf       0.00      31.87 
    33           occupation=Protective-serv       0.11        nan        nan        nan       1.12 
    34              occupation=Armed-Forces       0.59       0.00        inf       0.00       1.81 
    35                    relationship=Wife      -1.06       0.00       -inf       0.00       0.35 
    36               relationship=Own-child      -1.04        nan        nan        nan       0.35 
    37           relationship=Not-in-family      -1.94       0.00       -inf       0.00       0.14 
    38          relationship=Other-relative      -2.42       0.00       -inf       0.00       0.09 
    39               relationship=Unmarried      -1.92        nan        nan        nan       0.15 
    40              race=Asian-Pac-Islander      -0.19       0.00       -inf       0.00       0.83 
    41              race=Amer-Indian-Eskimo       2.88       0.00        inf       0.00      17.78 
    42                           race=Other       3.93        nan        nan        nan      51.07 
    43                           race=Black       0.11       0.00        inf       0.00       1.12 
    44                           sex=Female       0.30       0.00        inf       0.00       1.36 
    45                         capital-gain      -0.00        nan        nan        nan       1.00 
    46                         capital-loss      -0.00       0.00       -inf       0.00       1.00 
    47                       hours-per-week      -0.04       0.00       -inf       0.00       0.96 
     6age -0.0365046039224 
     7fnlwgt -1.13033081561e-06 
     8education-num -0.278378069401 
     9marital-status=Divorced 4.28520584106 
     10marital-status=Never-married 3.79432463646 
     11marital-status=Separated 3.4642136097 
     12marital-status=Widowed 3.84919857979 
     13marital-status=Married-spouse-absent 3.98207736015 
     14marital-status=Married-AF-spouse 4.01079034805 
     15occupation=Tech-support -0.324787259102 
     16occupation=Craft-repair 0.371972113848 
     17occupation=Other-service 2.68194651604 
     18occupation=Sales 0.215603515506 
     19occupation=Prof-specialty 0.176146954298 
     20occupation=Handlers-cleaners 1.29317069054 
     21occupation=Machine-op-inspct 0.8613935709 
     22occupation=Adm-clerical 0.301324903965 
     23occupation=Farming-fishing 1.11930930614 
     24occupation=Transport-moving 0.616262614727 
     25occupation=Priv-house-serv 3.46170806885 
     26occupation=Protective-serv 0.113764844835 
     27occupation=Armed-Forces 0.593791663647 
     28relationship=Wife -1.0589966774 
     29relationship=Own-child -1.03764116764 
     30relationship=Not-in-family -1.93763542175 
     31relationship=Other-relative -2.420140028 
     32relationship=Unmarried -1.92468094826 
     33race=Asian-Pac-Islander -0.191510245204 
     34race=Amer-Indian-Eskimo 2.87814831734 
     35race=Other 3.93312478065 
     36race=Black 0.111131064594 
     37sex=Female 0.304161816835 
     38capital-gain -0.000317209050991 
     39capital-loss -0.000606149493251 
     40hours-per-week -0.0415332503617 
  • Orange/testing/regression/results_tests_20/modules_misc_bestOnTheFly.py.txt

    r9951 r9968  
    1 0.565: EnumVariable 'lym_dimin' 
    2 0.565: EnumVariable 'lym_dimin' 
    3 0.565: EnumVariable 'lym_dimin' 
     11.000: EnumVariable 'milk' 
     21.000: EnumVariable 'milk' 
     31.000: EnumVariable 'milk' 
  • Orange/testing/regression/results_tests_20/modules_statExamples.py.txt

    r9951 r9968  
    1717 
    1818Confusion matrix for naive Bayes for 'van': 
    19 TP: 192, FP: 151, FN: 7.0, TN: 496 
     19TP: 192, FP: 152, FN: 7.0, TN: 495 
    2020 
    2121Confusion matrix for naive Bayes for 'opel': 
    22 TP: 79, FP: 75, FN: 133.0, TN: 559 
     22TP: 80, FP: 71, FN: 132.0, TN: 563 
    2323 
    24     bus van saab    opel 
    25 bus 156 19  17  26 
    26 van 4   192 2   1 
    27 saab    8   68  93  48 
    28 opel    8   64  61  79 
     24    bus opel    saab    van 
     25bus 156 25  17  20 
     26opel    6   80  61  65 
     27saab    7   46  97  67 
     28van 4   0   3   192 
    2929 
    3030Sensitivity and specificity for 'voting' 
     
    3636Sensitivity and specificity for 'vehicle=van' 
    3737method  sens    spec 
    38 bayes   0.965   0.767 
    39 tree    0.834   0.966 
     38bayes   0.965   0.765 
     39tree    0.794   0.969 
    4040majrty  0.000   1.000 
    4141 
     
    4747AUC for vehicle using weighted single-out method 
    4848bayes   tree    majority 
    49 0.840   0.816   0.500 
     490.841   0.795   0.500 
    5050 
    5151AUC for vehicle, using different methods 
    5252                            bayes   tree    majority 
    53        by pairs, weighted:  0.861   0.883   0.500 
    54                  by pairs:  0.863   0.884   0.500 
    55     one vs. all, weighted:  0.840   0.816   0.500 
    56               one vs. all:  0.840   0.816   0.500 
     53       by pairs, weighted:  0.858   0.869   0.500 
     54                 by pairs:  0.859   0.870   0.500 
     55    one vs. all, weighted:  0.841   0.795   0.500 
     56              one vs. all:  0.841   0.795   0.500 
    5757 
    5858AUC for detecting class 'van' in 'vehicle' 
    59 0.923   0.900   0.500 
     590.924   0.881   0.500 
    6060 
    6161AUCs for detecting various classes in 'vehicle' 
    62 bus (218.000) vs others:    0.952   0.936   0.500 
    63 van (199.000) vs others:    0.923   0.900   0.500 
    64 saab (217.000) vs others:   0.737   0.707   0.500 
    65 opel (212.000) vs others:   0.749   0.718   0.500 
     62bus (218.000) vs others:    0.954   0.943   0.500 
     63opel (212.000) vs others:   0.749   0.685   0.500 
     64saab (217.000) vs others:   0.739   0.672   0.500 
     65van (199.000) vs others:    0.924   0.881   0.500 
    6666 
    67     bus van saab 
    68 van 0.987 
    69 saab    0.927   0.860 
    70 opel    0.921   0.894   0.587 
     67    bus opel    saab 
     68opel    0.922 
     69saab    0.927   0.561 
     70van 0.991   0.898   0.857 
    7171 
    7272AUCs for detecting various pairs of classes in 'vehicle' 
    73 van vs bus:     0.987   0.976   0.500 
    74 saab vs bus:    0.927   0.936   0.500 
    75 saab vs van:    0.860   0.906   0.500 
    76 opel vs bus:    0.921   0.951   0.500 
    77 opel vs van:    0.894   0.915   0.500 
    78 opel vs saab:   0.587   0.622   0.500 
     73opel vs bus:    0.922   0.949   0.500 
     74saab vs bus:    0.927   0.941   0.500 
     75saab vs opel:   0.561   0.578   0.500 
     76van vs bus:     0.991   0.977   0.500 
     77van vs opel:    0.898   0.902   0.500 
     78van vs saab:    0.857   0.872   0.500 
    7979 
    8080AUC and SE for voting 
  • Orange/testing/regression/results_tests_20/reference_example2.py.txt

    r9951 r9968  
    1 ['young', 'myope', 'no', 'reduced', 'none'], {-42:0.84} 
     1['young', 'myope', 'no', 'reduced', 'none'], {-42:0.64} 
    22<15.000, 4.000, 5.000> 
    3 <9.691, 1.969, 3.232> 
    4 ['young', 'myope', 'no', 'reduced', 'none'], {"w":0.844} 
    5 0.844 
    6 0.844 
    7 0.844 
     3<7.326, 0.822, 1.628> 
     4['young', 'myope', 'no', 'reduced', 'none'], {"w":0.639} 
     50.639 
     60.639 
     70.639 
  • Orange/testing/regression/results_tests_20/reference_example3.py.txt

    r9951 r9968  
    88['young', 'myope', 'yes', 'normal', 'hard'], {"ok?":'no'} 
    99['young', 'hypermetrope', 'no', 'reduced', 'none'], {"ok?":'yes'} 
    10 ['young', 'hypermetrope', 'no', 'normal', 'soft'], {"ok?":'no'} 
     10['young', 'hypermetrope', 'no', 'normal', 'soft'], {"ok?":'yes'} 
    1111 
    1212 
     
    1717['young', 'myope', 'yes', 'normal', 'hard'], {"ok?":'no'} 
    1818['young', 'hypermetrope', 'no', 'reduced', 'none'], {"ok?":'yes'} 
    19 ['young', 'hypermetrope', 'no', 'normal', 'soft'], {"ok?":'no'} 
     19['young', 'hypermetrope', 'no', 'normal', 'soft'], {"ok?":'yes'} 
  • Orange/testing/regression/tests_20/modules_logreg2.py

    r9952 r9965  
    1111for ex in data[:5]: 
    1212    print ex.getclass(), lr(ex) 
    13      
    14 orngLR.printOUT(lr)  
     13 
     14out = [''] 
     15 
     16# get the longest attribute name 
     17longest=0 
     18for at in lr.continuized_domain.features: 
     19    if len(at.name)>longest: 
     20        longest=len(at.name) 
     21 
     22# print out the head 
     23for i in range(len(lr.continuized_domain.features)): 
     24    print lr.continuized_domain.features[i].name, lr.beta[i+1] 
  • Orange/testing/regression/tests_20/modules_misc_bestOnTheFly.py

    r9952 r9969  
    11import orange, orngMisc 
    22 
    3 data = orange.ExampleTable("lymphography") 
     3data = orange.ExampleTable("zoo") 
    44 
    55findBest = orngMisc.BestOnTheFly(orngMisc.compare2_firstBigger) 
     
    1111 
    1212 
    13 findBest = orngMisc.BestOnTheFly(callCompareOn1st = True) 
     13findBest = orngMisc.BestOnTheFly(callCompareOn1st=True) 
    1414for attr in data.domain.attributes: 
    1515    findBest.candidate((orange.MeasureAttribute_gainRatio(attr, data), attr)) 
  • Orange/testing/regression/xtest.py

    r9949 r9972  
    144144                    p.kill() 
    145145                    result2 = "timedout" 
    146                     print "timedout" 
     146                    print "timedout (use: --timeout #)" 
    147147                    # remove output file and change it for *.timedout.* 
    148148                    for state in states: 
     
    154154                    timeoutname = "%s/%s.%s.%s.%s.txt" % (outputsdir, name, sys.platform, sys.version[:3], "timedout") 
    155155                    open(timeoutname, "wt").close() 
     156                    result = "timedout" 
    156157                else: 
    157158                    stdout, stderr = p.communicate() 
  • docs/reference/rst/Orange.data.continuization.rst

    r9941 r9966  
    1111variable separately. 
    1212 
    13 .. class DomainContinuizer 
     13.. class:: DomainContinuizer 
    1414 
    1515    Returns a new domain containing only continuous attributes given a 
     
    2929      ``multinomial_treatment``. 
    3030 
    31     .. attribute zero_based 
     31    The typical use of the class is as follows:: 
     32 
     33        continuizer = orange.DomainContinuizer() 
     34        continuizer.multinomialTreatment = continuizer.LowestIsBase 
     35        domain0 = continuizer(data) 
     36        data0 = data.translate(domain0) 
     37 
     38    .. attribute:: zero_based 
    3239 
    3340        Determines the value used as the "low" value of the variable. When 
     
    3845        following text assumes the default case. 
    3946 
    40     .. attribute multinomial_treatment 
     47    .. attribute:: multinomial_treatment 
    4148 
    4249       Decides the treatment of multinomial variables. Let N be the 
     
    5461           used (directly) in, for instance, linear or logistic regression. 
    5562 
     63           For example, data set "bridges" has feature "RIVER" with 
     64           values "M", "A", "O" and "Y", in that order. Its value for 
     65           the 15th row is "M". Continuization replaces the variable 
     66           with variables "RIVER=M", "RIVER=A", "RIVER=O" and 
     67           "RIVER=Y". For the 15th row, the first has value 1 and 
     68           others are 0. 
     69 
    5670       DomainContinuizer.LowestIsBase 
    5771           Similar to the above except that it creates only N-1 
     
    6377           specified value is used as base instead of the lowest one. 
    6478 
     79           Continuizing the variable "RIVER" gives similar results as 
     80           above except that it would omit "RIVER=M"; all three 
     81           variables would be zero for the 15th data instance. 
     82 
    6583       DomainContinuizer.FrequentIsBase 
    66  
    6784           Like above, except that the most frequent value is used as the 
    6885           base (this can again be overidden by setting the descriptor's 
     
    7188           extracted from data, so this option cannot be used if constructor 
    7289           is given only a domain. 
     90 
     91           Variable "RIVER" would be continuized similarly to above 
     92           except that it omits "RIVER=A", which is the most frequent value. 
    7393            
    7494       DomainContinuizer.Ignore 
     
    87107           variable. 
    88108 
    89     .. attribute normalize_continuous 
     109    .. attribute:: normalize_continuous 
    90110 
    91111        If ``False`` (default), continues variables are left unchanged. If 
  • docs/reference/rst/Orange.data.discretization.rst

    r9943 r9963  
    11.. py:currentmodule:: Orange.data.discretization 
    22 
    3 ################################### 
     3######################################## 
    44Data discretization (``discretization``) 
    5 ################################### 
     5######################################## 
    66 
    77.. index:: discretization 
     
    1010   single: data; discretization 
    1111 
    12 Continues features in the data can be discretized using a uniform discretization method. The approach will consider 
    13 only continues features, and replace them in the data set with corresponding categorical features: 
     12Continues features in the data can be discretized using a uniform discretization method. Discretization considers 
     13only continues features, and replaces them in the new data set with corresponding categorical features: 
    1414 
    1515.. literalinclude:: code/discretization-table.py 
    1616 
    17 Discretization introduces new categorical features and computes their values in accordance to 
    18 a discretization method:: 
     17Discretization introduces new categorical features with discretized values:: 
    1918 
    2019    Original data set: 
     
    2827    ['<=5.45', '>3.15', '<=2.45', '<=0.80', 'Iris-setosa'] 
    2928 
    30 The procedure uses feature discretization classes as defined in :doc:`Orange.feature.discretization` and applies them 
    31 on entire data set. The suported discretization methods are: 
     29Data discretization uses feature discretization classes from :doc:`Orange.feature 
     30.discretization` and applies them on entire data set. The suported discretization methods are: 
    3231 
    3332* equal width discretization, where the domain of continuous feature is split to intervals of the same 
     
    4342.. FIXME give a corresponding class for fixed discretization 
    4443 
    45 The above script used the default discretization method (equal frequency with three intervals). This can be 
    46 changed while some selected discretization approach as demonstrated below: 
     44Default discretization method (equal frequency with three intervals) can be replaced with other 
     45discretization approaches as demonstrated below: 
    4746 
    4847.. literalinclude:: code/discretization-table-method.py 
    4948    :lines: 3-5 
     49 
     50Entropy-based discretization is special as it may infer new features that are constant and have only one value. Such 
     51features are redundant and provide no information about the class are. By default, 
     52:class:`DiscretizeTable` would remove them, a way performing feature subset selection. The effect of removal of 
     53non-informative features is also demonstrated in the following script: 
     54 
     55.. literalinclude:: code/discretization-entropy.py 
     56    :lines: 3- 
     57 
     58In the sampled dat set above three features were discretized to a constant and thus removed:: 
     59 
     60    Redundant features (3 of 13): 
     61    cholesterol, rest SBP, age 
     62 
     63.. note:: 
     64    Entropy-based and bi-modal discretization require class-labeled data sets. 
    5065 
    5166Data discretization classes 
  • docs/reference/rst/Orange.feature.discretization.rst

    r9944 r9964  
    1010   single: feature; discretization 
    1111 
    12 Continues features can be discretized either one feature at a time, or, as demonstrated in the following script, 
    13 using a single discretization method on entire set of data features: 
    14  
    15 .. literalinclude:: code/discretization-table.py 
    16  
    17 Discretization introduces new categorical features and computes their values in accordance to 
    18 selected (or default) discretization method:: 
    19  
    20     Original data set: 
    21     [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'] 
    22     [4.9, 3.0, 1.4, 0.2, 'Iris-setosa'] 
    23     [4.7, 3.2, 1.3, 0.2, 'Iris-setosa'] 
    24  
    25     Discretized data set: 
    26     ['<=5.45', '>3.15', '<=2.45', '<=0.80', 'Iris-setosa'] 
    27     ['<=5.45', '(2.85, 3.15]', '<=2.45', '<=0.80', 'Iris-setosa'] 
    28     ['<=5.45', '>3.15', '<=2.45', '<=0.80', 'Iris-setosa'] 
    29  
    30 The following discretization methods are supported: 
    31  
    32 * equal width discretization, where the domain of continuous feature is split to intervals of the same 
    33   width equal-sized intervals (:class:`EqualWidth`), 
    34 * equal frequency discretization, where each intervals contains equal number of data instances (:class:`EqualFreq`), 
    35 * entropy-based, as originally proposed by [FayyadIrani1993]_ that infers the intervals to minimize 
    36   within-interval entropy of class distributions (:class:`Entropy`), 
    37 * bi-modal, using three intervals to optimize the difference of the class distribution in 
    38   the middle with the distribution outside it (:class:`BiModal`), 
    39 * fixed, with the user-defined cut-off points. 
    40  
    41 The above script used the default discretization method (equal frequency with three intervals). This can be changed 
    42 as demonstrated below: 
    43  
    44 .. literalinclude:: code/discretization-table-method.py 
    45     :lines: 3-5 
    46  
    47 With exception to fixed discretization, discretization approaches infer the cut-off points from the 
    48 training data set and thus construct a discretizer to convert continuous values of this feature into categorical 
    49 value according to the rule found by discretization. In this respect, the discretization behaves similar to 
    50 :class:`Orange.classification.Learner`. 
    51  
    52 Discretization Algorithms 
    53 ========================= 
    54  
    55 Instances of discretization classes are all derived from :class:`Discretization`. 
    56  
    57 .. class:: Discretization 
    58  
    59     .. method:: __call__(feature, data[, weightID]) 
    60  
    61         Given a continuous ``feature``, ``data`` and, optionally id of 
    62         attribute with example weight, this function returns a discretized 
    63         feature. Argument ``feature`` can be a descriptor, index or 
    64         name of the attribute. 
    65  
    66  
    67 .. class:: EqualWidth 
    68  
    69     Discretizes the feature by spliting its domain to a fixed number 
    70     of equal-width intervals. The span of original domain is computed 
    71     from the training data and is defined by the smallest and the 
    72     largest feature value. 
    73  
    74     .. attribute:: n 
    75  
    76         Number of discretization intervals (default: 4). 
    77  
    78 The following example discretizes Iris dataset features using six 
    79 intervals. The script constructs a :class:`Orange.data.Table` with discretized 
    80 features and outputs their description: 
    81  
    82 .. literalinclude:: code/discretization.py 
    83     :lines: 38-43 
    84  
    85 The output of this script is:: 
    86  
    87     D_sepal length: <<4.90, [4.90, 5.50), [5.50, 6.10), [6.10, 6.70), [6.70, 7.30), >7.30> 
    88     D_sepal width: <<2.40, [2.40, 2.80), [2.80, 3.20), [3.20, 3.60), [3.60, 4.00), >4.00> 
    89     D_petal length: <<1.98, [1.98, 2.96), [2.96, 3.94), [3.94, 4.92), [4.92, 5.90), >5.90> 
    90     D_petal width: <<0.50, [0.50, 0.90), [0.90, 1.30), [1.30, 1.70), [1.70, 2.10), >2.10> 
    91  
    92 The cut-off values are hidden in the discretizer and stored in ``attr.get_value_from.transformer``:: 
    93  
    94     >>> for attr in newattrs: 
    95     ...    print "%s: first interval at %5.3f, step %5.3f" % \ 
    96     ...    (attr.name, attr.get_value_from.transformer.first_cut, \ 
    97     ...    attr.get_value_from.transformer.step) 
    98     D_sepal length: first interval at 4.900, step 0.600 
    99     D_sepal width: first interval at 2.400, step 0.400 
    100     D_petal length: first interval at 1.980, step 0.980 
    101     D_petal width: first interval at 0.500, step 0.400 
    102  
    103 All discretizers have the method 
    104 ``construct_variable``: 
    105  
    106 .. literalinclude:: code/discretization.py 
    107     :lines: 69-73 
    108  
    109  
    110 .. class:: EqualFreq 
    111  
    112     Infers the cut-off points so that the discretization intervals contain 
    113     approximately equal number of training data instances. 
    114  
    115     .. attribute:: n 
    116  
    117         Number of discretization intervals (default: 4). 
    118  
    119 The resulting discretizer is of class :class:`IntervalDiscretizer`. Its ``transformer`` includes ``points`` 
    120 that store the inferred cut-offs. 
    121  
    122 .. class:: Entropy 
    123  
    124     Entropy-based discretization as originally proposed by [FayyadIrani1993]_. The approach infers the most 
    125     appropriate number of intervals by recursively splitting the domain of continuous feature to minimize the 
    126     class-entropy of training examples. The splitting is repeated until the entropy decrease is smaller than the 
    127     increase of minimal descripton length (MDL) induced by the new cut-off point. 
    128  
    129     Entropy-based discretization can reduce a continuous feature into 
    130     a single interval if no suitable cut-off points are found. In this case the new feature is constant and can be 
    131     removed. This discretization can 
    132     therefore also serve for identification of non-informative features and thus used for feature subset selection. 
    133  
    134     .. attribute:: force_attribute 
    135  
    136         Forces the algorithm to induce at least one cut-off point, even when 
    137         its information gain is lower than MDL (default: ``False``). 
    138  
    139 Part of :download:`discretization.py <code/discretization.py>`: 
    140  
    141 .. literalinclude:: code/discretization.py 
    142     :lines: 77-80 
    143  
    144 The output shows that all attributes are discretized onto three intervals:: 
    145  
    146     sepal length: <5.5, 6.09999990463> 
    147     sepal width: <2.90000009537, 3.29999995232> 
    148     petal length: <1.89999997616, 4.69999980927> 
    149     petal width: <0.600000023842, 1.0000004768> 
    150  
    151 .. class:: BiModal 
    152  
    153     Infers two cut-off points to optimize the difference of class distribution of data instances in the 
    154     middle and in the other two intervals. The 
    155     difference is scored by chi-square statistics. All possible cut-off 
    156     points are examined, thus the discretization runs in O(n^2). This discretization method is especially suitable 
    157     for the attributes in 
    158     which the middle region corresponds to normal and the outer regions to 
    159     abnormal values of the feature. 
    160  
    161     .. attribute:: split_in_two 
    162  
    163         Decides whether the resulting attribute should have three or two values. 
    164         If ``True`` (default), the feature will be discretized to three 
    165         intervals and the discretizer is of type :class:`BiModalDiscretizer`. 
    166         If ``False`` the result is the ordinary :class:`IntervalDiscretizer`. 
    167  
    168 Iris dataset has three-valued class attribute. The figure below, drawn using LOESS probability estimation, shows that 
    169 sepal lenghts of versicolors are between lengths of setosas and virginicas. 
    170  
    171 .. image:: files/bayes-iris.gif 
    172  
    173 If we merge classes setosa and virginica, we can observe if 
    174 the bi-modal discretization would correctly recognize the interval in 
    175 which versicolors dominate. The following scripts peforms the merging and construction of new data set with class 
    176 that reports if iris is versicolor or not. 
    177  
    178 .. literalinclude:: code/discretization.py 
    179     :lines: 84-87 
    180  
    181 The following script implements the discretization: 
    182  
    183 .. literalinclude:: code/discretization.py 
    184     :lines: 97-100 
    185  
    186 The middle intervals are printed:: 
    187  
    188     sepal length: (5.400, 6.200] 
    189     sepal width: (2.000, 2.900] 
    190     petal length: (1.900, 4.700] 
    191     petal width: (0.600, 1.600] 
    192  
    193 Judging by the graph, the cut-off points inferred by discretization for "sepal length" make sense. 
    194  
    195 Discretizers 
    196 ============ 
    197  
    198 Discretizers construct a categorical feature from the continuous feature according to the method they implement and 
    199 its parameters. The most general is 
    200 :class:`IntervalDiscretizer` that is also used by most discretization 
    201 methods. Two other discretizers, :class:`EquiDistDiscretizer` and 
    202 :class:`ThresholdDiscretizer`> could easily be replaced by 
    203 :class:`IntervalDiscretizer` but are used for speed and simplicity. 
    204 The fourth discretizer, :class:`BiModalDiscretizer` is specialized 
    205 for discretizations induced by :class:`BiModalDiscretization`. 
    206  
    207 .. class:: Discretizer 
    208  
    209     A superclass implementing the construction of a new 
    210     attribute from an existing one. 
    211  
    212     .. method:: construct_variable(feature) 
    213  
    214         Constructs a descriptor for a new feature. The new feature's name is equal to ``feature.name`` 
    215         prefixed by "D\_". Its symbolic values are discretizer specific. 
    216  
    217 .. class:: IntervalDiscretizer 
    218  
    219     Discretizer defined with a set of cut-off points. 
    220  
    221     .. attribute:: points 
    222  
    223         The cut-off points; feature values below or equal to the first point will be mapped to the first interval, 
    224         those between the first and the second point 
    225         (including those equal to the second) are mapped to the second interval and 
    226         so forth to the last interval which covers all values greater than 
    227         the last value in ``points``. The number of intervals is thus 
    228         ``len(points)+1``. 
    229  
    230 The script that follows is an examples of a manual construction of a discretizer with cut-off points 
    231 at 3.0 and 5.0: 
    232  
    233 .. literalinclude:: code/discretization.py 
    234     :lines: 22-26 
    235  
    236 First five data instances of ``data2`` are:: 
    237  
    238     [5.1, '>5.00', 'Iris-setosa'] 
    239     [4.9, '(3.00, 5.00]', 'Iris-setosa'] 
    240     [4.7, '(3.00, 5.00]', 'Iris-setosa'] 
    241     [4.6, '(3.00, 5.00]', 'Iris-setosa'] 
    242     [5.0, '(3.00, 5.00]', 'Iris-setosa'] 
    243  
    244 The same discretizer can be used on several features by calling the function construct_var: 
    245  
    246 .. literalinclude:: code/discretization.py 
    247     :lines: 30-34 
    248  
    249 Each feature has its own instance of :class:`ClassifierFromVar` stored in 
    250 ``get_value_from``, but all use the same :class:`IntervalDiscretizer`, 
    251 ``idisc``. Changing any element of its ``points`` affect all attributes. 
    252  
    253 .. note:: 
    254  
    255     The length of :obj:`~IntervalDiscretizer.points` should not be changed if the 
    256     discretizer is used by any attribute. The length of 
    257     :obj:`~IntervalDiscretizer.points` should always match the number of values 
    258     of the feature, which is determined by the length of the attribute's field 
    259     ``values``. If ``attr`` is a discretized attribute, than ``len(attr.values)`` must equal 
    260     ``len(attr.get_value_from.transformer.points)+1``. 
    261  
    262  
    263 .. class:: EqualWidthDiscretizer 
    264  
    265     Discretizes to intervals of the fixed width. All values lower than :obj:`~EquiDistDiscretizer.first_cut` are mapped to the first 
    266     interval. Otherwise, value ``val``'s interval is ``floor((val-first_cut)/step)``. Possible overflows are mapped to the 
    267     last intervals. 
    268  
    269  
    270     .. attribute:: first_cut 
    271  
    272         The first cut-off point. 
    273  
    274     .. attribute:: step 
    275  
    276         Width of the intervals. 
    277  
    278     .. attribute:: n 
    279  
    280         Number of the intervals. 
    281  
    282     .. attribute:: points (read-only) 
    283  
    284         The cut-off points; this is not a real attribute although it behaves 
    285         as one. Reading it constructs a list of cut-off points and returns it, 
    286         but changing the list doesn't affect the discretizer. Only present to provide 
    287         the :obj:`EquiDistDiscretizer` the same interface as that of 
    288         :obj:`IntervalDiscretizer`. 
    289  
    290  
    291 .. class:: ThresholdDiscretizer 
    292  
    293     Threshold discretizer converts continuous values into binary by comparing 
    294     them to a fixed threshold. Orange uses this discretizer for 
    295     binarization of continuous attributes in decision trees. 
    296  
    297     .. attribute:: threshold 
    298  
    299         The value threshold; values below or equal to the threshold belong to the first 
    300         interval and those that are greater go to the second. 
    301  
    302  
    303 .. class:: BiModalDiscretizer 
    304  
    305     Bimodal discretizer has two cut off points and values are 
    306     discretized according to whether or not they belong to the region between these points 
    307     which includes the lower but not the upper boundary. The 
    308     discretizer is returned by :class:`BiModalDiscretization` if its 
    309     field :obj:`~BiModalDiscretization.split_in_two` is true (the default). 
    310  
    311     .. attribute:: low 
    312  
    313         Lower boundary of the interval (included in the interval). 
    314  
    315     .. attribute:: high 
    316  
    317         Upper boundary of the interval (not included in the interval). 
    318  
    319  
    320 Implementational details 
    321 ======================== 
     12Feature discretization module provides rutines that consider continuous features and 
     13introduce a new discretized feature based on the training data set. Most often such procedure would be executed 
     14on all the features of the data set using implementations from :doc:`Orange.feature.discretization`. Implementation 
     15in this module are concerned with discretization of one feature at the time, and do not provide wrappers for 
     16whole-data set discretization. The discretization is data-specific, and consist of learning of discretization 
     17procedure (see `Discretization Algorithms`_) and actual discretization (see Discretizers_) of the data. Splitting of 
     18these 
     19two phases is intentional, 
     20as in machine learing discretization may be learned from the training set and executed on the test set. 
    32221 
    32322Consider a following example (part of :download:`discretization.py <code/discretization.py>`): 
     
    36463by ``get_value_from`` and stored in the new example. 
    36564 
     65With exception to fixed discretization, discretization approaches infer the cut-off points from the 
     66training data set and thus construct a discretizer to convert continuous values of this feature into categorical 
     67value according to the rule found by discretization. In this respect, the discretization behaves similar to 
     68:class:`Orange.classification.Learner`. 
     69 
     70.. _`Discretization Algorithms` 
     71 
     72Discretization Algorithms 
     73========================= 
     74 
     75Instances of discretization classes are all derived from :class:`Discretization`. 
     76 
     77.. class:: Discretization 
     78 
     79    .. method:: __call__(feature, data[, weightID]) 
     80 
     81        Given a continuous ``feature``, ``data`` and, optionally id of 
     82        attribute with example weight, this function returns a discretized 
     83        feature. Argument ``feature`` can be a descriptor, index or 
     84        name of the attribute. 
     85 
     86 
     87.. class:: EqualWidth 
     88 
     89    Discretizes the feature by spliting its domain to a fixed number 
     90    of equal-width intervals. The span of original domain is computed 
     91    from the training data and is defined by the smallest and the 
     92    largest feature value. 
     93 
     94    .. attribute:: n 
     95 
     96        Number of discretization intervals (default: 4). 
     97 
     98The following example discretizes Iris dataset features using six 
     99intervals. The script constructs a :class:`Orange.data.Table` with discretized 
     100features and outputs their description: 
     101 
     102.. literalinclude:: code/discretization.py 
     103    :lines: 38-43 
     104 
     105The output of this script is:: 
     106 
     107    D_sepal length: <<4.90, [4.90, 5.50), [5.50, 6.10), [6.10, 6.70), [6.70, 7.30), >7.30> 
     108    D_sepal width: <<2.40, [2.40, 2.80), [2.80, 3.20), [3.20, 3.60), [3.60, 4.00), >4.00> 
     109    D_petal length: <<1.98, [1.98, 2.96), [2.96, 3.94), [3.94, 4.92), [4.92, 5.90), >5.90> 
     110    D_petal width: <<0.50, [0.50, 0.90), [0.90, 1.30), [1.30, 1.70), [1.70, 2.10), >2.10> 
     111 
     112The cut-off values are hidden in the discretizer and stored in ``attr.get_value_from.transformer``:: 
     113 
     114    >>> for attr in newattrs: 
     115    ...    print "%s: first interval at %5.3f, step %5.3f" % \ 
     116    ...    (attr.name, attr.get_value_from.transformer.first_cut, \ 
     117    ...    attr.get_value_from.transformer.step) 
     118    D_sepal length: first interval at 4.900, step 0.600 
     119    D_sepal width: first interval at 2.400, step 0.400 
     120    D_petal length: first interval at 1.980, step 0.980 
     121    D_petal width: first interval at 0.500, step 0.400 
     122 
     123All discretizers have the method 
     124``construct_variable``: 
     125 
     126.. literalinclude:: code/discretization.py 
     127    :lines: 69-73 
     128 
     129 
     130.. class:: EqualFreq 
     131 
     132    Infers the cut-off points so that the discretization intervals contain 
     133    approximately equal number of training data instances. 
     134 
     135    .. attribute:: n 
     136 
     137        Number of discretization intervals (default: 4). 
     138 
     139The resulting discretizer is of class :class:`IntervalDiscretizer`. Its ``transformer`` includes ``points`` 
     140that store the inferred cut-offs. 
     141 
     142.. class:: Entropy 
     143 
     144    Entropy-based discretization as originally proposed by [FayyadIrani1993]_. The approach infers the most 
     145    appropriate number of intervals by recursively splitting the domain of continuous feature to minimize the 
     146    class-entropy of training examples. The splitting is repeated until the entropy decrease is smaller than the 
     147    increase of minimal descripton length (MDL) induced by the new cut-off point. 
     148 
     149    Entropy-based discretization can reduce a continuous feature into 
     150    a single interval if no suitable cut-off points are found. In this case the new feature is constant and can be 
     151    removed. This discretization can 
     152    therefore also serve for identification of non-informative features and thus used for feature subset selection. 
     153 
     154    .. attribute:: force_attribute 
     155 
     156        Forces the algorithm to induce at least one cut-off point, even when 
     157        its information gain is lower than MDL (default: ``False``). 
     158 
     159Part of :download:`discretization.py <code/discretization.py>`: 
     160 
     161.. literalinclude:: code/discretization.py 
     162    :lines: 77-80 
     163 
     164The output shows that all attributes are discretized onto three intervals:: 
     165 
     166    sepal length: <5.5, 6.09999990463> 
     167    sepal width: <2.90000009537, 3.29999995232> 
     168    petal length: <1.89999997616, 4.69999980927> 
     169    petal width: <0.600000023842, 1.0000004768> 
     170 
     171.. class:: BiModal 
     172 
     173    Infers two cut-off points to optimize the difference of class distribution of data instances in the 
     174    middle and in the other two intervals. The 
     175    difference is scored by chi-square statistics. All possible cut-off 
     176    points are examined, thus the discretization runs in O(n^2). This discretization method is especially suitable 
     177    for the attributes in 
     178    which the middle region corresponds to normal and the outer regions to 
     179    abnormal values of the feature. 
     180 
     181    .. attribute:: split_in_two 
     182 
     183        Decides whether the resulting attribute should have three or two values. 
     184        If ``True`` (default), the feature will be discretized to three 
     185        intervals and the discretizer is of type :class:`BiModalDiscretizer`. 
     186        If ``False`` the result is the ordinary :class:`IntervalDiscretizer`. 
     187 
     188Iris dataset has three-valued class attribute. The figure below, drawn using LOESS probability estimation, shows that 
     189sepal lenghts of versicolors are between lengths of setosas and virginicas. 
     190 
     191.. image:: files/bayes-iris.gif 
     192 
     193If we merge classes setosa and virginica, we can observe if 
     194the bi-modal discretization would correctly recognize the interval in 
     195which versicolors dominate. The following scripts peforms the merging and construction of new data set with class 
     196that reports if iris is versicolor or not. 
     197 
     198.. literalinclude:: code/discretization.py 
     199    :lines: 84-87 
     200 
     201The following script implements the discretization: 
     202 
     203.. literalinclude:: code/discretization.py 
     204    :lines: 97-100 
     205 
     206The middle intervals are printed:: 
     207 
     208    sepal length: (5.400, 6.200] 
     209    sepal width: (2.000, 2.900] 
     210    petal length: (1.900, 4.700] 
     211    petal width: (0.600, 1.600] 
     212 
     213Judging by the graph, the cut-off points inferred by discretization for "sepal length" make sense. 
     214 
     215.. _Discretizers: 
     216 
     217Discretizers 
     218============= 
     219 
     220Discretizers construct a categorical feature from the continuous feature according to the method they implement and 
     221its parameters. The most general is 
     222:class:`IntervalDiscretizer` that is also used by most discretization 
     223methods. Two other discretizers, :class:`EquiDistDiscretizer` and 
     224:class:`ThresholdDiscretizer`> could easily be replaced by 
     225:class:`IntervalDiscretizer` but are used for speed and simplicity. 
     226The fourth discretizer, :class:`BiModalDiscretizer` is specialized 
     227for discretizations induced by :class:`BiModalDiscretization`. 
     228 
     229.. class:: Discretizer 
     230 
     231    A superclass implementing the construction of a new 
     232    attribute from an existing one. 
     233 
     234    .. method:: construct_variable(feature) 
     235 
     236        Constructs a descriptor for a new feature. The new feature's name is equal to ``feature.name`` 
     237        prefixed by "D\_". Its symbolic values are discretizer specific. 
     238 
     239.. class:: IntervalDiscretizer 
     240 
     241    Discretizer defined with a set of cut-off points. 
     242 
     243    .. attribute:: points 
     244 
     245        The cut-off points; feature values below or equal to the first point will be mapped to the first interval, 
     246        those between the first and the second point 
     247        (including those equal to the second) are mapped to the second interval and 
     248        so forth to the last interval which covers all values greater than 
     249        the last value in ``points``. The number of intervals is thus 
     250        ``len(points)+1``. 
     251 
     252The script that follows is an examples of a manual construction of a discretizer with cut-off points 
     253at 3.0 and 5.0: 
     254 
     255.. literalinclude:: code/discretization.py 
     256    :lines: 22-26 
     257 
     258First five data instances of ``data2`` are:: 
     259 
     260    [5.1, '>5.00', 'Iris-setosa'] 
     261    [4.9, '(3.00, 5.00]', 'Iris-setosa'] 
     262    [4.7, '(3.00, 5.00]', 'Iris-setosa'] 
     263    [4.6, '(3.00, 5.00]', 'Iris-setosa'] 
     264    [5.0, '(3.00, 5.00]', 'Iris-setosa'] 
     265 
     266The same discretizer can be used on several features by calling the function construct_var: 
     267 
     268.. literalinclude:: code/discretization.py 
     269    :lines: 30-34 
     270 
     271Each feature has its own instance of :class:`ClassifierFromVar` stored in 
     272``get_value_from``, but all use the same :class:`IntervalDiscretizer`, 
     273``idisc``. Changing any element of its ``points`` affect all attributes. 
     274 
     275.. note:: 
     276 
     277    The length of :obj:`~IntervalDiscretizer.points` should not be changed if the 
     278    discretizer is used by any attribute. The length of 
     279    :obj:`~IntervalDiscretizer.points` should always match the number of values 
     280    of the feature, which is determined by the length of the attribute's field 
     281    ``values``. If ``attr`` is a discretized attribute, than ``len(attr.values)`` must equal 
     282    ``len(attr.get_value_from.transformer.points)+1``. 
     283 
     284 
     285.. class:: EqualWidthDiscretizer 
     286 
     287    Discretizes to intervals of the fixed width. All values lower than :obj:`~EquiDistDiscretizer.first_cut` are mapped to the first 
     288    interval. Otherwise, value ``val``'s interval is ``floor((val-first_cut)/step)``. Possible overflows are mapped to the 
     289    last intervals. 
     290 
     291 
     292    .. attribute:: first_cut 
     293 
     294        The first cut-off point. 
     295 
     296    .. attribute:: step 
     297 
     298        Width of the intervals. 
     299 
     300    .. attribute:: n 
     301 
     302        Number of the intervals. 
     303 
     304    .. attribute:: points (read-only) 
     305 
     306        The cut-off points; this is not a real attribute although it behaves 
     307        as one. Reading it constructs a list of cut-off points and returns it, 
     308        but changing the list doesn't affect the discretizer. Only present to provide 
     309        the :obj:`EquiDistDiscretizer` the same interface as that of 
     310        :obj:`IntervalDiscretizer`. 
     311 
     312 
     313.. class:: ThresholdDiscretizer 
     314 
     315    Threshold discretizer converts continuous values into binary by comparing 
     316    them to a fixed threshold. Orange uses this discretizer for 
     317    binarization of continuous attributes in decision trees. 
     318 
     319    .. attribute:: threshold 
     320 
     321        The value threshold; values below or equal to the threshold belong to the first 
     322        interval and those that are greater go to the second. 
     323 
     324 
     325.. class:: BiModalDiscretizer 
     326 
     327    Bimodal discretizer has two cut off points and values are 
     328    discretized according to whether or not they belong to the region between these points 
     329    which includes the lower but not the upper boundary. The 
     330    discretizer is returned by :class:`BiModalDiscretization` if its 
     331    field :obj:`~BiModalDiscretization.split_in_two` is true (the default). 
     332 
     333    .. attribute:: low 
     334 
     335        Lower boundary of the interval (included in the interval). 
     336 
     337    .. attribute:: high 
     338 
     339        Upper boundary of the interval (not included in the interval). 
     340 
    366341References 
    367342========== 
  • docs/reference/rst/code/discretization-table-method.py

    r9812 r9973  
    11import Orange 
    22iris = Orange.data.Table("iris.tab") 
    3 disc = Orange.feature.discretization.DiscretizeTable() 
     3disc = Orange.data.discretization.DiscretizeTable() 
    44disc.method = Orange.feature.discretization.EquiNDiscretization(numberOfIntervals=2) 
    55disc_iris = disc(iris) 
  • docs/reference/rst/code/transformvalue-d2c.py

    r9945 r9970  
    22import Orange.feature 
    33 
    4 data = Orange.data.Table("monk1") 
     4data = Orange.data.Table("monks-1") 
    55 
    66e1 = Orange.feature.Continuous("e=1") 
    7 e1.getValueFrom = Orange.core.ClassifierFromVar(whichVar = data.domain["e"]) 
     7e1.getValueFrom = Orange.core.ClassifierFromVar(whichVar=data.domain["e"]) 
    88e1.getValueFrom.transformer = Orange.core.Discrete2Continuous() 
    9 e1.getValueFrom.transformer.value = int(Orange.data.Value(e, "1")) 
Note: See TracChangeset for help on using the changeset viewer.