Changeset 7501:5df4fc7a80f9 in orange


Ignore:
Timestamp:
02/04/11 18:27:15 (3 years ago)
Author:
mocnik <mocnik@…>
Branch:
default
Convert:
331e931b8ed43a283f58679c9bfff193beff72b9
Message:

Modifying Orange.evaluation.scoring and cleaning orngStat.

Location:
orange
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • orange/Orange/evaluation/scoring.py

    r7464 r7501  
    66regression. Most functions require an argument named res, an instance of 
    77:class:`Orange.evaluation.testing.ExperimentResults` as computed by 
    8 functions from orngTest and which contains predictions obtained through 
     8functions from Orange.evaluation.testing and which contains predictions 
     9obtained through 
    910cross-validation, leave one-out, testing on training data or test set examples. 
    1011 
     
    5051 
    5152.. literalinclude:: code/statExample1.py 
     53   :lines: 13- 
    5254 
    5355.. _voting.tab: code/voting.tab 
     
    338340    train = voting.selectref(ri2, 0) 
    339341    test = voting.selectref(ri2, 1) 
    340     res1 = orngTest.learnAndTestOnTestData(learners, train, test) 
     342    res1 = Orange.evaluation.testing.learnAndTestOnTestData(learners, train, test) 
    341343 
    342344 
     
    421423""" 
    422424 
    423 import statc 
     425import statc, operator, math 
    424426from operator import add 
    425427import numpy 
    426428 
    427 import orngMisc, orngTest 
     429import Orange 
     430 
    428431 
    429432#### Private stuff 
     
    458461        return [res] 
    459462         
    460     ress = [orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
     463    ress = [Orange.evaluation.testing.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    461464            for i in range(res.numberOfIterations)] 
    462465    for te in res.results: 
     
    705708     
    706709    If results are from multiple repetitions of experiments (like those 
    707     returned by orngTest.crossValidation or orngTest.proportionTest) the 
     710    returned by Orange.evaluation.testing.crossValidation or 
     711    Orange.evaluation.testing.proportionTest) the 
    708712    standard error (SE) is estimated from deviation of classification 
    709713    accuracy accross folds (SD), as SE = SD/sqrt(N), where N is number 
     
    15931597    import corn 
    15941598    ## merge multiple iterations into one 
    1595     mres = orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
     1599    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    15961600    for te in res.results: 
    15971601        mres.results.append( te ) 
     
    16551659    import corn 
    16561660    ## merge multiple iterations into one 
    1657     mres = orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
     1661    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    16581662    for te in res.results: 
    16591663        mres.results.append( te ) 
     
    20782082 
    20792083def plotLearningCurveLearners(file, allResults, proportions, learners, noConfidence=0): 
    2080     plotLearningCurve(file, allResults, proportions, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], noConfidence) 
     2084    plotLearningCurve(file, allResults, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], noConfidence) 
    20812085     
    20822086def plotLearningCurve(file, allResults, proportions, legend, noConfidence=0): 
     
    21302134 
    21312135def plotROCLearners(file, curves, learners): 
    2132     plotROC(file, curves, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]) 
     2136    plotROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]) 
    21332137     
    21342138def plotROC(file, curves, legend): 
     
    21612165 
    21622166def plotMcNemarCurveLearners(file, allResults, proportions, learners, reference=-1): 
    2163     plotMcNemarCurve(file, allResults, proportions, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference) 
     2167    plotMcNemarCurve(file, allResults, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference) 
    21642168 
    21652169def plotMcNemarCurve(file, allResults, proportions, legend, reference=-1): 
     
    22752279 
    22762280def legendLearners2PiCTeX(file, learners, **options): 
    2277   return apply(legend2PiCTeX, (file, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options) 
     2281  return apply(legend2PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options) 
    22782282     
    22792283def legend2PiCTeX(file, legend, **options): 
  • orange/doc/Orange/rst/code/statExample1.py

    r7465 r7501  
     1import orange, orngTest, orngTree 
     2 
     3learners = [orange.BayesLearner(name = "bayes"), 
     4            orngTree.TreeLearner(name="tree"), 
     5            orange.MajorityLearner(name="majrty")] 
     6 
     7voting = orange.ExampleTable("voting") 
     8res = orngTest.crossValidation(learners, voting) 
     9 
     10vehicle = orange.ExampleTable("vehicle") 
     11resVeh = orngTest.crossValidation(learners, vehicle) 
     12 
    113import orngStat 
    214 
  • orange/orngStat.py

    r6951 r7501  
    1 import statc, operator, math 
    2 from operator import add 
    3 import orngMisc, orngTest 
    4 import numpy 
    5  
    6 #### Private stuff 
    7  
    8 def log2(x): 
    9     return math.log(x)/math.log(2) 
    10  
    11 def checkNonZero(x): 
    12     if x==0.0: 
    13         raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." 
    14  
    15 def gettotweight(res): 
    16     totweight = reduce(lambda x, y: x+y.weight, res.results, 0) 
    17     if totweight==0.0: 
    18         raise ValueError, "Cannot compute the score: sum of weights is 0.0." 
    19     return totweight 
    20  
    21 def gettotsize(res): 
    22     if len(res.results): 
    23         return len(res.results) 
    24     else: 
    25         raise ValueError, "Cannot compute the score: no examples." 
    26  
    27  
    28 def splitByIterations(res): 
    29     if res.numberOfIterations < 2: 
    30         return [res] 
    31          
    32     ress = [orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    33             for i in range(res.numberOfIterations)] 
    34     for te in res.results: 
    35         ress[te.iterationNumber].results.append(te) 
    36     return ress     
    37  
    38  
    39 def classProbabilitiesFromRes(res, **argkw): 
    40     probs = [0.0] * len(res.classValues) 
    41     if argkw.get("unweighted", 0) or not res.weights: 
    42         for tex in res.results: 
    43             probs[int(tex.actualClass)] += 1.0 
    44         totweight = gettotsize(res) 
    45     else: 
    46         totweight = 0.0 
    47         for tex in res.results: 
    48             probs[tex.actualClass] += tex.weight 
    49             totweight += tex.weight 
    50         checkNonZero(totweight) 
    51     return [prob/totweight for prob in probs] 
    52  
    53  
    54 def statisticsByFolds(stats, foldN, reportSE, iterationIsOuter): 
    55     # remove empty folds, turn the matrix so that learner is outer 
    56     if iterationIsOuter: 
    57         if not stats: 
    58             raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." 
    59         numberOfLearners = len(stats[0]) 
    60         stats = filter(lambda (x, fN): fN>0.0, zip(stats,foldN)) 
    61         stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(numberOfLearners)] 
    62     else: 
    63         stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, foldN))] for lrnD in stats] 
    64  
    65     if not stats: 
    66         raise ValueError, "Cannot compute the score: no classifiers" 
    67     if not stats[0]: 
    68         raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0." 
    69      
    70     if reportSE: 
    71         return [(statc.mean(x), statc.sterr(x)) for x in stats] 
    72     else: 
    73         return [statc.mean(x) for x in stats] 
    74      
    75 def ME(res, **argkw): 
    76     MEs = [0.0]*res.numberOfLearners 
    77  
    78     if argkw.get("unweighted", 0) or not res.weights: 
    79         for tex in res.results: 
    80             MEs = map(lambda res, cls, ac = float(tex.actualClass): 
    81                       res + abs(float(cls) - ac), MEs, tex.classes) 
    82         totweight = gettotsize(res) 
    83     else: 
    84         for tex in res.results: 
    85             MEs = map(lambda res, cls, ac = float(tex.actualClass), tw = tex.weight: 
    86                        res + tw*abs(float(cls) - ac), MEs, tex.classes) 
    87         totweight = gettotweight(res) 
    88  
    89     return [x/totweight for x in MEs] 
    90  
    91 MAE = ME 
    92  
    93 ######################################################################### 
    94 # PERFORMANCE MEASURES: 
    95 # Scores for evaluation of numeric predictions 
    96  
    97 def checkArgkw(dct, lst): 
    98     """checkArgkw(dct, lst) -> returns true if any items have non-zero value in dct""" 
    99     return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst]) 
    100  
    101 def regressionError(res, **argkw): 
    102     """regressionError(res) -> regression error (default: MSE)""" 
    103     if argkw.get("SE", 0) and res.numberOfIterations > 1: 
    104         # computes the scores for each iteration, then averages 
    105         scores = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)] 
    106         if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): 
    107             norm = [0.0] * res.numberOfIterations 
    108  
    109         nIter = [0]*res.numberOfIterations       # counts examples in each iteration 
    110         a = [0]*res.numberOfIterations           # average class in each iteration 
    111         for tex in res.results: 
    112             nIter[tex.iterationNumber] += 1 
    113             a[tex.iterationNumber] += float(tex.actualClass) 
    114         a = [a[i]/nIter[i] for i in range(res.numberOfIterations)] 
    115  
    116         if argkw.get("unweighted", 0) or not res.weights: 
    117             # iterate accross test cases 
    118             for tex in res.results: 
    119                 ai = float(tex.actualClass) 
    120                 nIter[tex.iterationNumber] += 1 
    121  
    122                 # compute normalization, if required 
    123                 if argkw.get("norm-abs", 0): 
    124                     norm[tex.iterationNumber] += abs(ai - a[tex.iterationNumber]) 
    125                 elif argkw.get("norm-sqr", 0): 
    126                     norm[tex.iterationNumber] += (ai - a[tex.iterationNumber])**2 
    127  
    128                 # iterate accross results of different regressors 
    129                 for i, cls in enumerate(tex.classes): 
    130                     if argkw.get("abs", 0): 
    131                         scores[i][tex.iterationNumber] += abs(float(cls) - ai) 
    132                     else: 
    133                         scores[i][tex.iterationNumber] += (float(cls) - ai)**2 
    134         else: # unweighted<>0 
    135             raise NotImplementedError, "weighted error scores with SE not implemented yet" 
    136  
    137         if argkw.get("norm-abs") or argkw.get("norm-sqr"): 
    138             scores = [[x/n for x, n in zip(y, norm)] for y in scores] 
    139         else: 
    140             scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores] 
    141  
    142         if argkw.get("R2"): 
    143             scores = [[1.0 - x for x in y] for y in scores] 
    144  
    145         if argkw.get("sqrt", 0): 
    146             scores = [[math.sqrt(x) for x in y] for y in scores] 
    147  
    148         return [(statc.mean(x), statc.std(x)) for x in scores] 
    149          
    150     else: # single iteration (testing on a single test set) 
    151         scores = [0.0] * res.numberOfLearners 
    152         norm = 0.0 
    153  
    154         if argkw.get("unweighted", 0) or not res.weights: 
    155             a = sum([tex.actualClass for tex in res.results]) \ 
    156                 / len(res.results) 
    157             for tex in res.results: 
    158                 if argkw.get("abs", 0): 
    159                     scores = map(lambda res, cls, ac = float(tex.actualClass): 
    160                                  res + abs(float(cls) - ac), scores, tex.classes) 
    161                 else: 
    162                     scores = map(lambda res, cls, ac = float(tex.actualClass): 
    163                                  res + (float(cls) - ac)**2, scores, tex.classes) 
    164  
    165                 if argkw.get("norm-abs", 0): 
    166                     norm += abs(tex.actualClass - a) 
    167                 elif argkw.get("norm-sqr", 0): 
    168                     norm += (tex.actualClass - a)**2 
    169             totweight = gettotsize(res) 
    170         else: 
    171             # UNFINISHED 
    172             for tex in res.results: 
    173                 MSEs = map(lambda res, cls, ac = float(tex.actualClass), 
    174                            tw = tex.weight: 
    175                            res + tw * (float(cls) - ac)**2, MSEs, tex.classes) 
    176             totweight = gettotweight(res) 
    177  
    178         if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0): 
    179             scores = [s/norm for s in scores] 
    180         else: # normalize by number of instances (or sum of weights) 
    181             scores = [s/totweight for s in scores] 
    182  
    183         if argkw.get("R2"): 
    184             scores = [1.0 - s for s in scores] 
    185  
    186         if argkw.get("sqrt", 0): 
    187             scores = [math.sqrt(x) for x in scores] 
    188  
    189         return scores 
    190  
    191 def MSE(res, **argkw): 
    192     """MSE(res) -> mean-squared error""" 
    193     return regressionError(res, **argkw) 
    194      
    195 def RMSE(res, **argkw): 
    196     """RMSE(res) -> root mean-squared error""" 
    197     argkw.setdefault("sqrt", True) 
    198     return regressionError(res, **argkw) 
    199  
    200 def MAE(res, **argkw): 
    201     """MAE(res) -> mean absolute error""" 
    202     argkw.setdefault("abs", True) 
    203     return regressionError(res, **argkw) 
    204  
    205 def RSE(res, **argkw): 
    206     """RSE(res) -> relative squared error""" 
    207     argkw.setdefault("norm-sqr", True) 
    208     return regressionError(res, **argkw) 
    209  
    210 def RRSE(res, **argkw): 
    211     """RRSE(res) -> root relative squared error""" 
    212     argkw.setdefault("norm-sqr", True) 
    213     argkw.setdefault("sqrt", True) 
    214     return regressionError(res, **argkw) 
    215  
    216 def RAE(res, **argkw): 
    217     """RAE(res) -> relative absolute error""" 
    218     argkw.setdefault("abs", True) 
    219     argkw.setdefault("norm-abs", True) 
    220     return regressionError(res, **argkw) 
    221  
    222 def R2(res, **argkw): 
    223     """R2(res) -> R-squared""" 
    224     argkw.setdefault("norm-sqr", True) 
    225     argkw.setdefault("R2", True) 
    226     return regressionError(res, **argkw) 
    227  
    228 def MSE_old(res, **argkw): 
    229     """MSE(res) -> mean-squared error""" 
    230     if argkw.get("SE", 0) and res.numberOfIterations > 1: 
    231         MSEs = [[0.0] * res.numberOfIterations for i in range(res.numberOfLearners)] 
    232         nIter = [0]*res.numberOfIterations 
    233         if argkw.get("unweighted", 0) or not res.weights: 
    234             for tex in res.results: 
    235                 ac = float(tex.actualClass) 
    236                 nIter[tex.iterationNumber] += 1 
    237                 for i, cls in enumerate(tex.classes): 
    238                     MSEs[i][tex.iterationNumber] += (float(cls) - ac)**2 
    239         else: 
    240             raise ValueError, "weighted RMSE with SE not implemented yet" 
    241         MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs] 
    242         if argkw.get("sqrt", 0): 
    243             MSEs = [[math.sqrt(x) for x in y] for y in MSEs] 
    244         return [(statc.mean(x), statc.std(x)) for x in MSEs] 
    245          
    246     else: 
    247         MSEs = [0.0]*res.numberOfLearners 
    248         if argkw.get("unweighted", 0) or not res.weights: 
    249             for tex in res.results: 
    250                 MSEs = map(lambda res, cls, ac = float(tex.actualClass): 
    251                            res + (float(cls) - ac)**2, MSEs, tex.classes) 
    252             totweight = gettotsize(res) 
    253         else: 
    254             for tex in res.results: 
    255                 MSEs = map(lambda res, cls, ac = float(tex.actualClass), tw = tex.weight: 
    256                            res + tw * (float(cls) - ac)**2, MSEs, tex.classes) 
    257             totweight = gettotweight(res) 
    258  
    259         if argkw.get("sqrt", 0): 
    260             MSEs = [math.sqrt(x) for x in MSEs] 
    261         return [x/totweight for x in MSEs] 
    262  
    263 def RMSE_old(res, **argkw): 
    264     """RMSE(res) -> root mean-squared error""" 
    265     argkw.setdefault("sqrt", 1) 
    266     return MSE_old(res, **argkw) 
    267  
    268  
    269 ######################################################################### 
    270 # PERFORMANCE MEASURES: 
    271 # Scores for evaluation of classifiers 
    272  
    273 def CA(res, reportSE = False, **argkw): 
    274     if res.numberOfIterations==1: 
    275         if type(res)==ConfusionMatrix: 
    276             div = nm.TP+nm.FN+nm.FP+nm.TN 
    277             checkNonZero(div) 
    278             ca = [(nm.TP+nm.TN)/div] 
    279         else: 
    280             CAs = [0.0]*res.numberOfLearners 
    281             if argkw.get("unweighted", 0) or not res.weights: 
    282                 totweight = gettotsize(res) 
    283                 for tex in res.results: 
    284                     CAs = map(lambda res, cls: res+(cls==tex.actualClass), CAs, tex.classes) 
    285             else: 
    286                 totweight = 0. 
    287                 for tex in res.results: 
    288                     CAs = map(lambda res, cls: res+(cls==tex.actualClass and tex.weight), CAs, tex.classes) 
    289                     totweight += tex.weight 
    290             checkNonZero(totweight) 
    291             ca = [x/totweight for x in CAs] 
    292              
    293         if reportSE: 
    294             return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca] 
    295         else: 
    296             return ca 
    297          
    298     else: 
    299         CAsByFold = [[0.0]*res.numberOfIterations for i in range(res.numberOfLearners)] 
    300         foldN = [0.0]*res.numberOfIterations 
    301  
    302         if argkw.get("unweighted", 0) or not res.weights: 
    303             for tex in res.results: 
    304                 for lrn in range(res.numberOfLearners): 
    305                     CAsByFold[lrn][tex.iterationNumber] += (tex.classes[lrn]==tex.actualClass) 
    306                 foldN[tex.iterationNumber] += 1 
    307         else: 
    308             for tex in res.results: 
    309                 for lrn in range(res.numberOfLearners): 
    310                     CAsByFold[lrn][tex.iterationNumber] += (tex.classes[lrn]==tex.actualClass) and tex.weight 
    311                 foldN[tex.iterationNumber] += tex.weight 
    312  
    313         return statisticsByFolds(CAsByFold, foldN, reportSE, False) 
    314  
    315  
    316 # Obsolete, but kept for compatibility 
    317 def CA_se(res, **argkw): 
    318     return CA(res, True, **argkw) 
    319  
    320  
    321 def AP(res, reportSE = False, **argkw): 
    322     if res.numberOfIterations == 1: 
    323         APs=[0.0]*res.numberOfLearners 
    324         if argkw.get("unweighted", 0) or not res.weights: 
    325             for tex in res.results: 
    326                 APs = map(lambda res, probs: res + probs[tex.actualClass], APs, tex.probabilities) 
    327             totweight = gettotsize(res) 
    328         else: 
    329             totweight = 0. 
    330             for tex in res.results: 
    331                 APs = map(lambda res, probs: res + probs[tex.actualClass]*tex.weight, APs, tex.probabilities) 
    332                 totweight += tex.weight 
    333         checkNonZero(totweight) 
    334         return [AP/totweight for AP in APs] 
    335  
    336     APsByFold = [[0.0]*res.numberOfLearners for i in range(res.numberOfIterations)] 
    337     foldN = [0.0] * res.numberOfIterations 
    338     if argkw.get("unweighted", 0) or not res.weights: 
    339         for tex in res.results: 
    340             APsByFold[tex.iterationNumber] = map(lambda res, probs: res + probs[tex.actualClass], APsByFold[tex.iterationNumber], tex.probabilities) 
    341             foldN[tex.iterationNumber] += 1 
    342     else: 
    343         for tex in res.results: 
    344             APsByFold[tex.iterationNumber] = map(lambda res, probs: res + probs[tex.actualClass] * tex.weight, APsByFold[tex.iterationNumber], tex.probabilities) 
    345             foldN[tex.iterationNumber] += tex.weight 
    346  
    347     return statisticsByFolds(APsByFold, foldN, reportSE, True) 
    348  
    349  
    350 def BrierScore(res, reportSE = False, **argkw): 
    351     """Computes Brier score""" 
    352     # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where 
    353     #    x is class, 
    354     #    t(x) is 0 for 'wrong' and 1 for 'correct' class 
    355     #    p(x) is predicted probabilty. 
    356     # There's a trick: since t(x) is zero for all classes but the 
    357     # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1 
    358     # Since +1 is there for each example, it adds 1 to the average 
    359     # We skip the +1 inside the sum and add it just at the end of the function 
    360     # We take max(result, 0) to avoid -0.0000x due to rounding errors 
    361  
    362     if res.numberOfIterations == 1: 
    363         MSEs=[0.0]*res.numberOfLearners 
    364         if argkw.get("unweighted", 0) or not res.weights: 
    365             totweight = 0.0 
    366             for tex in res.results: 
    367                 MSEs = map(lambda res, probs: 
    368                            res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actualClass], MSEs, tex.probabilities) 
    369                 totweight += tex.weight 
    370         else: 
    371             for tex in res.results: 
    372                 MSEs = map(lambda res, probs: 
    373                            res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actualClass], MSEs, tex.probabilities) 
    374             totweight = gettotweight(res) 
    375         checkNonZero(totweight) 
    376         if reportSE: 
    377             return [(max(x/totweight+1.0, 0), 0) for x in MSEs]  ## change this, not zero!!! 
    378         else: 
    379             return [max(x/totweight+1.0, 0) for x in MSEs] 
    380  
    381     BSs = [[0.0]*res.numberOfLearners for i in range(res.numberOfIterations)] 
    382     foldN = [0.] * res.numberOfIterations 
    383  
    384     if argkw.get("unweighted", 0) or not res.weights: 
    385         for tex in res.results: 
    386             BSs[tex.iterationNumber] = map(lambda rr, probs: 
    387                        rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actualClass], BSs[tex.iterationNumber], tex.probabilities) 
    388             foldN[tex.iterationNumber] += 1 
    389     else: 
    390         for tex in res.results: 
    391             BSs[tex.iterationNumber] = map(lambda res, probs: 
    392                        res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actualClass], BSs[tex.iterationNumber], tex.probabilities) 
    393             foldN[tex.iterationNumber] += tex.weight 
    394  
    395     stats = statisticsByFolds(BSs, foldN, reportSE, True) 
    396     if reportSE: 
    397         return [(x+1.0, y) for x, y in stats] 
    398     else: 
    399         return [x+1.0 for x in stats] 
    400  
    401 def BSS(res, **argkw): 
    402     return [1-x/2 for x in apply(BrierScore, (res, ), argkw)] 
    403  
    404 ##def _KL_div(actualClass, predicted): 
    405 ##     
    406 ##def KL(res, **argkw): 
    407 ##    KLs = [0.0]*res.numberOfLearners 
    408 ## 
    409 ##    if argkw.get("unweighted", 0) or not res.weights: 
    410 ##        for tex in res.results: 
    411 ##            KLs = map(lambda res, predicted: res+KL(tex.actualClass, predicted), KLs, tex.probabilities) 
    412 ##        totweight = gettotsize(res) 
    413 ##    else: 
    414 ##        for tex in res.results: 
    415 ##            ## TEGA SE NISI! 
    416 ##            CAs = map(lambda res, cls: res+(cls==tex.actualClass and tex.weight), CAs, tex.classes) 
    417 ##        totweight = gettotweight(res) 
    418 ## 
    419 ##    return [x/totweight for x in CAs] 
    420  
    421      
    422 ##def KL_se(res, **argkw): 
    423 ##    # Kullback-Leibler divergence 
    424 ##    if res.numberOfIterations==1: 
    425 ##        if argkw.get("unweighted", 0) or not res.weights: 
    426 ##            totweight = gettotsize(res) 
    427 ##        else: 
    428 ##            totweight = gettotweight(res) 
    429 ##        return [(x, x*(1-x)/math.sqrt(totweight)) for x in apply(CA, (res,), argkw)] 
    430 ##    else: 
    431 ##        KLsByFold = [[0.0]*res.numberOfIterations for i in range(res.numberOfLearners)] 
    432 ##        foldN = [0.0]*res.numberOfIterations 
    433 ## 
    434 ##        if argkw.get("unweighted", 0) or not res.weights: 
    435 ##            for tex in res.results: 
    436 ##                for lrn in range(res.numberOfLearners): 
    437 ##                    CAsByFold[lrn][tex.iterationNumber] +=  
    438 ##                foldN[tex.iterationNumber] += 1 
    439 ##        else: 
    440 ##            for tex in res.results: 
    441 ##                for lrn in range(res.numberOfLearners): 
    442 ##                    CAsByFold[lrn][tex.iterationNumber] +=  
    443 ##                foldN[tex.iterationNumber] += tex.weight 
    444 ## 
    445 ##        newFolds = [] 
    446 ##        for lrn in range(res.numberOfLearners): 
    447 ##            newF = [] 
    448 ##            for fold in range(res.numberOfIterations): 
    449 ##                if foldN[fold]>0.0: 
    450 ##                        newF.append(CAsByFold[lrn][fold]/foldN[fold]) 
    451 ##            newFolds.append(newF) 
    452 ## 
    453 ##        checkNonZero(len(newFolds)) 
    454 ##        return [(statc.mean(cas), statc.sterr(cas)) for cas in newFolds] 
    455 ## 
    456  
    457 def IS_ex(Pc, P): 
    458     "Pc aposterior probability, P aprior" 
    459     if (Pc>=P): 
    460         return -log2(P)+log2(Pc) 
    461     else: 
    462         return -(-log2(1-P)+log2(1-Pc)) 
    463      
    464 def IS(res, apriori=None, reportSE = False, **argkw): 
    465     if not apriori: 
    466         apriori = classProbabilitiesFromRes(res) 
    467  
    468     if res.numberOfIterations==1: 
    469         ISs = [0.0]*res.numberOfLearners 
    470         if argkw.get("unweighted", 0) or not res.weights: 
    471             for tex in res.results: 
    472               for i in range(len(tex.probabilities)): 
    473                     cls = tex.actualClass 
    474                     ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) 
    475             totweight = gettotsize(res) 
    476         else: 
    477             for tex in res.results: 
    478               for i in range(len(tex.probabilities)): 
    479                     cls = tex.actualClass 
    480                     ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight 
    481             totweight = gettotweight(res) 
    482         if reportSE: 
    483             return [(IS/totweight,0) for IS in ISs] 
    484         else: 
    485             return [IS/totweight for IS in ISs] 
    486  
    487          
    488     ISs = [[0.0]*res.numberOfIterations for i in range(res.numberOfLearners)] 
    489     foldN = [0.] * res.numberOfIterations 
    490  
    491     # compute info scores for each fold     
    492     if argkw.get("unweighted", 0) or not res.weights: 
    493         for tex in res.results: 
    494             for i in range(len(tex.probabilities)): 
    495                 cls = tex.actualClass 
    496                 ISs[i][tex.iterationNumber] += IS_ex(tex.probabilities[i][cls], apriori[cls]) 
    497             foldN[tex.iterationNumber] += 1 
    498     else: 
    499         for tex in res.results: 
    500             for i in range(len(tex.probabilities)): 
    501                 cls = tex.actualClass 
    502                 ISs[i][tex.iterationNumber] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight 
    503             foldN[tex.iterationNumber] += tex.weight 
    504  
    505     return statisticsByFolds(ISs, foldN, reportSE, False) 
    506  
    507  
    508 def Friedman(res, statistics, **argkw): 
    509     sums = None 
    510     for ri in splitByIterations(res): 
    511         ranks = statc.rankdata(apply(statistics, (ri,), argkw)) 
    512         if sums: 
    513             sums = sums and [ranks[i]+sums[i] for i in range(k)] 
    514         else: 
    515             sums = ranks 
    516             k = len(sums) 
    517     N = res.numberOfIterations 
    518     k = len(sums) 
    519     T = sum([x*x for x in sums]) 
    520     F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1) 
    521     return F, statc.chisqprob(F, k-1) 
    522      
    523  
    524 def Wilcoxon(res, statistics, **argkw): 
    525     res1, res2 = [], [] 
    526     for ri in splitByIterations(res): 
    527         stats = apply(statistics, (ri,), argkw) 
    528         if (len(stats) != 2): 
    529             raise TypeError, "Wilcoxon compares two classifiers, no more, no less" 
    530         res1.append(stats[0]) 
    531         res2.append(stats[1]) 
    532     return statc.wilcoxont(res1, res2) 
    533  
    534 def rankDifference(res, statistics, **argkw): 
    535     if not res.results: 
    536         raise TypeError, "no experiments" 
    537  
    538     k = len(res.results[0].classes) 
    539     if (k<2): 
    540         raise TypeError, "nothing to compare (less than two classifiers given)" 
    541     if k==2: 
    542         return apply(Wilcoxon, (res, statistics), argkw) 
    543     else: 
    544         return apply(Friedman, (res, statistics), argkw) 
    545      
    546 class ConfusionMatrix: 
    547     def __init__(self): 
    548         self.TP = self.FN = self.FP = self.TN = 0.0 
    549  
    550     def addTFPosNeg(self, predictedPositive, isPositive, weight = 1.0): 
    551         if predictedPositive: 
    552             if isPositive: 
    553                 self.TP += weight 
    554             else: 
    555                 self.FP += weight 
    556         else: 
    557             if isPositive: 
    558                 self.FN += weight 
    559             else: 
    560                 self.TN += weight 
    561  
    562  
    563 def confusionMatrices(res, classIndex=-1, **argkw): 
    564     tfpns = [ConfusionMatrix() for i in range(res.numberOfLearners)] 
    565      
    566     if classIndex<0: 
    567         numberOfClasses = len(res.classValues) 
    568         if classIndex < -1 or numberOfClasses > 2: 
    569             cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(res.numberOfLearners)] 
    570             if argkw.get("unweighted", 0) or not res.weights: 
    571                 for tex in res.results: 
    572                     trueClass = int(tex.actualClass) 
    573                     for li, pred in enumerate(tex.classes): 
    574                         predClass = int(pred) 
    575                         if predClass < numberOfClasses: 
    576                             cm[li][trueClass][predClass] += 1 
    577             else: 
    578                 for tex in enumerate(res.results): 
    579                     trueClass = int(tex.actualClass) 
    580                     for li, pred in tex.classes: 
    581                         predClass = int(pred) 
    582                         if predClass < numberOfClasses: 
    583                             cm[li][trueClass][predClass] += tex.weight 
    584             return cm 
    585              
    586         elif res.baseClass>=0: 
    587             classIndex = res.baseClass 
    588         else: 
    589             classIndex = 1 
    590              
    591     cutoff = argkw.get("cutoff") 
    592     if cutoff: 
    593         if argkw.get("unweighted", 0) or not res.weights: 
    594             for lr in res.results: 
    595                 isPositive=(lr.actualClass==classIndex) 
    596                 for i in range(res.numberOfLearners): 
    597                     tfpns[i].addTFPosNeg(lr.probabilities[i][classIndex]>cutoff, isPositive) 
    598         else: 
    599             for lr in res.results: 
    600                 isPositive=(lr.actualClass==classIndex) 
    601                 for i in range(res.numberOfLearners): 
    602                     tfpns[i].addTFPosNeg(lr.probabilities[i][classIndex]>cutoff, isPositive, lr.weight) 
    603     else: 
    604         if argkw.get("unweighted", 0) or not res.weights: 
    605             for lr in res.results: 
    606                 isPositive=(lr.actualClass==classIndex) 
    607                 for i in range(res.numberOfLearners): 
    608                     tfpns[i].addTFPosNeg(lr.classes[i]==classIndex, isPositive) 
    609         else: 
    610             for lr in res.results: 
    611                 isPositive=(lr.actualClass==classIndex) 
    612                 for i in range(res.numberOfLearners): 
    613                     tfpns[i].addTFPosNeg(lr.classes[i]==classIndex, isPositive, lr.weight) 
    614     return tfpns 
    615  
    616  
    617 # obsolete (renamed) 
    618 computeConfusionMatrices = confusionMatrices 
    619  
    620  
    621 def confusionChiSquare(confusionMatrix): 
    622     dim = len(confusionMatrix) 
    623     rowPriors = [sum(r) for r in confusionMatrix] 
    624     colPriors = [sum([r[i] for r in confusionMatrix]) for i in range(dim)] 
    625     total = sum(rowPriors) 
    626     rowPriors = [r/total for r in rowPriors] 
    627     colPriors = [r/total for r in colPriors] 
    628     ss = 0 
    629     for ri, row in enumerate(confusionMatrix): 
    630         for ci, o in enumerate(row): 
    631             e = total * rowPriors[ri] * colPriors[ci] 
    632             if not e: 
    633                 return -1, -1, -1 
    634             ss += (o-e)**2 / e 
    635     df = (dim-1)**2 
    636     return ss, df, statc.chisqprob(ss, df) 
    637          
    638      
    639 def sens(confm): 
    640     """Return sensitivity (recall rate) over the given confusion matrix.""" 
    641     if type(confm) == list: 
    642         return [sens(cm) for cm in confm] 
    643     else: 
    644         tot = confm.TP+confm.FN 
    645         if tot < 1e-6: 
    646             import warnings 
    647             warnings.warn("Can't compute sensitivity: one or both classes have no instances") 
    648             return -1 
    649  
    650         return confm.TP/tot 
    651  
    652 def recall(confm): 
    653     """Return recall rate (sensitivity) over the given confusion matrix.""" 
    654     return sens(confm) 
    655  
    656  
    657 def spec(confm): 
    658     """Return specificity over the given confusion matrix.""" 
    659     if type(confm) == list: 
    660         return [spec(cm) for cm in confm] 
    661     else: 
    662         tot = confm.FP+confm.TN 
    663         if tot < 1e-6: 
    664             import warnings 
    665             warnings.warn("Can't compute specificity: one or both classes have no instances") 
    666             return -1 
    667         return confm.TN/tot 
    668    
    669  
    670 def PPV(confm): 
    671     """Return positive predictive value (precision rate) over the given confusion matrix.""" 
    672     if type(confm) == list: 
    673         return [PPV(cm) for cm in confm] 
    674     else: 
    675         tot = confm.TP+confm.FP 
    676         if tot < 1e-6: 
    677             import warnings 
    678             warnings.warn("Can't compute PPV: one or both classes have no instances") 
    679             return -1 
    680         return confm.TP/tot 
    681  
    682  
    683 def precision(confm): 
    684     """Return precision rate (positive predictive value) over the given confusion matrix.""" 
    685     return PPV(confm) 
    686  
    687  
    688 def NPV(confm): 
    689     """Return negative predictive value over the given confusion matrix.""" 
    690     if type(confm) == list: 
    691         return [NPV(cm) for cm in confm] 
    692     else: 
    693         tot = confm.FN+confm.TN 
    694         if tot < 1e-6: 
    695             import warnings 
    696             warnings.warn("Can't compute NPV: one or both classes have no instances") 
    697             return -1 
    698         return confm.TN/tot 
    699  
    700 def F1(confm): 
    701     """Return F1 score (harmonic mean of precision and recall) over the given confusion matrix.""" 
    702     if type(confm) == list: 
    703         return [F1(cm) for cm in confm] 
    704     else: 
    705         p = precision(confm) 
    706         r = recall(confm) 
    707         if p + r > 0: 
    708             return 2. * p * r / (p + r) 
    709         else: 
    710             import warnings 
    711             warnings.warn("Can't compute F1: P + R is zero or not defined") 
    712             return -1 
    713  
    714 def Falpha(confm, alpha=1.0): 
    715     """Return the alpha-mean of precision and recall over the given confusion matrix.""" 
    716     if type(confm) == list: 
    717         return [Falpha(cm, alpha=alpha) for cm in confm] 
    718     else: 
    719         p = precision(confm) 
    720         r = recall(confm) 
    721         return (1. + alpha) * p * r / (alpha * p + r) 
    722      
    723 def MCC(confm): 
    724     ''' 
    725     Return Mattew correlation coefficient over the given confusion matrix. 
    726  
    727     MCC is calculated as follows: 
    728     MCC = (TP*TN - FP*FN) / sqrt( (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) ) 
    729      
    730     [1] Matthews, B.W., Comparison of the predicted and observed secondary  
    731     structure of T4 phage lysozyme. Biochim. Biophys. Acta 1975, 405, 442-451 
    732  
    733     code by Boris Gorelik 
    734     ''' 
    735     if type(confm) == list: 
    736         return [MCC(cm) for cm in confm] 
    737     else: 
    738         truePositive = confm.TP 
    739         trueNegative = confm.TN 
    740         falsePositive = confm.FP 
    741         falseNegative = confm.FN  
    742            
    743         try:    
    744             r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/  
    745                 math.sqrt(  (truePositive + falsePositive)  *  
    746                 ( truePositive + falseNegative ) *  
    747                 ( trueNegative + falsePositive ) *  
    748                 ( trueNegative + falseNegative ) ) 
    749                 ) 
    750         except ZeroDivisionError: 
    751             # Zero difision occurs when there is either no true positives  
    752             # or no true negatives i.e. the problem contains only one  
    753             # type of classes. 
    754             import warnings 
    755             warnings.warn("Can't compute MCC: TP or TN is zero or not defined") 
    756             r = None 
    757  
    758     return r 
    759  
    760 def scottsPi(confm, bIsListOfMatrices=True): 
    761    """Compute Scott's Pi for measuring inter-rater agreement for nominal data 
    762  
    763    http://en.wikipedia.org/wiki/Scott%27s_Pi 
    764    Scott's Pi is a statistic for measuring inter-rater reliability for nominal 
    765    raters. 
    766  
    767    @param confm: confusion matrix, or list of confusion matrices. To obtain 
    768                            non-binary confusion matrix, call 
    769                            orngStat.computeConfusionMatrices and set the 
    770                            classIndex parameter to -2. 
    771    @param bIsListOfMatrices: specifies whether confm is list of matrices. 
    772                            This function needs to operate on non-binary 
    773                            confusion matrices, which are represented by python 
    774                            lists, therefore one needs a way to distinguish 
    775                            between a single matrix and list of matrices 
    776    """ 
    777  
    778    if bIsListOfMatrices: 
    779        try: 
    780            return [scottsPi(cm, bIsListOfMatrices=False) for cm in confm] 
    781        except TypeError: 
    782            # Nevermind the parameter, maybe this is a "conventional" binary 
    783            # confusion matrix and bIsListOfMatrices was specified by mistake 
    784            return scottsPiSingle(confm, bIsListOfMatrices=False) 
    785    else: 
    786        if isinstance(confm, ConfusionMatrix): 
    787            confm = numpy.array( [[confm.TP, confm.FN], 
    788                    [confm.FP, confm.TN]], dtype=float) 
    789        else: 
    790            confm = numpy.array(confm, dtype=float) 
    791  
    792        marginalSumOfRows = numpy.sum(confm, axis=0) 
    793        marginalSumOfColumns = numpy.sum(confm, axis=1) 
    794        jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \ 
    795                            (2.0 * numpy.sum(confm, axis=None)) 
    796        # In the eq. above, 2.0 is what the Wikipedia page calls 
    797        # the number of annotators. Here we have two annotators: 
    798        # the observed (true) labels (annotations) and the predicted by 
    799        # the learners. 
    800  
    801        prExpected = numpy.sum(jointProportion ** 2, axis=None) 
    802        prActual = numpy.sum(numpy.diag(confm), axis=None)/numpy.sum(confm, axis=None) 
    803  
    804        ret = (prActual - prExpected) / (1.0 - prExpected) 
    805        return ret 
    806  
    807 def AUCWilcoxon(res, classIndex=-1, **argkw): 
    808     import corn 
    809     useweights = res.weights and not argkw.get("unweighted", 0) 
    810     problists, tots = corn.computeROCCumulative(res, classIndex, useweights) 
    811  
    812     results=[] 
    813  
    814     totPos, totNeg = tots[1], tots[0] 
    815     N = totPos + totNeg 
    816     for plist in problists: 
    817         highPos, lowNeg = totPos, 0.0 
    818         W, Q1, Q2 = 0.0, 0.0, 0.0 
    819         for prob in plist: 
    820             thisPos, thisNeg = prob[1][1], prob[1][0] 
    821             highPos -= thisPos 
    822             W += thisNeg * (highPos + thisPos/2.) 
    823             Q2 += thisPos * (lowNeg**2  + lowNeg*thisNeg  + thisNeg**2 /3.) 
    824             Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.) 
    825  
    826             lowNeg += thisNeg 
    827  
    828         W  /= (totPos*totNeg) 
    829         Q1 /= (totNeg*totPos**2) 
    830         Q2 /= (totPos*totNeg**2) 
    831  
    832         SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) ) 
    833         results.append((W, SE)) 
    834     return results 
    835  
    836 AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote 
    837  
    838 def compare2AUCs(res, lrn1, lrn2, classIndex=-1, **argkw): 
    839     import corn 
    840     return corn.compare2ROCs(res, lrn1, lrn2, classIndex, res.weights and not argkw.get("unweighted")) 
    841  
    842 compare2AROCs = compare2AUCs # for backward compatibility, compare2AROCs is obsolote 
    843  
    844      
    845 def computeROC(res, classIndex=-1): 
    846     import corn 
    847     problists, tots = corn.computeROCCumulative(res, classIndex) 
    848  
    849     results = [] 
    850     totPos, totNeg = tots[1], tots[0] 
    851  
    852     for plist in problists: 
    853         curve=[(1., 1.)] 
    854         TP, TN = totPos, 0.0 
    855         FN, FP = 0., totNeg 
    856         for prob in plist: 
    857             thisPos, thisNeg = prob[1][1], prob[1][0] 
    858             # thisPos go from TP to FN 
    859             TP -= thisPos 
    860             FN += thisPos 
    861             # thisNeg go from FP to TN 
    862             TN += thisNeg 
    863             FP -= thisNeg 
    864  
    865             sens = TP/(TP+FN) 
    866             spec = TN/(FP+TN) 
    867             curve.append((1-spec, sens)) 
    868         results.append(curve) 
    869  
    870     return results     
    871  
    872 ## TC's implementation of algorithms, taken from: 
    873 ## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal.  
    874 def ROCslope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)): 
    875     if (P1x == P2x): 
    876         return 1e300 
    877     return (P1y - P2y) / (P1x - P2x) 
    878  
    879 def ROCaddPoint(P, R, keepConcavities=1): 
    880     if keepConcavities: 
    881         R.append(P) 
    882     else: 
    883         while (1): 
    884             if len(R) < 2: 
    885                 R.append(P) 
    886                 return R 
    887             else: 
    888                 T = R.pop() 
    889                 T2 = R[-1] 
    890                 if ROCslope(T2, T) > ROCslope(T, P): 
    891                     R.append(T) 
    892                     R.append(P) 
    893                     return R 
    894     return R 
    895  
    896 def TCcomputeROC(res, classIndex=-1, keepConcavities=1): 
    897     import corn 
    898     problists, tots = corn.computeROCCumulative(res, classIndex) 
    899  
    900     results = [] 
    901     P, N = tots[1], tots[0] 
    902  
    903     for plist in problists: 
    904         ## corn gives an increasing by scores list, we need a decreasing by scores 
    905         plist.reverse() 
    906         TP = 0.0 
    907         FP = 0.0 
    908         curve=[] 
    909         fPrev = 10e300 # "infinity" score at 0.0, 0.0 
    910         for prob in plist: 
    911             f = prob[0] 
    912             if f <> fPrev: 
    913                 if P: 
    914                     tpr = TP/P 
    915                 else: 
    916                     tpr = 0.0 
    917                 if N: 
    918                     fpr = FP/N 
    919                 else: 
    920                     fpr = 0.0 
    921                 curve = ROCaddPoint((fpr, tpr, fPrev), curve, keepConcavities) 
    922                 fPrev = f 
    923             thisPos, thisNeg = prob[1][1], prob[1][0] 
    924             TP += thisPos 
    925             FP += thisNeg 
    926         if P: 
    927             tpr = TP/P 
    928         else: 
    929             tpr = 0.0 
    930         if N: 
    931             fpr = FP/N 
    932         else: 
    933             fpr = 0.0 
    934         curve = ROCaddPoint((fpr, tpr, f), curve, keepConcavities) ## ugly 
    935         results.append(curve) 
    936  
    937     return results 
    938  
    939 ## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve 
    940 ## for given values of FPcost, FNcost and pval 
    941 def TCbestThresholdsOnROCcurve(FPcost, FNcost, pval, curve): 
    942     m = (FPcost*(1.0 - pval)) / (FNcost*pval) 
    943  
    944     ## put the iso-performance line in point (0.0, 1.0) 
    945     x0, y0 = (0.0, 1.0) 
    946     x1, y1 = (1.0, 1.0 + m) 
    947     d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0)) 
    948  
    949     ## calculate and find the closest point to the line 
    950     firstp = 1 
    951     mind = 0.0 
    952     a = (x0*y1 - x1*y0) 
    953     closestPoints = [] 
    954     for (x, y, fscore) in curve: 
    955         d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01 
    956         d = abs(d) 
    957         if firstp or d < mind: 
    958             mind, firstp = d, 0 
    959             closestPoints = [(x, y, fscore)] 
    960         else: 
    961             if abs(d - mind) <= 0.0001: ## close enough 
    962                 closestPoints.append( (x, y, fscore) ) 
    963     return closestPoints           
    964  
    965 def frange(start, end=None, inc=None): 
    966     "A range function, that does accept float increments..." 
    967  
    968     if end == None: 
    969         end = start + 0.0 
    970         start = 0.0 
    971  
    972     if inc == None or inc == 0: 
    973         inc = 1.0 
    974  
    975     L = [start] 
    976     while 1: 
    977         next = start + len(L) * inc 
    978         if inc > 0 and next >= end: 
    979             L.append(end) 
    980             break 
    981         elif inc < 0 and next <= end: 
    982             L.append(end) 
    983             break 
    984         L.append(next) 
    985          
    986     return L 
    987  
    988 ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], 
    989 ## where ROCcurvesX is a set of ROC curves, 
    990 ## where a (one) ROC curve is a set of (FP, TP) points 
    991 ## 
    992 ## for each (sub)set of input ROC curves 
    993 ## returns the average ROC curve and an array of (vertical) standard deviations 
    994 def TCverticalAverageROC(ROCcurves, samples = 10): 
    995     def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X): 
    996         if (P1x == P2x) or ((X > P1x) and (X > P2x)) or ((X < P1x) and (X < P2x)): 
    997             raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X) 
    998         dx = float(P2x) - float(P1x) 
    999         dy = float(P2y) - float(P1y) 
    1000         m = dy/dx 
    1001         return P1y + m*(X - P1x) 
    1002  
    1003     def TP_FOR_FP(FPsample, ROC, npts): 
    1004         i = 0 
    1005         while i < npts - 1: 
    1006             (fp, _, _) = ROC[i + 1] 
    1007             if (fp <= FPsample): 
    1008                 i += 1 
    1009             else: 
    1010                 break 
    1011         (fp, tp, _) = ROC[i] 
    1012         if fp == FPsample: 
    1013             return tp 
    1014         elif fp < FPsample and i + 1 < len(ROC): 
    1015             return INTERPOLATE(ROC[i], ROC[i+1], FPsample) 
    1016         elif fp < FPsample and i + 1 == len(ROC): # return the last 
    1017             return ROC[i][1] 
    1018         raise ValueError, "cannot compute: TP_FOR_FP in TCverticalAverageROC" 
    1019         #return 0.0 
    1020  
    1021     average = [] 
    1022     stdev = [] 
    1023     for ROCS in ROCcurves: 
    1024         npts = [] 
    1025         for c in ROCS: 
    1026             npts.append(len(c)) 
    1027         nrocs = len(ROCS) 
    1028  
    1029         TPavg = [] 
    1030         TPstd = [] 
    1031         for FPsample in frange(0.0, 1.0, 1.0/samples): 
    1032             TPsum = [] 
    1033             for i in range(nrocs): 
    1034                 TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i]) 
    1035             TPavg.append( (FPsample, statc.mean(TPsum)) ) 
    1036             if len(TPsum) > 1: 
    1037                 stdv = statc.std(TPsum) 
    1038             else: 
    1039                 stdv = 0.0 
    1040             TPstd.append( stdv ) 
    1041  
    1042         average.append(TPavg) 
    1043         stdev.append(TPstd) 
    1044  
    1045     return (average, stdev) 
    1046  
    1047 ## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN], 
    1048 ## where ROCcurvesX is a set of ROC curves, 
    1049 ## where a (one) ROC curve is a set of (FP, TP) points 
    1050 ## 
    1051 ## for each (sub)set of input ROC curves 
    1052 ## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations 
    1053 def TCthresholdlAverageROC(ROCcurves, samples = 10): 
    1054     def POINT_AT_THRESH(ROC, npts, thresh): 
    1055         i = 0 
    1056         while i < npts - 1: 
    1057             (px, py, pfscore) = ROC[i] 
    1058             if (pfscore > thresh): 
    1059                 i += 1 
    1060             else: 
    1061                 break 
    1062         return ROC[i] 
    1063  
    1064     average = [] 
    1065     stdevV = [] 
    1066     stdevH = [] 
    1067     for ROCS in ROCcurves: 
    1068         npts = [] 
    1069         for c in ROCS: 
    1070             npts.append(len(c)) 
    1071         nrocs = len(ROCS) 
    1072  
    1073         T = [] 
    1074         for c in ROCS: 
    1075             for (px, py, pfscore) in c: 
    1076 ##                try: 
    1077 ##                    T.index(pfscore) 
    1078 ##                except: 
    1079                 T.append(pfscore) 
    1080         T.sort() 
    1081         T.reverse() ## ugly 
    1082  
    1083         TPavg = [] 
    1084         TPstdV = [] 
    1085         TPstdH = [] 
    1086         for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples): 
    1087             FPsum = [] 
    1088             TPsum = [] 
    1089             for i in range(nrocs): 
    1090                 (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)]) 
    1091                 FPsum.append(fp) 
    1092                 TPsum.append(tp) 
    1093             TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) ) 
    1094             ## vertical standard deviation 
    1095             if len(TPsum) > 1: 
    1096                 stdv = statc.std(TPsum) 
    1097             else: 
    1098                 stdv = 0.0 
    1099             TPstdV.append( stdv ) 
    1100             ## horizontal standard deviation 
    1101             if len(FPsum) > 1: 
    1102                 stdh = statc.std(FPsum) 
    1103             else: 
    1104                 stdh = 0.0 
    1105             TPstdH.append( stdh ) 
    1106  
    1107         average.append(TPavg) 
    1108         stdevV.append(TPstdV) 
    1109         stdevH.append(TPstdH) 
    1110  
    1111     return (average, stdevV, stdevH) 
    1112  
    1113 ## Calibration Curve 
    1114 ## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where: 
    1115 ##  - curve is an array of points (x, y) on the calibration curve 
    1116 ##  - yesClassRugPoints is an array of (x, 1) points 
    1117 ##  - noClassRugPoints is an array of (x, 0) points 
    1118 def computeCalibrationCurve(res, classIndex=-1): 
    1119     import corn 
    1120     ## merge multiple iterations into one 
    1121     mres = orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    1122     for te in res.results: 
    1123         mres.results.append( te ) 
    1124  
    1125     problists, tots = corn.computeROCCumulative(mres, classIndex) 
    1126  
    1127     results = [] 
    1128     P, N = tots[1], tots[0] 
    1129  
    1130     bins = 10 ## divide interval between 0.0 and 1.0 into N bins 
    1131  
    1132     for plist in problists: 
    1133         yesClassRugPoints = []  
    1134         noClassRugPoints = [] 
    1135  
    1136         yesBinsVals = [0] * bins 
    1137         noBinsVals = [0] * bins 
    1138         for (f, (thisNeg, thisPos)) in plist: 
    1139             yesClassRugPoints.append( (f, thisPos) ) #1.0 
    1140             noClassRugPoints.append( (f, thisNeg) ) #1.0 
    1141  
    1142             index = int(f * bins ) 
    1143             index = min(index, bins - 1) ## just in case for value 1.0 
    1144             yesBinsVals[index] += thisPos 
    1145             noBinsVals[index] += thisNeg 
    1146  
    1147         curve = [] 
    1148         for cn in range(bins): 
    1149             f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins) 
    1150             yesVal = yesBinsVals[cn] 
    1151             noVal = noBinsVals[cn] 
    1152             allVal = yesVal + noVal 
    1153             if allVal == 0.0: continue 
    1154             y = float(yesVal)/float(allVal) 
    1155             curve.append( (f,  y) ) 
    1156  
    1157         ## smooth the curve 
    1158         maxnPoints = 100 
    1159         if len(curve) >= 3: 
    1160 #            loessCurve = statc.loess(curve, -3, 0.6) 
    1161             loessCurve = statc.loess(curve, maxnPoints, 0.5, 3) 
    1162         else: 
    1163             loessCurve = curve 
    1164         clen = len(loessCurve) 
    1165         if clen > maxnPoints: 
    1166             df = clen / maxnPoints 
    1167             if df < 1: df = 1 
    1168             curve = [loessCurve[i]  for i in range(0, clen, df)] 
    1169         else: 
    1170             curve = loessCurve 
    1171         curve = [(c)[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function 
    1172         results.append((curve, yesClassRugPoints, noClassRugPoints)) 
    1173  
    1174     return results 
    1175  
    1176  
    1177 ## Lift Curve 
    1178 ## returns an array of curve elements, where: 
    1179 ##  - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve 
    1180 def computeLiftCurve(res, classIndex=-1): 
    1181     import corn 
    1182     ## merge multiple iterations into one 
    1183     mres = orngTest.ExperimentResults(1, res.classifierNames, res.classValues, res.weights, classifiers=res.classifiers, loaded=res.loaded) 
    1184     for te in res.results: 
    1185         mres.results.append( te ) 
    1186  
    1187     problists, tots = corn.computeROCCumulative(mres, classIndex) 
    1188  
    1189     results = [] 
    1190     P, N = tots[1], tots[0] 
    1191     for plist in problists: 
    1192         ## corn gives an increasing by scores list, we need a decreasing by scores 
    1193         plist.reverse() 
    1194         TP = 0.0 
    1195         FP = 0.0 
    1196         curve = [(0.0, 0.0, (10e300, 0.0))] 
    1197         for (f, (thisNeg, thisPos)) in plist: 
    1198             TP += thisPos 
    1199             FP += thisNeg 
    1200             if FP > N: 
    1201                 import warnings 
    1202                 warnings.warn("The sky is falling!!") 
    1203             curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) ) 
    1204         results.append(curve) 
    1205  
    1206     return P, N, results 
    1207 ### 
    1208  
    1209 class CDT: 
    1210   """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """ 
    1211   def __init__(self, C=0.0, D=0.0, T=0.0): 
    1212     self.C, self.D, self.T = C, D, T 
    1213     
    1214 def isCDTEmpty(cdt): 
    1215     return cdt.C + cdt.D + cdt.T < 1e-20 
    1216  
    1217  
    1218 def computeCDT(res, classIndex=-1, **argkw): 
    1219     """Obsolete, don't use""" 
    1220     import corn 
    1221     if classIndex<0: 
    1222         if res.baseClass>=0: 
    1223             classIndex = res.baseClass 
    1224         else: 
    1225             classIndex = 1 
    1226              
    1227     useweights = res.weights and not argkw.get("unweighted", 0) 
    1228     weightByClasses = argkw.get("weightByClasses", True) 
    1229  
    1230     if (res.numberOfIterations>1): 
    1231         CDTs = [CDT() for i in range(res.numberOfLearners)] 
    1232         iterationExperiments = splitByIterations(res) 
    1233         for exp in iterationExperiments: 
    1234             expCDTs = corn.computeCDT(exp, classIndex, useweights) 
    1235             for i in range(len(CDTs)): 
    1236                 CDTs[i].C += expCDTs[i].C 
    1237                 CDTs[i].D += expCDTs[i].D 
    1238                 CDTs[i].T += expCDTs[i].T 
    1239         for i in range(res.numberOfLearners): 
    1240             if isCDTEmpty(CDTs[0]): 
    1241                 return corn.computeCDT(res, classIndex, useweights) 
    1242          
    1243         return CDTs 
    1244     else: 
    1245         return corn.computeCDT(res, classIndex, useweights) 
    1246  
    1247 ## THIS FUNCTION IS OBSOLETE AND ITS AVERAGING OVER FOLDS IS QUESTIONABLE 
    1248 ## DON'T USE IT 
    1249 def ROCsFromCDT(cdt, **argkw): 
    1250     """Obsolete, don't use""" 
    1251     if type(cdt) == list: 
    1252         return [ROCsFromCDT(c) for c in cdt] 
    1253  
    1254     C, D, T = cdt.C, cdt.D, cdt.T 
    1255     N = C+D+T 
    1256     if N < 1e-6: 
    1257         import warnings 
    1258         warnings.warn("Can't compute AUC: one or both classes have no instances") 
    1259         return (-1,)*8 
    1260     if N < 2: 
    1261         import warnings 
    1262         warnings.warn("Can't compute AUC: one or both classes have too few examples") 
    1263  
    1264     som = (C-D)/N 
    1265     c = 0.5*(1+som) 
    1266    
    1267     if (C+D): 
    1268         res = (C/N*100, D/N*100, T/N*100, N, som, (C-D)/(C+D), (C-D)/(N*(N-1)/2), 0.5*(1+som)) 
    1269     else: 
    1270         res = (C/N*100, D/N*100, T/N*100, N, som, -1.0, (C-D)/(N*(N-1)/2), 0.5*(1+som)) 
    1271  
    1272     if argkw.get("print"): 
    1273         print "Concordant  = %5.1f       Somers' D = %1.3f" % (res[0], res[4]) 
    1274         print "Discordant  = %5.1f       Gamma     = %1.3f" % (res[1], res[5]>0 and res[5] or "N/A") 
    1275         print "Tied        = %5.1f       Tau-a     = %1.3f" % (res[2], res[6]) 
    1276         print " %6d pairs             c         = %1.3f"    % (res[3], res[7]) 
    1277  
    1278     return res 
    1279  
    1280 AROCFromCDT = ROCsFromCDT  # for backward compatibility, AROCFromCDT is obsolote 
    1281  
    1282  
    1283  
    1284 # computes AUC using a specified 'cdtComputer' function 
    1285 # It tries to compute AUCs from 'ite' (examples from a single iteration) and, 
    1286 # if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs 
    1287 # are divided by 'divideByIfIte'. Additional flag is returned which is True in 
    1288 # the former case, or False in the latter. 
    1289 def AUC_x(cdtComputer, ite, all_ite, divideByIfIte, computerArgs): 
    1290     cdts = cdtComputer(*(ite, ) + computerArgs) 
    1291     if not isCDTEmpty(cdts[0]): 
    1292         return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divideByIfIte for cdt in cdts], True 
    1293          
    1294     if all_ite: 
    1295          cdts = cdtComputer(*(all_ite, ) + computerArgs) 
    1296          if not isCDTEmpty(cdts[0]): 
    1297              return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False 
    1298  
    1299     return False, False 
    1300  
    1301      
    1302 # computes AUC between classes i and j as if there we no other classes 
    1303 def AUC_ij(ite, classIndex1, classIndex2, useWeights = True, all_ite = None, divideByIfIte = 1.0): 
    1304     import corn 
    1305     return AUC_x(corn.computeCDTPair, ite, all_ite, divideByIfIte, (classIndex1, classIndex2, useWeights)) 
    1306  
    1307  
    1308 # computes AUC between class i and the other classes (treating them as the same class) 
    1309 def AUC_i(ite, classIndex, useWeights = True, all_ite = None, divideByIfIte = 1.0): 
    1310     import corn 
    1311     return AUC_x(corn.computeCDT, ite, all_ite, divideByIfIte, (classIndex, useWeights)) 
    1312     
    1313  
    1314 # computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij) 
    1315 # it returns the sum of what is returned by the computer, unless at a certain 
    1316 # fold the computer has to resort to computing over all folds or even this failed; 
    1317 # in these cases the result is returned immediately 
    1318 def AUC_iterations(AUCcomputer, iterations, computerArgs): 
    1319     subsum_aucs = [0.] * iterations[0].numberOfLearners 
    1320     for ite in iterations: 
    1321         aucs, foldsUsed = AUCcomputer(*(ite, ) + computerArgs) 
    1322         if not aucs: 
    1323             return None 
    1324         if not foldsUsed: 
    1325             return aucs 
    1326         subsum_aucs = map(add, subsum_aucs, aucs) 
    1327     return subsum_aucs 
    1328  
    1329  
    1330 # AUC for binary classification problems 
    1331 def AUC_binary(res, useWeights = True): 
    1332     if res.numberOfIterations > 1: 
    1333         return AUC_iterations(AUC_i, splitByIterations(res), (-1, useWeights, res, res.numberOfIterations)) 
    1334     else: 
    1335         return AUC_i(res, -1, useWeights)[0] 
    1336  
    1337 # AUC for multiclass problems 
    1338 def AUC_multi(res, useWeights = True, method = 0): 
    1339     numberOfClasses = len(res.classValues) 
    1340      
    1341     if res.numberOfIterations > 1: 
    1342         iterations = splitByIterations(res) 
    1343         all_ite = res 
    1344     else: 
    1345         iterations = [res] 
    1346         all_ite = None 
    1347      
    1348     # by pairs 
    1349     sum_aucs = [0.] * res.numberOfLearners 
    1350     usefulClassPairs = 0. 
    1351  
    1352     if method in [0, 2]: 
    1353         prob = classProbabilitiesFromRes(res) 
    1354          
    1355     if method <= 1: 
    1356         for classIndex1 in range(numberOfClasses): 
    1357             for classIndex2 in range(classIndex1): 
    1358                 subsum_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, useWeights, all_ite, res.numberOfIterations)) 
    1359                 if subsum_aucs: 
    1360                     if method == 0: 
    1361                         p_ij = prob[classIndex1] * prob[classIndex2] 
    1362                         subsum_aucs = [x * p_ij  for x in subsum_aucs] 
    1363                         usefulClassPairs += p_ij 
    1364                     else: 
    1365                         usefulClassPairs += 1 
    1366                     sum_aucs = map(add, sum_aucs, subsum_aucs) 
    1367     else: 
    1368         for classIndex in range(numberOfClasses): 
    1369             subsum_aucs = AUC_iterations(AUC_i, iterations, (classIndex, useWeights, all_ite, res.numberOfIterations)) 
    1370             if subsum_aucs: 
    1371                 if method == 0: 
    1372                     p_i = prob[classIndex] 
    1373                     subsum_aucs = [x * p_i  for x in subsum_aucs] 
    1374                     usefulClassPairs += p_i 
    1375                 else: 
    1376                     usefulClassPairs += 1 
    1377                 sum_aucs = map(add, sum_aucs, subsum_aucs) 
    1378                      
    1379     if usefulClassPairs > 0: 
    1380         sum_aucs = [x/usefulClassPairs for x in sum_aucs] 
    1381  
    1382     return sum_aucs 
    1383  
    1384  
    1385 # Computes AUC, possibly for multiple classes (the averaging method can be specified) 
    1386 # Results over folds are averages; if some folds examples from one class only, the folds are merged 
    1387 def AUC(res, method = 0, useWeights = True): 
    1388     if len(res.classValues) < 2: 
    1389         raise ValueError("Cannot compute AUC on a single-class problem") 
    1390     elif len(res.classValues) == 2: 
    1391         return AUC_binary(res, useWeights) 
    1392     else: 
    1393         return AUC_multi(res, useWeights, method) 
    1394  
    1395 AUC.ByWeightedPairs = 0 
    1396 AUC.ByPairs = 1 
    1397 AUC.WeightedOneAgainstAll = 2 
    1398 AUC.OneAgainstAll = 3 
    1399  
    1400  
    1401 # Computes AUC; in multivalued class problem, AUC is computed as one against all 
    1402 # Results over folds are averages; if some folds examples from one class only, the folds are merged 
    1403 def AUC_single(res, classIndex = -1, useWeights = True): 
    1404     if classIndex<0: 
    1405         if res.baseClass>=0: 
    1406             classIndex = res.baseClass 
    1407         else: 
    1408             classIndex = 1 
    1409  
    1410     if res.numberOfIterations > 1: 
    1411         return AUC_iterations(AUC_i, splitByIterations(res), (classIndex, useWeights, res, res.numberOfIterations)) 
    1412     else: 
    1413         return AUC_i( res, classIndex, useWeights)[0] 
    1414  
    1415 # Computes AUC for a pair of classes (as if there were no other classes) 
    1416 # Results over folds are averages; if some folds have examples from one class only, the folds are merged 
    1417 def AUC_pair(res, classIndex1, classIndex2, useWeights = True): 
    1418     if res.numberOfIterations > 1: 
    1419         return AUC_iterations(AUC_ij, splitByIterations(res), (classIndex1, classIndex2, useWeights, res, res.numberOfIterations)) 
    1420     else: 
    1421         return AUC_ij(res, classIndex1, classIndex2, useWeights) 
    1422    
    1423  
    1424 # AUC for multiclass problems 
    1425 def AUC_matrix(res, useWeights = True): 
    1426     numberOfClasses = len(res.classValues) 
    1427     numberOfLearners = res.numberOfLearners 
    1428      
    1429     if res.numberOfIterations > 1: 
    1430         iterations, all_ite = splitByIterations(res), res 
    1431     else: 
    1432         iterations, all_ite = [res], None 
    1433      
    1434     aucs = [[[] for i in range(numberOfClasses)] for i in range(numberOfLearners)] 
    1435     prob = classProbabilitiesFromRes(res) 
    1436          
    1437     for classIndex1 in range(numberOfClasses): 
    1438         for classIndex2 in range(classIndex1): 
    1439             pair_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, useWeights, all_ite, res.numberOfIterations)) 
    1440             if pair_aucs: 
    1441                 for lrn in range(numberOfLearners): 
    1442                     aucs[lrn][classIndex1].append(pair_aucs[lrn]) 
    1443             else: 
    1444                 for lrn in range(numberOfLearners): 
    1445                     aucs[lrn][classIndex1].append(-1) 
    1446     return aucs 
    1447                  
    1448  
    1449 def McNemar(res, **argkw): 
    1450     nLearners = res.numberOfLearners 
    1451     mcm = [] 
    1452     for i in range(nLearners): 
    1453        mcm.append([0.0]*res.numberOfLearners) 
    1454  
    1455     if not res.weights or argkw.get("unweighted"): 
    1456         for i in res.results: 
    1457             actual = i.actualClass 
    1458             classes = i.classes 
    1459             for l1 in range(nLearners): 
    1460                 for l2 in range(l1, nLearners): 
    1461                     if classes[l1]==actual: 
    1462                         if classes[l2]!=actual: 
    1463                             mcm[l1][l2] += 1 
    1464                     elif classes[l2]==actual: 
    1465                         mcm[l2][l1] += 1 
    1466     else: 
    1467         for i in res.results: 
    1468             actual = i.actualClass 
    1469             classes = i.classes 
    1470             for l1 in range(nLearners): 
    1471                 for l2 in range(l1, nLearners): 
    1472                     if classes[l1]==actual: 
    1473                         if classes[l2]!=actual: 
    1474                             mcm[l1][l2] += i.weight 
    1475                     elif classes[l2]==actual: 
    1476                         mcm[l2][l1] += i.weight 
    1477  
    1478     for l1 in range(nLearners): 
    1479         for l2 in range(l1, nLearners): 
    1480             su=mcm[l1][l2] + mcm[l2][l1] 
    1481             if su: 
    1482                 mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su 
    1483             else: 
    1484                 mcm[l2][l1] = 0 
    1485  
    1486     for l1 in range(nLearners): 
    1487         mcm[l1]=mcm[l1][:l1] 
    1488  
    1489     return mcm 
    1490  
    1491  
    1492 def McNemarOfTwo(res, lrn1, lrn2): 
    1493     tf = ft = 0.0 
    1494     if not res.weights or argkw.get("unweighted"): 
    1495         for i in res.results: 
    1496             actual=i.actualClass 
    1497             if i.classes[lrn1]==actual: 
    1498                 if i.classes[lrn2]!=actual: 
    1499                     tf += i.weight 
    1500             elif i.classes[lrn2]==actual: 
    1501                     ft += i.weight 
    1502     else: 
    1503         for i in res.results: 
    1504             actual=i.actualClass 
    1505             if i.classes[lrn1]==actual: 
    1506                 if i.classes[lrn2]!=actual: 
    1507                     tf += 1.0 
    1508             elif i.classes[lrn2]==actual: 
    1509                     ft += 1.0 
    1510  
    1511     su = tf + ft 
    1512     if su: 
    1513         return (abs(tf-ft)-1)**2 / su 
    1514     else: 
    1515         return 0 
    1516  
    1517  
    1518 def Friedman(res, stat=CA): 
    1519     """ Compares classifiers by Friedman test, treating folds as different examles. 
    1520         Returns F, p and average ranks 
    1521     """ 
    1522     res_split = splitByIterations(res) 
    1523     res = [stat(r) for r in res_split] 
    1524      
    1525     N = len(res) 
    1526     k = len(res[0]) 
    1527     sums = [0.0]*k 
    1528     for r in res: 
    1529         ranks = [k-x+1 for x in statc.rankdata(r)] 
    1530         if stat==BrierScore: # reverse ranks for BrierScore (lower better) 
    1531             ranks = [k+1-x for x in ranks] 
    1532         sums = [ranks[i]+sums[i] for i in range(k)] 
    1533  
    1534     T = sum([x*x for x in sums]) 
    1535     sums = [x/N for x in sums] 
    1536  
    1537     F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1) 
    1538  
    1539     return F, statc.chisqprob(F, k-1), sums 
    1540  
    1541  
    1542 def WilcoxonPairs(res, avgranks, stat=CA): 
    1543     """ Returns a triangular matrix, where element[i][j] stores significance of difference 
    1544         between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive 
    1545         if i-th is better than j-th, negative if it is worse, and 1 if they are equal. 
    1546         Arguments to function are ExperimentResults, average ranks (as returned by Friedman) 
    1547         and, optionally, a statistics; greater values should mean better results.append 
    1548     """ 
    1549     res_split = splitByIterations(res) 
    1550     res = [stat(r) for r in res_split] 
    1551  
    1552     k = len(res[0]) 
    1553     bt = [] 
    1554     for m1 in range(k): 
    1555         nl = [] 
    1556         for m2 in range(m1+1, k): 
    1557             t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res]) 
    1558             if avgranks[m1]<avgranks[m2]: 
    1559                 nl.append(p) 
    1560             elif avgranks[m2]<avgranks[m1]: 
    1561                 nl.append(-p) 
    1562             else: 
    1563                 nl.append(1) 
    1564         bt.append(nl) 
    1565     return bt 
    1566  
    1567  
    1568 def plotLearningCurveLearners(file, allResults, proportions, learners, noConfidence=0): 
    1569     plotLearningCurve(file, allResults, proportions, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], noConfidence) 
    1570      
    1571 def plotLearningCurve(file, allResults, proportions, legend, noConfidence=0): 
    1572     import types 
    1573     fopened=0 
    1574     if (type(file)==types.StringType): 
    1575         file=open(file, "wt") 
    1576         fopened=1 
    1577          
    1578     file.write("set yrange [0:1]\n") 
    1579     file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) 
    1580     file.write("set multiplot\n\n") 
    1581     CAs = [CA_dev(x) for x in allResults] 
    1582  
    1583     file.write("plot \\\n") 
    1584     for i in range(len(legend)-1): 
    1585         if not noConfidence: 
    1586             file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1)) 
    1587         file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1)) 
    1588     if not noConfidence: 
    1589         file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend))) 
    1590     file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend))) 
    1591  
    1592     for i in range(len(legend)): 
    1593         if not noConfidence: 
    1594             for p in range(len(proportions)): 
    1595                 file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1])) 
    1596             file.write("e\n\n") 
    1597  
    1598         for p in range(len(proportions)): 
    1599             file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0])) 
    1600         file.write("e\n\n") 
    1601  
    1602     if fopened: 
    1603         file.close() 
    1604  
    1605  
    1606 def printSingleROCCurveCoordinates(file, curve): 
    1607     import types 
    1608     fopened=0 
    1609     if (type(file)==types.StringType): 
    1610         file=open(file, "wt") 
    1611         fopened=1 
    1612  
    1613     for coord in curve: 
    1614         file.write("%5.3f\t%5.3f\n" % tuple(coord)) 
    1615  
    1616     if fopened: 
    1617         file.close() 
    1618  
    1619  
    1620 def plotROCLearners(file, curves, learners): 
    1621     plotROC(file, curves, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]) 
    1622      
    1623 def plotROC(file, curves, legend): 
    1624     import types 
    1625     fopened=0 
    1626     if (type(file)==types.StringType): 
    1627         file=open(file, "wt") 
    1628         fopened=1 
    1629  
    1630     file.write("set yrange [0:1]\n") 
    1631     file.write("set xrange [0:1]\n") 
    1632     file.write("set multiplot\n\n") 
    1633  
    1634     file.write("plot \\\n") 
    1635     for leg in legend: 
    1636         file.write("'-' title '%s' with lines,\\\n" % leg) 
    1637     file.write("'-' title '' with lines\n") 
    1638  
    1639     for curve in curves: 
    1640         for coord in curve: 
    1641             file.write("%5.3f\t%5.3f\n" % tuple(coord)) 
    1642         file.write("e\n\n") 
    1643  
    1644     file.write("1.0\t1.0\n0.0\t0.0e\n\n")           
    1645  
    1646     if fopened: 
    1647         file.close() 
    1648  
    1649  
    1650  
    1651 def plotMcNemarCurveLearners(file, allResults, proportions, learners, reference=-1): 
    1652     plotMcNemarCurve(file, allResults, proportions, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference) 
    1653  
    1654 def plotMcNemarCurve(file, allResults, proportions, legend, reference=-1): 
    1655     if reference<0: 
    1656         reference=len(legend)-1 
    1657          
    1658     import types 
    1659     fopened=0 
    1660     if (type(file)==types.StringType): 
    1661         file=open(file, "wt") 
    1662         fopened=1 
    1663          
    1664     #file.write("set yrange [0:1]\n") 
    1665     #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1])) 
    1666     file.write("set multiplot\n\n") 
    1667     file.write("plot \\\n") 
    1668     tmap=range(reference)+range(reference+1, len(legend)) 
    1669     for i in tmap[:-1]: 
    1670         file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1)) 
    1671     file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1])) 
    1672     file.write("\n") 
    1673  
    1674     for i in tmap: 
    1675         for p in range(len(proportions)): 
    1676             file.write("%f\t%f\n" % (proportions[p], McNemarOfTwo(allResults[p], i, reference))) 
    1677         file.write("e\n\n") 
    1678  
    1679     if fopened: 
    1680         file.close() 
    1681  
    1682 defaultPointTypes=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)]) 
    1683 defaultLineTypes=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots") 
    1684  
    1685 def learningCurveLearners2PiCTeX(file, allResults, proportions, **options): 
    1686     return apply(learningCurve2PiCTeX, (file, allResults, proportions), options) 
    1687      
    1688 def learningCurve2PiCTeX(file, allResults, proportions, **options): 
    1689     import types 
    1690     fopened=0 
    1691     if (type(file)==types.StringType): 
    1692         file=open(file, "wt") 
    1693         fopened=1 
    1694  
    1695     nexamples=len(allResults[0].results) 
    1696     CAs = [CA_dev(x) for x in allResults] 
    1697  
    1698     graphsize=float(options.get("graphsize", 10.0)) #cm 
    1699     difprop=proportions[-1]-proportions[0] 
    1700     ntestexamples=nexamples*proportions[-1] 
    1701     xunit=graphsize/ntestexamples 
    1702  
    1703     yshift=float(options.get("yshift", -ntestexamples/20.)) 
    1704      
    1705     pointtypes=options.get("pointtypes", defaultPointTypes) 
    1706     linetypes=options.get("linetypes", defaultLineTypes) 
    1707  
    1708     if options.has_key("numberedx"): 
    1709         numberedx=options["numberedx"] 
    1710         if type(numberedx)==types.IntType: 
    1711             if numberedx>0: 
    1712                 numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples] 
    1713             elif numberedx<0: 
    1714                 numberedx = -numberedx 
    1715                 newn=[] 
    1716                 for i in range(numberedx+1): 
    1717                     wanted=proportions[0]+float(i)/numberedx*difprop 
    1718                     best=(10, 0) 
    1719                     for t in proportions: 
    1720                         td=abs(wanted-t) 
    1721                         if td<best[0]: 
    1722                             best=(td, t) 
    1723                     if not best[1] in newn: 
    1724                         newn.append(best[1]) 
    1725                 newn.sort() 
    1726                 numberedx=[nexamples*x for x in newn] 
    1727         elif type(numberedx[0])==types.FloatType: 
    1728             numberedx=[nexamples*x for x in numberedx] 
    1729     else: 
    1730         numberedx=[nexamples*x for x in proportions] 
    1731  
    1732     file.write("\\mbox{\n") 
    1733     file.write("  \\beginpicture\n") 
    1734     file.write("  \\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))     
    1735     file.write("  \\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))     
    1736     file.write("  \\axis bottom invisible\n")# label {#examples}\n") 
    1737     file.write("      ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions])) 
    1738     if numberedx: 
    1739         file.write("            long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx])) 
    1740     file.write("  /\n") 
    1741     file.write("  \\axis left invisible\n")# label {classification accuracy}\n") 
    1742     file.write("      shiftedto y=%5.3f\n" % yshift) 
    1743     file.write("      ticks short from 0.0 to 1.0 by 0.05\n") 
    1744     file.write("            long numbered from 0.0 to 1.0 by 0.25\n") 
    1745     file.write("  /\n") 
    1746     if options.has_key("default"): 
    1747         file.write("  \\setdashpattern<1pt, 1pt>\n") 
    1748         file.write("  \\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"])) 
    1749      
    1750     for i in range(len(CAs[0])): 
    1751         coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))]) 
    1752         if linetypes: 
    1753             file.write("  %s\n" % linetypes[i]) 
    1754             file.write("  \\plot %s /\n" % coordinates) 
    1755         if pointtypes: 
    1756             file.write("  \\multiput %s at %s /\n" % (pointtypes[i], coordinates)) 
    1757  
    1758     file.write("  \\endpicture\n") 
    1759     file.write("}\n") 
    1760     if fopened: 
    1761         file.close() 
    1762     file.close() 
    1763     del file 
    1764  
    1765 def legendLearners2PiCTeX(file, learners, **options): 
    1766   return apply(legend2PiCTeX, (file, [orngMisc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options) 
    1767      
    1768 def legend2PiCTeX(file, legend, **options): 
    1769     import types 
    1770     fopened=0 
    1771     if (type(file)==types.StringType): 
    1772         file=open(file, "wt") 
    1773         fopened=1 
    1774  
    1775     pointtypes=options.get("pointtypes", defaultPointTypes) 
    1776     linetypes=options.get("linetypes", defaultLineTypes) 
    1777  
    1778     file.write("\\mbox{\n") 
    1779     file.write("  \\beginpicture\n") 
    1780     file.write("  \\setcoordinatesystem units <5cm, 1pt>\n\n") 
    1781     file.write("  \\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend)) 
    1782  
    1783     for i in range(len(legend)): 
    1784         if linetypes: 
    1785             file.write("  %s\n" % linetypes[i]) 
    1786             file.write("  \\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2)) 
    1787         if pointtypes: 
    1788             file.write("  \\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1)) 
    1789         file.write("  \\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25)) 
    1790  
    1791     file.write("  \\endpicture\n") 
    1792     file.write("}\n") 
    1793     if fopened: 
    1794         file.close() 
    1795     file.close() 
    1796     del file 
    1797  
    1798  
    1799 def compute_friedman(avranks, N): 
    1800     """ 
    1801     Returns a tuple (friedman statistic, degrees of freedom) 
    1802     and (Iman statistic - F-distribution, degrees of freedom) 
    1803     """ 
    1804  
    1805     k = len(avranks) 
    1806  
    1807     def friedman(N, k, ranks): 
    1808         return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1)) 
    1809  
    1810     def iman(fried, N, k): 
    1811         return (N-1)*fried/(N*(k-1) - fried) 
    1812  
    1813     f = friedman(N, k, avranks) 
    1814     im = iman(f, N, k) 
    1815     fdistdof = (k-1, (k-1)*(N-1)) 
    1816  
    1817     return (f, k-1), (im, fdistdof) 
    1818  
    1819 def compute_CD(avranks, N, alpha="0.05", type="nemenyi"): 
    1820     """ 
    1821     if type == "nemenyi": 
    1822         critical difference for Nemenyi two tailed test. 
    1823     if type == "bonferroni-dunn": 
    1824         critical difference for Bonferroni-Dunn test 
    1825     """ 
    1826  
    1827     k = len(avranks) 
    1828     
    1829     d = {} 
    1830  
    1831     d[("nemenyi", "0.05")] = [0, 0, 1.960, 2.343, 2.568, 2.728, 2.850, 2.949, 3.031, 3.102, 3.164 ] 
    1832     d[("nemenyi", "0.1")] = [0, 0, 1.645, 2.052, 2.291, 2.459, 2.589, 2.693, 2.780, 2.855, 2.920 ]     
    1833     d[("bonferroni-dunn", "0.05")] =  [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, 2.638, 2.690, 2.724, 2.773 ] 
    1834     d[("bonferroni-dunn", "0.1")] = [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, 2.394, 2.450, 2.498, 2.539 ] 
    1835  
    1836     q = d[(type, alpha)] 
    1837  
    1838     cd = q[k]*(k*(k+1)/(6.0*N))**0.5 
    1839  
    1840     return cd 
    1841   
    1842  
    1843 def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs): 
    1844     """ 
    1845     Draws a CD graph, which is used to display  the differences in methods'  
    1846     performance. 
    1847     See Janez Demsar, Statistical Comparisons of Classifiers over  
    1848     Multiple Data Sets, 7(Jan):1--30, 2006.  
    1849  
    1850     Needs matplotlib to work. 
    1851  
    1852     Arguments: 
    1853     filename -- Output file name (with extension). Formats supported 
    1854         by matplotlib can be used. 
    1855     avranks -- List of average methods' ranks. 
    1856     names -- List of methods' names. 
    1857  
    1858     Keyword arguments: 
    1859     cd -- Critical difference. Used for marking methods that whose 
    1860         difference is not statistically significant. 
    1861     lowv -- The lowest shown rank, if None, use 1. 
    1862     highv -- The highest shown rank, if None, use len(avranks). 
    1863     width -- Width of the drawn figure in inches, default 6 in. 
    1864     textspace -- Space on figure sides left for the description 
    1865         of methods, default 1 in. 
    1866     reverse -- If True, the lowest rank is on the right. Default: 
    1867         False. 
    1868     cdmethod -- None by default. It can be an index of element in avranks or 
    1869         or names which specifies the method which should be marked 
    1870         with an interval. 
    1871  
    1872     Maintainer: Marko Toplak 
    1873     """ 
    1874  
    1875     width = float(width) 
    1876     textspace = float(textspace) 
    1877  
    1878     def nth(l,n): 
    1879         """ 
    1880         Returns only nth elemnt in a list. 
    1881         """ 
    1882         n = lloc(l,n) 
    1883         return [ a[n] for a in l ] 
    1884  
    1885     def lloc(l,n): 
    1886         """ 
    1887         List location in list of list structure. 
    1888         Enable the use of negative locations: 
    1889         -1 is the last element, -2 second last... 
    1890         """ 
    1891         if n < 0: 
    1892             return len(l[0])+n 
    1893         else: 
    1894             return n  
    1895  
    1896     def mxrange(lr): 
    1897         """ 
    1898         Multiple xranges. Can be used to traverse matrices. 
    1899         This function is very slow due to unknown number of 
    1900         parameters. 
    1901  
    1902         >>> mxrange([3,5])  
    1903         [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)] 
    1904  
    1905         >>> mxrange([[3,5,1],[9,0,-3]]) 
    1906         [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)] 
    1907  
    1908         """ 
    1909         if len(lr) == 0: 
    1910             yield () 
    1911         else: 
    1912             #it can work with single numbers 
    1913             index = lr[0] 
    1914             if type(1) == type(index): 
    1915                 index = [ index ] 
    1916             for a in range(*index): 
    1917                 for b in mxrange(lr[1:]): 
    1918                     yield tuple([a] + list(b)) 
    1919  
    1920     try: 
    1921         from matplotlib.figure import Figure 
    1922         from matplotlib.patches import Polygon 
    1923         from matplotlib.backends.backend_agg import FigureCanvasAgg 
    1924     except: 
    1925         import sys 
    1926         print >> sys.stderr, "Function requires matplotlib. Please install it." 
    1927         return 
    1928  
    1929     def printFigure(fig, *args, **kwargs): 
    1930         canvas = FigureCanvasAgg(fig) 
    1931         canvas.print_figure(*args, **kwargs) 
    1932  
    1933     sums = avranks 
    1934  
    1935     tempsort =  sorted([ (a,i) for i,a in  enumerate(sums) ], reverse=reverse) 
    1936     ssums = nth(tempsort, 0) 
    1937     sortidx = nth(tempsort, 1) 
    1938     nnames = [ names[x] for x in sortidx ] 
    1939      
    1940     if lowv == None: 
    1941         lowv = min(1, int(math.floor(min(ssums)))) 
    1942     if highv == None: 
    1943         highv = max(len(avranks), int(math.ceil(max(ssums)))) 
    1944  
    1945     cline = 0.4 
    1946  
    1947     k = len(sums) 
    1948  
    1949     lines = None 
    1950     sums = sorted(sums) 
    1951  
    1952     linesblank = 0 
    1953     scalewidth = width - 2*textspace 
    1954  
    1955     def rankpos(rank): 
    1956         if not reverse: 
    1957             a = rank - lowv 
    1958         else: 
    1959             a = highv - rank 
    1960         return textspace+scalewidth/(highv-lowv)*a 
    1961  
    1962     distanceh = 0.25 
    1963  
    1964     if cd and cdmethod == None: 
    1965      
    1966         #get pairs of non significant methods 
    1967  
    1968         def getLines(sums, hsd): 
    1969  
    1970             #get all pairs 
    1971             lsums = len(sums) 
    1972             allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ] 
    1973             #remove not significant 
    1974             notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ] 
    1975             #keep only longest 
    1976              
    1977             def noLonger((i,j), notSig): 
    1978                 for i1,j1 in notSig: 
    1979                     if (i1 <= i and j1 > j) or (i1 < i and j1 >= j): 
    1980                         return False 
    1981                 return True 
    1982  
    1983             longest = [ (i,j) for i,j in notSig if noLonger((i,j),notSig) ] 
    1984              
    1985             return longest 
    1986  
    1987         lines = getLines(ssums, cd) 
    1988         linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1 
    1989  
    1990         #add scale 
    1991         distanceh = 0.25 
    1992         cline += distanceh 
    1993  
    1994     #calculate height needed height of an image 
    1995     minnotsignificant = max(2*0.2, linesblank) 
    1996     height = cline + ((k+1)/2)*0.2 + minnotsignificant 
    1997  
    1998     fig = Figure(figsize=(width, height)) 
    1999     ax = fig.add_axes([0,0,1,1]) #reverse y axis 
    2000     ax.set_axis_off() 
    2001  
    2002     hf = 1./height # height factor 
    2003     wf = 1./width 
    2004  
    2005     def hfl(l):  
    2006         return [ a*hf for a in l ] 
    2007  
    2008     def wfl(l):  
    2009         return [ a*wf for a in l ] 
    2010  
    2011     """ 
    2012     Upper left corner is (0,0). 
    2013     """ 
    2014  
    2015     ax.plot([0,1], [0,1], c="w") 
    2016     ax.set_xlim(0, 1) 
    2017     ax.set_ylim(1, 0) 
    2018  
    2019     def line(l, color='k', **kwargs): 
    2020         """ 
    2021         Input is a list of pairs of points. 
    2022         """ 
    2023         ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs) 
    2024  
    2025     def text(x, y, s, *args, **kwargs): 
    2026         ax.text(wf*x, hf*y, s, *args, **kwargs) 
    2027  
    2028     line([(textspace, cline), (width-textspace, cline)], linewidth=0.7) 
    2029      
    2030     bigtick = 0.1 
    2031     smalltick = 0.05 
    2032  
    2033  
    2034     import numpy 
    2035  
    2036     for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]: 
    2037         tick = smalltick 
    2038         if a == int(a): tick = bigtick 
    2039         line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7) 
    2040  
    2041     for a in range(lowv, highv+1): 
    2042         text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom") 
    2043  
    2044     k = len(ssums) 
    2045  
    2046     for i in range((k+1)/2): 
    2047         chei = cline+ minnotsignificant + (i)*0.2 
    2048         line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7) 
    2049         text(textspace-0.2, chei, nnames[i], ha="right", va="center") 
    2050  
    2051     for i in range((k+1)/2, k): 
    2052         chei = cline + minnotsignificant + (k-i-1)*0.2 
    2053         line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7) 
    2054         text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center") 
    2055  
    2056     if cd and cdmethod == None: 
    2057  
    2058         #upper scale 
    2059         if not reverse: 
    2060             begin, end = rankpos(lowv), rankpos(lowv+cd) 
    2061         else: 
    2062             begin, end = rankpos(highv), rankpos(highv - cd) 
    2063              
    2064         line([(begin, distanceh), (end, distanceh)], linewidth=0.7) 
    2065         line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7) 
    2066         line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7) 
    2067         text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom") 
    2068  
    2069         #non significance lines     
    2070         def drawLines(lines, side=0.05, height=0.1): 
    2071             start = cline + 0.2 
    2072             for l,r in lines:   
    2073                 line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5)  
    2074                 start += height 
    2075  
    2076         drawLines(lines) 
    2077  
    2078     elif cd: 
    2079         begin = rankpos(avranks[cdmethod]-cd) 
    2080         end = rankpos(avranks[cdmethod]+cd) 
    2081         line([(begin, cline), (end, cline)], linewidth=2.5)  
    2082         line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5) 
    2083         line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5) 
    2084   
    2085     printFigure(fig, filename, **kwargs) 
    2086  
    2087 if __name__ == "__main__": 
    2088     avranks =  [3.143, 2.000, 2.893, 1.964] 
    2089     names = ["prva", "druga", "tretja", "cetrta" ] 
    2090     cd = compute_CD(avranks, 14) 
    2091     #cd = compute_CD(avranks, 10, type="bonferroni-dunn") 
    2092     print cd 
    2093  
    2094     print compute_friedman(avranks, 14) 
    2095  
    2096     #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5) 
     1from Orange.evaluation.scoring import * 
Note: See TracChangeset for help on using the changeset viewer.