# source:orange/Orange/evaluation/scoring.py@9892:3b220d15fb39

Revision 9892:3b220d15fb39, 84.9 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Underscored function parameters and moved documentation to rst dir.

Line
1import operator, math
3import numpy
4
5import Orange
6from Orange import statc
7from Orange.misc import deprecated_keywords
8
9#### Private stuff
10
11def log2(x):
12    """Calculate logarithm in base 2."""
13    return math.log(x)/math.log(2)
14
15def check_non_zero(x):
16    """Throw Value Error when x = 0.0."""
17    if x==0.0:
18        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
19
20def gettotweight(res):
21    """Sum all the weights"""
22    totweight = reduce(lambda x, y: x+y.weight, res.results, 0)
23    if totweight==0.0:
24        raise ValueError, "Cannot compute the score: sum of weights is 0.0."
25    return totweight
26
27def gettotsize(res):
28    """ Get number of result instances """
29    if len(res.results):
30        return len(res.results)
31    else:
32        raise ValueError, "Cannot compute the score: no examples."
33
34
35def split_by_iterations(res):
36    """ Splits ExperimentResults of multiple iteratation test into a list
37    of ExperimentResults, one for each iteration.
38    """
39    if res.number_of_iterations < 2:
40        return [res]
41
43            for i in range(res.number_of_iterations)]
44    for te in res.results:
45        ress[te.iteration_number].results.append(te)
46    return ress
47
48def split_by_classifiers(res):
49    """ Splites an instance of :obj:ExperimentResults into a list of
50    :obj:ExperimentResults, one for each classifier.
51    """
52    split_res = []
53    for i in range(len(res.classifierNames)):
54        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
55                    [res.classifierNames[i]], res.classValues,
56                    weights=res.weights, baseClass=res.baseClass,
57                    classifiers=[res.classifiers[i]] if res.classifiers else [],
58                    test_type = res.test_type, labels = res.labels)
59        r.results = []
60        for te in res.results:
61            r.results.append(Orange.evaluation.testing.TestedExample(te.iterationNumber,
62                                te.actualClass, n=1, weight=te.weight))
63            r.results[-1].classes = [te.classes[i]]
64            r.results[-1].probabilities = [te.probabilities[i]]
65        split_res.append(r)
66    return split_res
67
68
69def class_probabilities_from_res(res, **argkw):
70    """Calculate class probabilities"""
71    probs = [0.0] * len(res.class_values)
72    if argkw.get("unweighted", 0) or not res.weights:
73        for tex in res.results:
74            probs[int(tex.actual_class)] += 1.0
75        totweight = gettotsize(res)
76    else:
77        totweight = 0.0
78        for tex in res.results:
79            probs[tex.actual_class] += tex.weight
80            totweight += tex.weight
81        check_non_zero(totweight)
82    return [prob/totweight for prob in probs]
83
84
85@deprecated_keywords({
86    "foldN": "fold_n",
87    "reportSE": "report_se",
88    "iterationIsOuter": "iteration_is_outer"})
89def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
90    # remove empty folds, turn the matrix so that learner is outer
91    if iteration_is_outer:
92        if not stats:
93            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
94        number_of_learners = len(stats[0])
95        stats = filter(lambda (x, fN): fN>0.0, zip(stats,fold_n))
96        stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(number_of_learners)]
97    else:
98        stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, fold_n))] for lrnD in stats]
99
100    if not stats:
101        raise ValueError, "Cannot compute the score: no classifiers"
102    if not stats[0]:
103        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
104
105    if report_se:
106        return [(statc.mean(x), statc.sterr(x)) for x in stats]
107    else:
108        return [statc.mean(x) for x in stats]
109
110def ME(res, **argkw):
111    MEs = [0.0]*res.number_of_learners
112
113    if argkw.get("unweighted", 0) or not res.weights:
114        for tex in res.results:
115            MEs = map(lambda res, cls, ac = float(tex.actual_class):
116                      res + abs(float(cls) - ac), MEs, tex.classes)
117        totweight = gettotsize(res)
118    else:
119        for tex in res.results:
120            MEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
121                       res + tw*abs(float(cls) - ac), MEs, tex.classes)
122        totweight = gettotweight(res)
123
124    return [x/totweight for x in MEs]
125
126MAE = ME
127
128#########################################################################
129# PERFORMANCE MEASURES:
130# Scores for evaluation of numeric predictions
131
132def check_argkw(dct, lst):
133    """check_argkw(dct, lst) -> returns true if any items have non-zero value in dct"""
134    return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst])
135
136def regression_error(res, **argkw):
137    """regression_error(res) -> regression error (default: MSE)"""
138    if argkw.get("SE", 0) and res.number_of_iterations > 1:
139        # computes the scores for each iteration, then averages
140        scores = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
141        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
142            norm = [0.0] * res.number_of_iterations
143
144        nIter = [0]*res.number_of_iterations       # counts examples in each iteration
145        a = [0]*res.number_of_iterations           # average class in each iteration
146        for tex in res.results:
147            nIter[tex.iteration_number] += 1
148            a[tex.iteration_number] += float(tex.actual_class)
149        a = [a[i]/nIter[i] for i in range(res.number_of_iterations)]
150
151        if argkw.get("unweighted", 0) or not res.weights:
152            # iterate accross test cases
153            for tex in res.results:
154                ai = float(tex.actual_class)
155                nIter[tex.iteration_number] += 1
156
157                # compute normalization, if required
158                if argkw.get("norm-abs", 0):
159                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
160                elif argkw.get("norm-sqr", 0):
161                    norm[tex.iteration_number] += (ai - a[tex.iteration_number])**2
162
163                # iterate accross results of different regressors
164                for i, cls in enumerate(tex.classes):
165                    if argkw.get("abs", 0):
166                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
167                    else:
168                        scores[i][tex.iteration_number] += (float(cls) - ai)**2
169        else: # unweighted<>0
170            raise NotImplementedError, "weighted error scores with SE not implemented yet"
171
172        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
173            scores = [[x/n for x, n in zip(y, norm)] for y in scores]
174        else:
175            scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores]
176
177        if argkw.get("R2"):
178            scores = [[1.0 - x for x in y] for y in scores]
179
180        if argkw.get("sqrt", 0):
181            scores = [[math.sqrt(x) for x in y] for y in scores]
182
183        return [(statc.mean(x), statc.std(x)) for x in scores]
184
185    else: # single iteration (testing on a single test set)
186        scores = [0.0] * res.number_of_learners
187        norm = 0.0
188
189        if argkw.get("unweighted", 0) or not res.weights:
190            a = sum([tex.actual_class for tex in res.results]) \
191                / len(res.results)
192            for tex in res.results:
193                if argkw.get("abs", 0):
194                    scores = map(lambda res, cls, ac = float(tex.actual_class):
195                                 res + abs(float(cls) - ac), scores, tex.classes)
196                else:
197                    scores = map(lambda res, cls, ac = float(tex.actual_class):
198                                 res + (float(cls) - ac)**2, scores, tex.classes)
199
200                if argkw.get("norm-abs", 0):
201                    norm += abs(tex.actual_class - a)
202                elif argkw.get("norm-sqr", 0):
203                    norm += (tex.actual_class - a)**2
204            totweight = gettotsize(res)
205        else:
206            # UNFINISHED
207            for tex in res.results:
208                MSEs = map(lambda res, cls, ac = float(tex.actual_class),
209                           tw = tex.weight:
210                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
211            totweight = gettotweight(res)
212
213        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
214            scores = [s/norm for s in scores]
215        else: # normalize by number of instances (or sum of weights)
216            scores = [s/totweight for s in scores]
217
218        if argkw.get("R2"):
219            scores = [1.0 - s for s in scores]
220
221        if argkw.get("sqrt", 0):
222            scores = [math.sqrt(x) for x in scores]
223
224        return scores
225
226def MSE(res, **argkw):
227    """ Computes mean-squared error. """
228    return regression_error(res, **argkw)
229
230def RMSE(res, **argkw):
231    """ Computes root mean-squared error. """
232    argkw.setdefault("sqrt", True)
233    return regression_error(res, **argkw)
234
235def MAE(res, **argkw):
236    """ Computes mean absolute error. """
237    argkw.setdefault("abs", True)
238    return regression_error(res, **argkw)
239
240def RSE(res, **argkw):
241    """ Computes relative squared error. """
242    argkw.setdefault("norm-sqr", True)
243    return regression_error(res, **argkw)
244
245def RRSE(res, **argkw):
246    """ Computes relative squared error. """
247    argkw.setdefault("norm-sqr", True)
248    argkw.setdefault("sqrt", True)
249    return regression_error(res, **argkw)
250
251def RAE(res, **argkw):
252    """ Computes relative absolute error. """
253    argkw.setdefault("abs", True)
254    argkw.setdefault("norm-abs", True)
255    return regression_error(res, **argkw)
256
257def R2(res, **argkw):
258    """ Computes the coefficient of determination, R-squared. """
259    argkw.setdefault("norm-sqr", True)
260    argkw.setdefault("R2", True)
261    return regression_error(res, **argkw)
262
263def MSE_old(res, **argkw):
264    """MSE(res) -> mean-squared error"""
265    if argkw.get("SE", 0) and res.number_of_iterations > 1:
266        MSEs = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
267        nIter = [0]*res.number_of_iterations
268        if argkw.get("unweighted", 0) or not res.weights:
269            for tex in res.results:
270                ac = float(tex.actual_class)
271                nIter[tex.iteration_number] += 1
272                for i, cls in enumerate(tex.classes):
273                    MSEs[i][tex.iteration_number] += (float(cls) - ac)**2
274        else:
275            raise ValueError, "weighted RMSE with SE not implemented yet"
276        MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs]
277        if argkw.get("sqrt", 0):
278            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
279        return [(statc.mean(x), statc.std(x)) for x in MSEs]
280
281    else:
282        MSEs = [0.0]*res.number_of_learners
283        if argkw.get("unweighted", 0) or not res.weights:
284            for tex in res.results:
285                MSEs = map(lambda res, cls, ac = float(tex.actual_class):
286                           res + (float(cls) - ac)**2, MSEs, tex.classes)
287            totweight = gettotsize(res)
288        else:
289            for tex in res.results:
290                MSEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
291                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
292            totweight = gettotweight(res)
293
294        if argkw.get("sqrt", 0):
295            MSEs = [math.sqrt(x) for x in MSEs]
296        return [x/totweight for x in MSEs]
297
298def RMSE_old(res, **argkw):
299    """RMSE(res) -> root mean-squared error"""
300    argkw.setdefault("sqrt", 1)
301    return MSE_old(res, **argkw)
302
303#########################################################################
304# PERFORMANCE MEASURES:
305# Scores for evaluation of classifiers
306
307@deprecated_keywords({"reportSE": "report_se"})
308def CA(res, report_se = False, **argkw):
309    """ Computes classification accuracy, i.e. percentage of matches between
310    predicted and actual class. The function returns a list of classification
311    accuracies of all classifiers tested. If reportSE is set to true, the list
312    will contain tuples with accuracies and standard errors.
313
314    If results are from multiple repetitions of experiments (like those
315    returned by Orange.evaluation.testing.crossValidation or
316    Orange.evaluation.testing.proportionTest) the
317    standard error (SE) is estimated from deviation of classification
318    accuracy accross folds (SD), as SE = SD/sqrt(N), where N is number
319    of repetitions (e.g. number of folds).
320
321    If results are from a single repetition, we assume independency of
322    instances and treat the classification accuracy as distributed according
323    to binomial distribution. This can be approximated by normal distribution,
324    so we report the SE of sqrt(CA*(1-CA)/N), where CA is classification
325    accuracy and N is number of test instances.
326
327    Instead of ExperimentResults, this function can be given a list of
328    confusion matrices (see below). Standard errors are in this case
329    estimated using the latter method.
330    """
331    if res.number_of_iterations==1:
332        if type(res)==ConfusionMatrix:
333            div = nm.TP+nm.FN+nm.FP+nm.TN
334            check_non_zero(div)
335            ca = [(nm.TP+nm.TN)/div]
336        else:
337            CAs = [0.0]*res.number_of_learners
338            if argkw.get("unweighted", 0) or not res.weights:
339                totweight = gettotsize(res)
340                for tex in res.results:
341                    CAs = map(lambda res, cls: res+(cls==tex.actual_class), CAs, tex.classes)
342            else:
343                totweight = 0.
344                for tex in res.results:
345                    CAs = map(lambda res, cls: res+(cls==tex.actual_class and tex.weight), CAs, tex.classes)
346                    totweight += tex.weight
347            check_non_zero(totweight)
348            ca = [x/totweight for x in CAs]
349
350        if report_se:
351            return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca]
352        else:
353            return ca
354
355    else:
356        CAsByFold = [[0.0]*res.number_of_iterations for i in range(res.number_of_learners)]
357        foldN = [0.0]*res.number_of_iterations
358
359        if argkw.get("unweighted", 0) or not res.weights:
360            for tex in res.results:
361                for lrn in range(res.number_of_learners):
362                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class)
363                foldN[tex.iteration_number] += 1
364        else:
365            for tex in res.results:
366                for lrn in range(res.number_of_learners):
367                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and tex.weight
368                foldN[tex.iteration_number] += tex.weight
369
370        return statistics_by_folds(CAsByFold, foldN, report_se, False)
371
372
373# Obsolete, but kept for compatibility
374def CA_se(res, **argkw):
375    return CA(res, True, **argkw)
376
377@deprecated_keywords({"reportSE": "report_se"})
378def AP(res, report_se = False, **argkw):
379    """ Computes the average probability assigned to the correct class. """
380    if res.number_of_iterations == 1:
381        APs=[0.0]*res.number_of_learners
382        if argkw.get("unweighted", 0) or not res.weights:
383            for tex in res.results:
384                APs = map(lambda res, probs: res + probs[tex.actual_class], APs, tex.probabilities)
385            totweight = gettotsize(res)
386        else:
387            totweight = 0.
388            for tex in res.results:
389                APs = map(lambda res, probs: res + probs[tex.actual_class]*tex.weight, APs, tex.probabilities)
390                totweight += tex.weight
391        check_non_zero(totweight)
392        return [AP/totweight for AP in APs]
393
394    APsByFold = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
395    foldN = [0.0] * res.number_of_iterations
396    if argkw.get("unweighted", 0) or not res.weights:
397        for tex in res.results:
398            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class], APsByFold[tex.iteration_number], tex.probabilities)
399            foldN[tex.iteration_number] += 1
400    else:
401        for tex in res.results:
402            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class] * tex.weight, APsByFold[tex.iteration_number], tex.probabilities)
403            foldN[tex.iteration_number] += tex.weight
404
405    return statistics_by_folds(APsByFold, foldN, report_se, True)
406
407
408@deprecated_keywords({"reportSE": "report_se"})
409def Brier_score(res, report_se = False, **argkw):
410    """ Computes the Brier's score, defined as the average (over test examples)
411    of sumx(t(x)-p(x))2, where x is a class, t(x) is 1 for the correct class
412    and 0 for the others, and p(x) is the probability that the classifier
413    assigned to the class x
414    """
415    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
416    #    x is class,
417    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
418    #    p(x) is predicted probabilty.
419    # There's a trick: since t(x) is zero for all classes but the
420    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
421    # Since +1 is there for each example, it adds 1 to the average
422    # We skip the +1 inside the sum and add it just at the end of the function
423    # We take max(result, 0) to avoid -0.0000x due to rounding errors
424
425    if res.number_of_iterations == 1:
426        MSEs=[0.0]*res.number_of_learners
427        if argkw.get("unweighted", 0) or not res.weights:
428            totweight = 0.0
429            for tex in res.results:
430                MSEs = map(lambda res, probs:
431                           res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
432                totweight += tex.weight
433        else:
434            for tex in res.results:
435                MSEs = map(lambda res, probs:
436                           res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
437            totweight = gettotweight(res)
438        check_non_zero(totweight)
439        if report_se:
440            return [(max(x/totweight+1.0, 0), 0) for x in MSEs]  ## change this, not zero!!!
441        else:
442            return [max(x/totweight+1.0, 0) for x in MSEs]
443
444    BSs = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
445    foldN = [0.] * res.number_of_iterations
446
447    if argkw.get("unweighted", 0) or not res.weights:
448        for tex in res.results:
449            BSs[tex.iteration_number] = map(lambda rr, probs:
450                       rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
451            foldN[tex.iteration_number] += 1
452    else:
453        for tex in res.results:
454            BSs[tex.iteration_number] = map(lambda res, probs:
455                       res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
456            foldN[tex.iteration_number] += tex.weight
457
458    stats = statistics_by_folds(BSs, foldN, report_se, True)
459    if report_se:
460        return [(x+1.0, y) for x, y in stats]
461    else:
462        return [x+1.0 for x in stats]
463
464def BSS(res, **argkw):
465    return [1-x/2 for x in apply(Brier_score, (res, ), argkw)]
466
467def IS_ex(Pc, P):
468    "Pc aposterior probability, P aprior"
469    if (Pc>=P):
470        return -log2(P)+log2(Pc)
471    else:
472        return -(-log2(1-P)+log2(1-Pc))
473
474
475@deprecated_keywords({"reportSE": "report_se"})
476def IS(res, apriori=None, report_se = False, **argkw):
477    """ Computes the information score as defined by
478    Kononenko and Bratko (1991) \
480    Argument :obj:apriori gives the apriori class
481    distribution; if it is omitted, the class distribution is computed from
482    the actual classes of examples in :obj:res.
483    """
484    if not apriori:
485        apriori = class_probabilities_from_res(res)
486
487    if res.number_of_iterations==1:
488        ISs = [0.0]*res.number_of_learners
489        if argkw.get("unweighted", 0) or not res.weights:
490            for tex in res.results:
491              for i in range(len(tex.probabilities)):
492                    cls = tex.actual_class
493                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
494            totweight = gettotsize(res)
495        else:
496            for tex in res.results:
497              for i in range(len(tex.probabilities)):
498                    cls = tex.actual_class
499                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
500            totweight = gettotweight(res)
501        if report_se:
502            return [(IS/totweight,0) for IS in ISs]
503        else:
504            return [IS/totweight for IS in ISs]
505
506
507    ISs = [[0.0]*res.number_of_iterations for i in range(res.number_of_learners)]
508    foldN = [0.] * res.number_of_iterations
509
510    # compute info scores for each fold
511    if argkw.get("unweighted", 0) or not res.weights:
512        for tex in res.results:
513            for i in range(len(tex.probabilities)):
514                cls = tex.actual_class
515                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls])
516            foldN[tex.iteration_number] += 1
517    else:
518        for tex in res.results:
519            for i in range(len(tex.probabilities)):
520                cls = tex.actual_class
521                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
522            foldN[tex.iteration_number] += tex.weight
523
524    return statistics_by_folds(ISs, foldN, report_se, False)
525
526
527def Friedman(res, statistics, **argkw):
528    sums = None
529    for ri in split_by_iterations(res):
530        ranks = statc.rankdata(apply(statistics, (ri,), argkw))
531        if sums:
532            sums = sums and [ranks[i]+sums[i] for i in range(k)]
533        else:
534            sums = ranks
535            k = len(sums)
536    N = res.number_of_iterations
537    k = len(sums)
538    T = sum([x*x for x in sums])
539    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
540    return F, statc.chisqprob(F, k-1)
541
542
543def Wilcoxon(res, statistics, **argkw):
544    res1, res2 = [], []
545    for ri in split_by_iterations(res):
546        stats = apply(statistics, (ri,), argkw)
547        if (len(stats) != 2):
548            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
549        res1.append(stats[0])
550        res2.append(stats[1])
551    return statc.wilcoxont(res1, res2)
552
553def rank_difference(res, statistics, **argkw):
554    if not res.results:
555        raise TypeError, "no experiments"
556
557    k = len(res.results[0].classes)
558    if (k<2):
559        raise TypeError, "nothing to compare (less than two classifiers given)"
560    if k==2:
561        return apply(Wilcoxon, (res, statistics), argkw)
562    else:
563        return apply(Friedman, (res, statistics), argkw)
564
565class ConfusionMatrix:
566    """ Class ConfusionMatrix stores data about false and true
567    predictions compared to real class. It stores the number of
568    True Negatives, False Positive, False Negatives and True Positives.
569    """
570    def __init__(self):
571        self.TP = self.FN = self.FP = self.TN = 0.0
572
573    def addTFPosNeg(self, predictedPositive, isPositive, weight = 1.0):
574        if predictedPositive:
575            if isPositive:
576                self.TP += weight
577            else:
578                self.FP += weight
579        else:
580            if isPositive:
581                self.FN += weight
582            else:
583                self.TN += weight
584
585
586@deprecated_keywords({"classIndex": "class_index"})
587def confusion_matrices(res, class_index=-1, **argkw):
588    """ This function can compute two different forms of confusion matrix:
589    one in which a certain class is marked as positive and the other(s)
590    negative, and another in which no class is singled out. The way to
591    specify what we want is somewhat confusing due to backward
592    compatibility issues.
593    """
594    tfpns = [ConfusionMatrix() for i in range(res.number_of_learners)]
595
596    if class_index<0:
597        numberOfClasses = len(res.class_values)
598        if class_index < -1 or numberOfClasses > 2:
599            cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(res.number_of_learners)]
600            if argkw.get("unweighted", 0) or not res.weights:
601                for tex in res.results:
602                    trueClass = int(tex.actual_class)
603                    for li, pred in enumerate(tex.classes):
604                        predClass = int(pred)
605                        if predClass < numberOfClasses:
606                            cm[li][trueClass][predClass] += 1
607            else:
608                for tex in enumerate(res.results):
609                    trueClass = int(tex.actual_class)
610                    for li, pred in tex.classes:
611                        predClass = int(pred)
612                        if predClass < numberOfClasses:
613                            cm[li][trueClass][predClass] += tex.weight
614            return cm
615
616        elif res.baseClass>=0:
617            class_index = res.baseClass
618        else:
619            class_index = 1
620
621    cutoff = argkw.get("cutoff")
622    if cutoff:
623        if argkw.get("unweighted", 0) or not res.weights:
624            for lr in res.results:
625                isPositive=(lr.actual_class==class_index)
626                for i in range(res.number_of_learners):
628        else:
629            for lr in res.results:
630                isPositive=(lr.actual_class==class_index)
631                for i in range(res.number_of_learners):
633    else:
634        if argkw.get("unweighted", 0) or not res.weights:
635            for lr in res.results:
636                isPositive=(lr.actual_class==class_index)
637                for i in range(res.number_of_learners):
639        else:
640            for lr in res.results:
641                isPositive=(lr.actual_class==class_index)
642                for i in range(res.number_of_learners):
644    return tfpns
645
646
647# obsolete (renamed)
648compute_confusion_matrices = confusion_matrices
649
650
651@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
652def confusion_chi_square(confusion_matrix):
653    dim = len(confusion_matrix)
654    rowPriors = [sum(r) for r in confusion_matrix]
655    colPriors = [sum([r[i] for r in confusion_matrix]) for i in range(dim)]
656    total = sum(rowPriors)
657    rowPriors = [r/total for r in rowPriors]
658    colPriors = [r/total for r in colPriors]
659    ss = 0
660    for ri, row in enumerate(confusion_matrix):
661        for ci, o in enumerate(row):
662            e = total * rowPriors[ri] * colPriors[ci]
663            if not e:
664                return -1, -1, -1
665            ss += (o-e)**2 / e
666    df = (dim-1)**2
667    return ss, df, statc.chisqprob(ss, df)
668
669
670def sens(confm):
671    """Return sensitivity (recall rate) over the given confusion matrix."""
672    if type(confm) == list:
673        return [sens(cm) for cm in confm]
674    else:
675        tot = confm.TP+confm.FN
676        if tot < 1e-6:
677            import warnings
678            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
679            return -1
680
681        return confm.TP/tot
682
683def recall(confm):
684    """Return recall rate (sensitivity) over the given confusion matrix."""
685    return sens(confm)
686
687
688def spec(confm):
689    """Return specificity over the given confusion matrix."""
690    if type(confm) == list:
691        return [spec(cm) for cm in confm]
692    else:
693        tot = confm.FP+confm.TN
694        if tot < 1e-6:
695            import warnings
696            warnings.warn("Can't compute specificity: one or both classes have no instances")
697            return -1
698        return confm.TN/tot
699
700
701def PPV(confm):
702    """Return positive predictive value (precision rate) over the given confusion matrix."""
703    if type(confm) == list:
704        return [PPV(cm) for cm in confm]
705    else:
706        tot = confm.TP+confm.FP
707        if tot < 1e-6:
708            import warnings
709            warnings.warn("Can't compute PPV: one or both classes have no instances")
710            return -1
711        return confm.TP/tot
712
713
714def precision(confm):
715    """Return precision rate (positive predictive value) over the given confusion matrix."""
716    return PPV(confm)
717
718
719def NPV(confm):
720    """Return negative predictive value over the given confusion matrix."""
721    if type(confm) == list:
722        return [NPV(cm) for cm in confm]
723    else:
724        tot = confm.FN+confm.TN
725        if tot < 1e-6:
726            import warnings
727            warnings.warn("Can't compute NPV: one or both classes have no instances")
728            return -1
729        return confm.TN/tot
730
731def F1(confm):
732    """Return F1 score (harmonic mean of precision and recall) over the given confusion matrix."""
733    if type(confm) == list:
734        return [F1(cm) for cm in confm]
735    else:
736        p = precision(confm)
737        r = recall(confm)
738        if p + r > 0:
739            return 2. * p * r / (p + r)
740        else:
741            import warnings
742            warnings.warn("Can't compute F1: P + R is zero or not defined")
743            return -1
744
745def Falpha(confm, alpha=1.0):
746    """Return the alpha-mean of precision and recall over the given confusion matrix."""
747    if type(confm) == list:
748        return [Falpha(cm, alpha=alpha) for cm in confm]
749    else:
750        p = precision(confm)
751        r = recall(confm)
752        return (1. + alpha) * p * r / (alpha * p + r)
753
754def MCC(confm):
755    '''
756    Return Mattew correlation coefficient over the given confusion matrix.
757
758    MCC is calculated as follows:
759    MCC = (TP*TN - FP*FN) / sqrt( (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) )
760
761    [1] Matthews, B.W., Comparison of the predicted and observed secondary
762    structure of T4 phage lysozyme. Biochim. Biophys. Acta 1975, 405, 442-451
763
764    code by Boris Gorelik
765    '''
766    if type(confm) == list:
767        return [MCC(cm) for cm in confm]
768    else:
769        truePositive = confm.TP
770        trueNegative = confm.TN
771        falsePositive = confm.FP
772        falseNegative = confm.FN
773
774        try:
775            r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/
776                math.sqrt(  (truePositive + falsePositive)  *
777                ( truePositive + falseNegative ) *
778                ( trueNegative + falsePositive ) *
779                ( trueNegative + falseNegative ) )
780                )
781        except ZeroDivisionError:
782            # Zero difision occurs when there is either no true positives
783            # or no true negatives i.e. the problem contains only one
784            # type of classes.
785            import warnings
786            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
787            r = None
788
789    return r
790
791
792@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
793def scotts_pi(confm, b_is_list_of_matrices=True):
794   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
795
796   http://en.wikipedia.org/wiki/Scott%27s_Pi
797   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
798   raters.
799
800   @param confm: confusion matrix, or list of confusion matrices. To obtain
801                           non-binary confusion matrix, call
802                           Orange.evaluation.scoring.compute_confusion_matrices and set the
803                           classIndex parameter to -2.
804   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
805                           This function needs to operate on non-binary
806                           confusion matrices, which are represented by python
807                           lists, therefore one needs a way to distinguish
808                           between a single matrix and list of matrices
809   """
810
811   if b_is_list_of_matrices:
812       try:
813           return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confm]
814       except TypeError:
815           # Nevermind the parameter, maybe this is a "conventional" binary
816           # confusion matrix and bIsListOfMatrices was specified by mistake
817           return scottsPiSingle(confm, bIsListOfMatrices=False)
818   else:
819       if isinstance(confm, ConfusionMatrix):
820           confm = numpy.array( [[confm.TP, confm.FN],
821                   [confm.FP, confm.TN]], dtype=float)
822       else:
823           confm = numpy.array(confm, dtype=float)
824
825       marginalSumOfRows = numpy.sum(confm, axis=0)
826       marginalSumOfColumns = numpy.sum(confm, axis=1)
827       jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \
828                           (2.0 * numpy.sum(confm, axis=None))
829       # In the eq. above, 2.0 is what the Wikipedia page calls
830       # the number of annotators. Here we have two annotators:
831       # the observed (true) labels (annotations) and the predicted by
832       # the learners.
833
834       prExpected = numpy.sum(jointProportion ** 2, axis=None)
835       prActual = numpy.sum(numpy.diag(confm), axis=None)/numpy.sum(confm, axis=None)
836
837       ret = (prActual - prExpected) / (1.0 - prExpected)
838       return ret
839
840@deprecated_keywords({"classIndex": "class_index"})
841def AUCWilcoxon(res, class_index=-1, **argkw):
842    """ Computes the area under ROC (AUC) and its standard error using
843    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
844    :obj:classIndex is not specified, the first class is used as
845    "the positive" and others are negative. The result is a list of
846    tuples (aROC, standard error).
847    """
848    import corn
849    useweights = res.weights and not argkw.get("unweighted", 0)
850    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
851
852    results=[]
853
854    totPos, totNeg = tots[1], tots[0]
855    N = totPos + totNeg
856    for plist in problists:
857        highPos, lowNeg = totPos, 0.0
858        W, Q1, Q2 = 0.0, 0.0, 0.0
859        for prob in plist:
860            thisPos, thisNeg = prob[1][1], prob[1][0]
861            highPos -= thisPos
862            W += thisNeg * (highPos + thisPos/2.)
863            Q2 += thisPos * (lowNeg**2  + lowNeg*thisNeg  + thisNeg**2 /3.)
864            Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.)
865
866            lowNeg += thisNeg
867
868        W  /= (totPos*totNeg)
869        Q1 /= (totNeg*totPos**2)
870        Q2 /= (totPos*totNeg**2)
871
872        SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) )
873        results.append((W, SE))
874    return results
875
876AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote
877
878
879@deprecated_keywords({"classIndex": "class_index"})
880def compare_2_AUCs(res, lrn1, lrn2, class_index=-1, **argkw):
881    import corn
882    return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not argkw.get("unweighted"))
883
884compare_2_AROCs = compare_2_AUCs # for backward compatibility, compare_2_AROCs is obsolote
885
886
887@deprecated_keywords({"classIndex": "class_index"})
888def compute_ROC(res, class_index=-1):
889    """ Computes a ROC curve as a list of (x, y) tuples, where x is
890    1-specificity and y is sensitivity.
891    """
892    import corn
893    problists, tots = corn.computeROCCumulative(res, class_index)
894
895    results = []
896    totPos, totNeg = tots[1], tots[0]
897
898    for plist in problists:
899        curve=[(1., 1.)]
900        TP, TN = totPos, 0.0
901        FN, FP = 0., totNeg
902        for prob in plist:
903            thisPos, thisNeg = prob[1][1], prob[1][0]
904            # thisPos go from TP to FN
905            TP -= thisPos
906            FN += thisPos
907            # thisNeg go from FP to TN
908            TN += thisNeg
909            FP -= thisNeg
910
911            sens = TP/(TP+FN)
912            spec = TN/(FP+TN)
913            curve.append((1-spec, sens))
914        results.append(curve)
915
916    return results
917
918## TC's implementation of algorithms, taken from:
919## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal.
920def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
921    if (P1x == P2x):
922        return 1e300
923    return (P1y - P2y) / (P1x - P2x)
924
925
926@deprecated_keywords({"keepConcavities": "keep_concavities"})
928    if keepConcavities:
929        R.append(P)
930    else:
931        while (1):
932            if len(R) < 2:
933                R.append(P)
934                return R
935            else:
936                T = R.pop()
937                T2 = R[-1]
938                if ROC_slope(T2, T) > ROC_slope(T, P):
939                    R.append(T)
940                    R.append(P)
941                    return R
942    return R
943
944
945@deprecated_keywords({"classIndex": "class_index",
946                      "keepConcavities": "keep_concavities"})
947def TC_compute_ROC(res, class_index=-1, keep_concavities=1):
948    import corn
949    problists, tots = corn.computeROCCumulative(res, class_index)
950
951    results = []
952    P, N = tots[1], tots[0]
953
954    for plist in problists:
955        ## corn gives an increasing by scores list, we need a decreasing by scores
956        plist.reverse()
957        TP = 0.0
958        FP = 0.0
959        curve=[]
960        fPrev = 10e300 # "infinity" score at 0.0, 0.0
961        for prob in plist:
962            f = prob[0]
963            if f <> fPrev:
964                if P:
965                    tpr = TP/P
966                else:
967                    tpr = 0.0
968                if N:
969                    fpr = FP/N
970                else:
971                    fpr = 0.0
972                curve = ROC_add_point((fpr, tpr, fPrev), curve, keep_concavities)
973                fPrev = f
974            thisPos, thisNeg = prob[1][1], prob[1][0]
975            TP += thisPos
976            FP += thisNeg
977        if P:
978            tpr = TP/P
979        else:
980            tpr = 0.0
981        if N:
982            fpr = FP/N
983        else:
984            fpr = 0.0
985        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
986        results.append(curve)
987
988    return results
989
990## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve
991## for given values of FPcost, FNcost and pval
992def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
993    m = (FPcost*(1.0 - pval)) / (FNcost*pval)
994
995    ## put the iso-performance line in point (0.0, 1.0)
996    x0, y0 = (0.0, 1.0)
997    x1, y1 = (1.0, 1.0 + m)
998    d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0))
999
1000    ## calculate and find the closest point to the line
1001    firstp = 1
1002    mind = 0.0
1003    a = (x0*y1 - x1*y0)
1004    closestPoints = []
1005    for (x, y, fscore) in curve:
1006        d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01
1007        d = abs(d)
1008        if firstp or d < mind:
1009            mind, firstp = d, 0
1010            closestPoints = [(x, y, fscore)]
1011        else:
1012            if abs(d - mind) <= 0.0001: ## close enough
1013                closestPoints.append( (x, y, fscore) )
1014    return closestPoints
1015
1016def frange(start, end=None, inc=None):
1017    "A range function, that does accept float increments..."
1018
1019    if end == None:
1020        end = start + 0.0
1021        start = 0.0
1022
1023    if inc == None or inc == 0:
1024        inc = 1.0
1025
1026    L = [start]
1027    while 1:
1028        next = start + len(L) * inc
1029        if inc > 0 and next >= end:
1030            L.append(end)
1031            break
1032        elif inc < 0 and next <= end:
1033            L.append(end)
1034            break
1035        L.append(next)
1036
1037    return L
1038
1039## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1040## where ROCcurvesX is a set of ROC curves,
1041## where a (one) ROC curve is a set of (FP, TP) points
1042##
1043## for each (sub)set of input ROC curves
1044## returns the average ROC curve and an array of (vertical) standard deviations
1045@deprecated_keywords({"ROCcurves": "roc_curves"})
1046def TC_vertical_average_ROC(roc_curves, samples = 10):
1047    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1048        if (P1x == P2x) or ((X > P1x) and (X > P2x)) or ((X < P1x) and (X < P2x)):
1049            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1050        dx = float(P2x) - float(P1x)
1051        dy = float(P2y) - float(P1y)
1052        m = dy/dx
1053        return P1y + m*(X - P1x)
1054
1055    def TP_FOR_FP(FPsample, ROC, npts):
1056        i = 0
1057        while i < npts - 1:
1058            (fp, _, _) = ROC[i + 1]
1059            if (fp <= FPsample):
1060                i += 1
1061            else:
1062                break
1063        (fp, tp, _) = ROC[i]
1064        if fp == FPsample:
1065            return tp
1066        elif fp < FPsample and i + 1 < len(ROC):
1067            return INTERPOLATE(ROC[i], ROC[i+1], FPsample)
1068        elif fp < FPsample and i + 1 == len(ROC): # return the last
1069            return ROC[i][1]
1070        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1071        #return 0.0
1072
1073    average = []
1074    stdev = []
1075    for ROCS in roc_curves:
1076        npts = []
1077        for c in ROCS:
1078            npts.append(len(c))
1079        nrocs = len(ROCS)
1080
1081        TPavg = []
1082        TPstd = []
1083        for FPsample in frange(0.0, 1.0, 1.0/samples):
1084            TPsum = []
1085            for i in range(nrocs):
1086                TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1087            TPavg.append( (FPsample, statc.mean(TPsum)) )
1088            if len(TPsum) > 1:
1089                stdv = statc.std(TPsum)
1090            else:
1091                stdv = 0.0
1092            TPstd.append( stdv )
1093
1094        average.append(TPavg)
1095        stdev.append(TPstd)
1096
1097    return (average, stdev)
1098
1099## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1100## where ROCcurvesX is a set of ROC curves,
1101## where a (one) ROC curve is a set of (FP, TP) points
1102##
1103## for each (sub)set of input ROC curves
1104## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1105@deprecated_keywords({"ROCcurves": "roc_curves"})
1106def TC_threshold_average_ROC(roc_curves, samples = 10):
1107    def POINT_AT_THRESH(ROC, npts, thresh):
1108        i = 0
1109        while i < npts - 1:
1110            (px, py, pfscore) = ROC[i]
1111            if (pfscore > thresh):
1112                i += 1
1113            else:
1114                break
1115        return ROC[i]
1116
1117    average = []
1118    stdevV = []
1119    stdevH = []
1120    for ROCS in roc_curves:
1121        npts = []
1122        for c in ROCS:
1123            npts.append(len(c))
1124        nrocs = len(ROCS)
1125
1126        T = []
1127        for c in ROCS:
1128            for (px, py, pfscore) in c:
1129##                try:
1130##                    T.index(pfscore)
1131##                except:
1132                T.append(pfscore)
1133        T.sort()
1134        T.reverse() ## ugly
1135
1136        TPavg = []
1137        TPstdV = []
1138        TPstdH = []
1139        for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples):
1140            FPsum = []
1141            TPsum = []
1142            for i in range(nrocs):
1143                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1144                FPsum.append(fp)
1145                TPsum.append(tp)
1146            TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) )
1147            ## vertical standard deviation
1148            if len(TPsum) > 1:
1149                stdv = statc.std(TPsum)
1150            else:
1151                stdv = 0.0
1152            TPstdV.append( stdv )
1153            ## horizontal standard deviation
1154            if len(FPsum) > 1:
1155                stdh = statc.std(FPsum)
1156            else:
1157                stdh = 0.0
1158            TPstdH.append( stdh )
1159
1160        average.append(TPavg)
1161        stdevV.append(TPstdV)
1162        stdevH.append(TPstdH)
1163
1164    return (average, stdevV, stdevH)
1165
1166## Calibration Curve
1167## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where:
1168##  - curve is an array of points (x, y) on the calibration curve
1169##  - yesClassRugPoints is an array of (x, 1) points
1170##  - noClassRugPoints is an array of (x, 0) points
1171@deprecated_keywords({"classIndex": "class_index"})
1172def compute_calibration_curve(res, class_index=-1):
1173    import corn
1174    ## merge multiple iterations into one
1176    for te in res.results:
1177        mres.results.append( te )
1178
1179    problists, tots = corn.computeROCCumulative(mres, class_index)
1180
1181    results = []
1182    P, N = tots[1], tots[0]
1183
1184    bins = 10 ## divide interval between 0.0 and 1.0 into N bins
1185
1186    for plist in problists:
1187        yesClassRugPoints = []
1188        noClassRugPoints = []
1189
1190        yesBinsVals = [0] * bins
1191        noBinsVals = [0] * bins
1192        for (f, (thisNeg, thisPos)) in plist:
1193            yesClassRugPoints.append( (f, thisPos) ) #1.0
1194            noClassRugPoints.append( (f, thisNeg) ) #1.0
1195
1196            index = int(f * bins )
1197            index = min(index, bins - 1) ## just in case for value 1.0
1198            yesBinsVals[index] += thisPos
1199            noBinsVals[index] += thisNeg
1200
1201        curve = []
1202        for cn in range(bins):
1203            f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins)
1204            yesVal = yesBinsVals[cn]
1205            noVal = noBinsVals[cn]
1206            allVal = yesVal + noVal
1207            if allVal == 0.0: continue
1208            y = float(yesVal)/float(allVal)
1209            curve.append( (f,  y) )
1210
1211        ## smooth the curve
1212        maxnPoints = 100
1213        if len(curve) >= 3:
1214#            loessCurve = statc.loess(curve, -3, 0.6)
1215            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1216        else:
1217            loessCurve = curve
1218        clen = len(loessCurve)
1219        if clen > maxnPoints:
1220            df = clen / maxnPoints
1221            if df < 1: df = 1
1222            curve = [loessCurve[i]  for i in range(0, clen, df)]
1223        else:
1224            curve = loessCurve
1225        curve = [(c)[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function
1226        results.append((curve, yesClassRugPoints, noClassRugPoints))
1227
1228    return results
1229
1230
1231## Lift Curve
1232## returns an array of curve elements, where:
1233##  - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve
1234@deprecated_keywords({"classIndex": "class_index"})
1235def compute_lift_curve(res, class_index=-1):
1236    import corn
1237    ## merge multiple iterations into one
1239    for te in res.results:
1240        mres.results.append( te )
1241
1242    problists, tots = corn.computeROCCumulative(mres, class_index)
1243
1244    results = []
1245    P, N = tots[1], tots[0]
1246    for plist in problists:
1247        ## corn gives an increasing by scores list, we need a decreasing by scores
1248        plist.reverse()
1249        TP = 0.0
1250        FP = 0.0
1251        curve = [(0.0, 0.0, (10e300, 0.0))]
1252        for (f, (thisNeg, thisPos)) in plist:
1253            TP += thisPos
1254            FP += thisNeg
1255            curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) )
1256        results.append(curve)
1257
1258    return P, N, results
1259###
1260
1261class CDT:
1262  """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """
1263  def __init__(self, C=0.0, D=0.0, T=0.0):
1264    self.C, self.D, self.T = C, D, T
1265
1266def is_CDT_empty(cdt):
1267    return cdt.C + cdt.D + cdt.T < 1e-20
1268
1269
1270@deprecated_keywords({"classIndex": "class_index"})
1271def compute_CDT(res, class_index=-1, **argkw):
1272    """Obsolete, don't use"""
1273    import corn
1274    if class_index<0:
1275        if res.baseClass>=0:
1276            class_index = res.baseClass
1277        else:
1278            class_index = 1
1279
1280    useweights = res.weights and not argkw.get("unweighted", 0)
1281    weightByClasses = argkw.get("weightByClasses", True)
1282
1283    if (res.number_of_iterations>1):
1284        CDTs = [CDT() for i in range(res.number_of_learners)]
1285        iterationExperiments = split_by_iterations(res)
1286        for exp in iterationExperiments:
1287            expCDTs = corn.computeCDT(exp, class_index, useweights)
1288            for i in range(len(CDTs)):
1289                CDTs[i].C += expCDTs[i].C
1290                CDTs[i].D += expCDTs[i].D
1291                CDTs[i].T += expCDTs[i].T
1292        for i in range(res.number_of_learners):
1293            if is_CDT_empty(CDTs[0]):
1294                return corn.computeCDT(res, class_index, useweights)
1295
1296        return CDTs
1297    else:
1298        return corn.computeCDT(res, class_index, useweights)
1299
1300## THIS FUNCTION IS OBSOLETE AND ITS AVERAGING OVER FOLDS IS QUESTIONABLE
1301## DON'T USE IT
1302def ROCs_from_CDT(cdt, **argkw):
1303    """Obsolete, don't use"""
1304    if type(cdt) == list:
1305        return [ROCs_from_CDT(c) for c in cdt]
1306
1307    C, D, T = cdt.C, cdt.D, cdt.T
1308    N = C+D+T
1309    if N < 1e-6:
1310        import warnings
1311        warnings.warn("Can't compute AUC: one or both classes have no instances")
1312        return (-1,)*8
1313    if N < 2:
1314        import warnings
1315        warnings.warn("Can't compute AUC: one or both classes have too few examples")
1316
1317    som = (C-D)/N
1318    c = 0.5*(1+som)
1319
1320    if (C+D):
1321        res = (C/N*100, D/N*100, T/N*100, N, som, (C-D)/(C+D), (C-D)/(N*(N-1)/2), 0.5*(1+som))
1322    else:
1323        res = (C/N*100, D/N*100, T/N*100, N, som, -1.0, (C-D)/(N*(N-1)/2), 0.5*(1+som))
1324
1325    if argkw.get("print"):
1326        print "Concordant  = %5.1f       Somers' D = %1.3f" % (res[0], res[4])
1327        print "Discordant  = %5.1f       Gamma     = %1.3f" % (res[1], res[5]>0 and res[5] or "N/A")
1328        print "Tied        = %5.1f       Tau-a     = %1.3f" % (res[2], res[6])
1329        print " %6d pairs             c         = %1.3f"    % (res[3], res[7])
1330
1331    return res
1332
1333AROC_from_CDT = ROCs_from_CDT  # for backward compatibility, AROC_from_CDT is obsolote
1334
1335
1336
1337# computes AUC using a specified 'cdtComputer' function
1338# It tries to compute AUCs from 'ite' (examples from a single iteration) and,
1339# if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs
1340# are divided by 'divideByIfIte'. Additional flag is returned which is True in
1341# the former case, or False in the latter.
1342@deprecated_keywords({"divideByIfIte": "divide_by_if_ite",
1343                      "computerArgs": "computer_args"})
1344def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1345    cdts = cdtComputer(*(ite, ) + computer_args)
1346    if not is_CDT_empty(cdts[0]):
1347        return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divide_by_if_ite for cdt in cdts], True
1348
1349    if all_ite:
1350        cdts = cdtComputer(*(all_ite, ) + computer_args)
1351        if not is_CDT_empty(cdts[0]):
1352            return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False
1353
1354    return False, False
1355
1356
1357# computes AUC between classes i and j as if there we no other classes
1358@deprecated_keywords({"classIndex1": "class_index1",
1359                      "classIndex2": "class_index2",
1360                      "useWeights": "use_weights",
1361                      "divideByIfIte": "divide_by_if_ite"})
1362def AUC_ij(ite, class_index1, class_index2, use_weights = True, all_ite = None, divide_by_if_ite = 1.0):
1363    import corn
1364    return AUC_x(corn.computeCDTPair, ite, all_ite, divide_by_if_ite, (class_index1, class_index2, use_weights))
1365
1366
1367# computes AUC between class i and the other classes (treating them as the same class)
1368@deprecated_keywords({"classIndex": "class_index",
1369                      "useWeights": "use_weights",
1370                      "divideByIfIte": "divide_by_if_ite"})
1371def AUC_i(ite, class_index, use_weights = True, all_ite = None, divide_by_if_ite = 1.0):
1372    import corn
1373    return AUC_x(corn.computeCDT, ite, all_ite, divide_by_if_ite, (class_index, use_weights))
1374
1375
1376# computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij)
1377# it returns the sum of what is returned by the computer, unless at a certain
1378# fold the computer has to resort to computing over all folds or even this failed;
1379# in these cases the result is returned immediately
1380
1381@deprecated_keywords({"AUCcomputer": "auc_computer",
1382                      "computerArgs": "computer_args"})
1383def AUC_iterations(auc_computer, iterations, computer_args):
1384    subsum_aucs = [0.] * iterations[0].number_of_learners
1385    for ite in iterations:
1386        aucs, foldsUsed = auc_computer(*(ite, ) + computer_args)
1387        if not aucs:
1388            return None
1389        if not foldsUsed:
1390            return aucs
1391        subsum_aucs = map(add, subsum_aucs, aucs)
1392    return subsum_aucs
1393
1394
1395# AUC for binary classification problems
1396@deprecated_keywords({"useWeights": "use_weights"})
1397def AUC_binary(res, use_weights = True):
1398    if res.number_of_iterations > 1:
1399        return AUC_iterations(AUC_i, split_by_iterations(res), (-1, use_weights, res, res.number_of_iterations))
1400    else:
1401        return AUC_i(res, -1, use_weights)[0]
1402
1403# AUC for multiclass problems
1404@deprecated_keywords({"useWeights": "use_weights"})
1405def AUC_multi(res, use_weights = True, method = 0):
1406    numberOfClasses = len(res.class_values)
1407
1408    if res.number_of_iterations > 1:
1409        iterations = split_by_iterations(res)
1410        all_ite = res
1411    else:
1412        iterations = [res]
1413        all_ite = None
1414
1415    # by pairs
1416    sum_aucs = [0.] * res.number_of_learners
1417    usefulClassPairs = 0.
1418
1419    if method in [0, 2]:
1420        prob = class_probabilities_from_res(res)
1421
1422    if method <= 1:
1423        for classIndex1 in range(numberOfClasses):
1424            for classIndex2 in range(classIndex1):
1425                subsum_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1426                if subsum_aucs:
1427                    if method == 0:
1428                        p_ij = prob[classIndex1] * prob[classIndex2]
1429                        subsum_aucs = [x * p_ij  for x in subsum_aucs]
1430                        usefulClassPairs += p_ij
1431                    else:
1432                        usefulClassPairs += 1
1433                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1434    else:
1435        for classIndex in range(numberOfClasses):
1436            subsum_aucs = AUC_iterations(AUC_i, iterations, (classIndex, use_weights, all_ite, res.number_of_iterations))
1437            if subsum_aucs:
1438                if method == 0:
1439                    p_i = prob[classIndex]
1440                    subsum_aucs = [x * p_i  for x in subsum_aucs]
1441                    usefulClassPairs += p_i
1442                else:
1443                    usefulClassPairs += 1
1444                sum_aucs = map(add, sum_aucs, subsum_aucs)
1445
1446    if usefulClassPairs > 0:
1447        sum_aucs = [x/usefulClassPairs for x in sum_aucs]
1448
1449    return sum_aucs
1450
1451def AUC():
1452    pass
1453
1454AUC.ByWeightedPairs = 0
1455
1456# Computes AUC, possibly for multiple classes (the averaging method can be specified)
1457# Results over folds are averages; if some folds examples from one class only, the folds are merged
1458@deprecated_keywords({"useWeights": "use_weights"})
1459def AUC(res, method = AUC.ByWeightedPairs, use_weights = True):
1460    """ Returns the area under ROC curve (AUC) given a set of experimental
1461    results. For multivalued class problems, it will compute some sort of
1462    average, as specified by the argument method.
1463    """
1464    if len(res.class_values) < 2:
1465        raise ValueError("Cannot compute AUC on a single-class problem")
1466    elif len(res.class_values) == 2:
1467        return AUC_binary(res, use_weights)
1468    else:
1469        return AUC_multi(res, use_weights, method)
1470
1471AUC.ByWeightedPairs = 0
1472AUC.ByPairs = 1
1473AUC.WeightedOneAgainstAll = 2
1474AUC.OneAgainstAll = 3
1475
1476
1477# Computes AUC; in multivalued class problem, AUC is computed as one against all
1478# Results over folds are averages; if some folds examples from one class only, the folds are merged
1479@deprecated_keywords({"classIndex": "class_index",
1480                      "useWeights": "use_weights"})
1481def AUC_single(res, class_index = -1, use_weights = True):
1482    """ Computes AUC where the class given classIndex is singled out, and
1483    all other classes are treated as a single class. To find how good our
1484    classifiers are in distinguishing between vans and other vehicle, call
1485    the function like this::
1486
1487        Orange.evaluation.scoring.AUC_single(resVeh, \
1488classIndex = vehicle.domain.classVar.values.index("van"))
1489    """
1490    if class_index<0:
1491        if res.baseClass>=0:
1492            class_index = res.baseClass
1493        else:
1494            class_index = 1
1495
1496    if res.number_of_iterations > 1:
1497        return AUC_iterations(AUC_i, split_by_iterations(res), (class_index, use_weights, res, res.number_of_iterations))
1498    else:
1499        return AUC_i( res, class_index, use_weights)[0]
1500
1501# Computes AUC for a pair of classes (as if there were no other classes)
1502# Results over folds are averages; if some folds have examples from one class only, the folds are merged
1503@deprecated_keywords({"classIndex1": "class_index1",
1504                      "classIndex2": "class_index2",
1505                      "useWeights": "use_weights"})
1506def AUC_pair(res, class_index1, class_index2, use_weights = True):
1507    """ Computes AUC between a pair of instances, ignoring instances from all
1508    other classes.
1509    """
1510    if res.number_of_iterations > 1:
1511        return AUC_iterations(AUC_ij, split_by_iterations(res), (class_index1, class_index2, use_weights, res, res.number_of_iterations))
1512    else:
1513        return AUC_ij(res, class_index1, class_index2, use_weights)
1514
1515
1516# AUC for multiclass problems
1517@deprecated_keywords({"useWeights": "use_weights"})
1518def AUC_matrix(res, use_weights = True):
1519    """ Computes a (lower diagonal) matrix with AUCs for all pairs of classes.
1520    If there are empty classes, the corresponding elements in the matrix
1521    are -1. Remember the beautiful(?) code for printing out the confusion
1522    matrix? Here it strikes again::
1523
1524        classes = vehicle.domain.classVar.values
1525        AUCmatrix = Orange.evaluation.scoring.AUC_matrix(resVeh)[0]
1526        print "\t"+"\t".join(classes[:-1])
1527        for className, AUCrow in zip(classes[1:], AUCmatrix[1:]):
1528            print ("%s" + ("\t%5.3f" * len(AUCrow))) % ((className, ) + tuple(AUCrow))
1529    """
1530    numberOfClasses = len(res.class_values)
1531    number_of_learners = res.number_of_learners
1532
1533    if res.number_of_iterations > 1:
1534        iterations, all_ite = split_by_iterations(res), res
1535    else:
1536        iterations, all_ite = [res], None
1537
1538    aucs = [[[] for i in range(numberOfClasses)] for i in range(number_of_learners)]
1539    prob = class_probabilities_from_res(res)
1540
1541    for classIndex1 in range(numberOfClasses):
1542        for classIndex2 in range(classIndex1):
1543            pair_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1544            if pair_aucs:
1545                for lrn in range(number_of_learners):
1546                    aucs[lrn][classIndex1].append(pair_aucs[lrn])
1547            else:
1548                for lrn in range(number_of_learners):
1549                    aucs[lrn][classIndex1].append(-1)
1550    return aucs
1551
1552
1553def McNemar(res, **argkw):
1554    """ Computes a triangular matrix with McNemar statistics for each pair of
1555    classifiers. The statistics is distributed by chi-square distribution with
1556    one degree of freedom; critical value for 5% significance is around 3.84.
1557    """
1558    nLearners = res.number_of_learners
1559    mcm = []
1560    for i in range(nLearners):
1561       mcm.append([0.0]*res.number_of_learners)
1562
1563    if not res.weights or argkw.get("unweighted"):
1564        for i in res.results:
1565            actual = i.actual_class
1566            classes = i.classes
1567            for l1 in range(nLearners):
1568                for l2 in range(l1, nLearners):
1569                    if classes[l1]==actual:
1570                        if classes[l2]!=actual:
1571                            mcm[l1][l2] += 1
1572                    elif classes[l2]==actual:
1573                        mcm[l2][l1] += 1
1574    else:
1575        for i in res.results:
1576            actual = i.actual_class
1577            classes = i.classes
1578            for l1 in range(nLearners):
1579                for l2 in range(l1, nLearners):
1580                    if classes[l1]==actual:
1581                        if classes[l2]!=actual:
1582                            mcm[l1][l2] += i.weight
1583                    elif classes[l2]==actual:
1584                        mcm[l2][l1] += i.weight
1585
1586    for l1 in range(nLearners):
1587        for l2 in range(l1, nLearners):
1588            su=mcm[l1][l2] + mcm[l2][l1]
1589            if su:
1590                mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su
1591            else:
1592                mcm[l2][l1] = 0
1593
1594    for l1 in range(nLearners):
1595        mcm[l1]=mcm[l1][:l1]
1596
1597    return mcm
1598
1599
1600def McNemar_of_two(res, lrn1, lrn2):
1601    """ McNemar_of_two computes a McNemar statistics for a pair of classifier,
1602    specified by indices learner1 and learner2.
1603    """
1604    tf = ft = 0.0
1605    if not res.weights or argkw.get("unweighted"):
1606        for i in res.results:
1607            actual=i.actual_class
1608            if i.classes[lrn1]==actual:
1609                if i.classes[lrn2]!=actual:
1610                    tf += i.weight
1611            elif i.classes[lrn2]==actual:
1612                    ft += i.weight
1613    else:
1614        for i in res.results:
1615            actual=i.actual_class
1616            if i.classes[lrn1]==actual:
1617                if i.classes[lrn2]!=actual:
1618                    tf += 1.0
1619            elif i.classes[lrn2]==actual:
1620                    ft += 1.0
1621
1622    su = tf + ft
1623    if su:
1624        return (abs(tf-ft)-1)**2 / su
1625    else:
1626        return 0
1627
1628
1629def Friedman(res, stat=CA):
1630    """ Compares classifiers by Friedman test, treating folds as different examles.
1631        Returns F, p and average ranks
1632    """
1633    res_split = split_by_iterations(res)
1634    res = [stat(r) for r in res_split]
1635
1636    N = len(res)
1637    k = len(res[0])
1638    sums = [0.0]*k
1639    for r in res:
1640        ranks = [k-x+1 for x in statc.rankdata(r)]
1641        if stat==Brier_score: # reverse ranks for Brier_score (lower better)
1642            ranks = [k+1-x for x in ranks]
1643        sums = [ranks[i]+sums[i] for i in range(k)]
1644
1645    T = sum([x*x for x in sums])
1646    sums = [x/N for x in sums]
1647
1648    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
1649
1650    return F, statc.chisqprob(F, k-1), sums
1651
1652
1653def Wilcoxon_pairs(res, avgranks, stat=CA):
1654    """ Returns a triangular matrix, where element[i][j] stores significance of difference
1655        between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive
1656        if i-th is better than j-th, negative if it is worse, and 1 if they are equal.
1657        Arguments to function are ExperimentResults, average ranks (as returned by Friedman)
1658        and, optionally, a statistics; greater values should mean better results.append
1659    """
1660    res_split = split_by_iterations(res)
1661    res = [stat(r) for r in res_split]
1662
1663    k = len(res[0])
1664    bt = []
1665    for m1 in range(k):
1666        nl = []
1667        for m2 in range(m1+1, k):
1668            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1669            if avgranks[m1]<avgranks[m2]:
1670                nl.append(p)
1671            elif avgranks[m2]<avgranks[m1]:
1672                nl.append(-p)
1673            else:
1674                nl.append(1)
1675        bt.append(nl)
1676    return bt
1677
1678
1679@deprecated_keywords({"allResults": "all_results",
1680                      "noConfidence": "no_confidence"})
1681def plot_learning_curve_learners(file, all_results, proportions, learners, no_confidence=0):
1682    plot_learning_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], no_confidence)
1683
1684
1685@deprecated_keywords({"allResults": "all_results",
1686                      "noConfidence": "no_confidence"})
1687def plot_learning_curve(file, all_results, proportions, legend, no_confidence=0):
1688    import types
1689    fopened=0
1690    if type(file)==types.StringType:
1691        file=open(file, "wt")
1692        fopened=1
1693
1694    file.write("set yrange [0:1]\n")
1695    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1696    file.write("set multiplot\n\n")
1697    CAs = [CA_dev(x) for x in all_results]
1698
1699    file.write("plot \\\n")
1700    for i in range(len(legend)-1):
1701        if not no_confidence:
1702            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1))
1703        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1704    if not no_confidence:
1705        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
1706    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
1707
1708    for i in range(len(legend)):
1709        if not no_confidence:
1710            for p in range(len(proportions)):
1711                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1]))
1712            file.write("e\n\n")
1713
1714        for p in range(len(proportions)):
1715            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
1716        file.write("e\n\n")
1717
1718    if fopened:
1719        file.close()
1720
1721
1722def print_single_ROC_curve_coordinates(file, curve):
1723    import types
1724    fopened=0
1725    if (type(file)==types.StringType):
1726        file=open(file, "wt")
1727        fopened=1
1728
1729    for coord in curve:
1730        file.write("%5.3f\t%5.3f\n" % tuple(coord))
1731
1732    if fopened:
1733        file.close()
1734
1735
1736def plot_ROC_learners(file, curves, learners):
1737    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
1738
1739def plot_ROC(file, curves, legend):
1740    import types
1741    fopened=0
1742    if (type(file)==types.StringType):
1743        file=open(file, "wt")
1744        fopened=1
1745
1746    file.write("set yrange [0:1]\n")
1747    file.write("set xrange [0:1]\n")
1748    file.write("set multiplot\n\n")
1749
1750    file.write("plot \\\n")
1751    for leg in legend:
1752        file.write("'-' title '%s' with lines,\\\n" % leg)
1753    file.write("'-' title '' with lines\n")
1754
1755    for curve in curves:
1756        for coord in curve:
1757            file.write("%5.3f\t%5.3f\n" % tuple(coord))
1758        file.write("e\n\n")
1759
1760    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
1761
1762    if fopened:
1763        file.close()
1764
1765
1766@deprecated_keywords({"allResults": "all_results"})
1767def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference=-1):
1768    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
1769
1770
1771@deprecated_keywords({"allResults": "all_results"})
1772def plot_McNemar_curve(file, all_results, proportions, legend, reference=-1):
1773    if reference<0:
1774        reference=len(legend)-1
1775
1776    import types
1777    fopened=0
1778    if (type(file)==types.StringType):
1779        file=open(file, "wt")
1780        fopened=1
1781
1782    #file.write("set yrange [0:1]\n")
1783    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1784    file.write("set multiplot\n\n")
1785    file.write("plot \\\n")
1786    tmap=range(reference)+range(reference+1, len(legend))
1787    for i in tmap[:-1]:
1788        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1789    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
1790    file.write("\n")
1791
1792    for i in tmap:
1793        for p in range(len(proportions)):
1794            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
1795        file.write("e\n\n")
1796
1797    if fopened:
1798        file.close()
1799
1800default_point_types=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)])
1801default_line_types=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
1802
1803@deprecated_keywords({"allResults": "all_results"})
1804def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
1805    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
1806
1807
1808@deprecated_keywords({"allResults": "all_results"})
1809def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
1810    import types
1811    fopened=0
1812    if (type(file)==types.StringType):
1813        file=open(file, "wt")
1814        fopened=1
1815
1816    nexamples=len(all_results[0].results)
1817    CAs = [CA_dev(x) for x in all_results]
1818
1819    graphsize=float(options.get("graphsize", 10.0)) #cm
1820    difprop=proportions[-1]-proportions[0]
1821    ntestexamples=nexamples*proportions[-1]
1822    xunit=graphsize/ntestexamples
1823
1824    yshift=float(options.get("yshift", -ntestexamples/20.))
1825
1826    pointtypes=options.get("pointtypes", default_point_types)
1827    linetypes=options.get("linetypes", default_line_types)
1828
1829    if options.has_key("numberedx"):
1830        numberedx=options["numberedx"]
1831        if type(numberedx)==types.IntType:
1832            if numberedx>0:
1833                numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples]
1834            elif numberedx<0:
1835                numberedx = -numberedx
1836                newn=[]
1837                for i in range(numberedx+1):
1838                    wanted=proportions[0]+float(i)/numberedx*difprop
1839                    best=(10, 0)
1840                    for t in proportions:
1841                        td=abs(wanted-t)
1842                        if td<best[0]:
1843                            best=(td, t)
1844                    if not best[1] in newn:
1845                        newn.append(best[1])
1846                newn.sort()
1847                numberedx=[nexamples*x for x in newn]
1848        elif type(numberedx[0])==types.FloatType:
1849            numberedx=[nexamples*x for x in numberedx]
1850    else:
1851        numberedx=[nexamples*x for x in proportions]
1852
1853    file.write("\\mbox{\n")
1854    file.write(\\beginpicture\n")
1855    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
1856    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
1857    file.write(\\axis bottom invisible\n")# label {#examples}\n")
1858    file.write("      ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions]))
1859    if numberedx:
1860        file.write("            long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx]))
1861    file.write("  /\n")
1862    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
1863    file.write("      shiftedto y=%5.3f\n" % yshift)
1864    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
1865    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
1866    file.write("  /\n")
1867    if options.has_key("default"):
1868        file.write(\\setdashpattern<1pt, 1pt>\n")
1869        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
1870
1871    for i in range(len(CAs[0])):
1872        coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))])
1873        if linetypes:
1874            file.write(%s\n" % linetypes[i])
1875            file.write(\\plot %s /\n" % coordinates)
1876        if pointtypes:
1877            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
1878
1879    file.write(\\endpicture\n")
1880    file.write("}\n")
1881    if fopened:
1882        file.close()
1883    file.close()
1884    del file
1885
1886def legend_learners_to_PiCTeX(file, learners, **options):
1887  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
1888
1889def legend_to_PiCTeX(file, legend, **options):
1890    import types
1891    fopened=0
1892    if (type(file)==types.StringType):
1893        file=open(file, "wt")
1894        fopened=1
1895
1896    pointtypes=options.get("pointtypes", default_point_types)
1897    linetypes=options.get("linetypes", default_line_types)
1898
1899    file.write("\\mbox{\n")
1900    file.write(\\beginpicture\n")
1901    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
1902    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
1903
1904    for i in range(len(legend)):
1905        if linetypes:
1906            file.write(%s\n" % linetypes[i])
1907            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2))
1908        if pointtypes:
1909            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1))
1910        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25))
1911
1912    file.write(\\endpicture\n")
1913    file.write("}\n")
1914    if fopened:
1915        file.close()
1916    file.close()
1917    del file
1918
1919
1920def compute_friedman(avranks, N):
1921    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
1922    and (Iman statistic - F-distribution, degrees of freedoma) given average
1923    ranks and a number of tested data sets N.
1924    """
1925
1926    k = len(avranks)
1927
1928    def friedman(N, k, ranks):
1929        return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1))
1930
1931    def iman(fried, N, k):
1932        return (N-1)*fried/(N*(k-1) - fried)
1933
1934    f = friedman(N, k, avranks)
1935    im = iman(f, N, k)
1936    fdistdof = (k-1, (k-1)*(N-1))
1937
1938    return (f, k-1), (im, fdistdof)
1939
1940def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
1941    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
1942    according to given alpha (either alpha="0.05" or alpha="0.1") for average
1943    ranks and number of tested data sets N. Type can be either "nemenyi" for
1944    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
1945    """
1946
1947    k = len(avranks)
1948
1949    d = {}
1950
1951    #can be computed in R as qtukey(0.95, n, Inf)**0.5
1952    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
1953    d[("nemenyi", "0.05")] = [0, 0, 1.959964, 2.343701, 2.569032, 2.727774, 2.849705, 2.94832, 3.030879, 3.101730, 3.163684, 3.218654, 3.268004, 3.312739, 3.353618, 3.39123, 3.426041, 3.458425, 3.488685, 3.517073, 3.543799 ]
1954    d[("nemenyi", "0.1")] = [0, 0, 1.644854, 2.052293, 2.291341, 2.459516, 2.588521, 2.692732, 2.779884, 2.854606, 2.919889, 2.977768, 3.029694, 3.076733, 3.119693, 3.159199, 3.195743, 3.229723, 3.261461, 3.291224, 3.319233 ]
1955
1956    d[("bonferroni-dunn", "0.05")] =  [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, 2.638, 2.690, 2.724, 2.773 ]
1957    d[("bonferroni-dunn", "0.1")] = [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, 2.394, 2.450, 2.498, 2.539 ]
1958
1959    q = d[(type, alpha)]
1960
1961    cd = q[k]*(k*(k+1)/(6.0*N))**0.5
1962
1963    return cd
1964
1965
1966def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
1967    """
1968    Draws a CD graph, which is used to display  the differences in methods'
1969    performance.
1970    See Janez Demsar, Statistical Comparisons of Classifiers over
1971    Multiple Data Sets, 7(Jan):1--30, 2006.
1972
1973    Needs matplotlib to work.
1974
1975    :param filename: Output file name (with extension). Formats supported
1976                     by matplotlib can be used.
1977    :param avranks: List of average methods' ranks.
1978    :param names: List of methods' names.
1979
1980    :param cd: Critical difference. Used for marking methods that whose
1981               difference is not statistically significant.
1982    :param lowv: The lowest shown rank, if None, use 1.
1983    :param highv: The highest shown rank, if None, use len(avranks).
1984    :param width: Width of the drawn figure in inches, default 6 in.
1985    :param textspace: Space on figure sides left for the description
1986                      of methods, default 1 in.
1987    :param reverse:  If True, the lowest rank is on the right. Default: False.
1988    :param cdmethod: None by default. It can be an index of element in avranks
1989                     or or names which specifies the method which should be
1990                     marked with an interval.
1991    """
1992
1993    width = float(width)
1994    textspace = float(textspace)
1995
1996    def nth(l,n):
1997        """
1998        Returns only nth elemnt in a list.
1999        """
2000        n = lloc(l,n)
2001        return [ a[n] for a in l ]
2002
2003    def lloc(l,n):
2004        """
2005        List location in list of list structure.
2006        Enable the use of negative locations:
2007        -1 is the last element, -2 second last...
2008        """
2009        if n < 0:
2010            return len(l[0])+n
2011        else:
2012            return n
2013
2014    def mxrange(lr):
2015        """
2016        Multiple xranges. Can be used to traverse matrices.
2017        This function is very slow due to unknown number of
2018        parameters.
2019
2020        >>> mxrange([3,5])
2021        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2022
2023        >>> mxrange([[3,5,1],[9,0,-3]])
2024        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2025
2026        """
2027        if len(lr) == 0:
2028            yield ()
2029        else:
2030            #it can work with single numbers
2031            index = lr[0]
2032            if type(1) == type(index):
2033                index = [ index ]
2034            for a in range(*index):
2035                for b in mxrange(lr[1:]):
2036                    yield tuple([a] + list(b))
2037
2038    try:
2039        from matplotlib.figure import Figure
2040        from matplotlib.patches import Polygon
2041        from matplotlib.backends.backend_agg import FigureCanvasAgg
2042    except:
2043        import sys
2044        print >> sys.stderr, "Function requires matplotlib. Please install it."
2045        return
2046
2047    def print_figure(fig, *args, **kwargs):
2048        canvas = FigureCanvasAgg(fig)
2049        canvas.print_figure(*args, **kwargs)
2050
2051    sums = avranks
2052
2053    tempsort =  sorted([ (a,i) for i,a in  enumerate(sums) ], reverse=reverse)
2054    ssums = nth(tempsort, 0)
2055    sortidx = nth(tempsort, 1)
2056    nnames = [ names[x] for x in sortidx ]
2057
2058    if lowv == None:
2059        lowv = min(1, int(math.floor(min(ssums))))
2060    if highv == None:
2061        highv = max(len(avranks), int(math.ceil(max(ssums))))
2062
2063    cline = 0.4
2064
2065    k = len(sums)
2066
2067    lines = None
2068    sums = sorted(sums)
2069
2070    linesblank = 0
2071    scalewidth = width - 2*textspace
2072
2073    def rankpos(rank):
2074        if not reverse:
2075            a = rank - lowv
2076        else:
2077            a = highv - rank
2078        return textspace+scalewidth/(highv-lowv)*a
2079
2080    distanceh = 0.25
2081
2082    if cd and cdmethod == None:
2083
2084        #get pairs of non significant methods
2085
2086        def get_lines(sums, hsd):
2087
2088            #get all pairs
2089            lsums = len(sums)
2090            allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ]
2091            #remove not significant
2092            notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ]
2093            #keep only longest
2094
2095            def no_longer((i,j), notSig):
2096                for i1,j1 in notSig:
2097                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2098                        return False
2099                return True
2100
2101            longest = [ (i,j) for i,j in notSig if no_longer((i,j),notSig) ]
2102
2103            return longest
2104
2105        lines = get_lines(ssums, cd)
2106        linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1
2107
2109        distanceh = 0.25
2110        cline += distanceh
2111
2112    #calculate height needed height of an image
2113    minnotsignificant = max(2*0.2, linesblank)
2114    height = cline + ((k+1)/2)*0.2 + minnotsignificant
2115
2116    fig = Figure(figsize=(width, height))
2117    ax = fig.add_axes([0,0,1,1]) #reverse y axis
2118    ax.set_axis_off()
2119
2120    hf = 1./height # height factor
2121    wf = 1./width
2122
2123    def hfl(l):
2124        return [ a*hf for a in l ]
2125
2126    def wfl(l):
2127        return [ a*wf for a in l ]
2128
2129    """
2130    Upper left corner is (0,0).
2131    """
2132
2133    ax.plot([0,1], [0,1], c="w")
2134    ax.set_xlim(0, 1)
2135    ax.set_ylim(1, 0)
2136
2137    def line(l, color='k', **kwargs):
2138        """
2139        Input is a list of pairs of points.
2140        """
2141        ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs)
2142
2143    def text(x, y, s, *args, **kwargs):
2144        ax.text(wf*x, hf*y, s, *args, **kwargs)
2145
2146    line([(textspace, cline), (width-textspace, cline)], linewidth=0.7)
2147
2148    bigtick = 0.1
2149    smalltick = 0.05
2150
2151
2152    import numpy
2153
2154    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2155        tick = smalltick
2156        if a == int(a): tick = bigtick
2157        line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7)
2158
2159    for a in range(lowv, highv+1):
2160        text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom")
2161
2162    k = len(ssums)
2163
2164    for i in range((k+1)/2):
2165        chei = cline+ minnotsignificant + (i)*0.2
2166        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7)
2167        text(textspace-0.2, chei, nnames[i], ha="right", va="center")
2168
2169    for i in range((k+1)/2, k):
2170        chei = cline + minnotsignificant + (k-i-1)*0.2
2171        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7)
2172        text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center")
2173
2174    if cd and cdmethod == None:
2175
2176        #upper scale
2177        if not reverse:
2178            begin, end = rankpos(lowv), rankpos(lowv+cd)
2179        else:
2180            begin, end = rankpos(highv), rankpos(highv - cd)
2181
2182        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2183        line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7)
2184        line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7)
2185        text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom")
2186
2187        #non significance lines
2188        def draw_lines(lines, side=0.05, height=0.1):
2189            start = cline + 0.2
2190            for l,r in lines:
2191                line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5)
2192                start += height
2193
2194        draw_lines(lines)
2195
2196    elif cd:
2197        begin = rankpos(avranks[cdmethod]-cd)
2198        end = rankpos(avranks[cdmethod]+cd)
2199        line([(begin, cline), (end, cline)], linewidth=2.5)
2200        line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5)
2201        line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5)
2202
2203    print_figure(fig, filename, **kwargs)
2204
2205def mlc_hamming_loss(res):
2206    """
2207    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2208
2209    :math:HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}
2210    """
2211    losses = [0]*res.number_of_learners
2212    label_num = len(res.labels)
2213    example_num = gettotsize(res)
2214
2215    for e in res.results:
2216        aclass = e.actual_class
2217        for i, labels in enumerate(e.classes):
2218            labels = map(int, labels)
2219            if len(labels) <> len(aclass):
2220                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2221            for j in range(label_num):
2222                if labels[j] != aclass[j]:
2223                    losses[i] += 1
2224
2225    return [float(x)/(label_num*example_num) for x in losses]
2226
2227def mlc_accuracy(res, forgiveness_rate = 1.0):
2228    """
2229    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2230
2231    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}
2232
2233    Boutell et al. (2004) give a more generalized version using a parameter :math:\\alpha \\ge 0,
2234    called forgiveness rate:
2235
2236    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}
2237    """
2238    accuracies = [0.0]*res.number_of_learners
2239    label_num = len(res.labels)
2240    example_num = gettotsize(res)
2241
2242    for e in res.results:
2243        aclass = e.actual_class
2244        for i, labels in enumerate(e.classes):
2245            labels = map(int, labels)
2246            if len(labels) <> len(aclass):
2247                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2248
2249            intersection = 0.0
2250            union = 0.0
2251            for real, pred in zip(labels, aclass):
2252                if real and pred:
2253                    intersection = intersection+1
2254                if real or pred:
2255                    union = union+1
2256
2257            if union != 0:
2258                accuracies[i] = accuracies[i] + intersection/union
2259
2260    return [math.pow(x/example_num,forgiveness_rate) for x in accuracies]
2261
2262def mlc_precision(res):
2263    """
2264    :math:Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}
2265    """
2266    precisions = [0.0]*res.number_of_learners
2267    label_num = len(res.labels)
2268    example_num = gettotsize(res)
2269
2270    for e in res.results:
2271        aclass = e.actual_class
2272        for i, labels in enumerate(e.classes):
2273            labels = map(int, labels)
2274            if len(labels) <> len(aclass):
2275                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2276
2277            intersection = 0.0
2278            predicted = 0.0
2279            for real, pred in zip(labels, aclass):
2280                if real and pred:
2281                    intersection = intersection+1
2282                if real:
2283                    predicted = predicted + 1
2284            if predicted <> 0:
2285                precisions[i] = precisions[i] + intersection/predicted
2286
2287    return [x/example_num for x in precisions]
2288
2289def mlc_recall(res):
2290    """
2291    :math:Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}
2292    """
2293    recalls = [0.0]*res.number_of_learners
2294    label_num = len(res.labels)
2295    example_num = gettotsize(res)
2296
2297    for e in res.results:
2298        aclass = e.actual_class
2299        for i, labels in enumerate(e.classes):
2300            labels = map(int, labels)
2301            if len(labels) <> len(aclass):
2302                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2303
2304            intersection = 0.0
2305            actual = 0.0
2306            for real, pred in zip(labels, aclass):
2307                if real and pred:
2308                    intersection = intersection+1
2309                if pred:
2310                    actual = actual + 1
2311            if actual <> 0:
2312                recalls[i] = recalls[i] + intersection/actual
2313
2314    return [x/example_num for x in recalls]
2315
2316#def mlc_ranking_loss(res):
2317#    pass
2318#
2319#def mlc_average_precision(res):
2320#    pass
2321#
2322#def mlc_hierarchical_loss(res):
2323#    pass
2324
2325#########################################################################################
2326if __name__ == "__main__":
2327    avranks =  [3.143, 2.000, 2.893, 1.964]
2328    names = ["prva", "druga", "tretja", "cetrta" ]
2329    cd = compute_CD(avranks, 14)
2330    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2331    print cd
2332
2333    print compute_friedman(avranks, 14)
2334
2335    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.