# source:orange/Orange/evaluation/scoring.py@10902:393ef8a8dd2d

Revision 10902:393ef8a8dd2d, 100.9 KB checked in by Matija Polajnar <matija.polajnar@…>, 23 months ago (diff)

Add probability conversion decorator to more scoring functions as needed, fix decorator usage.

Line
1import math
2import functools
4
5import numpy
6
7import Orange
8from Orange import statc, corn
9from Orange.utils import deprecated_keywords, deprecated_function_name, \
10    deprecation_warning, environ
11from Orange.evaluation import testing
12
13try:
14    import matplotlib
15    HAS_MATPLOTLIB = True
16except ImportError:
17    matplotlib = None
18    HAS_MATPLOTLIB = False
19
20#### Private stuff
21
22def log2(x):
23    """Calculate logarithm in base 2."""
24    return math.log(x) / math.log(2)
25
26def check_non_zero(x):
27    """Throw Value Error when x = 0."""
28    if x == 0.:
29        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
30
31def gettotweight(res):
32    """Sum all the weights."""
33    totweight = reduce(lambda x, y: x + y.weight, res.results, 0)
34    if totweight == 0.:
35        raise ValueError, "Cannot compute the score: sum of weights is 0."
36    return totweight
37
38def gettotsize(res):
39    """Get number of result instances."""
40    if len(res.results):
41        return len(res.results)
42    else:
43        raise ValueError, "Cannot compute the score: no examples."
44
45# Backward compatibility
46def replace_use_weights(fun):
47    if environ.orange_no_deprecated_members:
48        return fun
49
50    @functools.wraps(fun)
51    def wrapped(*args, **kwargs):
52        use_weights = kwargs.pop("useWeights", None)
53        if use_weights is not None:
54            deprecation_warning("useWeights", "ignore_weights")
55            kwargs["ignore_weights"] = not use_weights
56        return fun(*args, **kwargs)
57    return wrapped
58
59def replace_discrete_probabilities_with_list(method):
60    if environ.orange_no_deprecated_members:
61        return lambda fun: fun
62
63    def decorator(fun):
64        @functools.wraps(fun)
65        def wrapped(*args, **kwargs):
66            res = args[method] if len(args)>method else kwargs.get("res", kwargs.get("test_results", None))
67            convert = res is not None
68
69            if convert:
70                old_probs = []
71                for r in res.results:
72                    old_probs.append(r.probabilities)
73                    r.probabilities = [list(p) if type(p) is Orange.statistics.distribution.Discrete
74                                       else p for p in r.probabilities]
75            result = fun(*args, **kwargs)
76            if convert:
77                for r, old in zip(res.results, old_probs):
78                    r.probabilities = old
79            return result
80        return wrapped
81    return decorator
82
83def split_by_iterations(res):
84    """Split ExperimentResults of a multiple iteratation test into a list
85    of ExperimentResults, one for each iteration.
86    """
87    if res.number_of_iterations < 2:
88        return [res]
89
90    ress = [Orange.evaluation.testing.ExperimentResults(
91                1, res.classifier_names, res.class_values,
92                res.weights, classifiers=res.classifiers,
94            for _ in range(res.number_of_iterations)]
95    for te in res.results:
96        ress[te.iteration_number].results.append(te)
97    return ress
98
99def split_by_classifiers(res):
100    """Split an instance of :obj:ExperimentResults into a list of
101    :obj:ExperimentResults, one for each classifier.
102    """
103    split_res = []
104    for i in range(len(res.classifierNames)):
105        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
106                    [res.classifierNames[i]], res.classValues,
107                    weights=res.weights, base_class=res.base_class,
108                    classifiers=[res.classifiers[i]] if res.classifiers else [],
109                    test_type=res.test_type, labels=res.labels)
110        r.results = []
111        for te in res.results:
112            r.results.append(Orange.evaluation.testing.TestedExample(
113                te.iterationNumber, te.actualClass, n=1, weight=te.weight))
114            r.results[-1].classes = [te.classes[i]]
115            r.results[-1].probabilities = [te.probabilities[i]]
116        split_res.append(r)
117    return split_res
118
119
120def class_probabilities_from_res(res, **argkw):
121    """Calculate class probabilities."""
122    probs = [0.] * len(res.class_values)
123    if argkw.get("unweighted", 0) or not res.weights:
124        for tex in res.results:
125            probs[int(tex.actual_class)] += 1.
126        totweight = gettotsize(res)
127    else:
128        totweight = 0.
129        for tex in res.results:
130            probs[tex.actual_class] += tex.weight
131            totweight += tex.weight
132        check_non_zero(totweight)
133    return [prob / totweight for prob in probs]
134
135
136@deprecated_keywords({
137    "foldN": "fold_n",
138    "reportSE": "report_se",
139    "iterationIsOuter": "iteration_is_outer"})
140def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
141    # remove empty folds, turn the matrix so that learner is outer
142    if iteration_is_outer:
143        if not stats:
144            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
145        number_of_learners = len(stats[0])
146        stats = filter(lambda (x, fN): fN > 0, zip(stats, fold_n))
147        stats = [[x[lrn] / fN for x, fN in stats]
148                 for lrn in range(number_of_learners)]
149    else:
150        stats = [[x / Fn for x, Fn in filter(lambda (x, Fn): Fn > 0,
151                 zip(lrnD, fold_n))] for lrnD in stats]
152
153    if not stats:
154        raise ValueError, "Cannot compute the score: no classifiers"
155    if not stats[0]:
156        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
157
158    if report_se:
159        return [(statc.mean(x), statc.sterr(x)) for x in stats]
160    else:
161        return [statc.mean(x) for x in stats]
162
163def ME(res, **argkw):
164    MEs = [0.] * res.number_of_learners
165
166    if argkw.get("unweighted", 0) or not res.weights:
167        for tex in res.results:
168            MEs = map(lambda res, cls, ac=float(tex.actual_class):
169                      res + abs(float(cls) - ac), MEs, tex.classes)
170        totweight = gettotsize(res)
171    else:
172        for tex in res.results:
173            MEs = map(lambda res, cls, ac=float(tex.actual_class), tw=tex.weight:
174                       res + tw * abs(float(cls) - ac), MEs, tex.classes)
175        totweight = gettotweight(res)
176
177    return [x / totweight for x in MEs]
178
179MAE = ME
180
181
182class ConfusionMatrix:
183    """
184    Classification result summary.
185    """
186    #: True Positive predictions
187    TP = 0.
188    #:True Negative predictions
189    TN = 0.
190    #:False Positive predictions
191    FP = 0.
192    #: False Negative predictions
193    FN = 0.
194
195    @deprecated_keywords({"predictedPositive": "predicted_positive",
196                          "isPositive": "is_positive"})
197    def addTFPosNeg(self, predicted_positive, is_positive, weight=1.0):
198        """
199        Update confusion matrix with result of a single classification.
200
201        :param predicted_positive: positive class value was predicted
202        :param is_positive: correct class value is positive
203        :param weight: weight of the selected instance
204         """
205        if predicted_positive:
206            if is_positive:
207                self.TP += weight
208            else:
209                self.FP += weight
210        else:
211            if is_positive:
212                self.FN += weight
213            else:
214                self.TN += weight
215
216
217
218#########################################################################
219# PERFORMANCE MEASURES:
220# Scores for evaluation of numeric predictions
221
222def check_argkw(dct, lst):
223    """Return True if any item from lst has a non-zero value in dct."""
224    return reduce(lambda x, y: x or y, [dct.get(k, 0) for k in lst])
225
226def regression_error(res, **argkw):
227    """Return the regression error (default: MSE)."""
228    if argkw.get("SE", 0) and res.number_of_iterations > 1:
229        # computes the scores for each iteration, then averages
230        scores = [[0.] * res.number_of_iterations
231                  for _ in range(res.number_of_learners)]
232        norm = None
233        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
234            norm = [0.] * res.number_of_iterations
235
236        # counts examples in each iteration
237        nIter = [0] * res.number_of_iterations
238        # average class in each iteration
239        a = [0] * res.number_of_iterations
240        for tex in res.results:
241            nIter[tex.iteration_number] += 1
242            a[tex.iteration_number] += float(tex.actual_class)
243        a = [a[i] / nIter[i] for i in range(res.number_of_iterations)]
244
245        if argkw.get("unweighted", 0) or not res.weights:
246            # iterate accross test cases
247            for tex in res.results:
248                ai = float(tex.actual_class)
249                nIter[tex.iteration_number] += 1
250
251                # compute normalization, if required
252                if argkw.get("norm-abs", 0):
253                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
254                elif argkw.get("norm-sqr", 0):
255                    norm[tex.iteration_number] += (ai - a[tex.iteration_number]) ** 2
256
257                # iterate accross results of different regressors
258                for i, cls in enumerate(tex.classes):
259                    if argkw.get("abs", 0):
260                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
261                    else:
262                        scores[i][tex.iteration_number] += (float(cls) - ai) ** 2
263        else: # unweighted != 0
264            raise NotImplementedError, "weighted error scores with SE not implemented yet"
265
266        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
267            scores = [[x / n for x, n in zip(y, norm)] for y in scores]
268        else:
269            scores = [[x / ni for x, ni in zip(y, nIter)] for y in scores]
270
271        if argkw.get("R2"):
272            scores = [[1.0 - x for x in y] for y in scores]
273
274        if argkw.get("sqrt", 0):
275            scores = [[math.sqrt(x) for x in y] for y in scores]
276
277        return [(statc.mean(x), statc.std(x)) for x in scores]
278
279    else: # single iteration (testing on a single test set)
280        scores = [0.] * res.number_of_learners
281        norm = 0.
282
283        if argkw.get("unweighted", 0) or not res.weights:
284            a = sum([tex.actual_class for tex in res.results]) \
285                / len(res.results)
286            for tex in res.results:
287                if argkw.get("abs", 0):
288                    scores = map(lambda res, cls, ac=float(tex.actual_class):
289                                 res + abs(float(cls) - ac), scores, tex.classes)
290                else:
291                    scores = map(lambda res, cls, ac=float(tex.actual_class):
292                                 res + (float(cls) - ac) ** 2, scores, tex.classes)
293
294                if argkw.get("norm-abs", 0):
295                    norm += abs(tex.actual_class - a)
296                elif argkw.get("norm-sqr", 0):
297                    norm += (tex.actual_class - a) ** 2
298            totweight = gettotsize(res)
299        else:
300            # UNFINISHED
301            MSEs = [0.] * res.number_of_learners
302            for tex in res.results:
303                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
304                           tw=tex.weight:
305                           res + tw * (float(cls) - ac) ** 2, MSEs, tex.classes)
306            totweight = gettotweight(res)
307
308        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
309            scores = [s / norm for s in scores]
310        else: # normalize by number of instances (or sum of weights)
311            scores = [s / totweight for s in scores]
312
313        if argkw.get("R2"):
314            scores = [1. - s for s in scores]
315
316        if argkw.get("sqrt", 0):
317            scores = [math.sqrt(x) for x in scores]
318
319        return scores
320
321def MSE(res, **argkw):
322    """Compute mean-squared error."""
323    return regression_error(res, **argkw)
324
325def RMSE(res, **argkw):
326    """Compute root mean-squared error."""
327    argkw.setdefault("sqrt", True)
328    return regression_error(res, **argkw)
329
330def MAE(res, **argkw):
331    """Compute mean absolute error."""
332    argkw.setdefault("abs", True)
333    return regression_error(res, **argkw)
334
335def RSE(res, **argkw):
336    """Compute relative squared error."""
337    argkw.setdefault("norm-sqr", True)
338    return regression_error(res, **argkw)
339
340def RRSE(res, **argkw):
341    """Compute relative squared error."""
342    argkw.setdefault("norm-sqr", True)
343    argkw.setdefault("sqrt", True)
344    return regression_error(res, **argkw)
345
346def RAE(res, **argkw):
347    """Compute relative absolute error."""
348    argkw.setdefault("abs", True)
349    argkw.setdefault("norm-abs", True)
350    return regression_error(res, **argkw)
351
352def R2(res, **argkw):
353    """Compute the coefficient of determination, R-squared."""
354    argkw.setdefault("norm-sqr", True)
355    argkw.setdefault("R2", True)
356    return regression_error(res, **argkw)
357
358def MSE_old(res, **argkw):
359    """Compute mean-squared error."""
360    if argkw.get("SE", 0) and res.number_of_iterations > 1:
361        MSEs = [[0.] * res.number_of_iterations
362                for _ in range(res.number_of_learners)]
363        nIter = [0] * res.number_of_iterations
364        if argkw.get("unweighted", 0) or not res.weights:
365            for tex in res.results:
366                ac = float(tex.actual_class)
367                nIter[tex.iteration_number] += 1
368                for i, cls in enumerate(tex.classes):
369                    MSEs[i][tex.iteration_number] += (float(cls) - ac) ** 2
370        else:
371            raise ValueError, "weighted RMSE with SE not implemented yet"
372        MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs]
373        if argkw.get("sqrt", 0):
374            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
375        return [(statc.mean(x), statc.std(x)) for x in MSEs]
376
377    else:
378        MSEs = [0.] * res.number_of_learners
379        if argkw.get("unweighted", 0) or not res.weights:
380            for tex in res.results:
381                MSEs = map(lambda res, cls, ac=float(tex.actual_class):
382                           res + (float(cls) - ac) ** 2, MSEs, tex.classes)
383            totweight = gettotsize(res)
384        else:
385            for tex in res.results:
386                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
387                           tw=tex.weight: res + tw * (float(cls) - ac) ** 2,
388                           MSEs, tex.classes)
389            totweight = gettotweight(res)
390
391        if argkw.get("sqrt", 0):
392            MSEs = [math.sqrt(x) for x in MSEs]
393        return [x / totweight for x in MSEs]
394
395def RMSE_old(res, **argkw):
396    """Compute root mean-squared error."""
397    argkw.setdefault("sqrt", 1)
398    return MSE_old(res, **argkw)
399
400#########################################################################
401# PERFORMANCE MEASURES:
402# Scores for evaluation of classifiers
403
404class CA(list):
405    """
406    Compute percentage of matches between predicted and actual class.
407
408    :param test_results: :obj:~Orange.evaluation.testing.ExperimentResults
409                         or list of :obj:ConfusionMatrix.
410    :param report_se: include standard error in result.
411    :param ignore_weights: ignore instance weights.
412    :rtype: list of scores, one for each learner.
413
414    Standard errors are estimated from deviation of CAs across folds (if
415    test_results were produced by cross_validation) or approximated under
416    the assumption of normal distribution otherwise.
417    """
418    CONFUSION_MATRIX = 0
419    CONFUSION_MATRIX_LIST = 1
420    CLASSIFICATION = 2
421    CROSS_VALIDATION = 3
422
423    @deprecated_keywords({"reportSE": "report_se",
424                          "unweighted": "ignore_weights"})
425    def __init__(self, test_results, report_se=False, ignore_weights=False):
426        super(CA, self).__init__()
427        self.report_se = report_se
428        self.ignore_weights = ignore_weights
429
430        input_type = self.get_input_type(test_results)
431        if input_type == self.CONFUSION_MATRIX:
432            self[:] = [self.from_confusion_matrix(test_results)]
433        elif input_type == self.CONFUSION_MATRIX_LIST:
434            self[:] = self.from_confusion_matrix_list(test_results)
435        elif input_type == self.CLASSIFICATION:
436            self[:] = self.from_classification_results(test_results)
437        elif input_type == self.CROSS_VALIDATION:
438            self[:] = self.from_crossvalidation_results(test_results)
439
440    def from_confusion_matrix(self, cm):
441        all_predictions = 0.
442        correct_predictions = 0.
443        if isinstance(cm, ConfusionMatrix):
444            all_predictions += cm.TP + cm.FN + cm.FP + cm.TN
445            correct_predictions += cm.TP + cm.TN
446        else:
447            for r, row in enumerate(cm):
448                for c, column in enumerate(row):
449                    if r == c:
450                        correct_predictions += column
451                    all_predictions += column
452
453        check_non_zero(all_predictions)
454        ca = correct_predictions / all_predictions
455
456        if self.report_se:
457            return ca, ca * (1 - ca) / math.sqrt(all_predictions)
458        else:
459            return ca
460
461    def from_confusion_matrix_list(self, confusion_matrices):
462        return [self.from_confusion_matrix(cm) for cm in confusion_matrices]
463
464    def from_classification_results(self, test_results):
465        CAs = [0.] * test_results.number_of_learners
466        totweight = 0.
467        for tex in test_results.results:
468            w = 1. if self.ignore_weights else tex.weight
469            CAs = map(lambda res, cls: res + (cls == tex.actual_class and w),
470                      CAs, tex.classes)
471            totweight += w
472        check_non_zero(totweight)
473        ca = [x / totweight for x in CAs]
474
475        if self.report_se:
476            return [(x, x * (1 - x) / math.sqrt(totweight)) for x in ca]
477        else:
478            return ca
479
480    def from_crossvalidation_results(self, test_results):
481        CAsByFold = [[0.] * test_results.number_of_iterations
482                     for _ in range(test_results.number_of_learners)]
483        foldN = [0.] * test_results.number_of_iterations
484
485        for tex in test_results.results:
486            w = 1. if self.ignore_weights else tex.weight
487            for lrn in range(test_results.number_of_learners):
488                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn] ==
489                    tex.actual_class) and w
490            foldN[tex.iteration_number] += w
491
492        return statistics_by_folds(CAsByFold, foldN, self.report_se, False)
493
494    def get_input_type(self, test_results):
495        if isinstance(test_results, ConfusionMatrix):
496            return self.CONFUSION_MATRIX
497        elif isinstance(test_results, testing.ExperimentResults):
498            if test_results.number_of_iterations == 1:
499                return self.CLASSIFICATION
500            else:
501                return self.CROSS_VALIDATION
502        elif isinstance(test_results, list):
503            return self.CONFUSION_MATRIX_LIST
504
505
506@deprecated_keywords({"reportSE": "report_se",
507                      "unweighted": "ignore_weights"})
508def AP(res, report_se=False, ignore_weights=False, **argkw):
509    """Compute the average probability assigned to the correct class."""
510    if res.number_of_iterations == 1:
511        APs = [0.] * res.number_of_learners
512        if ignore_weights or not res.weights:
513            for tex in res.results:
514                APs = map(lambda res, probs: res + probs[tex.actual_class],
515                          APs, tex.probabilities)
516            totweight = gettotsize(res)
517        else:
518            totweight = 0.
519            for tex in res.results:
520                APs = map(lambda res, probs: res + probs[tex.actual_class] *
521                          tex.weight, APs, tex.probabilities)
522                totweight += tex.weight
523        check_non_zero(totweight)
524        return [AP / totweight for AP in APs]
525
526    APsByFold = [[0.] * res.number_of_learners
527                 for _ in range(res.number_of_iterations)]
528    foldN = [0.] * res.number_of_iterations
529    if ignore_weights or not res.weights:
530        for tex in res.results:
531            APsByFold[tex.iteration_number] = map(lambda res, probs:
532                res + probs[tex.actual_class],
533                APsByFold[tex.iteration_number], tex.probabilities)
534            foldN[tex.iteration_number] += 1
535    else:
536        for tex in res.results:
537            APsByFold[tex.iteration_number] = map(lambda res, probs:
538                res + probs[tex.actual_class] * tex.weight,
539                APsByFold[tex.iteration_number], tex.probabilities)
540            foldN[tex.iteration_number] += tex.weight
541
542    return statistics_by_folds(APsByFold, foldN, report_se, True)
543
544
545@deprecated_keywords({"reportSE": "report_se",
546                      "unweighted": "ignore_weights"})
547def Brier_score(res, report_se=False, ignore_weights=False, **argkw):
548    """Compute the Brier score, defined as the average (over test instances)
549    of :math:\sum_x(t(x) - p(x))^2, where x is a class value, t(x) is 1 for
550    the actual class value and 0 otherwise, and p(x) is the predicted
551    probability of x.
552    """
553    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
554    #    x is class,
555    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
556    #    p(x) is predicted probabilty.
557    # There's a trick: since t(x) is zero for all classes but the
558    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
559    # Since +1 is there for each example, it adds 1 to the average
560    # We skip the +1 inside the sum and add it just at the end of the function
561    # We take max(result, 0) to avoid -0.0000x due to rounding errors
562
563    if res.number_of_iterations == 1:
564        MSEs = [0.] * res.number_of_learners
565        if ignore_weights or not res.weights:
566            totweight = 0.
567            for tex in res.results:
568                MSEs = map(lambda res, probs: res + reduce(
569                    lambda s, pi: s + pi ** 2, probs, 0) -
570                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
571                totweight += tex.weight
572        else:
573            for tex in res.results:
574                MSEs = map(lambda res, probs: res + tex.weight * reduce(
575                    lambda s, pi: s + pi ** 2, probs, 0) -
576                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
577            totweight = gettotweight(res)
578        check_non_zero(totweight)
579        if report_se:
580            ## change this, not zero!!!
581            return [(max(x / totweight + 1., 0), 0) for x in MSEs]
582        else:
583            return [max(x / totweight + 1., 0) for x in MSEs]
584
585    BSs = [[0.] * res.number_of_learners
586           for _ in range(res.number_of_iterations)]
587    foldN = [0.] * res.number_of_iterations
588
589    if ignore_weights or not res.weights:
590        for tex in res.results:
591            BSs[tex.iteration_number] = map(lambda rr, probs: rr + reduce(
592                lambda s, pi: s + pi ** 2, probs, 0) -
593                2 * probs[tex.actual_class], BSs[tex.iteration_number],
594                tex.probabilities)
595            foldN[tex.iteration_number] += 1
596    else:
597        for tex in res.results:
598            BSs[tex.iteration_number] = map(lambda res, probs:
599                res + tex.weight * reduce(lambda s, pi: s + pi ** 2, probs, 0) -
600                2 * probs[tex. actual_class], BSs[tex.iteration_number],
601                tex.probabilities)
602            foldN[tex.iteration_number] += tex.weight
603
604    stats = statistics_by_folds(BSs, foldN, report_se, True)
605    if report_se:
606        return [(x + 1., y) for x, y in stats]
607    else:
608        return [x + 1. for x in stats]
609
610def BSS(res, **argkw):
611    return [1 - x / 2 for x in apply(Brier_score, (res,), argkw)]
612
613def IS_ex(Pc, P):
614    """Pc aposterior probability, P aprior"""
615    if Pc >= P:
616        return -log2(P) + log2(Pc)
617    else:
618        return -(-log2(1 - P) + log2(1 - Pc))
619
620
621@deprecated_keywords({"reportSE": "report_se"})
622def IS(res, apriori=None, report_se=False, **argkw):
623    """Compute the information score as defined by
624    Kononenko and Bratko (1991) \
626    Argument :obj:apriori gives the apriori class
627    distribution; if it is omitted, the class distribution is computed from
628    the actual classes of examples in :obj:res.
629    """
630    if not apriori:
631        apriori = class_probabilities_from_res(res)
632
633    if res.number_of_iterations == 1:
634        ISs = [0.] * res.number_of_learners
635        if argkw.get("unweighted", 0) or not res.weights:
636            for tex in res.results:
637              for i in range(len(tex.probabilities)):
638                    cls = tex.actual_class
639                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
640            totweight = gettotsize(res)
641        else:
642            for tex in res.results:
643              for i in range(len(tex.probabilities)):
644                    cls = tex.actual_class
645                    ISs[i] += (IS_ex(tex.probabilities[i][cls], apriori[cls]) *
646                               tex.weight)
647            totweight = gettotweight(res)
648        if report_se:
649            return [(IS / totweight, 0) for IS in ISs]
650        else:
651            return [IS / totweight for IS in ISs]
652
653
654    ISs = [[0.] * res.number_of_iterations
655           for _ in range(res.number_of_learners)]
656    foldN = [0.] * res.number_of_iterations
657
658    # compute info scores for each fold
659    if argkw.get("unweighted", 0) or not res.weights:
660        for tex in res.results:
661            for i in range(len(tex.probabilities)):
662                cls = tex.actual_class
663                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
664                                                apriori[cls])
665            foldN[tex.iteration_number] += 1
666    else:
667        for tex in res.results:
668            for i in range(len(tex.probabilities)):
669                cls = tex.actual_class
670                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
671                                                apriori[cls]) * tex.weight
672            foldN[tex.iteration_number] += tex.weight
673
674    return statistics_by_folds(ISs, foldN, report_se, False)
675
676
677def Wilcoxon(res, statistics, **argkw):
678    res1, res2 = [], []
679    for ri in split_by_iterations(res):
680        stats = statistics(ri, **argkw)
681        if len(stats) != 2:
682            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
683        res1.append(stats[0])
684        res2.append(stats[1])
685    return statc.wilcoxont(res1, res2)
686
687def rank_difference(res, statistics, **argkw):
688    if not res.results:
689        raise TypeError, "no experiments"
690
691    k = len(res.results[0].classes)
692    if k < 2:
693        raise TypeError, "nothing to compare (less than two classifiers given)"
694    if k == 2:
695        return apply(Wilcoxon, (res, statistics), argkw)
696    else:
697        return apply(Friedman, (res, statistics), argkw)
698
699
700@deprecated_keywords({"res": "test_results",
701                      "classIndex": "class_index",
702                      "unweighted": "ignore_weights"})
703def confusion_matrices(test_results, class_index= -1,
704                       ignore_weights=False, cutoff=.5):
705    """
706    Return confusion matrices for test_results.
707
708    :param test_results: test results
709    :param class_index: index of class value for which the confusion matrices
710        are to be computed (by default unspecified - see note below).
711    :param ignore_weights: ignore instance weights.
712    :param cutoff: cutoff for probability
713
714    :rtype: list of :obj:ConfusionMatrix or list-of-list-of-lists (see
715        note below)
716
717    .. note:: If class_index is not specified and test_results
718        contain predictions for multi-class problem, then the return
719        value is a list of 2D tables (list-of-lists) of all class
720        value pairwise misclassifications.
721
722    """
723    tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)]
724
725    if class_index < 0:
726        numberOfClasses = len(test_results.class_values)
727        if class_index < -1 or numberOfClasses > 2:
728            cm = [[[0.] * numberOfClasses for _ in range(numberOfClasses)]
729                  for _ in range(test_results.number_of_learners)]
730            if ignore_weights or not test_results.weights:
731                for tex in test_results.results:
732                    trueClass = int(tex.actual_class)
733                    for li, pred in enumerate(tex.classes):
734                        predClass = int(pred)
735                        if predClass < numberOfClasses:
736                            cm[li][trueClass][predClass] += 1
737            else:
738                for tex in test_results.results:
739                    trueClass = int(tex.actual_class)
740                    for li, pred in tex.classes:
741                        predClass = int(pred)
742                        if predClass < numberOfClasses:
743                            cm[li][trueClass][predClass] += tex.weight
744            return cm
745
746        elif test_results.base_class >= 0:
747            class_index = test_results.base_class
748        else:
749            class_index = 1
750
751    if cutoff != .5:
752        if ignore_weights or not test_results.weights:
753            for lr in test_results.results:
754                isPositive = (lr.actual_class == class_index)
755                for i in range(test_results.number_of_learners):
757                                         cutoff, isPositive)
758        else:
759            for lr in test_results.results:
760                isPositive = (lr.actual_class == class_index)
761                for i in range(test_results.number_of_learners):
763                                         cutoff, isPositive, lr.weight)
764    else:
765        if ignore_weights or not test_results.weights:
766            for lr in test_results.results:
767                isPositive = (lr.actual_class == class_index)
768                for i in range(test_results.number_of_learners):
770                                         isPositive)
771        else:
772            for lr in test_results.results:
773                isPositive = (lr.actual_class == class_index)
774                for i in range(test_results.number_of_learners):
776                                         isPositive, lr.weight)
777    return tfpns
778
779
780# obsolete (renamed)
781compute_confusion_matrices = confusion_matrices
782
783
784@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
785def confusion_chi_square(confusion_matrix):
786    """
787    Return chi square statistic of the confusion matrix
788    (higher value indicates that prediction is not by chance).
789    """
790    if isinstance(confusion_matrix, ConfusionMatrix) or \
791       not isinstance(confusion_matrix[1], list):
792        return _confusion_chi_square(confusion_matrix)
793    else:
794        return map(_confusion_chi_square, confusion_matrix)
795
796def _confusion_chi_square(confusion_matrix):
797    if isinstance(confusion_matrix, ConfusionMatrix):
798        c = confusion_matrix
799        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
800    dim = len(confusion_matrix)
801    rowPriors = [sum(r) for r in confusion_matrix]
802    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
803    total = sum(rowPriors)
804    rowPriors = [r / total for r in rowPriors]
805    colPriors = [r / total for r in colPriors]
806    ss = 0
807    for ri, row in enumerate(confusion_matrix):
808        for ci, o in enumerate(row):
809            e = total * rowPriors[ri] * colPriors[ci]
810            if not e:
811                return -1, -1, -1
812            ss += (o - e) ** 2 / e
813    df = (dim - 1) ** 2
814    return ss, df, statc.chisqprob(ss, df)
815
816class CMScore(list):
817    """
818    :param test_results: :obj:~Orange.evaluation.testing.ExperimentResults
819                         or list of :obj:ConfusionMatrix.
820    :rtype: list of scores, one for each learner."""
821    def __new__(cls, test_results, **kwargs):
822        self = list.__new__(cls)
823        if isinstance(test_results, ConfusionMatrix):
824            self.__init__(test_results, **kwargs)
825            return self[0]
826        return self
827
828
829    @deprecated_keywords({"confm": "test_results"})
830    def __init__(self, test_results=None):
831        super(CMScore, self).__init__()
832
833        if test_results is not None:
834            self[:] = self.__call__(test_results)
835
836    def __call__(self, test_results):
837        if isinstance(test_results, testing.ExperimentResults):
838            test_results = confusion_matrices(test_results, class_index=1)
839        if isinstance(test_results, ConfusionMatrix):
840            test_results = [test_results]
841
842        return map(self.compute, test_results)
843
844
845
846class Sensitivity(CMScore):
847    __doc__ = """Compute sensitivity
848    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>_ (proportion
849    of actual positives which are correctly identified as such).
850    """ + CMScore.__doc__
851    @classmethod
852    def compute(self, confusion_matrix):
853        tot = confusion_matrix.TP + confusion_matrix.FN
854        if tot < 1e-6:
855            import warnings
856            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
857            return None
858
859        return confusion_matrix.TP / tot
860
861
862class Recall(Sensitivity):
863    __doc__ = """ Compute recall
864    <http://en.wikipedia.org/wiki/Precision_and_recall>_
865    (fraction of relevant instances that are retrieved).
866    """ + CMScore.__doc__
867    pass # Recall == Sensitivity
868
869
870class Specificity(CMScore):
871    __doc__ = """Compute specificity
872    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>_
873    (proportion of negatives which are correctly identified).
874    """ + CMScore.__doc__
875    @classmethod
876    def compute(self, confusion_matrix):
877        tot = confusion_matrix.FP + confusion_matrix.TN
878        if tot < 1e-6:
879            import warnings
880            warnings.warn("Can't compute specificity: one or both classes have no instances")
881            return None
882        return confusion_matrix.TN / tot
883
884
885class PPV(CMScore):
886    __doc__ = """Compute positive predictive value
887    <http://en.wikipedia.org/wiki/Positive_predictive_value>_ (proportion of
888    subjects with positive test results who are correctly diagnosed).
889    """ + CMScore.__doc__
890    @classmethod
891    def compute(self, confusion_matrix):
892        tot = confusion_matrix.TP + confusion_matrix.FP
893        if tot < 1e-6:
894            import warnings
895            warnings.warn("Can't compute PPV: one or both classes have no instances")
896            return None
897        return confusion_matrix.TP / tot
898
899
900class Precision(PPV):
901    __doc__ = """Compute precision <http://en.wikipedia.org/wiki/Precision_and_recall>_
902    (retrieved instances that are relevant).
903    """ + CMScore.__doc__
904    pass # Precision == PPV
905
906
907class NPV(CMScore):
908    __doc__ = """Compute negative predictive value
909    <http://en.wikipedia.org/wiki/Negative_predictive_value>_ (proportion of
910    subjects with a negative test result who are correctly diagnosed).
911     """ + CMScore.__doc__
912    @classmethod
913    def compute(self, confusion_matrix):
914        tot = confusion_matrix.FN + confusion_matrix.TN
915        if tot < 1e-6:
916            import warnings
917            warnings.warn("Can't compute NPV: one or both classes have no instances")
918            return None
919        return confusion_matrix.TN / tot
920
921
922class F1(CMScore):
923    __doc__ = """Return F1 score
924    <http://en.wikipedia.org/wiki/F1_score>_
925    (harmonic mean of precision and recall).
926    """ + CMScore.__doc__
927    @classmethod
928    def compute(self, confusion_matrix):
929        p = Precision.compute(confusion_matrix)
930        r = Recall.compute(confusion_matrix)
931        if p is not None and r is not None and (p + r) != 0:
932            return 2. * p * r / (p + r)
933        else:
934            import warnings
935            warnings.warn("Can't compute F1: P + R is zero or not defined")
936            return None
937
938
939class Falpha(CMScore):
940    __doc__ = """Compute the alpha-mean of precision and recall over the given confusion
941    matrix.
942    """ + CMScore.__doc__
943
944    def __init__(self, test_results, alpha=1.):
945        self.alpha = alpha
946        super(Falpha, self).__init__(test_results)
947
948    def compute(self, confusion_matrix):
949        p = Precision.compute(confusion_matrix)
950        r = Recall.compute(confusion_matrix)
951        return (1. + self.alpha) * p * r / (self.alpha * p + r)
952
953
954class MCC(CMScore):
955    __doc__ = """Compute Matthew correlation coefficient
956    <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>_
957    (correlation coefficient between the observed and predicted binary
958    classifications).
959    """ + CMScore.__doc__
960    @classmethod
961    def compute(self, cm):
962        # code by Boris Gorelik
963        TP, TN, FP, FN = cm.TP, cm.TN, cm.FP, cm.FN
964
965        try:
966            return (TP * TN - FP * FN) / \
967                 math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
968        except ZeroDivisionError:
969            # Zero division occurs when there is either no true positives
970            # or no true negatives i.e. the problem contains only one
971            # type of classes.
972            import warnings
973            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
974
975
976@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
977def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
978   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
979
980   http://en.wikipedia.org/wiki/Scott%27s_Pi
981   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
982   raters.
983
984   @param confusion_matrix: confusion matrix, or list of confusion matrices.
985                            To obtain non-binary confusion matrix, call
986                            Orange.evaluation.scoring.confusion_matrices
987                            and set the class_index parameter to -2.
988   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
989                            This function needs to operate on non-binary
990                            confusion matrices, which are represented by python
991                            lists, therefore one needs a way to distinguish
992                            between a single matrix and list of matrices
993   """
994
995   if b_is_list_of_matrices:
996       try:
997           return [scotts_pi(cm, b_is_list_of_matrices=False)
998                   for cm in confusion_matrix]
999       except TypeError:
1000           # Nevermind the parameter, maybe this is a "conventional" binary
1001           # confusion matrix and bIsListOfMatrices was specified by mistake
1002           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
1003   else:
1004       if isinstance(confusion_matrix, ConfusionMatrix):
1005           confusion_matrix = numpy.array([[confusion_matrix.TP,
1006               confusion_matrix.FN], [confusion_matrix.FP,
1007               confusion_matrix.TN]], dtype=float)
1008       else:
1009           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
1010
1011       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
1012       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
1013       jointProportion = (marginalSumOfColumns + marginalSumOfRows) / \
1014                           (2. * numpy.sum(confusion_matrix))
1015       # In the eq. above, 2. is what the Wikipedia page calls
1016       # the number of annotators. Here we have two annotators:
1017       # the observed (true) labels (annotations) and the predicted by
1018       # the learners.
1019
1020       prExpected = numpy.sum(jointProportion ** 2)
1021       prActual = numpy.sum(numpy.diag(confusion_matrix)) / \
1022                  numpy.sum(confusion_matrix)
1023
1024       ret = (prActual - prExpected) / (1.0 - prExpected)
1025       return ret
1026
1027# Backward compatibility
1028sens = Sensitivity
1029spec = Specificity
1030precision = Precision
1031recall = Recall
1032
1033
1034
1035@deprecated_keywords({"classIndex": "class_index",
1036                      "unweighted": "ignore_weights"})
1037@replace_discrete_probabilities_with_list(False)
1038def AUCWilcoxon(res, class_index= -1, ignore_weights=False, **argkw):
1039    """Compute the area under ROC (AUC) and its standard error using
1040    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
1041    :obj:class_index is not specified, the first class is used as
1042    "the positive" and others are negative. The result is a list of
1043    tuples (aROC, standard error).
1044
1045    If test results consist of multiple folds, you need to split them using
1046    :obj:split_by_iterations and perform this test on each fold separately.
1047    """
1048    useweights = res.weights and not ignore_weights
1049    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
1050
1051    results = []
1052
1053    totPos, totNeg = tots[1], tots[0]
1054    N = totPos + totNeg
1055    for plist in problists:
1056        highPos, lowNeg = totPos, 0.
1057        W, Q1, Q2 = 0., 0., 0.
1058        for prob in plist:
1059            thisPos, thisNeg = prob[1][1], prob[1][0]
1060            highPos -= thisPos
1061            W += thisNeg * (highPos + thisPos / 2.)
1062            Q2 += thisPos * (lowNeg ** 2 + lowNeg * thisNeg + thisNeg ** 2 / 3.)
1063            Q1 += thisNeg * (highPos ** 2 + highPos * thisPos + thisPos ** 2 / 3.)
1064
1065            lowNeg += thisNeg
1066
1067        W /= (totPos * totNeg)
1068        Q1 /= (totNeg * totPos ** 2)
1069        Q2 /= (totPos * totNeg ** 2)
1070
1071        SE = math.sqrt((W * (1 - W) + (totPos - 1) * (Q1 - W ** 2) +
1072                       (totNeg - 1) * (Q2 - W ** 2)) / (totPos * totNeg))
1073        results.append((W, SE))
1074    return results
1075
1076AROC = AUCWilcoxon # for backward compatibility, AROC is obsolete
1077
1078
1079@deprecated_keywords({"classIndex": "class_index",
1080                      "unweighted": "ignore_weights"})
1081@replace_discrete_probabilities_with_list(False)
1082def compare_2_AUCs(res, lrn1, lrn2, class_index= -1,
1083                   ignore_weights=False, **argkw):
1084    return corn.compare2ROCs(res, lrn1, lrn2, class_index,
1085                             res.weights and not ignore_weights)
1086
1087# for backward compatibility, compare_2_AROCs is obsolete
1088compare_2_AROCs = compare_2_AUCs
1089
1090
1091@deprecated_keywords({"classIndex": "class_index"})
1092@replace_discrete_probabilities_with_list(False)
1093def compute_ROC(res, class_index= -1):
1094    """Compute a ROC curve as a list of (x, y) tuples, where x is
1095    1-specificity and y is sensitivity.
1096    """
1097    problists, tots = corn.computeROCCumulative(res, class_index)
1098
1099    results = []
1100    totPos, totNeg = tots[1], tots[0]
1101
1102    for plist in problists:
1103        curve = [(1., 1.)]
1104        TP, TN = totPos, 0.
1105        FN, FP = 0., totNeg
1106        for prob in plist:
1107            thisPos, thisNeg = prob[1][1], prob[1][0]
1108            # thisPos go from TP to FN
1109            TP -= thisPos
1110            FN += thisPos
1111            # thisNeg go from FP to TN
1112            TN += thisNeg
1113            FP -= thisNeg
1114
1115            sens = TP / (TP + FN)
1116            spec = TN / (FP + TN)
1117            curve.append((1 - spec, sens))
1118        results.append(curve)
1119
1120    return results
1121
1122## TC's implementation of algorithms, taken from:
1123## T Fawcett: ROC Graphs: Notes and Practical Considerations for
1124## Data Mining Researchers, submitted to KDD Journal.
1125def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1126    if P1x == P2x:
1127        return 1e300
1128    return (P1y - P2y) / (P1x - P2x)
1129
1130
1131@deprecated_keywords({"keepConcavities": "keep_concavities"})
1133    if keep_concavities:
1134        R.append(P)
1135    else:
1136        while True:
1137            if len(R) < 2:
1138                R.append(P)
1139                return R
1140            else:
1141                T = R.pop()
1142                T2 = R[-1]
1143                if ROC_slope(T2, T) > ROC_slope(T, P):
1144                    R.append(T)
1145                    R.append(P)
1146                    return R
1147    return R
1148
1149
1150@deprecated_keywords({"classIndex": "class_index",
1151                      "keepConcavities": "keep_concavities"})
1152@replace_discrete_probabilities_with_list(False)
1153def TC_compute_ROC(res, class_index= -1, keep_concavities=1):
1154    problists, tots = corn.computeROCCumulative(res, class_index)
1155
1156    results = []
1157    P, N = tots[1], tots[0]
1158
1159    for plist in problists:
1160        ## corn gives an increasing by scores list, we need a decreasing
1161        plist.reverse()
1162        TP = 0.
1163        FP = 0.
1164        curve = []
1165        fPrev = 10e300 # "infinity" score at 0., 0.
1166        for prob in plist:
1167            f = prob[0]
1168            if f != fPrev:
1169                if P:
1170                    tpr = TP / P
1171                else:
1172                    tpr = 0.
1173                if N:
1174                    fpr = FP / N
1175                else:
1176                    fpr = 0.
1177                curve = ROC_add_point((fpr, tpr, fPrev), curve,
1178                                      keep_concavities)
1179                fPrev = f
1180            thisPos, thisNeg = prob[1][1], prob[1][0]
1181            TP += thisPos
1182            FP += thisNeg
1183        if P:
1184            tpr = TP / P
1185        else:
1186            tpr = 0.
1187        if N:
1188            fpr = FP / N
1189        else:
1190            fpr = 0.
1191        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1192        results.append(curve)
1193
1194    return results
1195
1196## returns a list of points at the intersection of the tangential
1197## iso-performance line and the given ROC curve
1198## for given values of FPcost, FNcost and pval
1199def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1200    m = (FPcost * (1. - pval)) / (FNcost * pval)
1201
1202    ## put the iso-performance line in point (0., 1.)
1203    x0, y0 = (0., 1.)
1204    x1, y1 = (1., 1. + m)
1205    d01 = math.sqrt((x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0))
1206
1207    ## calculate and find the closest point to the line
1208    firstp = 1
1209    mind = 0.
1210    a = x0 * y1 - x1 * y0
1211    closestPoints = []
1212    for (x, y, fscore) in curve:
1213        d = ((y0 - y1) * x + (x1 - x0) * y + a) / d01
1214        d = abs(d)
1215        if firstp or d < mind:
1216            mind, firstp = d, 0
1217            closestPoints = [(x, y, fscore)]
1218        else:
1219            if abs(d - mind) <= 0.0001: ## close enough
1220                closestPoints.append((x, y, fscore))
1221    return closestPoints
1222
1223def frange(start, end=None, inc=None):
1224    """A range function, that does accept float increments..."""
1225
1226    if end is None:
1227        end = start + 0.
1228        start = 0.
1229
1230    if inc is None or inc == 0:
1231        inc = 1.
1232
1233    L = [start]
1234    while 1:
1235        next = start + len(L) * inc
1236        if inc > 0 and next >= end:
1237            L.append(end)
1238            break
1239        elif inc < 0 and next <= end:
1240            L.append(end)
1241            break
1242        L.append(next)
1243
1244    return L
1245
1246## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1247## where ROCcurvesX is a set of ROC curves,
1248## where a (one) ROC curve is a set of (FP, TP) points
1249##
1250## for each (sub)set of input ROC curves
1251## returns the average ROC curve and an array of (vertical) standard deviations
1252@deprecated_keywords({"ROCcurves": "roc_curves"})
1253def TC_vertical_average_ROC(roc_curves, samples=10):
1254    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1255        if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x:
1256            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1257        dx = float(P2x) - float(P1x)
1258        dy = float(P2y) - float(P1y)
1259        m = dy / dx
1260        return P1y + m * (X - P1x)
1261
1262    def TP_FOR_FP(FPsample, ROC, npts):
1263        i = 0
1264        while i < npts - 1:
1265            (fp, _, _) = ROC[i + 1]
1266            if fp <= FPsample:
1267                i += 1
1268            else:
1269                break
1270        (fp, tp, _) = ROC[i]
1271        if fp == FPsample:
1272            return tp
1273        elif fp < FPsample and i + 1 < len(ROC):
1274            return INTERPOLATE(ROC[i], ROC[i + 1], FPsample)
1275        elif fp < FPsample and i + 1 == len(ROC): # return the last
1276            return ROC[i][1]
1277        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1278        #return 0.
1279
1280    average = []
1281    stdev = []
1282    for ROCS in roc_curves:
1283        npts = []
1284        for c in ROCS:
1285            npts.append(len(c))
1286        nrocs = len(ROCS)
1287
1288        TPavg = []
1289        TPstd = []
1290        for FPsample in frange(0., 1., 1. / samples):
1291            TPsum = []
1292            for i in range(nrocs):
1293                ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1294                TPsum.append(TP_FOR_FP(FPsample, ROCS[i], npts[i]))
1295            TPavg.append((FPsample, statc.mean(TPsum)))
1296            if len(TPsum) > 1:
1297                stdv = statc.std(TPsum)
1298            else:
1299                stdv = 0.
1300            TPstd.append(stdv)
1301
1302        average.append(TPavg)
1303        stdev.append(TPstd)
1304
1305    return average, stdev
1306
1307## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1308## where ROCcurvesX is a set of ROC curves,
1309## where a (one) ROC curve is a set of (FP, TP) points
1310##
1311## for each (sub)set of input ROC curves
1312## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1313@deprecated_keywords({"ROCcurves": "roc_curves"})
1314def TC_threshold_average_ROC(roc_curves, samples=10):
1315    def POINT_AT_THRESH(ROC, npts, thresh):
1316        i = 0
1317        while i < npts - 1:
1318            (px, py, pfscore) = ROC[i]
1319            if pfscore > thresh:
1320                i += 1
1321            else:
1322                break
1323        return ROC[i]
1324
1325    average = []
1326    stdevV = []
1327    stdevH = []
1328    for ROCS in roc_curves:
1329        npts = []
1330        for c in ROCS:
1331            npts.append(len(c))
1332        nrocs = len(ROCS)
1333
1334        T = []
1335        for c in ROCS:
1336            for (px, py, pfscore) in c:
1337##                try:
1338##                    T.index(pfscore)
1339##                except:
1340                T.append(pfscore)
1341        T.sort()
1342        T.reverse() ## ugly
1343
1344        TPavg = []
1345        TPstdV = []
1346        TPstdH = []
1347        for tidx in frange(0, (len(T) - 1.), float(len(T)) / samples):
1348            FPsum = []
1349            TPsum = []
1350            for i in range(nrocs):
1351                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1352                FPsum.append(fp)
1353                TPsum.append(tp)
1354            TPavg.append((statc.mean(FPsum), statc.mean(TPsum)))
1355            ## vertical standard deviation
1356            if len(TPsum) > 1:
1357                stdv = statc.std(TPsum)
1358            else:
1359                stdv = 0.
1360            TPstdV.append(stdv)
1361            ## horizontal standard deviation
1362            if len(FPsum) > 1:
1363                stdh = statc.std(FPsum)
1364            else:
1365                stdh = 0.
1366            TPstdH.append(stdh)
1367
1368        average.append(TPavg)
1369        stdevV.append(TPstdV)
1370        stdevH.append(TPstdH)
1371
1372    return average, stdevV, stdevH
1373
1374## Calibration Curve
1375## returns an array of (curve, yesClassPredictions, noClassPredictions)
1376## elements, where:
1377##  - curve is an array of points (x, y) on the calibration curve
1378##  - yesClassRugPoints is an array of (x, 1) points
1379##  - noClassRugPoints is an array of (x, 0) points
1380@deprecated_keywords({"classIndex": "class_index"})
1381@replace_discrete_probabilities_with_list(False)
1382def compute_calibration_curve(res, class_index= -1):
1383    ## merge multiple iterations into one
1384    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1385        res.class_values, res.weights, classifiers=res.classifiers,
1387    for te in res.results:
1388        mres.results.append(te)
1389
1390    problists, tots = corn.computeROCCumulative(mres, class_index)
1391
1392    results = []
1393
1394    bins = 10 ## divide interval between 0. and 1. into N bins
1395
1396    for plist in problists:
1397        yesClassRugPoints = []
1398        noClassRugPoints = []
1399
1400        yesBinsVals = [0] * bins
1401        noBinsVals = [0] * bins
1402        for (f, (thisNeg, thisPos)) in plist:
1403            yesClassRugPoints.append((f, thisPos)) # 1.
1404            noClassRugPoints.append((f, thisNeg)) # 1.
1405
1406            index = int(f * bins)
1407            index = min(index, bins - 1) ## just in case for value 1.
1408            yesBinsVals[index] += thisPos
1409            noBinsVals[index] += thisNeg
1410
1411        curve = []
1412        for cn in range(bins):
1413            f = float(cn * 1. / bins) + (1. / 2. / bins)
1414            yesVal = yesBinsVals[cn]
1415            noVal = noBinsVals[cn]
1416            allVal = yesVal + noVal
1417            if allVal == 0.: continue
1418            y = float(yesVal) / float(allVal)
1419            curve.append((f, y))
1420
1421        ## smooth the curve
1422        maxnPoints = 100
1423        if len(curve) >= 3:
1424#            loessCurve = statc.loess(curve, -3, 0.6)
1425            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1426        else:
1427            loessCurve = curve
1428        clen = len(loessCurve)
1429        if clen > maxnPoints:
1430            df = clen / maxnPoints
1431            if df < 1: df = 1
1432            curve = [loessCurve[i]  for i in range(0, clen, df)]
1433        else:
1434            curve = loessCurve
1435        ## remove the third value (variance of epsilon?) that suddenly
1436        ## appeared in the output of the statc.loess function
1437        curve = [c[:2] for c in curve]
1438        results.append((curve, yesClassRugPoints, noClassRugPoints))
1439
1440    return results
1441
1442
1443## Lift Curve
1444## returns an array of curve elements, where:
1445##  - curve is an array of points ((TP + FP) / (P + N), TP / P, (th, FP / N))
1446##    on the Lift Curve
1447@deprecated_keywords({"classIndex": "class_index"})
1448@replace_discrete_probabilities_with_list(False)
1449def compute_lift_curve(res, class_index= -1):
1450    ## merge multiple iterations into one
1451    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1452        res.class_values, res.weights, classifiers=res.classifiers,
1454    for te in res.results:
1455        mres.results.append(te)
1456
1457    problists, tots = corn.computeROCCumulative(mres, class_index)
1458
1459    results = []
1460    P, N = tots[1], tots[0]
1461    for plist in problists:
1462        ## corn gives an increasing by scores list, we need a decreasing
1463        plist.reverse()
1464        TP = 0.
1465        FP = 0.
1466        curve = [(0., 0., (10e300, 0.))]
1467        for (f, (thisNeg, thisPos)) in plist:
1468            TP += thisPos
1469            FP += thisNeg
1470            curve.append(((TP + FP) / (P + N), TP, (f, FP / (N or 1))))
1471        results.append(curve)
1472
1473    return P, N, results
1474
1475
1476class CDT:
1477  """Store the number of concordant (C), discordant (D) and tied (T) pairs."""
1478  def __init__(self, C=0., D=0., T=0.):
1479    self.C, self.D, self.T = C, D, T
1480
1481def is_CDT_empty(cdt):
1482    return cdt.C + cdt.D + cdt.T < 1e-20
1483
1484
1485@deprecated_keywords({"classIndex": "class_index",
1486                      "unweighted": "ignore_weights"})
1487@replace_discrete_probabilities_with_list(False)
1488def compute_CDT(res, class_index= -1, ignore_weights=False, **argkw):
1489    """Obsolete, don't use."""
1490    if class_index < 0:
1491        if res.base_class >= 0:
1492            class_index = res.base_class
1493        else:
1494            class_index = 1
1495
1496    useweights = res.weights and not ignore_weights
1497    weightByClasses = argkw.get("weightByClasses", True)
1498
1499    if res.number_of_iterations > 1:
1500        CDTs = [CDT() for _ in range(res.number_of_learners)]
1501        iterationExperiments = split_by_iterations(res)
1502        for exp in iterationExperiments:
1503            expCDTs = corn.computeCDT(exp, class_index, useweights)
1504            for i in range(len(CDTs)):
1505                CDTs[i].C += expCDTs[i].C
1506                CDTs[i].D += expCDTs[i].D
1507                CDTs[i].T += expCDTs[i].T
1508        for i in range(res.number_of_learners):
1509            if is_CDT_empty(CDTs[0]):
1510                return corn.computeCDT(res, class_index, useweights)
1511
1512        return CDTs
1513    else:
1514        return corn.computeCDT(res, class_index, useweights)
1515
1516class AUC(list):
1517    """
1518    Compute the area under ROC curve given a set of experimental results.
1519    If testing consisted of multiple folds, each fold is scored and the
1520    average score is returned. If a fold contains only instances with the
1521    same class value, folds will be merged.
1522
1523    :param test_results: test results to score
1524    :param ignore_weights: ignore instance weights when calculating score
1525    :param multiclass: tells what kind of averaging to perform if the target
1526                       class has more than 2 values.
1527    """
1528
1529    #!Compute AUC for each pair of classes (ignoring instances of all other
1530    #!classes) and average the results, weighting them by the number of
1531    #!pairs of instances from these two classes (e.g. by the product of
1532    #!probabilities of the two classes). AUC computed in this way still
1533    #!behaves as the concordance index, e.g., gives the probability that two
1534    #!randomly chosen instances from different classes will be correctly
1535    #!recognized (if the classifier knows from which two classes the
1536    #!instances came).
1537    ByWeightedPairs = 0
1538
1539    #!Similar to ByWeightedPairs, except that the average over class pairs
1540    #!is not weighted. This AUC is, like the binary version, independent of
1541    #!class distributions, but it is not related to the concordance index
1542    #!any more.
1543    ByPairs = 1
1544
1545    #!For each class, it computes AUC for this class against all others (that
1546    #!is, treating other classes as one class). The AUCs are then averaged by
1547    #!the class probabilities. This is related to the concordance index in
1548    #!which we test the classifier's (average) capability of distinguishing
1549    #!the instances from a specified class from those that come from other
1550    #!classes.
1551    #!Unlike the binary AUC, the measure is not independent of class
1552    #!distributions.
1553    WeightedOneAgainstAll = 2
1554
1555    #!Similar to weighted_one_against_all, except that the average
1556    #!is not weighted.
1557    OneAgainstAll = 3
1558
1559    @replace_use_weights
1560    @deprecated_keywords({"method": "multiclass"})
1561    def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False):
1562
1563        super(AUC, self).__init__()
1564
1565        self.ignore_weights = ignore_weights
1566        self.method = multiclass
1567
1568        if test_results is not None:
1569            self[:] = self.__call__(test_results)
1570
1571    @replace_discrete_probabilities_with_list(method=True)
1572    def __call__(self, test_results):
1573        if len(test_results.class_values) < 2:
1574            raise ValueError("Cannot compute AUC on a single-class problem")
1575        elif len(test_results.class_values) == 2:
1576            return self._compute_for_binary_class(test_results)
1577        else:
1578            return self._compute_for_multi_value_class(test_results, self.method)
1579
1580    def _compute_for_binary_class(self, res):
1581        """AUC for binary classification problems"""
1582        if res.number_of_iterations > 1:
1583            return self._compute_for_multiple_folds(
1584                self._compute_one_class_against_all,
1585                split_by_iterations(res),
1586                (-1, res, res.number_of_iterations))
1587        else:
1588            return self._compute_one_class_against_all(res, -1)[0]
1589
1590    def _compute_for_multi_value_class(self, res, method=0):
1591        """AUC for multiclass classification problems"""
1592        numberOfClasses = len(res.class_values)
1593
1594        if res.number_of_iterations > 1:
1595            iterations = split_by_iterations(res)
1596            all_ite = res
1597        else:
1598            iterations = [res]
1599            all_ite = None
1600
1601        # by pairs
1602        sum_aucs = [0.] * res.number_of_learners
1603        usefulClassPairs = 0.
1604
1605        prob = None
1606        if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]:
1607            prob = class_probabilities_from_res(res)
1608
1609        if method in [self.ByWeightedPairs, self.ByPairs]:
1610            for classIndex1 in range(numberOfClasses):
1611                for classIndex2 in range(classIndex1):
1612                    subsum_aucs = self._compute_for_multiple_folds(
1613                        self._compute_one_class_against_another, iterations,
1614                        (classIndex1, classIndex2, all_ite,
1615                        res.number_of_iterations))
1616                    if subsum_aucs:
1617                        if method == self.ByWeightedPairs:
1618                            p_ij = prob[classIndex1] * prob[classIndex2]
1619                            subsum_aucs = [x * p_ij  for x in subsum_aucs]
1620                            usefulClassPairs += p_ij
1621                        else:
1622                            usefulClassPairs += 1
1623                        sum_aucs = map(add, sum_aucs, subsum_aucs)
1624        else:
1625            for classIndex in range(numberOfClasses):
1626                subsum_aucs = self._compute_for_multiple_folds(
1627                    self._compute_one_class_against_all,
1628                    iterations, (classIndex, all_ite,
1629                                 res.number_of_iterations))
1630                if subsum_aucs:
1631                    if method == self.ByWeightedPairs:
1632                        p_i = prob[classIndex]
1633                        subsum_aucs = [x * p_i  for x in subsum_aucs]
1634                        usefulClassPairs += p_i
1635                    else:
1636                        usefulClassPairs += 1
1637                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1638
1639        if usefulClassPairs > 0:
1640            sum_aucs = [x / usefulClassPairs for x in sum_aucs]
1641
1642        return sum_aucs
1643
1644    # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij)
1645    # it returns the sum of what is returned by the computer,
1646    # unless at a certain fold the computer has to resort to computing
1647    # over all folds or even this failed;
1648    # in these cases the result is returned immediately
1649    def _compute_for_multiple_folds(self, auc_computer, iterations,
1650                                 computer_args):
1651        """Compute the average AUC over folds using :obj:auc_computer."""
1652        subsum_aucs = [0.] * iterations[0].number_of_learners
1653        for ite in iterations:
1654            aucs, foldsUsed = auc_computer(*(ite,) + computer_args)
1655            if not aucs:
1656                import warnings
1657                warnings.warn("AUC cannot be computed (all instances belong to the same class).")
1658                return
1659            if not foldsUsed:
1660                self[:] = aucs
1661                return aucs
1662            subsum_aucs = map(add, subsum_aucs, aucs)
1663        return subsum_aucs
1664
1665    # Computes AUC
1666    # in multivalued class problem, AUC is computed as one against all
1667    # results over folds are averages
1668    # if some folds examples from one class only, the folds are merged
1669    def _compute_for_single_class(self, res, class_index):
1670        if res.number_of_iterations > 1:
1671            return self._compute_for_multiple_folds(
1672                self._compute_one_class_against_all, split_by_iterations(res),
1673                (class_index, res, res.number_of_iterations))
1674        else:
1675            return self._compute_one_class_against_all(res, class_index)
1676
1677    # Computes AUC for a pair of classes (as if there were no other classes)
1678    # results over folds are averages
1679    # if some folds have examples from one class only, the folds are merged
1680    def _compute_for_pair_of_classes(self, res, class_index1, class_index2):
1681        if res.number_of_iterations > 1:
1682            return self._compute_for_multiple_folds(
1683                self._compute_one_class_against_another,
1684                split_by_iterations(res),
1685                (class_index1, class_index2, res, res.number_of_iterations))
1686        else:
1687            return self._compute_one_class_against_another(res, class_index1,
1688                                                    class_index2)
1689
1690    # computes AUC between class i and the other classes
1691    # (treating them as the same class)
1692    @deprecated_keywords({"classIndex": "class_index",
1693                          "divideByIfIte": "divide_by_if_ite"})
1694    def _compute_one_class_against_all(self, ite, class_index, all_ite=None,
1695                                      divide_by_if_ite=1.0):
1696        """Compute AUC between class i and all the other classes)"""
1697        return self._compute_auc(corn.computeCDT, ite, all_ite,
1698            divide_by_if_ite, (class_index, not self.ignore_weights))
1699
1700
1701    # computes AUC between classes i and j as if there are no other classes
1702    def _compute_one_class_against_another(
1703        self, ite, class_index1, class_index2,
1704        all_ite=None, divide_by_if_ite=1.):
1705        """
1706        Compute AUC between classes i and j as if there are no other classes.
1707        """
1708        return self._compute_auc(corn.computeCDTPair, ite,
1709            all_ite, divide_by_if_ite,
1710            (class_index1, class_index2, not self.ignore_weights))
1711
1712    # computes AUC using a specified 'cdtComputer' function
1713    # It tries to compute AUCs from 'ite' (examples from a single iteration)
1714    # and, if C+D+T=0, from 'all_ite' (entire test set). In the former case,
1715    # the AUCs are divided by 'divideByIfIte'.
1716    # Additional flag is returned which is True in the former case,
1717    # or False in the latter.
1718    @deprecated_keywords({"cdt_computer": "cdtComputer",
1719                          "divideByIfIte": "divide_by_if_ite",
1720                          "computerArgs": "computer_args"})
1721    def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite,
1722                     computer_args):
1723        """
1724        Compute AUC using a :obj:cdt_computer.
1725        """
1726        cdts = cdt_computer(*(ite,) + computer_args)
1727        if not is_CDT_empty(cdts[0]):
1728            return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) /
1729                    divide_by_if_ite for cdt in cdts], True
1730
1731        if all_ite:
1732            cdts = cdt_computer(*(all_ite,) + computer_args)
1733            if not is_CDT_empty(cdts[0]):
1734                return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T)
1735                        for cdt in cdts], False
1736
1737        return False, False
1738
1739class AUC_for_single_class(AUC):
1740    """
1741    Compute AUC where the class with the given class_index is singled
1742    out and all other classes are treated as a single class.
1743    """
1744    def __init__(self, test_results=None, class_index= -1, ignore_weights=False):
1745        if class_index < 0:
1746            if test_results and test_results.base_class >= 0:
1747                self.class_index = test_results.base_class
1748            else:
1749                self.class_index = 1
1750        else:
1751            self.class_index = class_index
1752
1753        super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights)
1754
1755    @replace_discrete_probabilities_with_list(method=True)
1756    def __call__(self, test_results):
1757        return self._compute_for_single_class(test_results, self.class_index)
1758
1759
1760class AUC_for_pair_of_classes(AUC):
1761    """
1762    Computes AUC between a pair of classes, ignoring instances from all
1763    other classes.
1764    """
1765    def __init__(self, test_results, class_index1, class_index2, ignore_weights=False):
1766        self.class_index1 = class_index1
1767        self.class_index2 = class_index2
1768
1769        super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights)
1770
1771    @replace_discrete_probabilities_with_list(method=True)
1772    def __call__(self, test_results):
1773        return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2)
1774
1775
1776class AUC_matrix(AUC):
1777    """
1778    Compute a (lower diagonal) matrix with AUCs for all pairs of classes.
1779    If there are empty classes, the corresponding elements in the matrix
1780    are -1.
1781    """
1782
1783    @replace_discrete_probabilities_with_list(method=True)
1784    def __call__(self, test_results):
1785        numberOfClasses = len(test_results.class_values)
1786        number_of_learners = test_results.number_of_learners
1787        if test_results.number_of_iterations > 1:
1788            iterations, all_ite = split_by_iterations(test_results), test_results
1789        else:
1790            iterations, all_ite = [test_results], None
1791        aucs = [[[] for _ in range(numberOfClasses)]
1792        for _ in range(number_of_learners)]
1793        for classIndex1 in range(numberOfClasses):
1794            for classIndex2 in range(classIndex1):
1795                pair_aucs = self._compute_for_multiple_folds(
1796                    self._compute_one_class_against_another, iterations,
1797                    (classIndex1, classIndex2, all_ite,
1798                     test_results.number_of_iterations))
1799                if pair_aucs:
1800                    for lrn in range(number_of_learners):
1801                        aucs[lrn][classIndex1].append(pair_aucs[lrn])
1802                else:
1803                    for lrn in range(number_of_learners):
1804                        aucs[lrn][classIndex1].append(-1)
1805        return aucs
1806
1807#Backward compatibility
1808@replace_use_weights
1809@replace_discrete_probabilities_with_list(False)
1810def AUC_binary(res, ignore_weights=False):
1811    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1812    auc._compute_for_binary_class(res)
1813    return auc
1814
1815@replace_use_weights
1816@replace_discrete_probabilities_with_list(False)
1817def AUC_multi(res, ignore_weights=False, method=0):
1818    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights,
1819        method=method)
1820    auc._compute_for_multi_value_class(res)
1821    return auc
1822
1823
1824@deprecated_keywords({"AUCcomputer": "auc_computer",
1825                      "computerArgs": "computer_args"})
1826def AUC_iterations(auc_computer, iterations, computer_args):
1827    auc = deprecated_function_name(AUC)()
1828    auc._compute_for_multiple_folds(auc_computer, iterations, computer_args)
1829    return auc
1830
1831def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1832    auc = deprecated_function_name(AUC)()
1833    result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite,
1834                              computer_args)
1835    return result
1836
1837@replace_use_weights
1838def AUC_i(ite, class_index, ignore_weights=False, all_ite=None,
1839          divide_by_if_ite=1.):
1840    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1841    result = auc._compute_one_class_against_another(ite, class_index,
1842        all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1843    return result
1844
1845
1846@replace_use_weights
1847def AUC_ij(ite, class_index1, class_index2, ignore_weights=False,
1848           all_ite=None, divide_by_if_ite=1.):
1849    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1850    result = auc._compute_one_class_against_another(
1851        ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1852    return result
1853
1854AUC_single = replace_use_weights(
1855             deprecated_keywords({"classIndex": "class_index"})(
1856             deprecated_function_name(AUC_for_single_class)))
1857AUC_pair = replace_use_weights(
1858           deprecated_keywords({"classIndex1": "class_index1",
1859                                "classIndex2": "class_index2"})(
1860           deprecated_function_name(AUC_for_pair_of_classes)))
1861AUC_matrix = replace_use_weights(AUC_matrix)
1862
1863
1864@deprecated_keywords({"unweighted": "ignore_weights"})
1865@replace_discrete_probabilities_with_list(False)
1866def McNemar(res, ignore_weights=False, **argkw):
1867    """
1868    Compute a triangular matrix with McNemar statistics for each pair of
1869    classifiers. The statistics is distributed by chi-square distribution with
1870    one degree of freedom; critical value for 5% significance is around 3.84.
1871    """
1872    nLearners = res.number_of_learners
1873    mcm = []
1874    for i in range(nLearners):
1875       mcm.append([0.] * res.number_of_learners)
1876
1877    if not res.weights or ignore_weights:
1878        for i in res.results:
1879            actual = i.actual_class
1880            classes = i.classes
1881            for l1 in range(nLearners):
1882                for l2 in range(l1, nLearners):
1883                    if classes[l1] == actual:
1884                        if classes[l2] != actual:
1885                            mcm[l1][l2] += 1
1886                    elif classes[l2] == actual:
1887                        mcm[l2][l1] += 1
1888    else:
1889        for i in res.results:
1890            actual = i.actual_class
1891            classes = i.classes
1892            for l1 in range(nLearners):
1893                for l2 in range(l1, nLearners):
1894                    if classes[l1] == actual:
1895                        if classes[l2] != actual:
1896                            mcm[l1][l2] += i.weight
1897                    elif classes[l2] == actual:
1898                        mcm[l2][l1] += i.weight
1899
1900    for l1 in range(nLearners):
1901        for l2 in range(l1, nLearners):
1902            su = mcm[l1][l2] + mcm[l2][l1]
1903            if su:
1904                mcm[l2][l1] = (abs(mcm[l1][l2] - mcm[l2][l1]) - 1) ** 2 / su
1905            else:
1906                mcm[l2][l1] = 0
1907
1908    for l1 in range(nLearners):
1909        mcm[l1] = mcm[l1][:l1]
1910
1911    return mcm
1912
1913@replace_discrete_probabilities_with_list(False)
1914def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False):
1915    """
1916    McNemar_of_two computes a McNemar statistics for a pair of classifier,
1917    specified by indices learner1 and learner2.
1918    """
1919    tf = ft = 0.
1920    if not res.weights or ignore_weights:
1921        for i in res.results:
1922            actual = i.actual_class
1923            if i.classes[lrn1] == actual:
1924                if i.classes[lrn2] != actual:
1925                    tf += i.weight
1926            elif i.classes[lrn2] == actual:
1927                    ft += i.weight
1928    else:
1929        for i in res.results:
1930            actual = i.actual_class
1931            if i.classes[lrn1] == actual:
1932                if i.classes[lrn2] != actual:
1933                    tf += 1.
1934            elif i.classes[lrn2] == actual:
1935                    ft += 1.
1936
1937    su = tf + ft
1938    if su:
1939        return (abs(tf - ft) - 1) ** 2 / su
1940    else:
1941        return 0
1942
1943@replace_discrete_probabilities_with_list(False)
1944def Friedman(res, stat=CA):
1945    """
1946    Compare classifiers with Friedman test, treating folds as different examles.
1947    Returns F, p and average ranks.
1948    """
1949    res_split = split_by_iterations(res)
1950    res = [stat(r) for r in res_split]
1951
1952    N = len(res)
1953    k = len(res[0])
1954    sums = [0.] * k
1955    for r in res:
1956        ranks = [k - x + 1 for x in statc.rankdata(r)]
1957        if stat == Brier_score: # reverse ranks for Brier_score (lower better)
1958            ranks = [k + 1 - x for x in ranks]
1959        sums = map(add, ranks, sums)
1960
1961    T = sum(x * x for x in sums)
1962    sums = [x / N for x in sums]
1963
1964    F = 12. / (N * k * (k + 1)) * T - 3 * N * (k + 1)
1965
1966    return F, statc.chisqprob(F, k - 1), sums
1967
1968@replace_discrete_probabilities_with_list(False)
1969def Wilcoxon_pairs(res, avgranks, stat=CA):
1970    """
1971    Return a triangular matrix, where element[i][j] stores significance of
1972    difference between the i-th and the j-th classifier, as computed by the
1973    Wilcoxon test. The element is positive if the i-th is better than the j-th,
1974    negative if it is worse, and 1 if they are equal.
1975    Arguments are ExperimentResults, average ranks (as returned by Friedman)
1976    and, optionally, a statistics; greater values should mean better results.
1977    """
1978    res_split = split_by_iterations(res)
1979    res = [stat(r) for r in res_split]
1980
1981    k = len(res[0])
1982    bt = []
1983    for m1 in range(k):
1984        nl = []
1985        for m2 in range(m1 + 1, k):
1986            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1987            if avgranks[m1] < avgranks[m2]:
1988                nl.append(p)
1989            elif avgranks[m2] < avgranks[m1]:
1990                nl.append(-p)
1991            else:
1992                nl.append(1)
1993        bt.append(nl)
1994    return bt
1995
1996
1997@deprecated_keywords({"allResults": "all_results",
1998                      "noConfidence": "no_confidence"})
1999def plot_learning_curve_learners(file, all_results, proportions, learners,
2000                                 no_confidence=0):
2001    plot_learning_curve(file, all_results, proportions,
2002        [Orange.misc.getobjectname(learners[i], "Learner %i" % i)
2003        for i in range(len(learners))], no_confidence)
2004
2005
2006@deprecated_keywords({"allResults": "all_results",
2007                      "noConfidence": "no_confidence"})
2008def plot_learning_curve(file, all_results, proportions, legend,
2009                        no_confidence=0):
2010    import types
2011    fopened = 0
2012    if type(file) == types.StringType:
2013        file = open(file, "wt")
2014        fopened = 1
2015
2016    file.write("set yrange [0:1]\n")
2017    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2018    file.write("set multiplot\n\n")
2019    CAs = [CA(x, report_se=True) for x in all_results]
2020
2021    file.write("plot \\\n")
2022    for i in range(len(legend) - 1):
2023        if not no_confidence:
2024            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i + 1))
2025        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2026    if not no_confidence:
2027        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
2028    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
2029
2030    for i in range(len(legend)):
2031        if not no_confidence:
2032            for p in range(len(proportions)):
2033                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96 * CAs[p][i][1]))
2034            file.write("e\n\n")
2035
2036        for p in range(len(proportions)):
2037            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
2038        file.write("e\n\n")
2039
2040    if fopened:
2041        file.close()
2042
2043
2044def print_single_ROC_curve_coordinates(file, curve):
2045    import types
2046    fopened = 0
2047    if type(file) == types.StringType:
2048        file = open(file, "wt")
2049        fopened = 1
2050
2051    for coord in curve:
2052        file.write("%5.3f\t%5.3f\n" % tuple(coord))
2053
2054    if fopened:
2055        file.close()
2056
2057
2058def plot_ROC_learners(file, curves, learners):
2059    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
2060
2061def plot_ROC(file, curves, legend):
2062    import types
2063    fopened = 0
2064    if type(file) == types.StringType:
2065        file = open(file, "wt")
2066        fopened = 1
2067
2068    file.write("set yrange [0:1]\n")
2069    file.write("set xrange [0:1]\n")
2070    file.write("set multiplot\n\n")
2071
2072    file.write("plot \\\n")
2073    for leg in legend:
2074        file.write("'-' title '%s' with lines,\\\n" % leg)
2075    file.write("'-' title '' with lines\n")
2076
2077    for curve in curves:
2078        for coord in curve:
2079            file.write("%5.3f\t%5.3f\n" % tuple(coord))
2080        file.write("e\n\n")
2081
2082    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
2083
2084    if fopened:
2085        file.close()
2086
2087
2088@deprecated_keywords({"allResults": "all_results"})
2089def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference= -1):
2090    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
2091
2092
2093@deprecated_keywords({"allResults": "all_results"})
2094def plot_McNemar_curve(file, all_results, proportions, legend, reference= -1):
2095    if reference < 0:
2096        reference = len(legend) - 1
2097
2098    import types
2099    fopened = 0
2100    if type(file) == types.StringType:
2101        file = open(file, "wt")
2102        fopened = 1
2103
2104    #file.write("set yrange [0:1]\n")
2105    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2106    file.write("set multiplot\n\n")
2107    file.write("plot \\\n")
2108    tmap = range(reference) + range(reference + 1, len(legend))
2109    for i in tmap[:-1]:
2110        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2111    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
2112    file.write("\n")
2113
2114    for i in tmap:
2115        for p in range(len(proportions)):
2116            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
2117        file.write("e\n\n")
2118
2119    if fopened:
2120        file.close()
2121
2122default_point_types = ("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}") + tuple([chr(x) for x in range(97, 122)])
2123default_line_types = ("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
2124
2125@deprecated_keywords({"allResults": "all_results"})
2126def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
2127    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
2128
2129
2130@deprecated_keywords({"allResults": "all_results"})
2131def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
2132    import types
2133    fopened = 0
2134    if type(file) == types.StringType:
2135        file = open(file, "wt")
2136        fopened = 1
2137
2138    nexamples = len(all_results[0].results)
2139    CAs = [CA(x, report_se=True) for x in all_results]
2140
2141    graphsize = float(options.get("graphsize", 10.0)) #cm
2142    difprop = proportions[-1] - proportions[0]
2143    ntestexamples = nexamples * proportions[-1]
2144    xunit = graphsize / ntestexamples
2145
2146    yshift = float(options.get("yshift", -ntestexamples / 20.))
2147
2148    pointtypes = options.get("pointtypes", default_point_types)
2149    linetypes = options.get("linetypes", default_line_types)
2150
2151    if options.has_key("numberedx"):
2152        numberedx = options["numberedx"]
2153        if type(numberedx) == types.IntType:
2154            if numberedx > 0:
2155                numberedx = [nexamples * proportions[int(i / float(numberedx) * len(proportions))] for i in range(numberedx)] + [proportions[-1] * nexamples]
2156            elif numberedx < 0:
2157                numberedx = -numberedx
2158                newn = []
2159                for i in range(numberedx + 1):
2160                    wanted = proportions[0] + float(i) / numberedx * difprop
2161                    best = (10, 0)
2162                    for t in proportions:
2163                        td = abs(wanted - t)
2164                        if td < best[0]:
2165                            best = (td, t)
2166                    if not best[1] in newn:
2167                        newn.append(best[1])
2168                newn.sort()
2169                numberedx = [nexamples * x for x in newn]
2170        elif type(numberedx[0]) == types.FloatType:
2171            numberedx = [nexamples * x for x in numberedx]
2172    else:
2173        numberedx = [nexamples * x for x in proportions]
2174
2175    file.write("\\mbox{\n")
2176    file.write(\\beginpicture\n")
2177    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
2178    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
2179    file.write(\\axis bottom invisible\n")# label {#examples}\n")
2180    file.write("      ticks short at %s /\n" % reduce(lambda x, y:x + " " + y, ["%i" % (x * nexamples + 0.5) for x in proportions]))
2181    if numberedx:
2182        file.write("            long numbered at %s /\n" % reduce(lambda x, y:x + y, ["%i " % int(x + 0.5) for x in numberedx]))
2183    file.write("  /\n")
2184    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
2185    file.write("      shiftedto y=%5.3f\n" % yshift)
2186    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
2187    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
2188    file.write("  /\n")
2189    if options.has_key("default"):
2190        file.write(\\setdashpattern<1pt, 1pt>\n")
2191        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
2192
2193    for i in range(len(CAs[0])):
2194        coordinates = reduce(lambda x, y:x + " " + y, ["%i %5.3f" % (proportions[p] * nexamples, CAs[p][i][0]) for p in range(len(proportions))])
2195        if linetypes:
2196            file.write(%s\n" % linetypes[i])
2197            file.write(\\plot %s /\n" % coordinates)
2198        if pointtypes:
2199            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
2200
2201    file.write(\\endpicture\n")
2202    file.write("}\n")
2203    if fopened:
2204        file.close()
2205    file.close()
2206    del file
2207
2208def legend_learners_to_PiCTeX(file, learners, **options):
2209  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
2210
2211def legend_to_PiCTeX(file, legend, **options):
2212    import types
2213    fopened = 0
2214    if type(file) == types.StringType:
2215        file = open(file, "wt")
2216        fopened = 1
2217
2218    pointtypes = options.get("pointtypes", default_point_types)
2219    linetypes = options.get("linetypes", default_line_types)
2220
2221    file.write("\\mbox{\n")
2222    file.write(\\beginpicture\n")
2223    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
2224    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
2225
2226    for i in range(len(legend)):
2227        if linetypes:
2228            file.write(%s\n" % linetypes[i])
2229            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i + 0.2))
2230        if pointtypes:
2231            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i + 0.1))
2232        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i + 0.25))
2233
2234    file.write(\\endpicture\n")
2235    file.write("}\n")
2236    if fopened:
2237        file.close()
2238    file.close()
2239    del file
2240
2241
2242def compute_friedman(avranks, N):
2243    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
2244    and (Iman statistic - F-distribution, degrees of freedoma) given average
2245    ranks and a number of tested data sets N.
2246    """
2247
2248    k = len(avranks)
2249
2250    def friedman(N, k, ranks):
2251        return 12 * N * (sum([rank ** 2.0 for rank in ranks]) - (k * (k + 1) * (k + 1) / 4.0)) / (k * (k + 1))
2252
2253    def iman(fried, N, k):
2254        return (N - 1) * fried / (N * (k - 1) - fried)
2255
2256    f = friedman(N, k, avranks)
2257    im = iman(f, N, k)
2258    fdistdof = (k - 1, (k - 1) * (N - 1))
2259
2260    return (f, k - 1), (im, fdistdof)
2261
2262def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2263    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2264    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2265    ranks and number of tested data sets N. Type can be either "nemenyi" for
2266    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2267    """
2268
2269    k = len(avranks)
2270
2271    d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
2272                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
2273                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
2274                               3.426041, 3.458425, 3.488685, 3.517073, 3.543799]
2275        , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
2276                               2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
2277                               2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
2278                               3.195743, 3.229723, 3.261461, 3.291224, 3.319233]
2279        , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
2280                                        2.638, 2.690, 2.724, 2.773],
2281         ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
2282                                      2.394, 2.450, 2.498, 2.539]}
2283
2284    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2285    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2286
2287    q = d[(type, alpha)]
2288
2289    cd = q[k] * (k * (k + 1) / (6.0 * N)) ** 0.5
2290
2291    return cd
2292
2293
2294def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2295    """
2296    Draws a CD graph, which is used to display  the differences in methods'
2297    performance.
2298    See Janez Demsar, Statistical Comparisons of Classifiers over
2299    Multiple Data Sets, 7(Jan):1--30, 2006.
2300
2301    Needs matplotlib to work.
2302
2303    :param filename: Output file name (with extension). Formats supported
2304                     by matplotlib can be used.
2305    :param avranks: List of average methods' ranks.
2306    :param names: List of methods' names.
2307
2308    :param cd: Critical difference. Used for marking methods that whose
2309               difference is not statistically significant.
2310    :param lowv: The lowest shown rank, if None, use 1.
2311    :param highv: The highest shown rank, if None, use len(avranks).
2312    :param width: Width of the drawn figure in inches, default 6 in.
2313    :param textspace: Space on figure sides left for the description
2314                      of methods, default 1 in.
2315    :param reverse:  If True, the lowest rank is on the right. Default\: False.
2316    :param cdmethod: None by default. It can be an index of element in avranks
2317                     or or names which specifies the method which should be
2318                     marked with an interval.
2319    """
2320    if not HAS_MATPLOTLIB:
2321        import sys
2322        print >> sys.stderr, "Function requires matplotlib. Please install it."
2323        return
2324
2325    width = float(width)
2326    textspace = float(textspace)
2327
2328    def nth(l, n):
2329        """
2330        Returns only nth elemnt in a list.
2331        """
2332        n = lloc(l, n)
2333        return [ a[n] for a in l ]
2334
2335    def lloc(l, n):
2336        """
2337        List location in list of list structure.
2338        Enable the use of negative locations:
2339        -1 is the last element, -2 second last...
2340        """
2341        if n < 0:
2342            return len(l[0]) + n
2343        else:
2344            return n
2345
2346    def mxrange(lr):
2347        """
2348        Multiple xranges. Can be used to traverse matrices.
2349        This function is very slow due to unknown number of
2350        parameters.
2351
2352        >>> mxrange([3,5])
2353        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2354
2355        >>> mxrange([[3,5,1],[9,0,-3]])
2356        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2357
2358        """
2359        if not len(lr):
2360            yield ()
2361        else:
2362            #it can work with single numbers
2363            index = lr[0]
2364            if type(1) == type(index):
2365                index = [ index ]
2366            for a in range(*index):
2367                for b in mxrange(lr[1:]):
2368                    yield tuple([a] + list(b))
2369
2370
2371    from matplotlib.figure import Figure
2372    from matplotlib.backends.backend_agg import FigureCanvasAgg
2373
2374    def print_figure(fig, *args, **kwargs):
2375        canvas = FigureCanvasAgg(fig)
2376        canvas.print_figure(*args, **kwargs)
2377
2378    sums = avranks
2379
2380    tempsort = sorted([ (a, i) for i, a in  enumerate(sums) ], reverse=reverse)
2381    ssums = nth(tempsort, 0)
2382    sortidx = nth(tempsort, 1)
2383    nnames = [ names[x] for x in sortidx ]
2384
2385    if lowv is None:
2386        lowv = min(1, int(math.floor(min(ssums))))
2387    if highv is None:
2388        highv = max(len(avranks), int(math.ceil(max(ssums))))
2389
2390    cline = 0.4
2391
2392    k = len(sums)
2393
2394    lines = None
2395
2396    linesblank = 0
2397    scalewidth = width - 2 * textspace
2398
2399    def rankpos(rank):
2400        if not reverse:
2401            a = rank - lowv
2402        else:
2403            a = highv - rank
2404        return textspace + scalewidth / (highv - lowv) * a
2405
2406    distanceh = 0.25
2407
2408    if cd and cdmethod is None:
2409
2410        #get pairs of non significant methods
2411
2412        def get_lines(sums, hsd):
2413
2414            #get all pairs
2415            lsums = len(sums)
2416            allpairs = [ (i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i ]
2417            #remove not significant
2418            notSig = [ (i, j) for i, j in allpairs if abs(sums[i] - sums[j]) <= hsd ]
2419            #keep only longest
2420
2421            def no_longer((i, j), notSig):
2422                for i1, j1 in notSig:
2423                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2424                        return False
2425                return True
2426
2427            longest = [ (i, j) for i, j in notSig if no_longer((i, j), notSig) ]
2428
2429            return longest
2430
2431        lines = get_lines(ssums, cd)
2432        linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1
2433
2435        distanceh = 0.25
2436        cline += distanceh
2437
2438    #calculate height needed height of an image
2439    minnotsignificant = max(2 * 0.2, linesblank)
2440    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant
2441
2442    fig = Figure(figsize=(width, height))
2443    ax = fig.add_axes([0, 0, 1, 1]) #reverse y axis
2444    ax.set_axis_off()
2445
2446    hf = 1. / height # height factor
2447    wf = 1. / width
2448
2449    def hfl(l):
2450        return [ a * hf for a in l ]
2451
2452    def wfl(l):
2453        return [ a * wf for a in l ]
2454
2455
2456    # Upper left corner is (0,0).
2457
2458    ax.plot([0, 1], [0, 1], c="w")
2459    ax.set_xlim(0, 1)
2460    ax.set_ylim(1, 0)
2461
2462    def line(l, color='k', **kwargs):
2463        """
2464        Input is a list of pairs of points.
2465        """
2466        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)
2467
2468    def text(x, y, s, *args, **kwargs):
2469        ax.text(wf * x, hf * y, s, *args, **kwargs)
2470
2471    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)
2472
2473    bigtick = 0.1
2474    smalltick = 0.05
2475
2476
2477    import numpy
2478    tick = None
2479    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2480        tick = smalltick
2481        if a == int(a): tick = bigtick
2482        line([(rankpos(a), cline - tick / 2), (rankpos(a), cline)], linewidth=0.7)
2483
2484    for a in range(lowv, highv + 1):
2485        text(rankpos(a), cline - tick / 2 - 0.05, str(a), ha="center", va="bottom")
2486
2487    k = len(ssums)
2488
2489    for i in range((k + 1) / 2):
2490        chei = cline + minnotsignificant + i * 0.2
2491        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace - 0.1, chei)], linewidth=0.7)
2492        text(textspace - 0.2, chei, nnames[i], ha="right", va="center")
2493
2494    for i in range((k + 1) / 2, k):
2495        chei = cline + minnotsignificant + (k - i - 1) * 0.2
2496        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace + scalewidth + 0.1, chei)], linewidth=0.7)
2497        text(textspace + scalewidth + 0.2, chei, nnames[i], ha="left", va="center")
2498
2499    if cd and cdmethod is None:
2500
2501        #upper scale
2502        if not reverse:
2503            begin, end = rankpos(lowv), rankpos(lowv + cd)
2504        else:
2505            begin, end = rankpos(highv), rankpos(highv - cd)
2506
2507        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2508        line([(begin, distanceh + bigtick / 2), (begin, distanceh - bigtick / 2)], linewidth=0.7)
2509        line([(end, distanceh + bigtick / 2), (end, distanceh - bigtick / 2)], linewidth=0.7)
2510        text((begin + end) / 2, distanceh - 0.05, "CD", ha="center", va="bottom")
2511
2512        #non significance lines
2513        def draw_lines(lines, side=0.05, height=0.1):
2514            start = cline + 0.2
2515            for l, r in lines:
2516                line([(rankpos(ssums[l]) - side, start), (rankpos(ssums[r]) + side, start)], linewidth=2.5)
2517                start += height
2518
2519        draw_lines(lines)
2520
2521    elif cd:
2522        begin = rankpos(avranks[cdmethod] - cd)
2523        end = rankpos(avranks[cdmethod] + cd)
2524        line([(begin, cline), (end, cline)], linewidth=2.5)
2525        line([(begin, cline + bigtick / 2), (begin, cline - bigtick / 2)], linewidth=2.5)
2526        line([(end, cline + bigtick / 2), (end, cline - bigtick / 2)], linewidth=2.5)
2527
2528    print_figure(fig, filename, **kwargs)
2529
2530def mlc_hamming_loss(res):
2531    """
2532    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2533
2534    :math:HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}
2535    """
2536    losses = [0] * res.number_of_learners
2537    label_num = len(res.labels)
2538    example_num = gettotsize(res)
2539
2540    for e in res.results:
2541        aclass = e.actual_class
2542        for i, labels in enumerate(e.classes):
2543            labels = map(int, labels)
2544            if len(labels) <> len(aclass):
2545                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2546            for j in range(label_num):
2547                if labels[j] != aclass[j]:
2548                    losses[i] += 1
2549
2550    return [float(x) / (label_num * example_num) for x in losses]
2551
2552def mlc_accuracy(res, forgiveness_rate=1.0):
2553    """
2554    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2555
2556    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}
2557
2558    Boutell et al. (2004) give a more generalized version using a parameter :math:\\alpha \\ge 0,
2559    called forgiveness rate:
2560
2561    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}
2562    """
2563    accuracies = [0.0] * res.number_of_learners
2564    example_num = gettotsize(res)
2565
2566    for e in res.results:
2567        aclass = e.actual_class
2568        for i, labels in enumerate(e.classes):
2569            labels = map(int, labels)
2570            if len(labels) <> len(aclass):
2571                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2572
2573            intersection = 0.0
2574            union = 0.0
2575            for real, pred in zip(labels, aclass):
2576                if real and pred:
2577                    intersection += 1
2578                if real or pred:
2579                    union += 1
2580
2581            if union:
2582                accuracies[i] += intersection / union
2583
2584    return [math.pow(x / example_num, forgiveness_rate) for x in accuracies]
2585
2586def mlc_precision(res):
2587    """
2588    :math:Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}
2589    """
2590    precisions = [0.0] * res.number_of_learners
2591    example_num = gettotsize(res)
2592
2593    for e in res.results:
2594        aclass = e.actual_class
2595        for i, labels in enumerate(e.classes):
2596            labels = map(int, labels)
2597            if len(labels) <> len(aclass):
2598                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2599
2600            intersection = 0.0
2601            predicted = 0.0
2602            for real, pred in zip(labels, aclass):
2603                if real and pred:
2604                    intersection += 1
2605                if real:
2606                    predicted += 1
2607            if predicted:
2608                precisions[i] += intersection / predicted
2609
2610    return [x / example_num for x in precisions]
2611
2612def mlc_recall(res):
2613    """
2614    :math:Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}
2615    """
2616    recalls = [0.0] * res.number_of_learners
2617    example_num = gettotsize(res)
2618
2619    for e in res.results:
2620        aclass = e.actual_class
2621        for i, labels in enumerate(e.classes):
2622            labels = map(int, labels)
2623            if len(labels) <> len(aclass):
2624                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2625
2626            intersection = 0.0
2627            actual = 0.0
2628            for real, pred in zip(labels, aclass):
2629                if real and pred:
2630                    intersection += 1
2631                if pred:
2632                    actual += 1
2633            if actual:
2634                recalls[i] += intersection / actual
2635
2636    return [x / example_num for x in recalls]
2637
2638#def mlc_ranking_loss(res):
2639#    pass
2640#
2641#def mlc_average_precision(res):
2642#    pass
2643#
2644#def mlc_hierarchical_loss(res):
2645#    pass
2646
2647
2648def mt_average_score(res, score, weights=None):
2649    """
2650    Compute individual scores for each target and return the (weighted) average.
2651
2652    One method can be used to compute scores for all targets or a list of
2653    scoring methods can be passed to use different methods for different
2654    targets. In the latter case, care has to be taken if the ranges of scoring
2655    methods differ.
2656    For example, when the first target is scored from -1 to 1 (1 best) and the
2657    second from 0 to 1 (0 best), using weights=[0.5,-1] would scale both
2658    to a span of 1, and invert the second so that higher scores are better.
2659
2660    :param score: Single-target scoring method or a list of such methods
2661                  (one for each target).
2662    :param weights: List of real weights, one for each target,
2663                    for a weighted average.
2664
2665    """
2666    if not len(res.results):
2667        raise ValueError, "Cannot compute the score: no examples."
2668    if res.number_of_learners < 1:
2669        return []
2670    n_classes = len(res.results[0].actual_class)
2671    if weights is None:
2672        weights = [1.] * n_classes
2673    if not hasattr(score, '__len__'):
2674        score = [score] * n_classes
2675    elif len(score) != n_classes:
2676        raise ValueError, "Number of scoring methods and targets do not match."
2677    # save original classes
2678    clsss = [te.classes for te in res.results]
2679    aclsss = [te.actual_class for te in res.results]
2680    # compute single target scores
2681    single_scores = []
2682    for i in range(n_classes):
2683        for te, clss, aclss in zip(res.results, clsss, aclsss):
2684            te.classes = [cls[i] for cls in clss]
2685            te.actual_class = aclss[i]
2686        single_scores.append(score[i](res))
2687    # restore original classes
2688    for te, clss, aclss in zip(res.results, clsss, aclsss):
2689        te.classes = clss
2690        te.actual_class = aclss
2691    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
2692        for scores in zip(*single_scores)]
2693
2694def mt_flattened_score(res, score):
2695    """
2696    Flatten (concatenate into a single list) the predictions of multiple
2697    targets and compute a single-target score.
2698
2699    :param score: Single-target scoring method.
2700    """
2701    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
2702        res.classifier_names, class_values=res.class_values,
2704        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
2705    for te in res.results:
2706        for i, ac in enumerate(te.actual_class):
2707            te2 = Orange.evaluation.testing.TestedExample(
2708                iteration_number=te.iteration_number, actual_class=ac)
2709            for c, p in zip(te.classes, te.probabilities):
2711            res2.results.append(te2)
2712    return score(res2)
2713
2714def mt_global_accuracy(res):
2715    """
2716    :math:Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline
2717
2718    :math:\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.
2719    """
2720    results = []
2721    for l in xrange(res.number_of_learners):
2722        n_results = len(res.results)
2723        n_correct = 0.
2724
2725        for r in res.results:
2726            if list(r.classes[l]) == r.actual_class:
2727                n_correct+=1
2728
2729        results.append(n_correct/n_results)
2730    return results
2731
2732
2733def mt_mean_accuracy(res):
2734    """
2735    :math:\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline
2736
2737    :math:\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.
2738    """
2739    results = []
2740    for l in xrange(res.number_of_learners):
2741        n_classes = len(res.results[0].actual_class)
2742        n_results = len(res.results)
2743        n_correct = 0.
2744
2745        for r in res.results:
2746            for i in xrange(n_classes):
2747                if r.classes[l][i] == r.actual_class[i]:
2748                    n_correct+=1
2749        results.append(n_correct/n_classes/n_results)
2750    return results
2751
2752
2753
2754################################################################################
2755if __name__ == "__main__":
2756    avranks = [3.143, 2.000, 2.893, 1.964]
2757    names = ["prva", "druga", "tretja", "cetrta" ]
2758    cd = compute_CD(avranks, 14)
2759    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2760    print cd
2761
2762    print compute_friedman(avranks, 14)
2763
2764    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.