source: orange/Orange/evaluation/scoring.py @ 10914:4863a689ae90

Revision 10914:4863a689ae90, 100.9 KB checked in by Lan Zagar <lan.zagar@…>, 22 months ago (diff)

Fixes #1198.

Line 
1import math
2import functools
3from operator import add
4from collections import Iterable
5
6import numpy
7
8import Orange
9from Orange import statc, corn
10from Orange.utils import deprecated_keywords, deprecated_function_name, \
11    deprecation_warning, environ
12from Orange.evaluation import testing
13
14try:
15    import matplotlib
16    HAS_MATPLOTLIB = True
17except ImportError:
18    matplotlib = None
19    HAS_MATPLOTLIB = False
20
21#### Private stuff
22
23def log2(x):
24    """Calculate logarithm in base 2."""
25    return math.log(x) / math.log(2)
26
27def check_non_zero(x):
28    """Throw Value Error when x = 0."""
29    if x == 0.:
30        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
31
32def gettotweight(res):
33    """Sum all the weights."""
34    totweight = reduce(lambda x, y: x + y.weight, res.results, 0)
35    if totweight == 0.:
36        raise ValueError, "Cannot compute the score: sum of weights is 0."
37    return totweight
38
39def gettotsize(res):
40    """Get number of result instances."""
41    if len(res.results):
42        return len(res.results)
43    else:
44        raise ValueError, "Cannot compute the score: no examples."
45
46# Backward compatibility
47def replace_use_weights(fun):
48    if environ.orange_no_deprecated_members:
49        return fun
50
51    @functools.wraps(fun)
52    def wrapped(*args, **kwargs):
53        use_weights = kwargs.pop("useWeights", None)
54        if use_weights is not None:
55            deprecation_warning("useWeights", "ignore_weights")
56            kwargs["ignore_weights"] = not use_weights
57        return fun(*args, **kwargs)
58    return wrapped
59
60def replace_discrete_probabilities_with_list(method):
61    if environ.orange_no_deprecated_members:
62        return lambda fun: fun
63
64    def decorator(fun):
65        @functools.wraps(fun)
66        def wrapped(*args, **kwargs):
67            res = args[method] if len(args)>method else kwargs.get("res", kwargs.get("test_results", None))
68            convert = res is not None
69
70            if convert:
71                old_probs = []
72                for r in res.results:
73                    old_probs.append(r.probabilities)
74                    r.probabilities = [list(p) if type(p) is Orange.statistics.distribution.Discrete
75                                       else p for p in r.probabilities]
76            result = fun(*args, **kwargs)
77            if convert:
78                for r, old in zip(res.results, old_probs):
79                    r.probabilities = old
80            return result
81        return wrapped
82    return decorator
83
84def split_by_iterations(res):
85    """Split ExperimentResults of a multiple iteratation test into a list
86    of ExperimentResults, one for each iteration.
87    """
88    if res.number_of_iterations < 2:
89        return [res]
90
91    ress = [Orange.evaluation.testing.ExperimentResults(
92                1, res.classifier_names, res.class_values,
93                res.weights, classifiers=res.classifiers,
94                loaded=res.loaded, test_type=res.test_type, labels=res.labels)
95            for _ in range(res.number_of_iterations)]
96    for te in res.results:
97        ress[te.iteration_number].results.append(te)
98    return ress
99
100def split_by_classifiers(res):
101    """Split an instance of :obj:`ExperimentResults` into a list of
102    :obj:`ExperimentResults`, one for each classifier.
103    """
104    split_res = []
105    for i in range(len(res.classifierNames)):
106        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
107                    [res.classifierNames[i]], res.classValues,
108                    weights=res.weights, base_class=res.base_class,
109                    classifiers=[res.classifiers[i]] if res.classifiers else [],
110                    test_type=res.test_type, labels=res.labels)
111        r.results = []
112        for te in res.results:
113            r.results.append(Orange.evaluation.testing.TestedExample(
114                te.iterationNumber, te.actualClass, n=1, weight=te.weight))
115            r.results[-1].classes = [te.classes[i]]
116            r.results[-1].probabilities = [te.probabilities[i]]
117        split_res.append(r)
118    return split_res
119
120
121def class_probabilities_from_res(res, **argkw):
122    """Calculate class probabilities."""
123    probs = [0.] * len(res.class_values)
124    if argkw.get("unweighted", 0) or not res.weights:
125        for tex in res.results:
126            probs[int(tex.actual_class)] += 1.
127        totweight = gettotsize(res)
128    else:
129        totweight = 0.
130        for tex in res.results:
131            probs[tex.actual_class] += tex.weight
132            totweight += tex.weight
133        check_non_zero(totweight)
134    return [prob / totweight for prob in probs]
135
136
137@deprecated_keywords({
138    "foldN": "fold_n",
139    "reportSE": "report_se",
140    "iterationIsOuter": "iteration_is_outer"})
141def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
142    # remove empty folds, turn the matrix so that learner is outer
143    if iteration_is_outer:
144        if not stats:
145            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
146        number_of_learners = len(stats[0])
147        stats = filter(lambda (x, fN): fN > 0, zip(stats, fold_n))
148        stats = [[x[lrn] / fN for x, fN in stats]
149                 for lrn in range(number_of_learners)]
150    else:
151        stats = [[x / Fn for x, Fn in filter(lambda (x, Fn): Fn > 0,
152                 zip(lrnD, fold_n))] for lrnD in stats]
153
154    if not stats:
155        raise ValueError, "Cannot compute the score: no classifiers"
156    if not stats[0]:
157        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
158
159    if report_se:
160        return [(statc.mean(x), statc.sterr(x)) for x in stats]
161    else:
162        return [statc.mean(x) for x in stats]
163
164def ME(res, **argkw):
165    MEs = [0.] * res.number_of_learners
166
167    if argkw.get("unweighted", 0) or not res.weights:
168        for tex in res.results:
169            MEs = map(lambda res, cls, ac=float(tex.actual_class):
170                      res + abs(float(cls) - ac), MEs, tex.classes)
171        totweight = gettotsize(res)
172    else:
173        for tex in res.results:
174            MEs = map(lambda res, cls, ac=float(tex.actual_class), tw=tex.weight:
175                       res + tw * abs(float(cls) - ac), MEs, tex.classes)
176        totweight = gettotweight(res)
177
178    return [x / totweight for x in MEs]
179
180MAE = ME
181
182
183class ConfusionMatrix:
184    """
185    Classification result summary.
186    """
187    #: True Positive predictions
188    TP = 0.
189    #:True Negative predictions
190    TN = 0.
191    #:False Positive predictions
192    FP = 0.
193    #: False Negative predictions
194    FN = 0.
195
196    @deprecated_keywords({"predictedPositive": "predicted_positive",
197                          "isPositive": "is_positive"})
198    def addTFPosNeg(self, predicted_positive, is_positive, weight=1.0):
199        """
200        Update confusion matrix with result of a single classification.
201
202        :param predicted_positive: positive class value was predicted
203        :param is_positive: correct class value is positive
204        :param weight: weight of the selected instance
205         """
206        if predicted_positive:
207            if is_positive:
208                self.TP += weight
209            else:
210                self.FP += weight
211        else:
212            if is_positive:
213                self.FN += weight
214            else:
215                self.TN += weight
216
217
218
219#########################################################################
220# PERFORMANCE MEASURES:
221# Scores for evaluation of numeric predictions
222
223def check_argkw(dct, lst):
224    """Return True if any item from lst has a non-zero value in dct."""
225    return reduce(lambda x, y: x or y, [dct.get(k, 0) for k in lst])
226
227def regression_error(res, **argkw):
228    """Return the regression error (default: MSE)."""
229    if argkw.get("SE", 0) and res.number_of_iterations > 1:
230        # computes the scores for each iteration, then averages
231        scores = [[0.] * res.number_of_iterations
232                  for _ in range(res.number_of_learners)]
233        norm = None
234        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
235            norm = [0.] * res.number_of_iterations
236
237        # counts examples in each iteration
238        nIter = [0] * res.number_of_iterations
239        # average class in each iteration
240        a = [0] * res.number_of_iterations
241        for tex in res.results:
242            nIter[tex.iteration_number] += 1
243            a[tex.iteration_number] += float(tex.actual_class)
244        a = [a[i] / nIter[i] for i in range(res.number_of_iterations)]
245
246        if argkw.get("unweighted", 0) or not res.weights:
247            # iterate accross test cases
248            for tex in res.results:
249                ai = float(tex.actual_class)
250                nIter[tex.iteration_number] += 1
251
252                # compute normalization, if required
253                if argkw.get("norm-abs", 0):
254                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
255                elif argkw.get("norm-sqr", 0):
256                    norm[tex.iteration_number] += (ai - a[tex.iteration_number]) ** 2
257
258                # iterate accross results of different regressors
259                for i, cls in enumerate(tex.classes):
260                    if argkw.get("abs", 0):
261                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
262                    else:
263                        scores[i][tex.iteration_number] += (float(cls) - ai) ** 2
264        else: # unweighted != 0
265            raise NotImplementedError, "weighted error scores with SE not implemented yet"
266
267        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
268            scores = [[x / n for x, n in zip(y, norm)] for y in scores]
269        else:
270            scores = [[x / ni for x, ni in zip(y, nIter)] for y in scores]
271
272        if argkw.get("R2"):
273            scores = [[1.0 - x for x in y] for y in scores]
274
275        if argkw.get("sqrt", 0):
276            scores = [[math.sqrt(x) for x in y] for y in scores]
277
278        return [(statc.mean(x), statc.std(x)) for x in scores]
279
280    else: # single iteration (testing on a single test set)
281        scores = [0.] * res.number_of_learners
282        norm = 0.
283
284        if argkw.get("unweighted", 0) or not res.weights:
285            a = sum([tex.actual_class for tex in res.results]) \
286                / len(res.results)
287            for tex in res.results:
288                if argkw.get("abs", 0):
289                    scores = map(lambda res, cls, ac=float(tex.actual_class):
290                                 res + abs(float(cls) - ac), scores, tex.classes)
291                else:
292                    scores = map(lambda res, cls, ac=float(tex.actual_class):
293                                 res + (float(cls) - ac) ** 2, scores, tex.classes)
294
295                if argkw.get("norm-abs", 0):
296                    norm += abs(tex.actual_class - a)
297                elif argkw.get("norm-sqr", 0):
298                    norm += (tex.actual_class - a) ** 2
299            totweight = gettotsize(res)
300        else:
301            # UNFINISHED
302            MSEs = [0.] * res.number_of_learners
303            for tex in res.results:
304                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
305                           tw=tex.weight:
306                           res + tw * (float(cls) - ac) ** 2, MSEs, tex.classes)
307            totweight = gettotweight(res)
308
309        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
310            scores = [s / norm for s in scores]
311        else: # normalize by number of instances (or sum of weights)
312            scores = [s / totweight for s in scores]
313
314        if argkw.get("R2"):
315            scores = [1. - s for s in scores]
316
317        if argkw.get("sqrt", 0):
318            scores = [math.sqrt(x) for x in scores]
319
320        return scores
321
322def MSE(res, **argkw):
323    """Compute mean-squared error."""
324    return regression_error(res, **argkw)
325
326def RMSE(res, **argkw):
327    """Compute root mean-squared error."""
328    argkw.setdefault("sqrt", True)
329    return regression_error(res, **argkw)
330
331def MAE(res, **argkw):
332    """Compute mean absolute error."""
333    argkw.setdefault("abs", True)
334    return regression_error(res, **argkw)
335
336def RSE(res, **argkw):
337    """Compute relative squared error."""
338    argkw.setdefault("norm-sqr", True)
339    return regression_error(res, **argkw)
340
341def RRSE(res, **argkw):
342    """Compute relative squared error."""
343    argkw.setdefault("norm-sqr", True)
344    argkw.setdefault("sqrt", True)
345    return regression_error(res, **argkw)
346
347def RAE(res, **argkw):
348    """Compute relative absolute error."""
349    argkw.setdefault("abs", True)
350    argkw.setdefault("norm-abs", True)
351    return regression_error(res, **argkw)
352
353def R2(res, **argkw):
354    """Compute the coefficient of determination, R-squared."""
355    argkw.setdefault("norm-sqr", True)
356    argkw.setdefault("R2", True)
357    return regression_error(res, **argkw)
358
359def MSE_old(res, **argkw):
360    """Compute mean-squared error."""
361    if argkw.get("SE", 0) and res.number_of_iterations > 1:
362        MSEs = [[0.] * res.number_of_iterations
363                for _ in range(res.number_of_learners)]
364        nIter = [0] * res.number_of_iterations
365        if argkw.get("unweighted", 0) or not res.weights:
366            for tex in res.results:
367                ac = float(tex.actual_class)
368                nIter[tex.iteration_number] += 1
369                for i, cls in enumerate(tex.classes):
370                    MSEs[i][tex.iteration_number] += (float(cls) - ac) ** 2
371        else:
372            raise ValueError, "weighted RMSE with SE not implemented yet"
373        MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs]
374        if argkw.get("sqrt", 0):
375            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
376        return [(statc.mean(x), statc.std(x)) for x in MSEs]
377
378    else:
379        MSEs = [0.] * res.number_of_learners
380        if argkw.get("unweighted", 0) or not res.weights:
381            for tex in res.results:
382                MSEs = map(lambda res, cls, ac=float(tex.actual_class):
383                           res + (float(cls) - ac) ** 2, MSEs, tex.classes)
384            totweight = gettotsize(res)
385        else:
386            for tex in res.results:
387                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
388                           tw=tex.weight: res + tw * (float(cls) - ac) ** 2,
389                           MSEs, tex.classes)
390            totweight = gettotweight(res)
391
392        if argkw.get("sqrt", 0):
393            MSEs = [math.sqrt(x) for x in MSEs]
394        return [x / totweight for x in MSEs]
395
396def RMSE_old(res, **argkw):
397    """Compute root mean-squared error."""
398    argkw.setdefault("sqrt", 1)
399    return MSE_old(res, **argkw)
400
401#########################################################################
402# PERFORMANCE MEASURES:
403# Scores for evaluation of classifiers
404
405class CA(list):
406    """
407    Compute percentage of matches between predicted and actual class.
408
409    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
410                         or list of :obj:`ConfusionMatrix`.
411    :param report_se: include standard error in result.
412    :param ignore_weights: ignore instance weights.
413    :rtype: list of scores, one for each learner.
414
415    Standard errors are estimated from deviation of CAs across folds (if
416    test_results were produced by cross_validation) or approximated under
417    the assumption of normal distribution otherwise.
418    """
419    CONFUSION_MATRIX = 0
420    CONFUSION_MATRIX_LIST = 1
421    CLASSIFICATION = 2
422    CROSS_VALIDATION = 3
423
424    @deprecated_keywords({"reportSE": "report_se",
425                          "unweighted": "ignore_weights"})
426    def __init__(self, test_results, report_se=False, ignore_weights=False):
427        super(CA, self).__init__()
428        self.report_se = report_se
429        self.ignore_weights = ignore_weights
430
431        input_type = self.get_input_type(test_results)
432        if input_type == self.CONFUSION_MATRIX:
433            self[:] = [self.from_confusion_matrix(test_results)]
434        elif input_type == self.CONFUSION_MATRIX_LIST:
435            self[:] = self.from_confusion_matrix_list(test_results)
436        elif input_type == self.CLASSIFICATION:
437            self[:] = self.from_classification_results(test_results)
438        elif input_type == self.CROSS_VALIDATION:
439            self[:] = self.from_crossvalidation_results(test_results)
440
441    def from_confusion_matrix(self, cm):
442        all_predictions = 0.
443        correct_predictions = 0.
444        if isinstance(cm, ConfusionMatrix):
445            all_predictions += cm.TP + cm.FN + cm.FP + cm.TN
446            correct_predictions += cm.TP + cm.TN
447        else:
448            for r, row in enumerate(cm):
449                for c, column in enumerate(row):
450                    if r == c:
451                        correct_predictions += column
452                    all_predictions += column
453
454        check_non_zero(all_predictions)
455        ca = correct_predictions / all_predictions
456
457        if self.report_se:
458            return ca, ca * (1 - ca) / math.sqrt(all_predictions)
459        else:
460            return ca
461
462    def from_confusion_matrix_list(self, confusion_matrices):
463        return [self.from_confusion_matrix(cm) for cm in confusion_matrices]
464
465    def from_classification_results(self, test_results):
466        CAs = [0.] * test_results.number_of_learners
467        totweight = 0.
468        for tex in test_results.results:
469            w = 1. if self.ignore_weights else tex.weight
470            CAs = map(lambda res, cls: res + (cls == tex.actual_class and w),
471                      CAs, tex.classes)
472            totweight += w
473        check_non_zero(totweight)
474        ca = [x / totweight for x in CAs]
475
476        if self.report_se:
477            return [(x, x * (1 - x) / math.sqrt(totweight)) for x in ca]
478        else:
479            return ca
480
481    def from_crossvalidation_results(self, test_results):
482        CAsByFold = [[0.] * test_results.number_of_iterations
483                     for _ in range(test_results.number_of_learners)]
484        foldN = [0.] * test_results.number_of_iterations
485
486        for tex in test_results.results:
487            w = 1. if self.ignore_weights else tex.weight
488            for lrn in range(test_results.number_of_learners):
489                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn] ==
490                    tex.actual_class) and w
491            foldN[tex.iteration_number] += w
492
493        return statistics_by_folds(CAsByFold, foldN, self.report_se, False)
494
495    def get_input_type(self, test_results):
496        if isinstance(test_results, ConfusionMatrix):
497            return self.CONFUSION_MATRIX
498        elif isinstance(test_results, testing.ExperimentResults):
499            if test_results.number_of_iterations == 1:
500                return self.CLASSIFICATION
501            else:
502                return self.CROSS_VALIDATION
503        elif isinstance(test_results, list):
504            return self.CONFUSION_MATRIX_LIST
505
506
507@deprecated_keywords({"reportSE": "report_se",
508                      "unweighted": "ignore_weights"})
509def AP(res, report_se=False, ignore_weights=False, **argkw):
510    """Compute the average probability assigned to the correct class."""
511    if res.number_of_iterations == 1:
512        APs = [0.] * res.number_of_learners
513        if ignore_weights or not res.weights:
514            for tex in res.results:
515                APs = map(lambda res, probs: res + probs[tex.actual_class],
516                          APs, tex.probabilities)
517            totweight = gettotsize(res)
518        else:
519            totweight = 0.
520            for tex in res.results:
521                APs = map(lambda res, probs: res + probs[tex.actual_class] *
522                          tex.weight, APs, tex.probabilities)
523                totweight += tex.weight
524        check_non_zero(totweight)
525        return [AP / totweight for AP in APs]
526
527    APsByFold = [[0.] * res.number_of_learners
528                 for _ in range(res.number_of_iterations)]
529    foldN = [0.] * res.number_of_iterations
530    if ignore_weights or not res.weights:
531        for tex in res.results:
532            APsByFold[tex.iteration_number] = map(lambda res, probs:
533                res + probs[tex.actual_class],
534                APsByFold[tex.iteration_number], tex.probabilities)
535            foldN[tex.iteration_number] += 1
536    else:
537        for tex in res.results:
538            APsByFold[tex.iteration_number] = map(lambda res, probs:
539                res + probs[tex.actual_class] * tex.weight,
540                APsByFold[tex.iteration_number], tex.probabilities)
541            foldN[tex.iteration_number] += tex.weight
542
543    return statistics_by_folds(APsByFold, foldN, report_se, True)
544
545
546@deprecated_keywords({"reportSE": "report_se",
547                      "unweighted": "ignore_weights"})
548def Brier_score(res, report_se=False, ignore_weights=False, **argkw):
549    """Compute the Brier score, defined as the average (over test instances)
550    of :math:`\sum_x(t(x) - p(x))^2`, where x is a class value, t(x) is 1 for
551    the actual class value and 0 otherwise, and p(x) is the predicted
552    probability of x.
553    """
554    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
555    #    x is class,
556    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
557    #    p(x) is predicted probabilty.
558    # There's a trick: since t(x) is zero for all classes but the
559    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
560    # Since +1 is there for each example, it adds 1 to the average
561    # We skip the +1 inside the sum and add it just at the end of the function
562    # We take max(result, 0) to avoid -0.0000x due to rounding errors
563
564    if res.number_of_iterations == 1:
565        MSEs = [0.] * res.number_of_learners
566        if ignore_weights or not res.weights:
567            totweight = 0.
568            for tex in res.results:
569                MSEs = map(lambda res, probs: res + reduce(
570                    lambda s, pi: s + pi ** 2, probs, 0) -
571                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
572                totweight += tex.weight
573        else:
574            for tex in res.results:
575                MSEs = map(lambda res, probs: res + tex.weight * reduce(
576                    lambda s, pi: s + pi ** 2, probs, 0) -
577                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
578            totweight = gettotweight(res)
579        check_non_zero(totweight)
580        if report_se:
581            ## change this, not zero!!!
582            return [(max(x / totweight + 1., 0), 0) for x in MSEs]
583        else:
584            return [max(x / totweight + 1., 0) for x in MSEs]
585
586    BSs = [[0.] * res.number_of_learners
587           for _ in range(res.number_of_iterations)]
588    foldN = [0.] * res.number_of_iterations
589
590    if ignore_weights or not res.weights:
591        for tex in res.results:
592            BSs[tex.iteration_number] = map(lambda rr, probs: rr + reduce(
593                lambda s, pi: s + pi ** 2, probs, 0) -
594                2 * probs[tex.actual_class], BSs[tex.iteration_number],
595                tex.probabilities)
596            foldN[tex.iteration_number] += 1
597    else:
598        for tex in res.results:
599            BSs[tex.iteration_number] = map(lambda res, probs:
600                res + tex.weight * reduce(lambda s, pi: s + pi ** 2, probs, 0) -
601                2 * probs[tex. actual_class], BSs[tex.iteration_number],
602                tex.probabilities)
603            foldN[tex.iteration_number] += tex.weight
604
605    stats = statistics_by_folds(BSs, foldN, report_se, True)
606    if report_se:
607        return [(x + 1., y) for x, y in stats]
608    else:
609        return [x + 1. for x in stats]
610
611def BSS(res, **argkw):
612    return [1 - x / 2 for x in apply(Brier_score, (res,), argkw)]
613
614def IS_ex(Pc, P):
615    """Pc aposterior probability, P aprior"""
616    if Pc >= P:
617        return -log2(P) + log2(Pc)
618    else:
619        return -(-log2(1 - P) + log2(1 - Pc))
620
621
622@deprecated_keywords({"reportSE": "report_se"})
623def IS(res, apriori=None, report_se=False, **argkw):
624    """Compute the information score as defined by
625    `Kononenko and Bratko (1991) \
626    <http://www.springerlink.com/content/g5p7473160476612/>`_.
627    Argument :obj:`apriori` gives the apriori class
628    distribution; if it is omitted, the class distribution is computed from
629    the actual classes of examples in :obj:`res`.
630    """
631    if not apriori:
632        apriori = class_probabilities_from_res(res)
633
634    if res.number_of_iterations == 1:
635        ISs = [0.] * res.number_of_learners
636        if argkw.get("unweighted", 0) or not res.weights:
637            for tex in res.results:
638              for i in range(len(tex.probabilities)):
639                    cls = tex.actual_class
640                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
641            totweight = gettotsize(res)
642        else:
643            for tex in res.results:
644              for i in range(len(tex.probabilities)):
645                    cls = tex.actual_class
646                    ISs[i] += (IS_ex(tex.probabilities[i][cls], apriori[cls]) *
647                               tex.weight)
648            totweight = gettotweight(res)
649        if report_se:
650            return [(IS / totweight, 0) for IS in ISs]
651        else:
652            return [IS / totweight for IS in ISs]
653
654
655    ISs = [[0.] * res.number_of_iterations
656           for _ in range(res.number_of_learners)]
657    foldN = [0.] * res.number_of_iterations
658
659    # compute info scores for each fold   
660    if argkw.get("unweighted", 0) or not res.weights:
661        for tex in res.results:
662            for i in range(len(tex.probabilities)):
663                cls = tex.actual_class
664                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
665                                                apriori[cls])
666            foldN[tex.iteration_number] += 1
667    else:
668        for tex in res.results:
669            for i in range(len(tex.probabilities)):
670                cls = tex.actual_class
671                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
672                                                apriori[cls]) * tex.weight
673            foldN[tex.iteration_number] += tex.weight
674
675    return statistics_by_folds(ISs, foldN, report_se, False)
676
677
678def Wilcoxon(res, statistics, **argkw):
679    res1, res2 = [], []
680    for ri in split_by_iterations(res):
681        stats = statistics(ri, **argkw)
682        if len(stats) != 2:
683            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
684        res1.append(stats[0])
685        res2.append(stats[1])
686    return statc.wilcoxont(res1, res2)
687
688def rank_difference(res, statistics, **argkw):
689    if not res.results:
690        raise TypeError, "no experiments"
691
692    k = len(res.results[0].classes)
693    if k < 2:
694        raise TypeError, "nothing to compare (less than two classifiers given)"
695    if k == 2:
696        return apply(Wilcoxon, (res, statistics), argkw)
697    else:
698        return apply(Friedman, (res, statistics), argkw)
699
700
701@deprecated_keywords({"res": "test_results",
702                      "classIndex": "class_index",
703                      "unweighted": "ignore_weights"})
704def confusion_matrices(test_results, class_index= -1,
705                       ignore_weights=False, cutoff=.5):
706    """
707    Return confusion matrices for test_results.
708
709    :param test_results: test results
710    :param class_index: index of class value for which the confusion matrices
711        are to be computed (by default unspecified - see note below).
712    :param ignore_weights: ignore instance weights.
713    :param cutoff: cutoff for probability
714
715    :rtype: list of :obj:`ConfusionMatrix` or list-of-list-of-lists (see
716        note below)
717
718    .. note:: If `class_index` is not specified and `test_results`
719        contain predictions for multi-class problem, then the return
720        value is a list of 2D tables (list-of-lists) of all class
721        value pairwise misclassifications.
722
723    """
724    tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)]
725
726    if class_index < 0:
727        numberOfClasses = len(test_results.class_values)
728        if class_index < -1 or numberOfClasses > 2:
729            cm = [[[0.] * numberOfClasses for _ in range(numberOfClasses)]
730                  for _ in range(test_results.number_of_learners)]
731            if ignore_weights or not test_results.weights:
732                for tex in test_results.results:
733                    trueClass = int(tex.actual_class)
734                    for li, pred in enumerate(tex.classes):
735                        predClass = int(pred)
736                        if predClass < numberOfClasses:
737                            cm[li][trueClass][predClass] += 1
738            else:
739                for tex in test_results.results:
740                    trueClass = int(tex.actual_class)
741                    for li, pred in tex.classes:
742                        predClass = int(pred)
743                        if predClass < numberOfClasses:
744                            cm[li][trueClass][predClass] += tex.weight
745            return cm
746
747        elif test_results.base_class >= 0:
748            class_index = test_results.base_class
749        else:
750            class_index = 1
751
752    if cutoff != .5:
753        if ignore_weights or not test_results.weights:
754            for lr in test_results.results:
755                isPositive = (lr.actual_class == class_index)
756                for i in range(test_results.number_of_learners):
757                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
758                                         cutoff, isPositive)
759        else:
760            for lr in test_results.results:
761                isPositive = (lr.actual_class == class_index)
762                for i in range(test_results.number_of_learners):
763                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
764                                         cutoff, isPositive, lr.weight)
765    else:
766        if ignore_weights or not test_results.weights:
767            for lr in test_results.results:
768                isPositive = (lr.actual_class == class_index)
769                for i in range(test_results.number_of_learners):
770                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
771                                         isPositive)
772        else:
773            for lr in test_results.results:
774                isPositive = (lr.actual_class == class_index)
775                for i in range(test_results.number_of_learners):
776                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
777                                         isPositive, lr.weight)
778    return tfpns
779
780
781# obsolete (renamed)
782compute_confusion_matrices = confusion_matrices
783
784
785@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
786def confusion_chi_square(confusion_matrix):
787    """
788    Return chi square statistic of the confusion matrix
789    (higher value indicates that prediction is not by chance).
790    """
791    if isinstance(confusion_matrix, ConfusionMatrix) or \
792       not isinstance(confusion_matrix[1], list):
793        return _confusion_chi_square(confusion_matrix)
794    else:
795        return map(_confusion_chi_square, confusion_matrix)
796
797def _confusion_chi_square(confusion_matrix):
798    if isinstance(confusion_matrix, ConfusionMatrix):
799        c = confusion_matrix
800        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
801    dim = len(confusion_matrix)
802    rowPriors = [sum(r) for r in confusion_matrix]
803    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
804    total = sum(rowPriors)
805    rowPriors = [r / total for r in rowPriors]
806    colPriors = [r / total for r in colPriors]
807    ss = 0
808    for ri, row in enumerate(confusion_matrix):
809        for ci, o in enumerate(row):
810            e = total * rowPriors[ri] * colPriors[ci]
811            if not e:
812                return -1, -1, -1
813            ss += (o - e) ** 2 / e
814    df = (dim - 1) ** 2
815    return ss, df, statc.chisqprob(ss, df)
816
817class CMScore(list):
818    """
819    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
820                         or list of :obj:`ConfusionMatrix`.
821    :rtype: list of scores, one for each learner."""
822    def __new__(cls, test_results, **kwargs):
823        self = list.__new__(cls)
824        if isinstance(test_results, ConfusionMatrix):
825            self.__init__(test_results, **kwargs)
826            return self[0]
827        return self
828
829
830    @deprecated_keywords({"confm": "test_results"})
831    def __init__(self, test_results=None):
832        super(CMScore, self).__init__()
833
834        if test_results is not None:
835            self[:] = self.__call__(test_results)
836
837    def __call__(self, test_results):
838        if isinstance(test_results, testing.ExperimentResults):
839            test_results = confusion_matrices(test_results, class_index=1)
840        if isinstance(test_results, ConfusionMatrix):
841            test_results = [test_results]
842
843        return map(self.compute, test_results)
844
845
846
847class Sensitivity(CMScore):
848    __doc__ = """Compute `sensitivity
849    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ (proportion
850    of actual positives which are correctly identified as such).
851    """ + CMScore.__doc__
852    @classmethod
853    def compute(self, confusion_matrix):
854        tot = confusion_matrix.TP + confusion_matrix.FN
855        if tot < 1e-6:
856            import warnings
857            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
858            return None
859
860        return confusion_matrix.TP / tot
861
862
863class Recall(Sensitivity):
864    __doc__ = """ Compute `recall
865    <http://en.wikipedia.org/wiki/Precision_and_recall>`_
866    (fraction of relevant instances that are retrieved).
867    """ + CMScore.__doc__
868    pass # Recall == Sensitivity
869
870
871class Specificity(CMScore):
872    __doc__ = """Compute `specificity
873    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
874    (proportion of negatives which are correctly identified).
875    """ + CMScore.__doc__
876    @classmethod
877    def compute(self, confusion_matrix):
878        tot = confusion_matrix.FP + confusion_matrix.TN
879        if tot < 1e-6:
880            import warnings
881            warnings.warn("Can't compute specificity: one or both classes have no instances")
882            return None
883        return confusion_matrix.TN / tot
884
885
886class PPV(CMScore):
887    __doc__ = """Compute `positive predictive value
888    <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ (proportion of
889    subjects with positive test results who are correctly diagnosed).
890    """ + CMScore.__doc__
891    @classmethod
892    def compute(self, confusion_matrix):
893        tot = confusion_matrix.TP + confusion_matrix.FP
894        if tot < 1e-6:
895            import warnings
896            warnings.warn("Can't compute PPV: one or both classes have no instances")
897            return None
898        return confusion_matrix.TP / tot
899
900
901class Precision(PPV):
902    __doc__ = """Compute `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_
903    (retrieved instances that are relevant).
904    """ + CMScore.__doc__
905    pass # Precision == PPV
906
907
908class NPV(CMScore):
909    __doc__ = """Compute `negative predictive value
910    <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ (proportion of
911    subjects with a negative test result who are correctly diagnosed).
912     """ + CMScore.__doc__
913    @classmethod
914    def compute(self, confusion_matrix):
915        tot = confusion_matrix.FN + confusion_matrix.TN
916        if tot < 1e-6:
917            import warnings
918            warnings.warn("Can't compute NPV: one or both classes have no instances")
919            return None
920        return confusion_matrix.TN / tot
921
922
923class F1(CMScore):
924    __doc__ = """Return `F1 score
925    <http://en.wikipedia.org/wiki/F1_score>`_
926    (harmonic mean of precision and recall).
927    """ + CMScore.__doc__
928    @classmethod
929    def compute(self, confusion_matrix):
930        p = Precision.compute(confusion_matrix)
931        r = Recall.compute(confusion_matrix)
932        if p is not None and r is not None and (p + r) != 0:
933            return 2. * p * r / (p + r)
934        else:
935            import warnings
936            warnings.warn("Can't compute F1: P + R is zero or not defined")
937            return None
938
939
940class Falpha(CMScore):
941    __doc__ = """Compute the alpha-mean of precision and recall over the given confusion
942    matrix.
943    """ + CMScore.__doc__
944
945    def __init__(self, test_results, alpha=1.):
946        self.alpha = alpha
947        super(Falpha, self).__init__(test_results)
948
949    def compute(self, confusion_matrix):
950        p = Precision.compute(confusion_matrix)
951        r = Recall.compute(confusion_matrix)
952        return (1. + self.alpha) * p * r / (self.alpha * p + r)
953
954
955class MCC(CMScore):
956    __doc__ = """Compute `Matthew correlation coefficient
957    <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_
958    (correlation coefficient between the observed and predicted binary
959    classifications).
960    """ + CMScore.__doc__
961    @classmethod
962    def compute(self, cm):
963        # code by Boris Gorelik
964        TP, TN, FP, FN = cm.TP, cm.TN, cm.FP, cm.FN
965
966        try:
967            return (TP * TN - FP * FN) / \
968                 math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
969        except ZeroDivisionError:
970            # Zero division occurs when there is either no true positives
971            # or no true negatives i.e. the problem contains only one
972            # type of classes.
973            import warnings
974            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
975
976
977@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
978def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
979   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
980
981   http://en.wikipedia.org/wiki/Scott%27s_Pi
982   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
983   raters.
984
985   @param confusion_matrix: confusion matrix, or list of confusion matrices.
986                            To obtain non-binary confusion matrix, call
987                            Orange.evaluation.scoring.confusion_matrices
988                            and set the class_index parameter to -2.
989   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
990                            This function needs to operate on non-binary
991                            confusion matrices, which are represented by python
992                            lists, therefore one needs a way to distinguish
993                            between a single matrix and list of matrices
994   """
995
996   if b_is_list_of_matrices:
997       try:
998           return [scotts_pi(cm, b_is_list_of_matrices=False)
999                   for cm in confusion_matrix]
1000       except TypeError:
1001           # Nevermind the parameter, maybe this is a "conventional" binary
1002           # confusion matrix and bIsListOfMatrices was specified by mistake
1003           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
1004   else:
1005       if isinstance(confusion_matrix, ConfusionMatrix):
1006           confusion_matrix = numpy.array([[confusion_matrix.TP,
1007               confusion_matrix.FN], [confusion_matrix.FP,
1008               confusion_matrix.TN]], dtype=float)
1009       else:
1010           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
1011
1012       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
1013       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
1014       jointProportion = (marginalSumOfColumns + marginalSumOfRows) / \
1015                           (2. * numpy.sum(confusion_matrix))
1016       # In the eq. above, 2. is what the Wikipedia page calls
1017       # the number of annotators. Here we have two annotators:
1018       # the observed (true) labels (annotations) and the predicted by
1019       # the learners.
1020
1021       prExpected = numpy.sum(jointProportion ** 2)
1022       prActual = numpy.sum(numpy.diag(confusion_matrix)) / \
1023                  numpy.sum(confusion_matrix)
1024
1025       ret = (prActual - prExpected) / (1.0 - prExpected)
1026       return ret
1027
1028# Backward compatibility
1029sens = Sensitivity
1030spec = Specificity
1031precision = Precision
1032recall = Recall
1033
1034
1035
1036@deprecated_keywords({"classIndex": "class_index",
1037                      "unweighted": "ignore_weights"})
1038@replace_discrete_probabilities_with_list(False)
1039def AUCWilcoxon(res, class_index= -1, ignore_weights=False, **argkw):
1040    """Compute the area under ROC (AUC) and its standard error using
1041    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
1042    :obj:`class_index` is not specified, the first class is used as
1043    "the positive" and others are negative. The result is a list of
1044    tuples (aROC, standard error).
1045
1046    If test results consist of multiple folds, you need to split them using
1047    :obj:`split_by_iterations` and perform this test on each fold separately.
1048    """
1049    useweights = res.weights and not ignore_weights
1050    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
1051
1052    results = []
1053
1054    totPos, totNeg = tots[1], tots[0]
1055    N = totPos + totNeg
1056    for plist in problists:
1057        highPos, lowNeg = totPos, 0.
1058        W, Q1, Q2 = 0., 0., 0.
1059        for prob in plist:
1060            thisPos, thisNeg = prob[1][1], prob[1][0]
1061            highPos -= thisPos
1062            W += thisNeg * (highPos + thisPos / 2.)
1063            Q2 += thisPos * (lowNeg ** 2 + lowNeg * thisNeg + thisNeg ** 2 / 3.)
1064            Q1 += thisNeg * (highPos ** 2 + highPos * thisPos + thisPos ** 2 / 3.)
1065
1066            lowNeg += thisNeg
1067
1068        W /= (totPos * totNeg)
1069        Q1 /= (totNeg * totPos ** 2)
1070        Q2 /= (totPos * totNeg ** 2)
1071
1072        SE = math.sqrt((W * (1 - W) + (totPos - 1) * (Q1 - W ** 2) +
1073                       (totNeg - 1) * (Q2 - W ** 2)) / (totPos * totNeg))
1074        results.append((W, SE))
1075    return results
1076
1077AROC = AUCWilcoxon # for backward compatibility, AROC is obsolete
1078
1079
1080@deprecated_keywords({"classIndex": "class_index",
1081                      "unweighted": "ignore_weights"})
1082@replace_discrete_probabilities_with_list(False)
1083def compare_2_AUCs(res, lrn1, lrn2, class_index= -1,
1084                   ignore_weights=False, **argkw):
1085    return corn.compare2ROCs(res, lrn1, lrn2, class_index,
1086                             res.weights and not ignore_weights)
1087
1088# for backward compatibility, compare_2_AROCs is obsolete
1089compare_2_AROCs = compare_2_AUCs
1090
1091
1092@deprecated_keywords({"classIndex": "class_index"})
1093@replace_discrete_probabilities_with_list(False)
1094def compute_ROC(res, class_index= -1):
1095    """Compute a ROC curve as a list of (x, y) tuples, where x is
1096    1-specificity and y is sensitivity.
1097    """
1098    problists, tots = corn.computeROCCumulative(res, class_index)
1099
1100    results = []
1101    totPos, totNeg = tots[1], tots[0]
1102
1103    for plist in problists:
1104        curve = [(1., 1.)]
1105        TP, TN = totPos, 0.
1106        FN, FP = 0., totNeg
1107        for prob in plist:
1108            thisPos, thisNeg = prob[1][1], prob[1][0]
1109            # thisPos go from TP to FN
1110            TP -= thisPos
1111            FN += thisPos
1112            # thisNeg go from FP to TN
1113            TN += thisNeg
1114            FP -= thisNeg
1115
1116            sens = TP / (TP + FN)
1117            spec = TN / (FP + TN)
1118            curve.append((1 - spec, sens))
1119        results.append(curve)
1120
1121    return results
1122
1123## TC's implementation of algorithms, taken from:
1124## T Fawcett: ROC Graphs: Notes and Practical Considerations for
1125## Data Mining Researchers, submitted to KDD Journal.
1126def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1127    if P1x == P2x:
1128        return 1e300
1129    return (P1y - P2y) / (P1x - P2x)
1130
1131
1132@deprecated_keywords({"keepConcavities": "keep_concavities"})
1133def ROC_add_point(P, R, keep_concavities=1):
1134    if keep_concavities:
1135        R.append(P)
1136    else:
1137        while True:
1138            if len(R) < 2:
1139                R.append(P)
1140                return R
1141            else:
1142                T = R.pop()
1143                T2 = R[-1]
1144                if ROC_slope(T2, T) > ROC_slope(T, P):
1145                    R.append(T)
1146                    R.append(P)
1147                    return R
1148    return R
1149
1150
1151@deprecated_keywords({"classIndex": "class_index",
1152                      "keepConcavities": "keep_concavities"})
1153@replace_discrete_probabilities_with_list(False)
1154def TC_compute_ROC(res, class_index= -1, keep_concavities=1):
1155    problists, tots = corn.computeROCCumulative(res, class_index)
1156
1157    results = []
1158    P, N = tots[1], tots[0]
1159
1160    for plist in problists:
1161        ## corn gives an increasing by scores list, we need a decreasing
1162        plist.reverse()
1163        TP = 0.
1164        FP = 0.
1165        curve = []
1166        fPrev = 10e300 # "infinity" score at 0., 0.
1167        for prob in plist:
1168            f = prob[0]
1169            if f != fPrev:
1170                if P:
1171                    tpr = TP / P
1172                else:
1173                    tpr = 0.
1174                if N:
1175                    fpr = FP / N
1176                else:
1177                    fpr = 0.
1178                curve = ROC_add_point((fpr, tpr, fPrev), curve,
1179                                      keep_concavities)
1180                fPrev = f
1181            thisPos, thisNeg = prob[1][1], prob[1][0]
1182            TP += thisPos
1183            FP += thisNeg
1184        if P:
1185            tpr = TP / P
1186        else:
1187            tpr = 0.
1188        if N:
1189            fpr = FP / N
1190        else:
1191            fpr = 0.
1192        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1193        results.append(curve)
1194
1195    return results
1196
1197## returns a list of points at the intersection of the tangential
1198## iso-performance line and the given ROC curve
1199## for given values of FPcost, FNcost and pval
1200def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1201    m = (FPcost * (1. - pval)) / (FNcost * pval)
1202
1203    ## put the iso-performance line in point (0., 1.)
1204    x0, y0 = (0., 1.)
1205    x1, y1 = (1., 1. + m)
1206    d01 = math.sqrt((x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0))
1207
1208    ## calculate and find the closest point to the line
1209    firstp = 1
1210    mind = 0.
1211    a = x0 * y1 - x1 * y0
1212    closestPoints = []
1213    for (x, y, fscore) in curve:
1214        d = ((y0 - y1) * x + (x1 - x0) * y + a) / d01
1215        d = abs(d)
1216        if firstp or d < mind:
1217            mind, firstp = d, 0
1218            closestPoints = [(x, y, fscore)]
1219        else:
1220            if abs(d - mind) <= 0.0001: ## close enough
1221                closestPoints.append((x, y, fscore))
1222    return closestPoints
1223
1224def frange(start, end=None, inc=None):
1225    """A range function, that does accept float increments..."""
1226
1227    if end is None:
1228        end = start + 0.
1229        start = 0.
1230
1231    if inc is None or inc == 0:
1232        inc = 1.
1233
1234    L = [start]
1235    while 1:
1236        next = start + len(L) * inc
1237        if inc > 0 and next >= end:
1238            L.append(end)
1239            break
1240        elif inc < 0 and next <= end:
1241            L.append(end)
1242            break
1243        L.append(next)
1244
1245    return L
1246
1247## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1248## where ROCcurvesX is a set of ROC curves,
1249## where a (one) ROC curve is a set of (FP, TP) points
1250##
1251## for each (sub)set of input ROC curves
1252## returns the average ROC curve and an array of (vertical) standard deviations
1253@deprecated_keywords({"ROCcurves": "roc_curves"})
1254def TC_vertical_average_ROC(roc_curves, samples=10):
1255    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1256        if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x:
1257            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1258        dx = float(P2x) - float(P1x)
1259        dy = float(P2y) - float(P1y)
1260        m = dy / dx
1261        return P1y + m * (X - P1x)
1262
1263    def TP_FOR_FP(FPsample, ROC, npts):
1264        i = 0
1265        while i < npts - 1:
1266            (fp, _, _) = ROC[i + 1]
1267            if fp <= FPsample:
1268                i += 1
1269            else:
1270                break
1271        (fp, tp, _) = ROC[i]
1272        if fp == FPsample:
1273            return tp
1274        elif fp < FPsample and i + 1 < len(ROC):
1275            return INTERPOLATE(ROC[i], ROC[i + 1], FPsample)
1276        elif fp < FPsample and i + 1 == len(ROC): # return the last
1277            return ROC[i][1]
1278        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1279        #return 0.
1280
1281    average = []
1282    stdev = []
1283    for ROCS in roc_curves:
1284        npts = []
1285        for c in ROCS:
1286            npts.append(len(c))
1287        nrocs = len(ROCS)
1288
1289        TPavg = []
1290        TPstd = []
1291        for FPsample in frange(0., 1., 1. / samples):
1292            TPsum = []
1293            for i in range(nrocs):
1294                ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1295                TPsum.append(TP_FOR_FP(FPsample, ROCS[i], npts[i]))
1296            TPavg.append((FPsample, statc.mean(TPsum)))
1297            if len(TPsum) > 1:
1298                stdv = statc.std(TPsum)
1299            else:
1300                stdv = 0.
1301            TPstd.append(stdv)
1302
1303        average.append(TPavg)
1304        stdev.append(TPstd)
1305
1306    return average, stdev
1307
1308## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1309## where ROCcurvesX is a set of ROC curves,
1310## where a (one) ROC curve is a set of (FP, TP) points
1311##
1312## for each (sub)set of input ROC curves
1313## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1314@deprecated_keywords({"ROCcurves": "roc_curves"})
1315def TC_threshold_average_ROC(roc_curves, samples=10):
1316    def POINT_AT_THRESH(ROC, npts, thresh):
1317        i = 0
1318        while i < npts - 1:
1319            (px, py, pfscore) = ROC[i]
1320            if pfscore > thresh:
1321                i += 1
1322            else:
1323                break
1324        return ROC[i]
1325
1326    average = []
1327    stdevV = []
1328    stdevH = []
1329    for ROCS in roc_curves:
1330        npts = []
1331        for c in ROCS:
1332            npts.append(len(c))
1333        nrocs = len(ROCS)
1334
1335        T = []
1336        for c in ROCS:
1337            for (px, py, pfscore) in c:
1338##                try:
1339##                    T.index(pfscore)
1340##                except:
1341                T.append(pfscore)
1342        T.sort()
1343        T.reverse() ## ugly
1344
1345        TPavg = []
1346        TPstdV = []
1347        TPstdH = []
1348        for tidx in frange(0, (len(T) - 1.), float(len(T)) / samples):
1349            FPsum = []
1350            TPsum = []
1351            for i in range(nrocs):
1352                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1353                FPsum.append(fp)
1354                TPsum.append(tp)
1355            TPavg.append((statc.mean(FPsum), statc.mean(TPsum)))
1356            ## vertical standard deviation
1357            if len(TPsum) > 1:
1358                stdv = statc.std(TPsum)
1359            else:
1360                stdv = 0.
1361            TPstdV.append(stdv)
1362            ## horizontal standard deviation
1363            if len(FPsum) > 1:
1364                stdh = statc.std(FPsum)
1365            else:
1366                stdh = 0.
1367            TPstdH.append(stdh)
1368
1369        average.append(TPavg)
1370        stdevV.append(TPstdV)
1371        stdevH.append(TPstdH)
1372
1373    return average, stdevV, stdevH
1374
1375## Calibration Curve
1376## returns an array of (curve, yesClassPredictions, noClassPredictions)
1377## elements, where:
1378##  - curve is an array of points (x, y) on the calibration curve
1379##  - yesClassRugPoints is an array of (x, 1) points
1380##  - noClassRugPoints is an array of (x, 0) points
1381@deprecated_keywords({"classIndex": "class_index"})
1382@replace_discrete_probabilities_with_list(False)
1383def compute_calibration_curve(res, class_index= -1):
1384    ## merge multiple iterations into one
1385    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1386        res.class_values, res.weights, classifiers=res.classifiers,
1387        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1388    for te in res.results:
1389        mres.results.append(te)
1390
1391    problists, tots = corn.computeROCCumulative(mres, class_index)
1392
1393    results = []
1394
1395    bins = 10 ## divide interval between 0. and 1. into N bins
1396
1397    for plist in problists:
1398        yesClassRugPoints = []
1399        noClassRugPoints = []
1400
1401        yesBinsVals = [0] * bins
1402        noBinsVals = [0] * bins
1403        for (f, (thisNeg, thisPos)) in plist:
1404            yesClassRugPoints.append((f, thisPos)) # 1.
1405            noClassRugPoints.append((f, thisNeg)) # 1.
1406
1407            index = int(f * bins)
1408            index = min(index, bins - 1) ## just in case for value 1.
1409            yesBinsVals[index] += thisPos
1410            noBinsVals[index] += thisNeg
1411
1412        curve = []
1413        for cn in range(bins):
1414            f = float(cn * 1. / bins) + (1. / 2. / bins)
1415            yesVal = yesBinsVals[cn]
1416            noVal = noBinsVals[cn]
1417            allVal = yesVal + noVal
1418            if allVal == 0.: continue
1419            y = float(yesVal) / float(allVal)
1420            curve.append((f, y))
1421
1422        ## smooth the curve
1423        maxnPoints = 100
1424        if len(curve) >= 3:
1425#            loessCurve = statc.loess(curve, -3, 0.6)
1426            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1427        else:
1428            loessCurve = curve
1429        clen = len(loessCurve)
1430        if clen > maxnPoints:
1431            df = clen / maxnPoints
1432            if df < 1: df = 1
1433            curve = [loessCurve[i]  for i in range(0, clen, df)]
1434        else:
1435            curve = loessCurve
1436        ## remove the third value (variance of epsilon?) that suddenly
1437        ## appeared in the output of the statc.loess function
1438        curve = [c[:2] for c in curve]
1439        results.append((curve, yesClassRugPoints, noClassRugPoints))
1440
1441    return results
1442
1443
1444## Lift Curve
1445## returns an array of curve elements, where:
1446##  - curve is an array of points ((TP + FP) / (P + N), TP / P, (th, FP / N))
1447##    on the Lift Curve
1448@deprecated_keywords({"classIndex": "class_index"})
1449@replace_discrete_probabilities_with_list(False)
1450def compute_lift_curve(res, class_index= -1):
1451    ## merge multiple iterations into one
1452    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1453        res.class_values, res.weights, classifiers=res.classifiers,
1454        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1455    for te in res.results:
1456        mres.results.append(te)
1457
1458    problists, tots = corn.computeROCCumulative(mres, class_index)
1459
1460    results = []
1461    P, N = tots[1], tots[0]
1462    for plist in problists:
1463        ## corn gives an increasing by scores list, we need a decreasing
1464        plist.reverse()
1465        TP = 0.
1466        FP = 0.
1467        curve = [(0., 0., (10e300, 0.))]
1468        for (f, (thisNeg, thisPos)) in plist:
1469            TP += thisPos
1470            FP += thisNeg
1471            curve.append(((TP + FP) / (P + N), TP, (f, FP / (N or 1))))
1472        results.append(curve)
1473
1474    return P, N, results
1475
1476
1477class CDT:
1478  """Store the number of concordant (C), discordant (D) and tied (T) pairs."""
1479  def __init__(self, C=0., D=0., T=0.):
1480    self.C, self.D, self.T = C, D, T
1481
1482def is_CDT_empty(cdt):
1483    return cdt.C + cdt.D + cdt.T < 1e-20
1484
1485
1486@deprecated_keywords({"classIndex": "class_index",
1487                      "unweighted": "ignore_weights"})
1488@replace_discrete_probabilities_with_list(False)
1489def compute_CDT(res, class_index= -1, ignore_weights=False, **argkw):
1490    """Obsolete, don't use."""
1491    if class_index < 0:
1492        if res.base_class >= 0:
1493            class_index = res.base_class
1494        else:
1495            class_index = 1
1496
1497    useweights = res.weights and not ignore_weights
1498    weightByClasses = argkw.get("weightByClasses", True)
1499
1500    if res.number_of_iterations > 1:
1501        CDTs = [CDT() for _ in range(res.number_of_learners)]
1502        iterationExperiments = split_by_iterations(res)
1503        for exp in iterationExperiments:
1504            expCDTs = corn.computeCDT(exp, class_index, useweights)
1505            for i in range(len(CDTs)):
1506                CDTs[i].C += expCDTs[i].C
1507                CDTs[i].D += expCDTs[i].D
1508                CDTs[i].T += expCDTs[i].T
1509        for i in range(res.number_of_learners):
1510            if is_CDT_empty(CDTs[0]):
1511                return corn.computeCDT(res, class_index, useweights)
1512
1513        return CDTs
1514    else:
1515        return corn.computeCDT(res, class_index, useweights)
1516
1517class AUC(list):
1518    """
1519    Compute the area under ROC curve given a set of experimental results.
1520    If testing consisted of multiple folds, each fold is scored and the
1521    average score is returned. If a fold contains only instances with the
1522    same class value, folds will be merged.
1523
1524    :param test_results: test results to score
1525    :param ignore_weights: ignore instance weights when calculating score
1526    :param multiclass: tells what kind of averaging to perform if the target
1527                       class has more than 2 values.
1528    """
1529
1530    #!Compute AUC for each pair of classes (ignoring instances of all other
1531    #!classes) and average the results, weighting them by the number of
1532    #!pairs of instances from these two classes (e.g. by the product of
1533    #!probabilities of the two classes). AUC computed in this way still
1534    #!behaves as the concordance index, e.g., gives the probability that two
1535    #!randomly chosen instances from different classes will be correctly
1536    #!recognized (if the classifier knows from which two classes the
1537    #!instances came).
1538    ByWeightedPairs = 0
1539
1540    #!Similar to ByWeightedPairs, except that the average over class pairs
1541    #!is not weighted. This AUC is, like the binary version, independent of
1542    #!class distributions, but it is not related to the concordance index
1543    #!any more.
1544    ByPairs = 1
1545
1546    #!For each class, it computes AUC for this class against all others (that
1547    #!is, treating other classes as one class). The AUCs are then averaged by
1548    #!the class probabilities. This is related to the concordance index in
1549    #!which we test the classifier's (average) capability of distinguishing
1550    #!the instances from a specified class from those that come from other
1551    #!classes.
1552    #!Unlike the binary AUC, the measure is not independent of class
1553    #!distributions.
1554    WeightedOneAgainstAll = 2
1555
1556    #!Similar to weighted_one_against_all, except that the average
1557    #!is not weighted.
1558    OneAgainstAll = 3
1559
1560    @replace_use_weights
1561    @deprecated_keywords({"method": "multiclass"})
1562    def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False):
1563
1564        super(AUC, self).__init__()
1565
1566        self.ignore_weights = ignore_weights
1567        self.method = multiclass
1568
1569        if test_results is not None:
1570            self[:] = self.__call__(test_results)
1571
1572    @replace_discrete_probabilities_with_list(method=True)
1573    def __call__(self, test_results):
1574        if len(test_results.class_values) < 2:
1575            raise ValueError("Cannot compute AUC on a single-class problem")
1576        elif len(test_results.class_values) == 2:
1577            return self._compute_for_binary_class(test_results)
1578        else:
1579            return self._compute_for_multi_value_class(test_results, self.method)
1580
1581    def _compute_for_binary_class(self, res):
1582        """AUC for binary classification problems"""
1583        if res.number_of_iterations > 1:
1584            return self._compute_for_multiple_folds(
1585                self._compute_one_class_against_all,
1586                split_by_iterations(res),
1587                (-1, res, res.number_of_iterations))
1588        else:
1589            return self._compute_one_class_against_all(res, -1)[0]
1590
1591    def _compute_for_multi_value_class(self, res, method=0):
1592        """AUC for multiclass classification problems"""
1593        numberOfClasses = len(res.class_values)
1594
1595        if res.number_of_iterations > 1:
1596            iterations = split_by_iterations(res)
1597            all_ite = res
1598        else:
1599            iterations = [res]
1600            all_ite = None
1601
1602        # by pairs
1603        sum_aucs = [0.] * res.number_of_learners
1604        usefulClassPairs = 0.
1605
1606        prob = None
1607        if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]:
1608            prob = class_probabilities_from_res(res)
1609
1610        if method in [self.ByWeightedPairs, self.ByPairs]:
1611            for classIndex1 in range(numberOfClasses):
1612                for classIndex2 in range(classIndex1):
1613                    subsum_aucs = self._compute_for_multiple_folds(
1614                        self._compute_one_class_against_another, iterations,
1615                        (classIndex1, classIndex2, all_ite,
1616                        res.number_of_iterations))
1617                    if subsum_aucs:
1618                        if method == self.ByWeightedPairs:
1619                            p_ij = prob[classIndex1] * prob[classIndex2]
1620                            subsum_aucs = [x * p_ij  for x in subsum_aucs]
1621                            usefulClassPairs += p_ij
1622                        else:
1623                            usefulClassPairs += 1
1624                        sum_aucs = map(add, sum_aucs, subsum_aucs)
1625        else:
1626            for classIndex in range(numberOfClasses):
1627                subsum_aucs = self._compute_for_multiple_folds(
1628                    self._compute_one_class_against_all,
1629                    iterations, (classIndex, all_ite,
1630                                 res.number_of_iterations))
1631                if subsum_aucs:
1632                    if method == self.ByWeightedPairs:
1633                        p_i = prob[classIndex]
1634                        subsum_aucs = [x * p_i  for x in subsum_aucs]
1635                        usefulClassPairs += p_i
1636                    else:
1637                        usefulClassPairs += 1
1638                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1639
1640        if usefulClassPairs > 0:
1641            sum_aucs = [x / usefulClassPairs for x in sum_aucs]
1642
1643        return sum_aucs
1644
1645    # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij)
1646    # it returns the sum of what is returned by the computer,
1647    # unless at a certain fold the computer has to resort to computing
1648    # over all folds or even this failed;
1649    # in these cases the result is returned immediately
1650    def _compute_for_multiple_folds(self, auc_computer, iterations,
1651                                 computer_args):
1652        """Compute the average AUC over folds using :obj:`auc_computer`."""
1653        subsum_aucs = [0.] * iterations[0].number_of_learners
1654        for ite in iterations:
1655            aucs, foldsUsed = auc_computer(*(ite,) + computer_args)
1656            if not aucs:
1657                import warnings
1658                warnings.warn("AUC cannot be computed (all instances belong to the same class).")
1659                return
1660            if not foldsUsed:
1661                self[:] = aucs
1662                return aucs
1663            subsum_aucs = map(add, subsum_aucs, aucs)
1664        return subsum_aucs
1665
1666    # Computes AUC
1667    # in multivalued class problem, AUC is computed as one against all
1668    # results over folds are averages
1669    # if some folds examples from one class only, the folds are merged
1670    def _compute_for_single_class(self, res, class_index):
1671        if res.number_of_iterations > 1:
1672            return self._compute_for_multiple_folds(
1673                self._compute_one_class_against_all, split_by_iterations(res),
1674                (class_index, res, res.number_of_iterations))
1675        else:
1676            return self._compute_one_class_against_all(res, class_index)
1677
1678    # Computes AUC for a pair of classes (as if there were no other classes)
1679    # results over folds are averages
1680    # if some folds have examples from one class only, the folds are merged
1681    def _compute_for_pair_of_classes(self, res, class_index1, class_index2):
1682        if res.number_of_iterations > 1:
1683            return self._compute_for_multiple_folds(
1684                self._compute_one_class_against_another,
1685                split_by_iterations(res),
1686                (class_index1, class_index2, res, res.number_of_iterations))
1687        else:
1688            return self._compute_one_class_against_another(res, class_index1,
1689                                                    class_index2)
1690
1691    # computes AUC between class i and the other classes
1692    # (treating them as the same class)
1693    @deprecated_keywords({"classIndex": "class_index",
1694                          "divideByIfIte": "divide_by_if_ite"})
1695    def _compute_one_class_against_all(self, ite, class_index, all_ite=None,
1696                                      divide_by_if_ite=1.0):
1697        """Compute AUC between class i and all the other classes)"""
1698        return self._compute_auc(corn.computeCDT, ite, all_ite,
1699            divide_by_if_ite, (class_index, not self.ignore_weights))
1700
1701
1702    # computes AUC between classes i and j as if there are no other classes
1703    def _compute_one_class_against_another(
1704        self, ite, class_index1, class_index2,
1705        all_ite=None, divide_by_if_ite=1.):
1706        """
1707        Compute AUC between classes i and j as if there are no other classes.
1708        """
1709        return self._compute_auc(corn.computeCDTPair, ite,
1710            all_ite, divide_by_if_ite,
1711            (class_index1, class_index2, not self.ignore_weights))
1712
1713    # computes AUC using a specified 'cdtComputer' function
1714    # It tries to compute AUCs from 'ite' (examples from a single iteration)
1715    # and, if C+D+T=0, from 'all_ite' (entire test set). In the former case,
1716    # the AUCs are divided by 'divideByIfIte'.
1717    # Additional flag is returned which is True in the former case,
1718    # or False in the latter.
1719    @deprecated_keywords({"cdt_computer": "cdtComputer",
1720                          "divideByIfIte": "divide_by_if_ite",
1721                          "computerArgs": "computer_args"})
1722    def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite,
1723                     computer_args):
1724        """
1725        Compute AUC using a :obj:`cdt_computer`.
1726        """
1727        cdts = cdt_computer(*(ite,) + computer_args)
1728        if not is_CDT_empty(cdts[0]):
1729            return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) /
1730                    divide_by_if_ite for cdt in cdts], True
1731
1732        if all_ite:
1733            cdts = cdt_computer(*(all_ite,) + computer_args)
1734            if not is_CDT_empty(cdts[0]):
1735                return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T)
1736                        for cdt in cdts], False
1737
1738        return False, False
1739
1740class AUC_for_single_class(AUC):
1741    """
1742    Compute AUC where the class with the given class_index is singled
1743    out and all other classes are treated as a single class.
1744    """
1745    def __init__(self, test_results=None, class_index= -1, ignore_weights=False):
1746        if class_index < 0:
1747            if test_results and test_results.base_class >= 0:
1748                self.class_index = test_results.base_class
1749            else:
1750                self.class_index = 1
1751        else:
1752            self.class_index = class_index
1753
1754        super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights)
1755
1756    @replace_discrete_probabilities_with_list(method=True)
1757    def __call__(self, test_results):
1758        return self._compute_for_single_class(test_results, self.class_index)
1759
1760
1761class AUC_for_pair_of_classes(AUC):
1762    """
1763    Computes AUC between a pair of classes, ignoring instances from all
1764    other classes.
1765    """
1766    def __init__(self, test_results, class_index1, class_index2, ignore_weights=False):
1767        self.class_index1 = class_index1
1768        self.class_index2 = class_index2
1769
1770        super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights)
1771
1772    @replace_discrete_probabilities_with_list(method=True)
1773    def __call__(self, test_results):
1774        return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2)
1775
1776
1777class AUC_matrix(AUC):
1778    """
1779    Compute a (lower diagonal) matrix with AUCs for all pairs of classes.
1780    If there are empty classes, the corresponding elements in the matrix
1781    are -1.
1782    """
1783
1784    @replace_discrete_probabilities_with_list(method=True)
1785    def __call__(self, test_results):
1786        numberOfClasses = len(test_results.class_values)
1787        number_of_learners = test_results.number_of_learners
1788        if test_results.number_of_iterations > 1:
1789            iterations, all_ite = split_by_iterations(test_results), test_results
1790        else:
1791            iterations, all_ite = [test_results], None
1792        aucs = [[[] for _ in range(numberOfClasses)]
1793        for _ in range(number_of_learners)]
1794        for classIndex1 in range(numberOfClasses):
1795            for classIndex2 in range(classIndex1):
1796                pair_aucs = self._compute_for_multiple_folds(
1797                    self._compute_one_class_against_another, iterations,
1798                    (classIndex1, classIndex2, all_ite,
1799                     test_results.number_of_iterations))
1800                if pair_aucs:
1801                    for lrn in range(number_of_learners):
1802                        aucs[lrn][classIndex1].append(pair_aucs[lrn])
1803                else:
1804                    for lrn in range(number_of_learners):
1805                        aucs[lrn][classIndex1].append(-1)
1806        return aucs
1807
1808#Backward compatibility
1809@replace_use_weights
1810@replace_discrete_probabilities_with_list(False)
1811def AUC_binary(res, ignore_weights=False):
1812    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1813    auc._compute_for_binary_class(res)
1814    return auc
1815
1816@replace_use_weights
1817@replace_discrete_probabilities_with_list(False)
1818def AUC_multi(res, ignore_weights=False, method=0):
1819    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights,
1820        method=method)
1821    auc._compute_for_multi_value_class(res)
1822    return auc
1823
1824
1825@deprecated_keywords({"AUCcomputer": "auc_computer",
1826                      "computerArgs": "computer_args"})
1827def AUC_iterations(auc_computer, iterations, computer_args):
1828    auc = deprecated_function_name(AUC)()
1829    auc._compute_for_multiple_folds(auc_computer, iterations, computer_args)
1830    return auc
1831
1832def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1833    auc = deprecated_function_name(AUC)()
1834    result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite,
1835                              computer_args)
1836    return result
1837
1838@replace_use_weights
1839def AUC_i(ite, class_index, ignore_weights=False, all_ite=None,
1840          divide_by_if_ite=1.):
1841    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1842    result = auc._compute_one_class_against_another(ite, class_index,
1843        all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1844    return result
1845
1846
1847@replace_use_weights
1848def AUC_ij(ite, class_index1, class_index2, ignore_weights=False,
1849           all_ite=None, divide_by_if_ite=1.):
1850    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1851    result = auc._compute_one_class_against_another(
1852        ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1853    return result
1854
1855AUC_single = replace_use_weights(
1856             deprecated_keywords({"classIndex": "class_index"})(
1857             deprecated_function_name(AUC_for_single_class)))
1858AUC_pair = replace_use_weights(
1859           deprecated_keywords({"classIndex1": "class_index1",
1860                                "classIndex2": "class_index2"})(
1861           deprecated_function_name(AUC_for_pair_of_classes)))
1862AUC_matrix = replace_use_weights(AUC_matrix)
1863
1864
1865@deprecated_keywords({"unweighted": "ignore_weights"})
1866@replace_discrete_probabilities_with_list(False)
1867def McNemar(res, ignore_weights=False, **argkw):
1868    """
1869    Compute a triangular matrix with McNemar statistics for each pair of
1870    classifiers. The statistics is distributed by chi-square distribution with
1871    one degree of freedom; critical value for 5% significance is around 3.84.
1872    """
1873    nLearners = res.number_of_learners
1874    mcm = []
1875    for i in range(nLearners):
1876       mcm.append([0.] * res.number_of_learners)
1877
1878    if not res.weights or ignore_weights:
1879        for i in res.results:
1880            actual = i.actual_class
1881            classes = i.classes
1882            for l1 in range(nLearners):
1883                for l2 in range(l1, nLearners):
1884                    if classes[l1] == actual:
1885                        if classes[l2] != actual:
1886                            mcm[l1][l2] += 1
1887                    elif classes[l2] == actual:
1888                        mcm[l2][l1] += 1
1889    else:
1890        for i in res.results:
1891            actual = i.actual_class
1892            classes = i.classes
1893            for l1 in range(nLearners):
1894                for l2 in range(l1, nLearners):
1895                    if classes[l1] == actual:
1896                        if classes[l2] != actual:
1897                            mcm[l1][l2] += i.weight
1898                    elif classes[l2] == actual:
1899                        mcm[l2][l1] += i.weight
1900
1901    for l1 in range(nLearners):
1902        for l2 in range(l1, nLearners):
1903            su = mcm[l1][l2] + mcm[l2][l1]
1904            if su:
1905                mcm[l2][l1] = (abs(mcm[l1][l2] - mcm[l2][l1]) - 1) ** 2 / su
1906            else:
1907                mcm[l2][l1] = 0
1908
1909    for l1 in range(nLearners):
1910        mcm[l1] = mcm[l1][:l1]
1911
1912    return mcm
1913
1914@replace_discrete_probabilities_with_list(False)
1915def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False):
1916    """
1917    McNemar_of_two computes a McNemar statistics for a pair of classifier,
1918    specified by indices learner1 and learner2.
1919    """
1920    tf = ft = 0.
1921    if not res.weights or ignore_weights:
1922        for i in res.results:
1923            actual = i.actual_class
1924            if i.classes[lrn1] == actual:
1925                if i.classes[lrn2] != actual:
1926                    tf += i.weight
1927            elif i.classes[lrn2] == actual:
1928                    ft += i.weight
1929    else:
1930        for i in res.results:
1931            actual = i.actual_class
1932            if i.classes[lrn1] == actual:
1933                if i.classes[lrn2] != actual:
1934                    tf += 1.
1935            elif i.classes[lrn2] == actual:
1936                    ft += 1.
1937
1938    su = tf + ft
1939    if su:
1940        return (abs(tf - ft) - 1) ** 2 / su
1941    else:
1942        return 0
1943
1944@replace_discrete_probabilities_with_list(False)
1945def Friedman(res, stat=CA):
1946    """
1947    Compare classifiers with Friedman test, treating folds as different examles.
1948    Returns F, p and average ranks.
1949    """
1950    res_split = split_by_iterations(res)
1951    res = [stat(r) for r in res_split]
1952
1953    N = len(res)
1954    k = len(res[0])
1955    sums = [0.] * k
1956    for r in res:
1957        ranks = [k - x + 1 for x in statc.rankdata(r)]
1958        if stat == Brier_score: # reverse ranks for Brier_score (lower better)
1959            ranks = [k + 1 - x for x in ranks]
1960        sums = map(add, ranks, sums)
1961
1962    T = sum(x * x for x in sums)
1963    sums = [x / N for x in sums]
1964
1965    F = 12. / (N * k * (k + 1)) * T - 3 * N * (k + 1)
1966
1967    return F, statc.chisqprob(F, k - 1), sums
1968
1969@replace_discrete_probabilities_with_list(False)
1970def Wilcoxon_pairs(res, avgranks, stat=CA):
1971    """
1972    Return a triangular matrix, where element[i][j] stores significance of
1973    difference between the i-th and the j-th classifier, as computed by the
1974    Wilcoxon test. The element is positive if the i-th is better than the j-th,
1975    negative if it is worse, and 1 if they are equal.
1976    Arguments are ExperimentResults, average ranks (as returned by Friedman)
1977    and, optionally, a statistics; greater values should mean better results.
1978    """
1979    res_split = split_by_iterations(res)
1980    res = [stat(r) for r in res_split]
1981
1982    k = len(res[0])
1983    bt = []
1984    for m1 in range(k):
1985        nl = []
1986        for m2 in range(m1 + 1, k):
1987            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1988            if avgranks[m1] < avgranks[m2]:
1989                nl.append(p)
1990            elif avgranks[m2] < avgranks[m1]:
1991                nl.append(-p)
1992            else:
1993                nl.append(1)
1994        bt.append(nl)
1995    return bt
1996
1997
1998@deprecated_keywords({"allResults": "all_results",
1999                      "noConfidence": "no_confidence"})
2000def plot_learning_curve_learners(file, all_results, proportions, learners,
2001                                 no_confidence=0):
2002    plot_learning_curve(file, all_results, proportions,
2003        [Orange.misc.getobjectname(learners[i], "Learner %i" % i)
2004        for i in range(len(learners))], no_confidence)
2005
2006
2007@deprecated_keywords({"allResults": "all_results",
2008                      "noConfidence": "no_confidence"})
2009def plot_learning_curve(file, all_results, proportions, legend,
2010                        no_confidence=0):
2011    import types
2012    fopened = 0
2013    if type(file) == types.StringType:
2014        file = open(file, "wt")
2015        fopened = 1
2016
2017    file.write("set yrange [0:1]\n")
2018    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2019    file.write("set multiplot\n\n")
2020    CAs = [CA(x, report_se=True) for x in all_results]
2021
2022    file.write("plot \\\n")
2023    for i in range(len(legend) - 1):
2024        if not no_confidence:
2025            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i + 1))
2026        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2027    if not no_confidence:
2028        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
2029    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
2030
2031    for i in range(len(legend)):
2032        if not no_confidence:
2033            for p in range(len(proportions)):
2034                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96 * CAs[p][i][1]))
2035            file.write("e\n\n")
2036
2037        for p in range(len(proportions)):
2038            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
2039        file.write("e\n\n")
2040
2041    if fopened:
2042        file.close()
2043
2044
2045def print_single_ROC_curve_coordinates(file, curve):
2046    import types
2047    fopened = 0
2048    if type(file) == types.StringType:
2049        file = open(file, "wt")
2050        fopened = 1
2051
2052    for coord in curve:
2053        file.write("%5.3f\t%5.3f\n" % tuple(coord))
2054
2055    if fopened:
2056        file.close()
2057
2058
2059def plot_ROC_learners(file, curves, learners):
2060    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
2061
2062def plot_ROC(file, curves, legend):
2063    import types
2064    fopened = 0
2065    if type(file) == types.StringType:
2066        file = open(file, "wt")
2067        fopened = 1
2068
2069    file.write("set yrange [0:1]\n")
2070    file.write("set xrange [0:1]\n")
2071    file.write("set multiplot\n\n")
2072
2073    file.write("plot \\\n")
2074    for leg in legend:
2075        file.write("'-' title '%s' with lines,\\\n" % leg)
2076    file.write("'-' title '' with lines\n")
2077
2078    for curve in curves:
2079        for coord in curve:
2080            file.write("%5.3f\t%5.3f\n" % tuple(coord))
2081        file.write("e\n\n")
2082
2083    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
2084
2085    if fopened:
2086        file.close()
2087
2088
2089@deprecated_keywords({"allResults": "all_results"})
2090def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference= -1):
2091    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
2092
2093
2094@deprecated_keywords({"allResults": "all_results"})
2095def plot_McNemar_curve(file, all_results, proportions, legend, reference= -1):
2096    if reference < 0:
2097        reference = len(legend) - 1
2098
2099    import types
2100    fopened = 0
2101    if type(file) == types.StringType:
2102        file = open(file, "wt")
2103        fopened = 1
2104
2105    #file.write("set yrange [0:1]\n")
2106    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2107    file.write("set multiplot\n\n")
2108    file.write("plot \\\n")
2109    tmap = range(reference) + range(reference + 1, len(legend))
2110    for i in tmap[:-1]:
2111        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2112    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
2113    file.write("\n")
2114
2115    for i in tmap:
2116        for p in range(len(proportions)):
2117            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
2118        file.write("e\n\n")
2119
2120    if fopened:
2121        file.close()
2122
2123default_point_types = ("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}") + tuple([chr(x) for x in range(97, 122)])
2124default_line_types = ("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
2125
2126@deprecated_keywords({"allResults": "all_results"})
2127def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
2128    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
2129
2130
2131@deprecated_keywords({"allResults": "all_results"})
2132def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
2133    import types
2134    fopened = 0
2135    if type(file) == types.StringType:
2136        file = open(file, "wt")
2137        fopened = 1
2138
2139    nexamples = len(all_results[0].results)
2140    CAs = [CA(x, report_se=True) for x in all_results]
2141
2142    graphsize = float(options.get("graphsize", 10.0)) #cm
2143    difprop = proportions[-1] - proportions[0]
2144    ntestexamples = nexamples * proportions[-1]
2145    xunit = graphsize / ntestexamples
2146
2147    yshift = float(options.get("yshift", -ntestexamples / 20.))
2148
2149    pointtypes = options.get("pointtypes", default_point_types)
2150    linetypes = options.get("linetypes", default_line_types)
2151
2152    if options.has_key("numberedx"):
2153        numberedx = options["numberedx"]
2154        if type(numberedx) == types.IntType:
2155            if numberedx > 0:
2156                numberedx = [nexamples * proportions[int(i / float(numberedx) * len(proportions))] for i in range(numberedx)] + [proportions[-1] * nexamples]
2157            elif numberedx < 0:
2158                numberedx = -numberedx
2159                newn = []
2160                for i in range(numberedx + 1):
2161                    wanted = proportions[0] + float(i) / numberedx * difprop
2162                    best = (10, 0)
2163                    for t in proportions:
2164                        td = abs(wanted - t)
2165                        if td < best[0]:
2166                            best = (td, t)
2167                    if not best[1] in newn:
2168                        newn.append(best[1])
2169                newn.sort()
2170                numberedx = [nexamples * x for x in newn]
2171        elif type(numberedx[0]) == types.FloatType:
2172            numberedx = [nexamples * x for x in numberedx]
2173    else:
2174        numberedx = [nexamples * x for x in proportions]
2175
2176    file.write("\\mbox{\n")
2177    file.write(\\beginpicture\n")
2178    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
2179    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
2180    file.write(\\axis bottom invisible\n")# label {#examples}\n")
2181    file.write("      ticks short at %s /\n" % reduce(lambda x, y:x + " " + y, ["%i" % (x * nexamples + 0.5) for x in proportions]))
2182    if numberedx:
2183        file.write("            long numbered at %s /\n" % reduce(lambda x, y:x + y, ["%i " % int(x + 0.5) for x in numberedx]))
2184    file.write("  /\n")
2185    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
2186    file.write("      shiftedto y=%5.3f\n" % yshift)
2187    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
2188    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
2189    file.write("  /\n")
2190    if options.has_key("default"):
2191        file.write(\\setdashpattern<1pt, 1pt>\n")
2192        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
2193
2194    for i in range(len(CAs[0])):
2195        coordinates = reduce(lambda x, y:x + " " + y, ["%i %5.3f" % (proportions[p] * nexamples, CAs[p][i][0]) for p in range(len(proportions))])
2196        if linetypes:
2197            file.write(%s\n" % linetypes[i])
2198            file.write(\\plot %s /\n" % coordinates)
2199        if pointtypes:
2200            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
2201
2202    file.write(\\endpicture\n")
2203    file.write("}\n")
2204    if fopened:
2205        file.close()
2206    file.close()
2207    del file
2208
2209def legend_learners_to_PiCTeX(file, learners, **options):
2210  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
2211
2212def legend_to_PiCTeX(file, legend, **options):
2213    import types
2214    fopened = 0
2215    if type(file) == types.StringType:
2216        file = open(file, "wt")
2217        fopened = 1
2218
2219    pointtypes = options.get("pointtypes", default_point_types)
2220    linetypes = options.get("linetypes", default_line_types)
2221
2222    file.write("\\mbox{\n")
2223    file.write(\\beginpicture\n")
2224    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
2225    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
2226
2227    for i in range(len(legend)):
2228        if linetypes:
2229            file.write(%s\n" % linetypes[i])
2230            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i + 0.2))
2231        if pointtypes:
2232            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i + 0.1))
2233        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i + 0.25))
2234
2235    file.write(\\endpicture\n")
2236    file.write("}\n")
2237    if fopened:
2238        file.close()
2239    file.close()
2240    del file
2241
2242
2243def compute_friedman(avranks, N):
2244    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
2245    and (Iman statistic - F-distribution, degrees of freedoma) given average
2246    ranks and a number of tested data sets N.
2247    """
2248
2249    k = len(avranks)
2250
2251    def friedman(N, k, ranks):
2252        return 12 * N * (sum([rank ** 2.0 for rank in ranks]) - (k * (k + 1) * (k + 1) / 4.0)) / (k * (k + 1))
2253
2254    def iman(fried, N, k):
2255        return (N - 1) * fried / (N * (k - 1) - fried)
2256
2257    f = friedman(N, k, avranks)
2258    im = iman(f, N, k)
2259    fdistdof = (k - 1, (k - 1) * (N - 1))
2260
2261    return (f, k - 1), (im, fdistdof)
2262
2263def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2264    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2265    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2266    ranks and number of tested data sets N. Type can be either "nemenyi" for
2267    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2268    """
2269
2270    k = len(avranks)
2271
2272    d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
2273                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
2274                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
2275                               3.426041, 3.458425, 3.488685, 3.517073, 3.543799]
2276        , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
2277                               2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
2278                               2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
2279                               3.195743, 3.229723, 3.261461, 3.291224, 3.319233]
2280        , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
2281                                        2.638, 2.690, 2.724, 2.773],
2282         ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
2283                                      2.394, 2.450, 2.498, 2.539]}
2284
2285    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2286    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2287
2288    q = d[(type, alpha)]
2289
2290    cd = q[k] * (k * (k + 1) / (6.0 * N)) ** 0.5
2291
2292    return cd
2293
2294
2295def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2296    """
2297    Draws a CD graph, which is used to display  the differences in methods'
2298    performance.
2299    See Janez Demsar, Statistical Comparisons of Classifiers over
2300    Multiple Data Sets, 7(Jan):1--30, 2006.
2301
2302    Needs matplotlib to work.
2303
2304    :param filename: Output file name (with extension). Formats supported
2305                     by matplotlib can be used.
2306    :param avranks: List of average methods' ranks.
2307    :param names: List of methods' names.
2308
2309    :param cd: Critical difference. Used for marking methods that whose
2310               difference is not statistically significant.
2311    :param lowv: The lowest shown rank, if None, use 1.
2312    :param highv: The highest shown rank, if None, use len(avranks).
2313    :param width: Width of the drawn figure in inches, default 6 in.
2314    :param textspace: Space on figure sides left for the description
2315                      of methods, default 1 in.
2316    :param reverse:  If True, the lowest rank is on the right. Default\: False.
2317    :param cdmethod: None by default. It can be an index of element in avranks
2318                     or or names which specifies the method which should be
2319                     marked with an interval.
2320    """
2321    if not HAS_MATPLOTLIB:
2322        import sys
2323        print >> sys.stderr, "Function requires matplotlib. Please install it."
2324        return
2325
2326    width = float(width)
2327    textspace = float(textspace)
2328
2329    def nth(l, n):
2330        """
2331        Returns only nth elemnt in a list.
2332        """
2333        n = lloc(l, n)
2334        return [ a[n] for a in l ]
2335
2336    def lloc(l, n):
2337        """
2338        List location in list of list structure.
2339        Enable the use of negative locations:
2340        -1 is the last element, -2 second last...
2341        """
2342        if n < 0:
2343            return len(l[0]) + n
2344        else:
2345            return n
2346
2347    def mxrange(lr):
2348        """
2349        Multiple xranges. Can be used to traverse matrices.
2350        This function is very slow due to unknown number of
2351        parameters.
2352
2353        >>> mxrange([3,5])
2354        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2355
2356        >>> mxrange([[3,5,1],[9,0,-3]])
2357        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2358
2359        """
2360        if not len(lr):
2361            yield ()
2362        else:
2363            #it can work with single numbers
2364            index = lr[0]
2365            if type(1) == type(index):
2366                index = [ index ]
2367            for a in range(*index):
2368                for b in mxrange(lr[1:]):
2369                    yield tuple([a] + list(b))
2370
2371
2372    from matplotlib.figure import Figure
2373    from matplotlib.backends.backend_agg import FigureCanvasAgg
2374
2375    def print_figure(fig, *args, **kwargs):
2376        canvas = FigureCanvasAgg(fig)
2377        canvas.print_figure(*args, **kwargs)
2378
2379    sums = avranks
2380
2381    tempsort = sorted([ (a, i) for i, a in  enumerate(sums) ], reverse=reverse)
2382    ssums = nth(tempsort, 0)
2383    sortidx = nth(tempsort, 1)
2384    nnames = [ names[x] for x in sortidx ]
2385
2386    if lowv is None:
2387        lowv = min(1, int(math.floor(min(ssums))))
2388    if highv is None:
2389        highv = max(len(avranks), int(math.ceil(max(ssums))))
2390
2391    cline = 0.4
2392
2393    k = len(sums)
2394
2395    lines = None
2396
2397    linesblank = 0
2398    scalewidth = width - 2 * textspace
2399
2400    def rankpos(rank):
2401        if not reverse:
2402            a = rank - lowv
2403        else:
2404            a = highv - rank
2405        return textspace + scalewidth / (highv - lowv) * a
2406
2407    distanceh = 0.25
2408
2409    if cd and cdmethod is None:
2410
2411        #get pairs of non significant methods
2412
2413        def get_lines(sums, hsd):
2414
2415            #get all pairs
2416            lsums = len(sums)
2417            allpairs = [ (i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i ]
2418            #remove not significant
2419            notSig = [ (i, j) for i, j in allpairs if abs(sums[i] - sums[j]) <= hsd ]
2420            #keep only longest
2421
2422            def no_longer((i, j), notSig):
2423                for i1, j1 in notSig:
2424                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2425                        return False
2426                return True
2427
2428            longest = [ (i, j) for i, j in notSig if no_longer((i, j), notSig) ]
2429
2430            return longest
2431
2432        lines = get_lines(ssums, cd)
2433        linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1
2434
2435        #add scale
2436        distanceh = 0.25
2437        cline += distanceh
2438
2439    #calculate height needed height of an image
2440    minnotsignificant = max(2 * 0.2, linesblank)
2441    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant
2442
2443    fig = Figure(figsize=(width, height))
2444    ax = fig.add_axes([0, 0, 1, 1]) #reverse y axis
2445    ax.set_axis_off()
2446
2447    hf = 1. / height # height factor
2448    wf = 1. / width
2449
2450    def hfl(l):
2451        return [ a * hf for a in l ]
2452
2453    def wfl(l):
2454        return [ a * wf for a in l ]
2455
2456
2457    # Upper left corner is (0,0).
2458
2459    ax.plot([0, 1], [0, 1], c="w")
2460    ax.set_xlim(0, 1)
2461    ax.set_ylim(1, 0)
2462
2463    def line(l, color='k', **kwargs):
2464        """
2465        Input is a list of pairs of points.
2466        """
2467        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)
2468
2469    def text(x, y, s, *args, **kwargs):
2470        ax.text(wf * x, hf * y, s, *args, **kwargs)
2471
2472    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)
2473
2474    bigtick = 0.1
2475    smalltick = 0.05
2476
2477
2478    import numpy
2479    tick = None
2480    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2481        tick = smalltick
2482        if a == int(a): tick = bigtick
2483        line([(rankpos(a), cline - tick / 2), (rankpos(a), cline)], linewidth=0.7)
2484
2485    for a in range(lowv, highv + 1):
2486        text(rankpos(a), cline - tick / 2 - 0.05, str(a), ha="center", va="bottom")
2487
2488    k = len(ssums)
2489
2490    for i in range((k + 1) / 2):
2491        chei = cline + minnotsignificant + i * 0.2
2492        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace - 0.1, chei)], linewidth=0.7)
2493        text(textspace - 0.2, chei, nnames[i], ha="right", va="center")
2494
2495    for i in range((k + 1) / 2, k):
2496        chei = cline + minnotsignificant + (k - i - 1) * 0.2
2497        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace + scalewidth + 0.1, chei)], linewidth=0.7)
2498        text(textspace + scalewidth + 0.2, chei, nnames[i], ha="left", va="center")
2499
2500    if cd and cdmethod is None:
2501
2502        #upper scale
2503        if not reverse:
2504            begin, end = rankpos(lowv), rankpos(lowv + cd)
2505        else:
2506            begin, end = rankpos(highv), rankpos(highv - cd)
2507
2508        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2509        line([(begin, distanceh + bigtick / 2), (begin, distanceh - bigtick / 2)], linewidth=0.7)
2510        line([(end, distanceh + bigtick / 2), (end, distanceh - bigtick / 2)], linewidth=0.7)
2511        text((begin + end) / 2, distanceh - 0.05, "CD", ha="center", va="bottom")
2512
2513        #non significance lines   
2514        def draw_lines(lines, side=0.05, height=0.1):
2515            start = cline + 0.2
2516            for l, r in lines:
2517                line([(rankpos(ssums[l]) - side, start), (rankpos(ssums[r]) + side, start)], linewidth=2.5)
2518                start += height
2519
2520        draw_lines(lines)
2521
2522    elif cd:
2523        begin = rankpos(avranks[cdmethod] - cd)
2524        end = rankpos(avranks[cdmethod] + cd)
2525        line([(begin, cline), (end, cline)], linewidth=2.5)
2526        line([(begin, cline + bigtick / 2), (begin, cline - bigtick / 2)], linewidth=2.5)
2527        line([(end, cline + bigtick / 2), (end, cline - bigtick / 2)], linewidth=2.5)
2528
2529    print_figure(fig, filename, **kwargs)
2530
2531def mlc_hamming_loss(res):
2532    """
2533    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2534   
2535    :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}`
2536    """
2537    losses = [0] * res.number_of_learners
2538    label_num = len(res.labels)
2539    example_num = gettotsize(res)
2540
2541    for e in res.results:
2542        aclass = e.actual_class
2543        for i, labels in enumerate(e.classes):
2544            labels = map(int, labels)
2545            if len(labels) <> len(aclass):
2546                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2547            for j in range(label_num):
2548                if labels[j] != aclass[j]:
2549                    losses[i] += 1
2550
2551    return [float(x) / (label_num * example_num) for x in losses]
2552
2553def mlc_accuracy(res, forgiveness_rate=1.0):
2554    """
2555    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2556     
2557    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}`
2558   
2559    Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`,
2560    called forgiveness rate:
2561   
2562    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}`
2563    """
2564    accuracies = [0.0] * res.number_of_learners
2565    example_num = gettotsize(res)
2566
2567    for e in res.results:
2568        aclass = e.actual_class
2569        for i, labels in enumerate(e.classes):
2570            labels = map(int, labels)
2571            if len(labels) <> len(aclass):
2572                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2573
2574            intersection = 0.0
2575            union = 0.0
2576            for real, pred in zip(labels, aclass):
2577                if real and pred:
2578                    intersection += 1
2579                if real or pred:
2580                    union += 1
2581
2582            if union:
2583                accuracies[i] += intersection / union
2584
2585    return [math.pow(x / example_num, forgiveness_rate) for x in accuracies]
2586
2587def mlc_precision(res):
2588    """
2589    :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}`
2590    """
2591    precisions = [0.0] * res.number_of_learners
2592    example_num = gettotsize(res)
2593
2594    for e in res.results:
2595        aclass = e.actual_class
2596        for i, labels in enumerate(e.classes):
2597            labels = map(int, labels)
2598            if len(labels) <> len(aclass):
2599                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2600
2601            intersection = 0.0
2602            predicted = 0.0
2603            for real, pred in zip(labels, aclass):
2604                if real and pred:
2605                    intersection += 1
2606                if real:
2607                    predicted += 1
2608            if predicted:
2609                precisions[i] += intersection / predicted
2610
2611    return [x / example_num for x in precisions]
2612
2613def mlc_recall(res):
2614    """
2615    :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}`
2616    """
2617    recalls = [0.0] * res.number_of_learners
2618    example_num = gettotsize(res)
2619
2620    for e in res.results:
2621        aclass = e.actual_class
2622        for i, labels in enumerate(e.classes):
2623            labels = map(int, labels)
2624            if len(labels) <> len(aclass):
2625                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2626
2627            intersection = 0.0
2628            actual = 0.0
2629            for real, pred in zip(labels, aclass):
2630                if real and pred:
2631                    intersection += 1
2632                if pred:
2633                    actual += 1
2634            if actual:
2635                recalls[i] += intersection / actual
2636
2637    return [x / example_num for x in recalls]
2638
2639#def mlc_ranking_loss(res):
2640#    pass
2641#
2642#def mlc_average_precision(res):
2643#    pass
2644#
2645#def mlc_hierarchical_loss(res):
2646#    pass
2647
2648
2649def mt_average_score(res, score, weights=None):
2650    """
2651    Compute individual scores for each target and return the (weighted) average.
2652
2653    One method can be used to compute scores for all targets or a list of
2654    scoring methods can be passed to use different methods for different
2655    targets. In the latter case, care has to be taken if the ranges of scoring
2656    methods differ.
2657    For example, when the first target is scored from -1 to 1 (1 best) and the
2658    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
2659    to a span of 1, and invert the second so that higher scores are better.
2660
2661    :param score: Single-target scoring method or a list of such methods
2662                  (one for each target).
2663    :param weights: List of real weights, one for each target,
2664                    for a weighted average.
2665
2666    """
2667    if not len(res.results):
2668        raise ValueError, "Cannot compute the score: no examples."
2669    if res.number_of_learners < 1:
2670        return []
2671    n_classes = len(res.results[0].actual_class)
2672    if weights is None:
2673        weights = [1.] * n_classes
2674    if not isinstance(score, Iterable):
2675        score = [score] * n_classes
2676    elif len(score) != n_classes:
2677        raise ValueError, "Number of scoring methods and targets do not match."
2678    # save original classes
2679    clsss = [te.classes for te in res.results]
2680    aclsss = [te.actual_class for te in res.results]
2681    # compute single target scores
2682    single_scores = []
2683    for i in range(n_classes):
2684        for te, clss, aclss in zip(res.results, clsss, aclsss):
2685            te.classes = [cls[i] for cls in clss]
2686            te.actual_class = aclss[i]
2687        single_scores.append(score[i](res))
2688    # restore original classes
2689    for te, clss, aclss in zip(res.results, clsss, aclsss):
2690        te.classes = clss
2691        te.actual_class = aclss
2692    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
2693        for scores in zip(*single_scores)]
2694
2695def mt_flattened_score(res, score):
2696    """
2697    Flatten (concatenate into a single list) the predictions of multiple
2698    targets and compute a single-target score.
2699   
2700    :param score: Single-target scoring method.
2701    """
2702    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
2703        res.classifier_names, class_values=res.class_values,
2704        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
2705        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
2706    for te in res.results:
2707        for i, ac in enumerate(te.actual_class):
2708            te2 = Orange.evaluation.testing.TestedExample(
2709                iteration_number=te.iteration_number, actual_class=ac)
2710            for c, p in zip(te.classes, te.probabilities):
2711                te2.add_result(c[i], p[i])
2712            res2.results.append(te2)
2713    return score(res2)
2714
2715def mt_global_accuracy(res):
2716    """
2717    :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline`
2718   
2719    :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.`
2720    """
2721    results = []
2722    for l in xrange(res.number_of_learners):
2723        n_results = len(res.results)
2724        n_correct = 0.
2725
2726        for r in res.results:
2727            if list(r.classes[l]) == r.actual_class:
2728                n_correct+=1
2729
2730        results.append(n_correct/n_results)
2731    return results
2732
2733
2734def mt_mean_accuracy(res):
2735    """
2736    :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline`
2737   
2738    :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.`
2739    """
2740    results = []
2741    for l in xrange(res.number_of_learners):
2742        n_classes = len(res.results[0].actual_class)
2743        n_results = len(res.results)
2744        n_correct = 0.
2745
2746        for r in res.results:
2747            for i in xrange(n_classes):
2748                if r.classes[l][i] == r.actual_class[i]:
2749                    n_correct+=1
2750        results.append(n_correct/n_classes/n_results)
2751    return results
2752
2753
2754
2755################################################################################
2756if __name__ == "__main__":
2757    avranks = [3.143, 2.000, 2.893, 1.964]
2758    names = ["prva", "druga", "tretja", "cetrta" ]
2759    cd = compute_CD(avranks, 14)
2760    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2761    print cd
2762
2763    print compute_friedman(avranks, 14)
2764
2765    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.