source: orange/Orange/evaluation/scoring.py @ 10857:e45c9ebd2c25

Revision 10857:e45c9ebd2c25, 99.1 KB checked in by Ales Erjavec <ales.erjavec@…>, 2 years ago (diff)

Added a note to 'confusion_matrices' function for the return value if
the 'class_index' is not specified.

Line 
1import math
2import functools
3from operator import add
4
5import numpy
6
7import Orange
8from Orange import statc, corn
9from Orange.utils import deprecated_keywords, deprecated_function_name, \
10    deprecation_warning, environ
11from Orange.evaluation import testing
12
13try:
14    import matplotlib
15    HAS_MATPLOTLIB = True
16except ImportError:
17    matplotlib = None
18    HAS_MATPLOTLIB = False
19
20#### Private stuff
21
22def log2(x):
23    """Calculate logarithm in base 2."""
24    return math.log(x) / math.log(2)
25
26def check_non_zero(x):
27    """Throw Value Error when x = 0."""
28    if x == 0.:
29        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
30
31def gettotweight(res):
32    """Sum all the weights."""
33    totweight = reduce(lambda x, y: x + y.weight, res.results, 0)
34    if totweight == 0.:
35        raise ValueError, "Cannot compute the score: sum of weights is 0."
36    return totweight
37
38def gettotsize(res):
39    """Get number of result instances."""
40    if len(res.results):
41        return len(res.results)
42    else:
43        raise ValueError, "Cannot compute the score: no examples."
44
45
46def split_by_iterations(res):
47    """Split ExperimentResults of a multiple iteratation test into a list
48    of ExperimentResults, one for each iteration.
49    """
50    if res.number_of_iterations < 2:
51        return [res]
52
53    ress = [Orange.evaluation.testing.ExperimentResults(
54                1, res.classifier_names, res.class_values,
55                res.weights, classifiers=res.classifiers,
56                loaded=res.loaded, test_type=res.test_type, labels=res.labels)
57            for _ in range(res.number_of_iterations)]
58    for te in res.results:
59        ress[te.iteration_number].results.append(te)
60    return ress
61
62def split_by_classifiers(res):
63    """Split an instance of :obj:`ExperimentResults` into a list of
64    :obj:`ExperimentResults`, one for each classifier.
65    """
66    split_res = []
67    for i in range(len(res.classifierNames)):
68        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
69                    [res.classifierNames[i]], res.classValues,
70                    weights=res.weights, base_class=res.base_class,
71                    classifiers=[res.classifiers[i]] if res.classifiers else [],
72                    test_type=res.test_type, labels=res.labels)
73        r.results = []
74        for te in res.results:
75            r.results.append(Orange.evaluation.testing.TestedExample(
76                te.iterationNumber, te.actualClass, n=1, weight=te.weight))
77            r.results[-1].classes = [te.classes[i]]
78            r.results[-1].probabilities = [te.probabilities[i]]
79        split_res.append(r)
80    return split_res
81
82
83def class_probabilities_from_res(res, **argkw):
84    """Calculate class probabilities."""
85    probs = [0.] * len(res.class_values)
86    if argkw.get("unweighted", 0) or not res.weights:
87        for tex in res.results:
88            probs[int(tex.actual_class)] += 1.
89        totweight = gettotsize(res)
90    else:
91        totweight = 0.
92        for tex in res.results:
93            probs[tex.actual_class] += tex.weight
94            totweight += tex.weight
95        check_non_zero(totweight)
96    return [prob / totweight for prob in probs]
97
98
99@deprecated_keywords({
100    "foldN": "fold_n",
101    "reportSE": "report_se",
102    "iterationIsOuter": "iteration_is_outer"})
103def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
104    # remove empty folds, turn the matrix so that learner is outer
105    if iteration_is_outer:
106        if not stats:
107            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
108        number_of_learners = len(stats[0])
109        stats = filter(lambda (x, fN): fN > 0, zip(stats, fold_n))
110        stats = [[x[lrn] / fN for x, fN in stats]
111                 for lrn in range(number_of_learners)]
112    else:
113        stats = [[x / Fn for x, Fn in filter(lambda (x, Fn): Fn > 0,
114                 zip(lrnD, fold_n))] for lrnD in stats]
115
116    if not stats:
117        raise ValueError, "Cannot compute the score: no classifiers"
118    if not stats[0]:
119        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
120
121    if report_se:
122        return [(statc.mean(x), statc.sterr(x)) for x in stats]
123    else:
124        return [statc.mean(x) for x in stats]
125
126def ME(res, **argkw):
127    MEs = [0.] * res.number_of_learners
128
129    if argkw.get("unweighted", 0) or not res.weights:
130        for tex in res.results:
131            MEs = map(lambda res, cls, ac=float(tex.actual_class):
132                      res + abs(float(cls) - ac), MEs, tex.classes)
133        totweight = gettotsize(res)
134    else:
135        for tex in res.results:
136            MEs = map(lambda res, cls, ac=float(tex.actual_class), tw=tex.weight:
137                       res + tw * abs(float(cls) - ac), MEs, tex.classes)
138        totweight = gettotweight(res)
139
140    return [x / totweight for x in MEs]
141
142MAE = ME
143
144
145class ConfusionMatrix:
146    """
147    Classification result summary.
148    """
149    #: True Positive predictions
150    TP = 0.
151    #:True Negative predictions
152    TN = 0.
153    #:False Positive predictions
154    FP = 0.
155    #: False Negative predictions
156    FN = 0.
157
158    @deprecated_keywords({"predictedPositive": "predicted_positive",
159                          "isPositive": "is_positive"})
160    def addTFPosNeg(self, predicted_positive, is_positive, weight=1.0):
161        """
162        Update confusion matrix with result of a single classification.
163
164        :param predicted_positive: positive class value was predicted
165        :param is_positive: correct class value is positive
166        :param weight: weight of the selected instance
167         """
168        if predicted_positive:
169            if is_positive:
170                self.TP += weight
171            else:
172                self.FP += weight
173        else:
174            if is_positive:
175                self.FN += weight
176            else:
177                self.TN += weight
178
179
180
181#########################################################################
182# PERFORMANCE MEASURES:
183# Scores for evaluation of numeric predictions
184
185def check_argkw(dct, lst):
186    """Return True if any item from lst has a non-zero value in dct."""
187    return reduce(lambda x, y: x or y, [dct.get(k, 0) for k in lst])
188
189def regression_error(res, **argkw):
190    """Return the regression error (default: MSE)."""
191    if argkw.get("SE", 0) and res.number_of_iterations > 1:
192        # computes the scores for each iteration, then averages
193        scores = [[0.] * res.number_of_iterations
194                  for _ in range(res.number_of_learners)]
195        norm = None
196        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
197            norm = [0.] * res.number_of_iterations
198
199        # counts examples in each iteration
200        nIter = [0] * res.number_of_iterations
201        # average class in each iteration
202        a = [0] * res.number_of_iterations
203        for tex in res.results:
204            nIter[tex.iteration_number] += 1
205            a[tex.iteration_number] += float(tex.actual_class)
206        a = [a[i] / nIter[i] for i in range(res.number_of_iterations)]
207
208        if argkw.get("unweighted", 0) or not res.weights:
209            # iterate accross test cases
210            for tex in res.results:
211                ai = float(tex.actual_class)
212                nIter[tex.iteration_number] += 1
213
214                # compute normalization, if required
215                if argkw.get("norm-abs", 0):
216                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
217                elif argkw.get("norm-sqr", 0):
218                    norm[tex.iteration_number] += (ai - a[tex.iteration_number]) ** 2
219
220                # iterate accross results of different regressors
221                for i, cls in enumerate(tex.classes):
222                    if argkw.get("abs", 0):
223                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
224                    else:
225                        scores[i][tex.iteration_number] += (float(cls) - ai) ** 2
226        else: # unweighted != 0
227            raise NotImplementedError, "weighted error scores with SE not implemented yet"
228
229        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
230            scores = [[x / n for x, n in zip(y, norm)] for y in scores]
231        else:
232            scores = [[x / ni for x, ni in zip(y, nIter)] for y in scores]
233
234        if argkw.get("R2"):
235            scores = [[1.0 - x for x in y] for y in scores]
236
237        if argkw.get("sqrt", 0):
238            scores = [[math.sqrt(x) for x in y] for y in scores]
239
240        return [(statc.mean(x), statc.std(x)) for x in scores]
241
242    else: # single iteration (testing on a single test set)
243        scores = [0.] * res.number_of_learners
244        norm = 0.
245
246        if argkw.get("unweighted", 0) or not res.weights:
247            a = sum([tex.actual_class for tex in res.results]) \
248                / len(res.results)
249            for tex in res.results:
250                if argkw.get("abs", 0):
251                    scores = map(lambda res, cls, ac=float(tex.actual_class):
252                                 res + abs(float(cls) - ac), scores, tex.classes)
253                else:
254                    scores = map(lambda res, cls, ac=float(tex.actual_class):
255                                 res + (float(cls) - ac) ** 2, scores, tex.classes)
256
257                if argkw.get("norm-abs", 0):
258                    norm += abs(tex.actual_class - a)
259                elif argkw.get("norm-sqr", 0):
260                    norm += (tex.actual_class - a) ** 2
261            totweight = gettotsize(res)
262        else:
263            # UNFINISHED
264            MSEs = [0.] * res.number_of_learners
265            for tex in res.results:
266                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
267                           tw=tex.weight:
268                           res + tw * (float(cls) - ac) ** 2, MSEs, tex.classes)
269            totweight = gettotweight(res)
270
271        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
272            scores = [s / norm for s in scores]
273        else: # normalize by number of instances (or sum of weights)
274            scores = [s / totweight for s in scores]
275
276        if argkw.get("R2"):
277            scores = [1. - s for s in scores]
278
279        if argkw.get("sqrt", 0):
280            scores = [math.sqrt(x) for x in scores]
281
282        return scores
283
284def MSE(res, **argkw):
285    """Compute mean-squared error."""
286    return regression_error(res, **argkw)
287
288def RMSE(res, **argkw):
289    """Compute root mean-squared error."""
290    argkw.setdefault("sqrt", True)
291    return regression_error(res, **argkw)
292
293def MAE(res, **argkw):
294    """Compute mean absolute error."""
295    argkw.setdefault("abs", True)
296    return regression_error(res, **argkw)
297
298def RSE(res, **argkw):
299    """Compute relative squared error."""
300    argkw.setdefault("norm-sqr", True)
301    return regression_error(res, **argkw)
302
303def RRSE(res, **argkw):
304    """Compute relative squared error."""
305    argkw.setdefault("norm-sqr", True)
306    argkw.setdefault("sqrt", True)
307    return regression_error(res, **argkw)
308
309def RAE(res, **argkw):
310    """Compute relative absolute error."""
311    argkw.setdefault("abs", True)
312    argkw.setdefault("norm-abs", True)
313    return regression_error(res, **argkw)
314
315def R2(res, **argkw):
316    """Compute the coefficient of determination, R-squared."""
317    argkw.setdefault("norm-sqr", True)
318    argkw.setdefault("R2", True)
319    return regression_error(res, **argkw)
320
321def MSE_old(res, **argkw):
322    """Compute mean-squared error."""
323    if argkw.get("SE", 0) and res.number_of_iterations > 1:
324        MSEs = [[0.] * res.number_of_iterations
325                for _ in range(res.number_of_learners)]
326        nIter = [0] * res.number_of_iterations
327        if argkw.get("unweighted", 0) or not res.weights:
328            for tex in res.results:
329                ac = float(tex.actual_class)
330                nIter[tex.iteration_number] += 1
331                for i, cls in enumerate(tex.classes):
332                    MSEs[i][tex.iteration_number] += (float(cls) - ac) ** 2
333        else:
334            raise ValueError, "weighted RMSE with SE not implemented yet"
335        MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs]
336        if argkw.get("sqrt", 0):
337            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
338        return [(statc.mean(x), statc.std(x)) for x in MSEs]
339
340    else:
341        MSEs = [0.] * res.number_of_learners
342        if argkw.get("unweighted", 0) or not res.weights:
343            for tex in res.results:
344                MSEs = map(lambda res, cls, ac=float(tex.actual_class):
345                           res + (float(cls) - ac) ** 2, MSEs, tex.classes)
346            totweight = gettotsize(res)
347        else:
348            for tex in res.results:
349                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
350                           tw=tex.weight: res + tw * (float(cls) - ac) ** 2,
351                           MSEs, tex.classes)
352            totweight = gettotweight(res)
353
354        if argkw.get("sqrt", 0):
355            MSEs = [math.sqrt(x) for x in MSEs]
356        return [x / totweight for x in MSEs]
357
358def RMSE_old(res, **argkw):
359    """Compute root mean-squared error."""
360    argkw.setdefault("sqrt", 1)
361    return MSE_old(res, **argkw)
362
363#########################################################################
364# PERFORMANCE MEASURES:
365# Scores for evaluation of classifiers
366
367class CA(list):
368    """
369    Compute percentage of matches between predicted and actual class.
370
371    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
372                         or list of :obj:`ConfusionMatrix`.
373    :param report_se: include standard error in result.
374    :param ignore_weights: ignore instance weights.
375    :rtype: list of scores, one for each learner.
376
377    Standard errors are estimated from deviation of CAs across folds (if
378    test_results were produced by cross_validation) or approximated under
379    the assumption of normal distribution otherwise.
380    """
381    CONFUSION_MATRIX = 0
382    CONFUSION_MATRIX_LIST = 1
383    CLASSIFICATION = 2
384    CROSS_VALIDATION = 3
385
386    @deprecated_keywords({"reportSE": "report_se",
387                          "unweighted": "ignore_weights"})
388    def __init__(self, test_results, report_se=False, ignore_weights=False):
389        super(CA, self).__init__()
390        self.report_se = report_se
391        self.ignore_weights = ignore_weights
392
393        input_type = self.get_input_type(test_results)
394        if input_type == self.CONFUSION_MATRIX:
395            self[:] = [self.from_confusion_matrix(test_results)]
396        elif input_type == self.CONFUSION_MATRIX_LIST:
397            self[:] = self.from_confusion_matrix_list(test_results)
398        elif input_type == self.CLASSIFICATION:
399            self[:] = self.from_classification_results(test_results)
400        elif input_type == self.CROSS_VALIDATION:
401            self[:] = self.from_crossvalidation_results(test_results)
402
403    def from_confusion_matrix(self, cm):
404        all_predictions = 0.
405        correct_predictions = 0.
406        if isinstance(cm, ConfusionMatrix):
407            all_predictions += cm.TP + cm.FN + cm.FP + cm.TN
408            correct_predictions += cm.TP + cm.TN
409        else:
410            for r, row in enumerate(cm):
411                for c, column in enumerate(row):
412                    if r == c:
413                        correct_predictions += column
414                    all_predictions += column
415
416        check_non_zero(all_predictions)
417        ca = correct_predictions / all_predictions
418
419        if self.report_se:
420            return ca, ca * (1 - ca) / math.sqrt(all_predictions)
421        else:
422            return ca
423
424    def from_confusion_matrix_list(self, confusion_matrices):
425        return [self.from_confusion_matrix(cm) for cm in confusion_matrices]
426
427    def from_classification_results(self, test_results):
428        CAs = [0.] * test_results.number_of_learners
429        totweight = 0.
430        for tex in test_results.results:
431            w = 1. if self.ignore_weights else tex.weight
432            CAs = map(lambda res, cls: res + (cls == tex.actual_class and w),
433                      CAs, tex.classes)
434            totweight += w
435        check_non_zero(totweight)
436        ca = [x / totweight for x in CAs]
437
438        if self.report_se:
439            return [(x, x * (1 - x) / math.sqrt(totweight)) for x in ca]
440        else:
441            return ca
442
443    def from_crossvalidation_results(self, test_results):
444        CAsByFold = [[0.] * test_results.number_of_iterations
445                     for _ in range(test_results.number_of_learners)]
446        foldN = [0.] * test_results.number_of_iterations
447
448        for tex in test_results.results:
449            w = 1. if self.ignore_weights else tex.weight
450            for lrn in range(test_results.number_of_learners):
451                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn] ==
452                    tex.actual_class) and w
453            foldN[tex.iteration_number] += w
454
455        return statistics_by_folds(CAsByFold, foldN, self.report_se, False)
456
457    def get_input_type(self, test_results):
458        if isinstance(test_results, ConfusionMatrix):
459            return self.CONFUSION_MATRIX
460        elif isinstance(test_results, testing.ExperimentResults):
461            if test_results.number_of_iterations == 1:
462                return self.CLASSIFICATION
463            else:
464                return self.CROSS_VALIDATION
465        elif isinstance(test_results, list):
466            return self.CONFUSION_MATRIX_LIST
467
468
469@deprecated_keywords({"reportSE": "report_se",
470                      "unweighted": "ignore_weights"})
471def AP(res, report_se=False, ignore_weights=False, **argkw):
472    """Compute the average probability assigned to the correct class."""
473    if res.number_of_iterations == 1:
474        APs = [0.] * res.number_of_learners
475        if ignore_weights or not res.weights:
476            for tex in res.results:
477                APs = map(lambda res, probs: res + probs[tex.actual_class],
478                          APs, tex.probabilities)
479            totweight = gettotsize(res)
480        else:
481            totweight = 0.
482            for tex in res.results:
483                APs = map(lambda res, probs: res + probs[tex.actual_class] *
484                          tex.weight, APs, tex.probabilities)
485                totweight += tex.weight
486        check_non_zero(totweight)
487        return [AP / totweight for AP in APs]
488
489    APsByFold = [[0.] * res.number_of_learners
490                 for _ in range(res.number_of_iterations)]
491    foldN = [0.] * res.number_of_iterations
492    if ignore_weights or not res.weights:
493        for tex in res.results:
494            APsByFold[tex.iteration_number] = map(lambda res, probs:
495                res + probs[tex.actual_class],
496                APsByFold[tex.iteration_number], tex.probabilities)
497            foldN[tex.iteration_number] += 1
498    else:
499        for tex in res.results:
500            APsByFold[tex.iteration_number] = map(lambda res, probs:
501                res + probs[tex.actual_class] * tex.weight,
502                APsByFold[tex.iteration_number], tex.probabilities)
503            foldN[tex.iteration_number] += tex.weight
504
505    return statistics_by_folds(APsByFold, foldN, report_se, True)
506
507
508@deprecated_keywords({"reportSE": "report_se",
509                      "unweighted": "ignore_weights"})
510def Brier_score(res, report_se=False, ignore_weights=False, **argkw):
511    """Compute the Brier score, defined as the average (over test instances)
512    of :math:`\sum_x(t(x) - p(x))^2`, where x is a class value, t(x) is 1 for
513    the actual class value and 0 otherwise, and p(x) is the predicted
514    probability of x.
515    """
516    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
517    #    x is class,
518    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
519    #    p(x) is predicted probabilty.
520    # There's a trick: since t(x) is zero for all classes but the
521    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
522    # Since +1 is there for each example, it adds 1 to the average
523    # We skip the +1 inside the sum and add it just at the end of the function
524    # We take max(result, 0) to avoid -0.0000x due to rounding errors
525
526    if res.number_of_iterations == 1:
527        MSEs = [0.] * res.number_of_learners
528        if ignore_weights or not res.weights:
529            totweight = 0.
530            for tex in res.results:
531                MSEs = map(lambda res, probs: res + reduce(
532                    lambda s, pi: s + pi ** 2, probs, 0) -
533                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
534                totweight += tex.weight
535        else:
536            for tex in res.results:
537                MSEs = map(lambda res, probs: res + tex.weight * reduce(
538                    lambda s, pi: s + pi ** 2, probs, 0) -
539                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
540            totweight = gettotweight(res)
541        check_non_zero(totweight)
542        if report_se:
543            ## change this, not zero!!!
544            return [(max(x / totweight + 1., 0), 0) for x in MSEs]
545        else:
546            return [max(x / totweight + 1., 0) for x in MSEs]
547
548    BSs = [[0.] * res.number_of_learners
549           for _ in range(res.number_of_iterations)]
550    foldN = [0.] * res.number_of_iterations
551
552    if ignore_weights or not res.weights:
553        for tex in res.results:
554            BSs[tex.iteration_number] = map(lambda rr, probs: rr + reduce(
555                lambda s, pi: s + pi ** 2, probs, 0) -
556                2 * probs[tex.actual_class], BSs[tex.iteration_number],
557                tex.probabilities)
558            foldN[tex.iteration_number] += 1
559    else:
560        for tex in res.results:
561            BSs[tex.iteration_number] = map(lambda res, probs:
562                res + tex.weight * reduce(lambda s, pi: s + pi ** 2, probs, 0) -
563                2 * probs[tex. actual_class], BSs[tex.iteration_number],
564                tex.probabilities)
565            foldN[tex.iteration_number] += tex.weight
566
567    stats = statistics_by_folds(BSs, foldN, report_se, True)
568    if report_se:
569        return [(x + 1., y) for x, y in stats]
570    else:
571        return [x + 1. for x in stats]
572
573def BSS(res, **argkw):
574    return [1 - x / 2 for x in apply(Brier_score, (res,), argkw)]
575
576def IS_ex(Pc, P):
577    """Pc aposterior probability, P aprior"""
578    if Pc >= P:
579        return -log2(P) + log2(Pc)
580    else:
581        return -(-log2(1 - P) + log2(1 - Pc))
582
583
584@deprecated_keywords({"reportSE": "report_se"})
585def IS(res, apriori=None, report_se=False, **argkw):
586    """Compute the information score as defined by
587    `Kononenko and Bratko (1991) \
588    <http://www.springerlink.com/content/g5p7473160476612/>`_.
589    Argument :obj:`apriori` gives the apriori class
590    distribution; if it is omitted, the class distribution is computed from
591    the actual classes of examples in :obj:`res`.
592    """
593    if not apriori:
594        apriori = class_probabilities_from_res(res)
595
596    if res.number_of_iterations == 1:
597        ISs = [0.] * res.number_of_learners
598        if argkw.get("unweighted", 0) or not res.weights:
599            for tex in res.results:
600              for i in range(len(tex.probabilities)):
601                    cls = tex.actual_class
602                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
603            totweight = gettotsize(res)
604        else:
605            for tex in res.results:
606              for i in range(len(tex.probabilities)):
607                    cls = tex.actual_class
608                    ISs[i] += (IS_ex(tex.probabilities[i][cls], apriori[cls]) *
609                               tex.weight)
610            totweight = gettotweight(res)
611        if report_se:
612            return [(IS / totweight, 0) for IS in ISs]
613        else:
614            return [IS / totweight for IS in ISs]
615
616
617    ISs = [[0.] * res.number_of_iterations
618           for _ in range(res.number_of_learners)]
619    foldN = [0.] * res.number_of_iterations
620
621    # compute info scores for each fold   
622    if argkw.get("unweighted", 0) or not res.weights:
623        for tex in res.results:
624            for i in range(len(tex.probabilities)):
625                cls = tex.actual_class
626                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
627                                                apriori[cls])
628            foldN[tex.iteration_number] += 1
629    else:
630        for tex in res.results:
631            for i in range(len(tex.probabilities)):
632                cls = tex.actual_class
633                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
634                                                apriori[cls]) * tex.weight
635            foldN[tex.iteration_number] += tex.weight
636
637    return statistics_by_folds(ISs, foldN, report_se, False)
638
639
640def Wilcoxon(res, statistics, **argkw):
641    res1, res2 = [], []
642    for ri in split_by_iterations(res):
643        stats = statistics(ri, **argkw)
644        if len(stats) != 2:
645            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
646        res1.append(stats[0])
647        res2.append(stats[1])
648    return statc.wilcoxont(res1, res2)
649
650def rank_difference(res, statistics, **argkw):
651    if not res.results:
652        raise TypeError, "no experiments"
653
654    k = len(res.results[0].classes)
655    if k < 2:
656        raise TypeError, "nothing to compare (less than two classifiers given)"
657    if k == 2:
658        return apply(Wilcoxon, (res, statistics), argkw)
659    else:
660        return apply(Friedman, (res, statistics), argkw)
661
662
663@deprecated_keywords({"res": "test_results",
664                      "classIndex": "class_index",
665                      "unweighted": "ignore_weights"})
666def confusion_matrices(test_results, class_index= -1,
667                       ignore_weights=False, cutoff=.5):
668    """
669    Return confusion matrices for test_results.
670
671    :param test_results: test results
672    :param class_index: index of class value for which the confusion matrices
673        are to be computed (by default unspecified - see note below).
674    :param ignore_weights: ignore instance weights.
675    :param cutoff: cutoff for probability
676
677    :rtype: list of :obj:`ConfusionMatrix` or list-of-list-of-lists (see
678        note below)
679
680    .. note:: If `class_index` is not specified and `test_results`
681        contain predictions for multi-class problem, then the return
682        value is a list of 2D tables (list-of-lists) of all class
683        value pairwise misclassifications.
684
685    """
686    tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)]
687
688    if class_index < 0:
689        numberOfClasses = len(test_results.class_values)
690        if class_index < -1 or numberOfClasses > 2:
691            cm = [[[0.] * numberOfClasses for _ in range(numberOfClasses)]
692                  for _ in range(test_results.number_of_learners)]
693            if ignore_weights or not test_results.weights:
694                for tex in test_results.results:
695                    trueClass = int(tex.actual_class)
696                    for li, pred in enumerate(tex.classes):
697                        predClass = int(pred)
698                        if predClass < numberOfClasses:
699                            cm[li][trueClass][predClass] += 1
700            else:
701                for tex in test_results.results:
702                    trueClass = int(tex.actual_class)
703                    for li, pred in tex.classes:
704                        predClass = int(pred)
705                        if predClass < numberOfClasses:
706                            cm[li][trueClass][predClass] += tex.weight
707            return cm
708
709        elif test_results.base_class >= 0:
710            class_index = test_results.base_class
711        else:
712            class_index = 1
713
714    if cutoff != .5:
715        if ignore_weights or not test_results.weights:
716            for lr in test_results.results:
717                isPositive = (lr.actual_class == class_index)
718                for i in range(test_results.number_of_learners):
719                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
720                                         cutoff, isPositive)
721        else:
722            for lr in test_results.results:
723                isPositive = (lr.actual_class == class_index)
724                for i in range(test_results.number_of_learners):
725                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
726                                         cutoff, isPositive, lr.weight)
727    else:
728        if ignore_weights or not test_results.weights:
729            for lr in test_results.results:
730                isPositive = (lr.actual_class == class_index)
731                for i in range(test_results.number_of_learners):
732                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
733                                         isPositive)
734        else:
735            for lr in test_results.results:
736                isPositive = (lr.actual_class == class_index)
737                for i in range(test_results.number_of_learners):
738                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
739                                         isPositive, lr.weight)
740    return tfpns
741
742
743# obsolete (renamed)
744compute_confusion_matrices = confusion_matrices
745
746
747@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
748def confusion_chi_square(confusion_matrix):
749    """
750    Return chi square statistic of the confusion matrix
751    (higher value indicates that prediction is not by chance).
752    """
753    if isinstance(confusion_matrix, ConfusionMatrix) or \
754       not isinstance(confusion_matrix[1], list):
755        return _confusion_chi_square(confusion_matrix)
756    else:
757        return map(_confusion_chi_square, confusion_matrix)
758
759def _confusion_chi_square(confusion_matrix):
760    if isinstance(confusion_matrix, ConfusionMatrix):
761        c = confusion_matrix
762        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
763    dim = len(confusion_matrix)
764    rowPriors = [sum(r) for r in confusion_matrix]
765    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
766    total = sum(rowPriors)
767    rowPriors = [r / total for r in rowPriors]
768    colPriors = [r / total for r in colPriors]
769    ss = 0
770    for ri, row in enumerate(confusion_matrix):
771        for ci, o in enumerate(row):
772            e = total * rowPriors[ri] * colPriors[ci]
773            if not e:
774                return -1, -1, -1
775            ss += (o - e) ** 2 / e
776    df = (dim - 1) ** 2
777    return ss, df, statc.chisqprob(ss, df)
778
779class CMScore(list):
780    """
781    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
782                         or list of :obj:`ConfusionMatrix`.
783    :rtype: list of scores, one for each learner."""
784    def __new__(cls, test_results, **kwargs):
785        self = list.__new__(cls)
786        if isinstance(test_results, ConfusionMatrix):
787            self.__init__(test_results, **kwargs)
788            return self[0]
789        return self
790
791
792    @deprecated_keywords({"confm": "test_results"})
793    def __init__(self, test_results=None):
794        super(CMScore, self).__init__()
795
796        if test_results is not None:
797            self[:] = self.__call__(test_results)
798
799    def __call__(self, test_results):
800        if isinstance(test_results, testing.ExperimentResults):
801            test_results = confusion_matrices(test_results, class_index=1)
802        if isinstance(test_results, ConfusionMatrix):
803            test_results = [test_results]
804
805        return map(self.compute, test_results)
806
807
808
809class Sensitivity(CMScore):
810    __doc__ = """Compute `sensitivity
811    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ (proportion
812    of actual positives which are correctly identified as such).
813    """ + CMScore.__doc__
814    @classmethod
815    def compute(self, confusion_matrix):
816        tot = confusion_matrix.TP + confusion_matrix.FN
817        if tot < 1e-6:
818            import warnings
819            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
820            return None
821
822        return confusion_matrix.TP / tot
823
824
825class Recall(Sensitivity):
826    __doc__ = """ Compute `recall
827    <http://en.wikipedia.org/wiki/Precision_and_recall>`_
828    (fraction of relevant instances that are retrieved).
829    """ + CMScore.__doc__
830    pass # Recall == Sensitivity
831
832
833class Specificity(CMScore):
834    __doc__ = """Compute `specificity
835    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
836    (proportion of negatives which are correctly identified).
837    """ + CMScore.__doc__
838    @classmethod
839    def compute(self, confusion_matrix):
840        tot = confusion_matrix.FP + confusion_matrix.TN
841        if tot < 1e-6:
842            import warnings
843            warnings.warn("Can't compute specificity: one or both classes have no instances")
844            return None
845        return confusion_matrix.TN / tot
846
847
848class PPV(CMScore):
849    __doc__ = """Compute `positive predictive value
850    <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ (proportion of
851    subjects with positive test results who are correctly diagnosed).
852    """ + CMScore.__doc__
853    @classmethod
854    def compute(self, confusion_matrix):
855        tot = confusion_matrix.TP + confusion_matrix.FP
856        if tot < 1e-6:
857            import warnings
858            warnings.warn("Can't compute PPV: one or both classes have no instances")
859            return None
860        return confusion_matrix.TP / tot
861
862
863class Precision(PPV):
864    __doc__ = """Compute `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_
865    (retrieved instances that are relevant).
866    """ + CMScore.__doc__
867    pass # Precision == PPV
868
869
870class NPV(CMScore):
871    __doc__ = """Compute `negative predictive value
872    <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ (proportion of
873    subjects with a negative test result who are correctly diagnosed).
874     """ + CMScore.__doc__
875    @classmethod
876    def compute(self, confusion_matrix):
877        tot = confusion_matrix.FN + confusion_matrix.TN
878        if tot < 1e-6:
879            import warnings
880            warnings.warn("Can't compute NPV: one or both classes have no instances")
881            return None
882        return confusion_matrix.TN / tot
883
884
885class F1(CMScore):
886    __doc__ = """Return `F1 score
887    <http://en.wikipedia.org/wiki/F1_score>`_
888    (harmonic mean of precision and recall).
889    """ + CMScore.__doc__
890    @classmethod
891    def compute(self, confusion_matrix):
892        p = Precision.compute(confusion_matrix)
893        r = Recall.compute(confusion_matrix)
894        if p is not None and r is not None and (p + r) != 0:
895            return 2. * p * r / (p + r)
896        else:
897            import warnings
898            warnings.warn("Can't compute F1: P + R is zero or not defined")
899            return None
900
901
902class Falpha(CMScore):
903    __doc__ = """Compute the alpha-mean of precision and recall over the given confusion
904    matrix.
905    """ + CMScore.__doc__
906
907    def __init__(self, test_results, alpha=1.):
908        self.alpha = alpha
909        super(Falpha, self).__init__(test_results)
910
911    def compute(self, confusion_matrix):
912        p = Precision.compute(confusion_matrix)
913        r = Recall.compute(confusion_matrix)
914        return (1. + self.alpha) * p * r / (self.alpha * p + r)
915
916
917class MCC(CMScore):
918    __doc__ = """Compute `Matthew correlation coefficient
919    <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_
920    (correlation coefficient between the observed and predicted binary
921    classifications).
922    """ + CMScore.__doc__
923    @classmethod
924    def compute(self, cm):
925        # code by Boris Gorelik
926        TP, TN, FP, FN = cm.TP, cm.TN, cm.FP, cm.FN
927
928        try:
929            return (TP * TN - FP * FN) / \
930                 math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
931        except ZeroDivisionError:
932            # Zero division occurs when there is either no true positives
933            # or no true negatives i.e. the problem contains only one
934            # type of classes.
935            import warnings
936            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
937
938
939@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
940def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
941   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
942
943   http://en.wikipedia.org/wiki/Scott%27s_Pi
944   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
945   raters.
946
947   @param confusion_matrix: confusion matrix, or list of confusion matrices.
948                            To obtain non-binary confusion matrix, call
949                            Orange.evaluation.scoring.confusion_matrices
950                            and set the class_index parameter to -2.
951   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
952                            This function needs to operate on non-binary
953                            confusion matrices, which are represented by python
954                            lists, therefore one needs a way to distinguish
955                            between a single matrix and list of matrices
956   """
957
958   if b_is_list_of_matrices:
959       try:
960           return [scotts_pi(cm, b_is_list_of_matrices=False)
961                   for cm in confusion_matrix]
962       except TypeError:
963           # Nevermind the parameter, maybe this is a "conventional" binary
964           # confusion matrix and bIsListOfMatrices was specified by mistake
965           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
966   else:
967       if isinstance(confusion_matrix, ConfusionMatrix):
968           confusion_matrix = numpy.array([[confusion_matrix.TP,
969               confusion_matrix.FN], [confusion_matrix.FP,
970               confusion_matrix.TN]], dtype=float)
971       else:
972           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
973
974       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
975       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
976       jointProportion = (marginalSumOfColumns + marginalSumOfRows) / \
977                           (2. * numpy.sum(confusion_matrix))
978       # In the eq. above, 2. is what the Wikipedia page calls
979       # the number of annotators. Here we have two annotators:
980       # the observed (true) labels (annotations) and the predicted by
981       # the learners.
982
983       prExpected = numpy.sum(jointProportion ** 2)
984       prActual = numpy.sum(numpy.diag(confusion_matrix)) / \
985                  numpy.sum(confusion_matrix)
986
987       ret = (prActual - prExpected) / (1.0 - prExpected)
988       return ret
989
990# Backward compatibility
991sens = Sensitivity
992spec = Specificity
993precision = Precision
994recall = Recall
995
996
997
998@deprecated_keywords({"classIndex": "class_index",
999                      "unweighted": "ignore_weights"})
1000def AUCWilcoxon(res, class_index= -1, ignore_weights=False, **argkw):
1001    """Compute the area under ROC (AUC) and its standard error using
1002    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
1003    :obj:`class_index` is not specified, the first class is used as
1004    "the positive" and others are negative. The result is a list of
1005    tuples (aROC, standard error).
1006
1007    If test results consist of multiple folds, you need to split them using
1008    :obj:`split_by_iterations` and perform this test on each fold separately.
1009    """
1010    useweights = res.weights and not ignore_weights
1011    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
1012
1013    results = []
1014
1015    totPos, totNeg = tots[1], tots[0]
1016    N = totPos + totNeg
1017    for plist in problists:
1018        highPos, lowNeg = totPos, 0.
1019        W, Q1, Q2 = 0., 0., 0.
1020        for prob in plist:
1021            thisPos, thisNeg = prob[1][1], prob[1][0]
1022            highPos -= thisPos
1023            W += thisNeg * (highPos + thisPos / 2.)
1024            Q2 += thisPos * (lowNeg ** 2 + lowNeg * thisNeg + thisNeg ** 2 / 3.)
1025            Q1 += thisNeg * (highPos ** 2 + highPos * thisPos + thisPos ** 2 / 3.)
1026
1027            lowNeg += thisNeg
1028
1029        W /= (totPos * totNeg)
1030        Q1 /= (totNeg * totPos ** 2)
1031        Q2 /= (totPos * totNeg ** 2)
1032
1033        SE = math.sqrt((W * (1 - W) + (totPos - 1) * (Q1 - W ** 2) +
1034                       (totNeg - 1) * (Q2 - W ** 2)) / (totPos * totNeg))
1035        results.append((W, SE))
1036    return results
1037
1038AROC = AUCWilcoxon # for backward compatibility, AROC is obsolete
1039
1040
1041@deprecated_keywords({"classIndex": "class_index",
1042                      "unweighted": "ignore_weights"})
1043def compare_2_AUCs(res, lrn1, lrn2, class_index= -1,
1044                   ignore_weights=False, **argkw):
1045    return corn.compare2ROCs(res, lrn1, lrn2, class_index,
1046                             res.weights and not ignore_weights)
1047
1048# for backward compatibility, compare_2_AROCs is obsolete
1049compare_2_AROCs = compare_2_AUCs
1050
1051
1052@deprecated_keywords({"classIndex": "class_index"})
1053def compute_ROC(res, class_index= -1):
1054    """Compute a ROC curve as a list of (x, y) tuples, where x is
1055    1-specificity and y is sensitivity.
1056    """
1057    problists, tots = corn.computeROCCumulative(res, class_index)
1058
1059    results = []
1060    totPos, totNeg = tots[1], tots[0]
1061
1062    for plist in problists:
1063        curve = [(1., 1.)]
1064        TP, TN = totPos, 0.
1065        FN, FP = 0., totNeg
1066        for prob in plist:
1067            thisPos, thisNeg = prob[1][1], prob[1][0]
1068            # thisPos go from TP to FN
1069            TP -= thisPos
1070            FN += thisPos
1071            # thisNeg go from FP to TN
1072            TN += thisNeg
1073            FP -= thisNeg
1074
1075            sens = TP / (TP + FN)
1076            spec = TN / (FP + TN)
1077            curve.append((1 - spec, sens))
1078        results.append(curve)
1079
1080    return results
1081
1082## TC's implementation of algorithms, taken from:
1083## T Fawcett: ROC Graphs: Notes and Practical Considerations for
1084## Data Mining Researchers, submitted to KDD Journal.
1085def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1086    if P1x == P2x:
1087        return 1e300
1088    return (P1y - P2y) / (P1x - P2x)
1089
1090
1091@deprecated_keywords({"keepConcavities": "keep_concavities"})
1092def ROC_add_point(P, R, keep_concavities=1):
1093    if keep_concavities:
1094        R.append(P)
1095    else:
1096        while True:
1097            if len(R) < 2:
1098                R.append(P)
1099                return R
1100            else:
1101                T = R.pop()
1102                T2 = R[-1]
1103                if ROC_slope(T2, T) > ROC_slope(T, P):
1104                    R.append(T)
1105                    R.append(P)
1106                    return R
1107    return R
1108
1109
1110@deprecated_keywords({"classIndex": "class_index",
1111                      "keepConcavities": "keep_concavities"})
1112def TC_compute_ROC(res, class_index= -1, keep_concavities=1):
1113    problists, tots = corn.computeROCCumulative(res, class_index)
1114
1115    results = []
1116    P, N = tots[1], tots[0]
1117
1118    for plist in problists:
1119        ## corn gives an increasing by scores list, we need a decreasing
1120        plist.reverse()
1121        TP = 0.
1122        FP = 0.
1123        curve = []
1124        fPrev = 10e300 # "infinity" score at 0., 0.
1125        for prob in plist:
1126            f = prob[0]
1127            if f != fPrev:
1128                if P:
1129                    tpr = TP / P
1130                else:
1131                    tpr = 0.
1132                if N:
1133                    fpr = FP / N
1134                else:
1135                    fpr = 0.
1136                curve = ROC_add_point((fpr, tpr, fPrev), curve,
1137                                      keep_concavities)
1138                fPrev = f
1139            thisPos, thisNeg = prob[1][1], prob[1][0]
1140            TP += thisPos
1141            FP += thisNeg
1142        if P:
1143            tpr = TP / P
1144        else:
1145            tpr = 0.
1146        if N:
1147            fpr = FP / N
1148        else:
1149            fpr = 0.
1150        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1151        results.append(curve)
1152
1153    return results
1154
1155## returns a list of points at the intersection of the tangential
1156## iso-performance line and the given ROC curve
1157## for given values of FPcost, FNcost and pval
1158def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1159    m = (FPcost * (1. - pval)) / (FNcost * pval)
1160
1161    ## put the iso-performance line in point (0., 1.)
1162    x0, y0 = (0., 1.)
1163    x1, y1 = (1., 1. + m)
1164    d01 = math.sqrt((x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0))
1165
1166    ## calculate and find the closest point to the line
1167    firstp = 1
1168    mind = 0.
1169    a = x0 * y1 - x1 * y0
1170    closestPoints = []
1171    for (x, y, fscore) in curve:
1172        d = ((y0 - y1) * x + (x1 - x0) * y + a) / d01
1173        d = abs(d)
1174        if firstp or d < mind:
1175            mind, firstp = d, 0
1176            closestPoints = [(x, y, fscore)]
1177        else:
1178            if abs(d - mind) <= 0.0001: ## close enough
1179                closestPoints.append((x, y, fscore))
1180    return closestPoints
1181
1182def frange(start, end=None, inc=None):
1183    """A range function, that does accept float increments..."""
1184
1185    if end is None:
1186        end = start + 0.
1187        start = 0.
1188
1189    if inc is None or inc == 0:
1190        inc = 1.
1191
1192    L = [start]
1193    while 1:
1194        next = start + len(L) * inc
1195        if inc > 0 and next >= end:
1196            L.append(end)
1197            break
1198        elif inc < 0 and next <= end:
1199            L.append(end)
1200            break
1201        L.append(next)
1202
1203    return L
1204
1205## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1206## where ROCcurvesX is a set of ROC curves,
1207## where a (one) ROC curve is a set of (FP, TP) points
1208##
1209## for each (sub)set of input ROC curves
1210## returns the average ROC curve and an array of (vertical) standard deviations
1211@deprecated_keywords({"ROCcurves": "roc_curves"})
1212def TC_vertical_average_ROC(roc_curves, samples=10):
1213    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1214        if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x:
1215            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1216        dx = float(P2x) - float(P1x)
1217        dy = float(P2y) - float(P1y)
1218        m = dy / dx
1219        return P1y + m * (X - P1x)
1220
1221    def TP_FOR_FP(FPsample, ROC, npts):
1222        i = 0
1223        while i < npts - 1:
1224            (fp, _, _) = ROC[i + 1]
1225            if fp <= FPsample:
1226                i += 1
1227            else:
1228                break
1229        (fp, tp, _) = ROC[i]
1230        if fp == FPsample:
1231            return tp
1232        elif fp < FPsample and i + 1 < len(ROC):
1233            return INTERPOLATE(ROC[i], ROC[i + 1], FPsample)
1234        elif fp < FPsample and i + 1 == len(ROC): # return the last
1235            return ROC[i][1]
1236        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1237        #return 0.
1238
1239    average = []
1240    stdev = []
1241    for ROCS in roc_curves:
1242        npts = []
1243        for c in ROCS:
1244            npts.append(len(c))
1245        nrocs = len(ROCS)
1246
1247        TPavg = []
1248        TPstd = []
1249        for FPsample in frange(0., 1., 1. / samples):
1250            TPsum = []
1251            for i in range(nrocs):
1252                ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1253                TPsum.append(TP_FOR_FP(FPsample, ROCS[i], npts[i]))
1254            TPavg.append((FPsample, statc.mean(TPsum)))
1255            if len(TPsum) > 1:
1256                stdv = statc.std(TPsum)
1257            else:
1258                stdv = 0.
1259            TPstd.append(stdv)
1260
1261        average.append(TPavg)
1262        stdev.append(TPstd)
1263
1264    return average, stdev
1265
1266## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1267## where ROCcurvesX is a set of ROC curves,
1268## where a (one) ROC curve is a set of (FP, TP) points
1269##
1270## for each (sub)set of input ROC curves
1271## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1272@deprecated_keywords({"ROCcurves": "roc_curves"})
1273def TC_threshold_average_ROC(roc_curves, samples=10):
1274    def POINT_AT_THRESH(ROC, npts, thresh):
1275        i = 0
1276        while i < npts - 1:
1277            (px, py, pfscore) = ROC[i]
1278            if pfscore > thresh:
1279                i += 1
1280            else:
1281                break
1282        return ROC[i]
1283
1284    average = []
1285    stdevV = []
1286    stdevH = []
1287    for ROCS in roc_curves:
1288        npts = []
1289        for c in ROCS:
1290            npts.append(len(c))
1291        nrocs = len(ROCS)
1292
1293        T = []
1294        for c in ROCS:
1295            for (px, py, pfscore) in c:
1296##                try:
1297##                    T.index(pfscore)
1298##                except:
1299                T.append(pfscore)
1300        T.sort()
1301        T.reverse() ## ugly
1302
1303        TPavg = []
1304        TPstdV = []
1305        TPstdH = []
1306        for tidx in frange(0, (len(T) - 1.), float(len(T)) / samples):
1307            FPsum = []
1308            TPsum = []
1309            for i in range(nrocs):
1310                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1311                FPsum.append(fp)
1312                TPsum.append(tp)
1313            TPavg.append((statc.mean(FPsum), statc.mean(TPsum)))
1314            ## vertical standard deviation
1315            if len(TPsum) > 1:
1316                stdv = statc.std(TPsum)
1317            else:
1318                stdv = 0.
1319            TPstdV.append(stdv)
1320            ## horizontal standard deviation
1321            if len(FPsum) > 1:
1322                stdh = statc.std(FPsum)
1323            else:
1324                stdh = 0.
1325            TPstdH.append(stdh)
1326
1327        average.append(TPavg)
1328        stdevV.append(TPstdV)
1329        stdevH.append(TPstdH)
1330
1331    return average, stdevV, stdevH
1332
1333## Calibration Curve
1334## returns an array of (curve, yesClassPredictions, noClassPredictions)
1335## elements, where:
1336##  - curve is an array of points (x, y) on the calibration curve
1337##  - yesClassRugPoints is an array of (x, 1) points
1338##  - noClassRugPoints is an array of (x, 0) points
1339@deprecated_keywords({"classIndex": "class_index"})
1340def compute_calibration_curve(res, class_index= -1):
1341    ## merge multiple iterations into one
1342    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1343        res.class_values, res.weights, classifiers=res.classifiers,
1344        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1345    for te in res.results:
1346        mres.results.append(te)
1347
1348    problists, tots = corn.computeROCCumulative(mres, class_index)
1349
1350    results = []
1351
1352    bins = 10 ## divide interval between 0. and 1. into N bins
1353
1354    for plist in problists:
1355        yesClassRugPoints = []
1356        noClassRugPoints = []
1357
1358        yesBinsVals = [0] * bins
1359        noBinsVals = [0] * bins
1360        for (f, (thisNeg, thisPos)) in plist:
1361            yesClassRugPoints.append((f, thisPos)) # 1.
1362            noClassRugPoints.append((f, thisNeg)) # 1.
1363
1364            index = int(f * bins)
1365            index = min(index, bins - 1) ## just in case for value 1.
1366            yesBinsVals[index] += thisPos
1367            noBinsVals[index] += thisNeg
1368
1369        curve = []
1370        for cn in range(bins):
1371            f = float(cn * 1. / bins) + (1. / 2. / bins)
1372            yesVal = yesBinsVals[cn]
1373            noVal = noBinsVals[cn]
1374            allVal = yesVal + noVal
1375            if allVal == 0.: continue
1376            y = float(yesVal) / float(allVal)
1377            curve.append((f, y))
1378
1379        ## smooth the curve
1380        maxnPoints = 100
1381        if len(curve) >= 3:
1382#            loessCurve = statc.loess(curve, -3, 0.6)
1383            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1384        else:
1385            loessCurve = curve
1386        clen = len(loessCurve)
1387        if clen > maxnPoints:
1388            df = clen / maxnPoints
1389            if df < 1: df = 1
1390            curve = [loessCurve[i]  for i in range(0, clen, df)]
1391        else:
1392            curve = loessCurve
1393        ## remove the third value (variance of epsilon?) that suddenly
1394        ## appeared in the output of the statc.loess function
1395        curve = [c[:2] for c in curve]
1396        results.append((curve, yesClassRugPoints, noClassRugPoints))
1397
1398    return results
1399
1400
1401## Lift Curve
1402## returns an array of curve elements, where:
1403##  - curve is an array of points ((TP + FP) / (P + N), TP / P, (th, FP / N))
1404##    on the Lift Curve
1405@deprecated_keywords({"classIndex": "class_index"})
1406def compute_lift_curve(res, class_index= -1):
1407    ## merge multiple iterations into one
1408    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1409        res.class_values, res.weights, classifiers=res.classifiers,
1410        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1411    for te in res.results:
1412        mres.results.append(te)
1413
1414    problists, tots = corn.computeROCCumulative(mres, class_index)
1415
1416    results = []
1417    P, N = tots[1], tots[0]
1418    for plist in problists:
1419        ## corn gives an increasing by scores list, we need a decreasing
1420        plist.reverse()
1421        TP = 0.
1422        FP = 0.
1423        curve = [(0., 0., (10e300, 0.))]
1424        for (f, (thisNeg, thisPos)) in plist:
1425            TP += thisPos
1426            FP += thisNeg
1427            curve.append(((TP + FP) / (P + N), TP, (f, FP / (N or 1))))
1428        results.append(curve)
1429
1430    return P, N, results
1431
1432
1433class CDT:
1434  """Store the number of concordant (C), discordant (D) and tied (T) pairs."""
1435  def __init__(self, C=0., D=0., T=0.):
1436    self.C, self.D, self.T = C, D, T
1437
1438def is_CDT_empty(cdt):
1439    return cdt.C + cdt.D + cdt.T < 1e-20
1440
1441
1442@deprecated_keywords({"classIndex": "class_index",
1443                      "unweighted": "ignore_weights"})
1444def compute_CDT(res, class_index= -1, ignore_weights=False, **argkw):
1445    """Obsolete, don't use."""
1446    if class_index < 0:
1447        if res.base_class >= 0:
1448            class_index = res.base_class
1449        else:
1450            class_index = 1
1451
1452    useweights = res.weights and not ignore_weights
1453    weightByClasses = argkw.get("weightByClasses", True)
1454
1455    if res.number_of_iterations > 1:
1456        CDTs = [CDT() for _ in range(res.number_of_learners)]
1457        iterationExperiments = split_by_iterations(res)
1458        for exp in iterationExperiments:
1459            expCDTs = corn.computeCDT(exp, class_index, useweights)
1460            for i in range(len(CDTs)):
1461                CDTs[i].C += expCDTs[i].C
1462                CDTs[i].D += expCDTs[i].D
1463                CDTs[i].T += expCDTs[i].T
1464        for i in range(res.number_of_learners):
1465            if is_CDT_empty(CDTs[0]):
1466                return corn.computeCDT(res, class_index, useweights)
1467
1468        return CDTs
1469    else:
1470        return corn.computeCDT(res, class_index, useweights)
1471
1472# Backward compatibility
1473def replace_use_weights(fun):
1474    if environ.orange_no_deprecated_members:
1475        return fun
1476
1477    @functools.wraps(fun)
1478    def wrapped(*args, **kwargs):
1479        use_weights = kwargs.pop("useWeights", None)
1480        if use_weights is not None:
1481            deprecation_warning("useWeights", "ignore_weights")
1482            kwargs["ignore_weights"] = not use_weights
1483        return fun(*args, **kwargs)
1484    return wrapped
1485
1486class AUC(list):
1487    """
1488    Compute the area under ROC curve given a set of experimental results.
1489    If testing consisted of multiple folds, each fold is scored and the
1490    average score is returned. If a fold contains only instances with the
1491    same class value, folds will be merged.
1492
1493    :param test_results: test results to score
1494    :param ignore_weights: ignore instance weights when calculating score
1495    :param multiclass: tells what kind of averaging to perform if the target
1496                       class has more than 2 values.
1497    """
1498
1499    #!Compute AUC for each pair of classes (ignoring instances of all other
1500    #!classes) and average the results, weighting them by the number of
1501    #!pairs of instances from these two classes (e.g. by the product of
1502    #!probabilities of the two classes). AUC computed in this way still
1503    #!behaves as the concordance index, e.g., gives the probability that two
1504    #!randomly chosen instances from different classes will be correctly
1505    #!recognized (if the classifier knows from which two classes the
1506    #!instances came).
1507    ByWeightedPairs = 0
1508
1509    #!Similar to ByWeightedPairs, except that the average over class pairs
1510    #!is not weighted. This AUC is, like the binary version, independent of
1511    #!class distributions, but it is not related to the concordance index
1512    #!any more.
1513    ByPairs = 1
1514
1515    #!For each class, it computes AUC for this class against all others (that
1516    #!is, treating other classes as one class). The AUCs are then averaged by
1517    #!the class probabilities. This is related to the concordance index in
1518    #!which we test the classifier's (average) capability of distinguishing
1519    #!the instances from a specified class from those that come from other
1520    #!classes.
1521    #!Unlike the binary AUC, the measure is not independent of class
1522    #!distributions.
1523    WeightedOneAgainstAll = 2
1524
1525    #!Similar to weighted_one_against_all, except that the average
1526    #!is not weighted.
1527    OneAgainstAll = 3
1528
1529    @replace_use_weights
1530    @deprecated_keywords({"method": "multiclass"})
1531    def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False):
1532
1533        super(AUC, self).__init__()
1534
1535        self.ignore_weights = ignore_weights
1536        self.method = multiclass
1537
1538        if test_results is not None:
1539            self[:] = self.__call__(test_results)
1540
1541    def __call__(self, test_results):
1542        if len(test_results.class_values) < 2:
1543            raise ValueError("Cannot compute AUC on a single-class problem")
1544        elif len(test_results.class_values) == 2:
1545            return self._compute_for_binary_class(test_results)
1546        else:
1547            return self._compute_for_multi_value_class(test_results, self.method)
1548
1549    def _compute_for_binary_class(self, res):
1550        """AUC for binary classification problems"""
1551        if res.number_of_iterations > 1:
1552            return self._compute_for_multiple_folds(
1553                self._compute_one_class_against_all,
1554                split_by_iterations(res),
1555                (-1, res, res.number_of_iterations))
1556        else:
1557            return self._compute_one_class_against_all(res, -1)[0]
1558
1559    def _compute_for_multi_value_class(self, res, method=0):
1560        """AUC for multiclass classification problems"""
1561        numberOfClasses = len(res.class_values)
1562
1563        if res.number_of_iterations > 1:
1564            iterations = split_by_iterations(res)
1565            all_ite = res
1566        else:
1567            iterations = [res]
1568            all_ite = None
1569
1570        # by pairs
1571        sum_aucs = [0.] * res.number_of_learners
1572        usefulClassPairs = 0.
1573
1574        prob = None
1575        if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]:
1576            prob = class_probabilities_from_res(res)
1577
1578        if method in [self.ByWeightedPairs, self.ByPairs]:
1579            for classIndex1 in range(numberOfClasses):
1580                for classIndex2 in range(classIndex1):
1581                    subsum_aucs = self._compute_for_multiple_folds(
1582                        self._compute_one_class_against_another, iterations,
1583                        (classIndex1, classIndex2, all_ite,
1584                        res.number_of_iterations))
1585                    if subsum_aucs:
1586                        if method == self.ByWeightedPairs:
1587                            p_ij = prob[classIndex1] * prob[classIndex2]
1588                            subsum_aucs = [x * p_ij  for x in subsum_aucs]
1589                            usefulClassPairs += p_ij
1590                        else:
1591                            usefulClassPairs += 1
1592                        sum_aucs = map(add, sum_aucs, subsum_aucs)
1593        else:
1594            for classIndex in range(numberOfClasses):
1595                subsum_aucs = self._compute_for_multiple_folds(
1596                    self._compute_one_class_against_all,
1597                    iterations, (classIndex, all_ite,
1598                                 res.number_of_iterations))
1599                if subsum_aucs:
1600                    if method == self.ByWeightedPairs:
1601                        p_i = prob[classIndex]
1602                        subsum_aucs = [x * p_i  for x in subsum_aucs]
1603                        usefulClassPairs += p_i
1604                    else:
1605                        usefulClassPairs += 1
1606                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1607
1608        if usefulClassPairs > 0:
1609            sum_aucs = [x / usefulClassPairs for x in sum_aucs]
1610
1611        return sum_aucs
1612
1613    # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij)
1614    # it returns the sum of what is returned by the computer,
1615    # unless at a certain fold the computer has to resort to computing
1616    # over all folds or even this failed;
1617    # in these cases the result is returned immediately
1618    def _compute_for_multiple_folds(self, auc_computer, iterations,
1619                                 computer_args):
1620        """Compute the average AUC over folds using :obj:`auc_computer`."""
1621        subsum_aucs = [0.] * iterations[0].number_of_learners
1622        for ite in iterations:
1623            aucs, foldsUsed = auc_computer(*(ite,) + computer_args)
1624            if not aucs:
1625                import warnings
1626                warnings.warn("AUC cannot be computed (all instances belong to the same class).")
1627                return
1628            if not foldsUsed:
1629                self[:] = aucs
1630                return aucs
1631            subsum_aucs = map(add, subsum_aucs, aucs)
1632        return subsum_aucs
1633
1634    # Computes AUC
1635    # in multivalued class problem, AUC is computed as one against all
1636    # results over folds are averages
1637    # if some folds examples from one class only, the folds are merged
1638    def _compute_for_single_class(self, res, class_index):
1639        if res.number_of_iterations > 1:
1640            return self._compute_for_multiple_folds(
1641                self._compute_one_class_against_all, split_by_iterations(res),
1642                (class_index, res, res.number_of_iterations))
1643        else:
1644            return self._compute_one_class_against_all(res, class_index)
1645
1646    # Computes AUC for a pair of classes (as if there were no other classes)
1647    # results over folds are averages
1648    # if some folds have examples from one class only, the folds are merged
1649    def _compute_for_pair_of_classes(self, res, class_index1, class_index2):
1650        if res.number_of_iterations > 1:
1651            return self._compute_for_multiple_folds(
1652                self._compute_one_class_against_another,
1653                split_by_iterations(res),
1654                (class_index1, class_index2, res, res.number_of_iterations))
1655        else:
1656            return self._compute_one_class_against_another(res, class_index1,
1657                                                    class_index2)
1658
1659    # computes AUC between class i and the other classes
1660    # (treating them as the same class)
1661    @deprecated_keywords({"classIndex": "class_index",
1662                          "divideByIfIte": "divide_by_if_ite"})
1663    def _compute_one_class_against_all(self, ite, class_index, all_ite=None,
1664                                      divide_by_if_ite=1.0):
1665        """Compute AUC between class i and all the other classes)"""
1666        return self._compute_auc(corn.computeCDT, ite, all_ite,
1667            divide_by_if_ite, (class_index, not self.ignore_weights))
1668
1669
1670    # computes AUC between classes i and j as if there are no other classes
1671    def _compute_one_class_against_another(
1672        self, ite, class_index1, class_index2,
1673        all_ite=None, divide_by_if_ite=1.):
1674        """
1675        Compute AUC between classes i and j as if there are no other classes.
1676        """
1677        return self._compute_auc(corn.computeCDTPair, ite,
1678            all_ite, divide_by_if_ite,
1679            (class_index1, class_index2, not self.ignore_weights))
1680
1681    # computes AUC using a specified 'cdtComputer' function
1682    # It tries to compute AUCs from 'ite' (examples from a single iteration)
1683    # and, if C+D+T=0, from 'all_ite' (entire test set). In the former case,
1684    # the AUCs are divided by 'divideByIfIte'.
1685    # Additional flag is returned which is True in the former case,
1686    # or False in the latter.
1687    @deprecated_keywords({"cdt_computer": "cdtComputer",
1688                          "divideByIfIte": "divide_by_if_ite",
1689                          "computerArgs": "computer_args"})
1690    def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite,
1691                     computer_args):
1692        """
1693        Compute AUC using a :obj:`cdt_computer`.
1694        """
1695        cdts = cdt_computer(*(ite,) + computer_args)
1696        if not is_CDT_empty(cdts[0]):
1697            return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) /
1698                    divide_by_if_ite for cdt in cdts], True
1699
1700        if all_ite:
1701            cdts = cdt_computer(*(all_ite,) + computer_args)
1702            if not is_CDT_empty(cdts[0]):
1703                return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T)
1704                        for cdt in cdts], False
1705
1706        return False, False
1707
1708class AUC_for_single_class(AUC):
1709    """
1710    Compute AUC where the class with the given class_index is singled
1711    out and all other classes are treated as a single class.
1712    """
1713    def __init__(self, test_results=None, class_index= -1, ignore_weights=False):
1714        if class_index < 0:
1715            if test_results and test_results.base_class >= 0:
1716                self.class_index = test_results.base_class
1717            else:
1718                self.class_index = 1
1719        else:
1720            self.class_index = class_index
1721
1722        super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights)
1723
1724    def __call__(self, test_results):
1725        return self._compute_for_single_class(test_results, self.class_index)
1726
1727
1728class AUC_for_pair_of_classes(AUC):
1729    """
1730    Computes AUC between a pair of classes, ignoring instances from all
1731    other classes.
1732    """
1733    def __init__(self, test_results, class_index1, class_index2, ignore_weights=False):
1734        self.class_index1 = class_index1
1735        self.class_index2 = class_index2
1736
1737        super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights)
1738
1739    def __call__(self, test_results):
1740        return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2)
1741
1742
1743class AUC_matrix(AUC):
1744    """
1745    Compute a (lower diagonal) matrix with AUCs for all pairs of classes.
1746    If there are empty classes, the corresponding elements in the matrix
1747    are -1.
1748    """
1749
1750    def __call__(self, test_results):
1751        numberOfClasses = len(test_results.class_values)
1752        number_of_learners = test_results.number_of_learners
1753        if test_results.number_of_iterations > 1:
1754            iterations, all_ite = split_by_iterations(test_results), test_results
1755        else:
1756            iterations, all_ite = [test_results], None
1757        aucs = [[[] for _ in range(numberOfClasses)]
1758        for _ in range(number_of_learners)]
1759        for classIndex1 in range(numberOfClasses):
1760            for classIndex2 in range(classIndex1):
1761                pair_aucs = self._compute_for_multiple_folds(
1762                    self._compute_one_class_against_another, iterations,
1763                    (classIndex1, classIndex2, all_ite,
1764                     test_results.number_of_iterations))
1765                if pair_aucs:
1766                    for lrn in range(number_of_learners):
1767                        aucs[lrn][classIndex1].append(pair_aucs[lrn])
1768                else:
1769                    for lrn in range(number_of_learners):
1770                        aucs[lrn][classIndex1].append(-1)
1771        return aucs
1772
1773#Backward compatibility
1774@replace_use_weights
1775def AUC_binary(res, ignore_weights=False):
1776    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1777    auc._compute_for_binary_class(res)
1778    return auc
1779
1780@replace_use_weights
1781def AUC_multi(res, ignore_weights=False, method=0):
1782    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights,
1783        method=method)
1784    auc._compute_for_multi_value_class(res)
1785    return auc
1786
1787
1788@deprecated_keywords({"AUCcomputer": "auc_computer",
1789                      "computerArgs": "computer_args"})
1790def AUC_iterations(auc_computer, iterations, computer_args):
1791    auc = deprecated_function_name(AUC)()
1792    auc._compute_for_multiple_folds(auc_computer, iterations, computer_args)
1793    return auc
1794
1795def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1796    auc = deprecated_function_name(AUC)()
1797    result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite,
1798                              computer_args)
1799    return result
1800
1801@replace_use_weights
1802def AUC_i(ite, class_index, ignore_weights=False, all_ite=None,
1803          divide_by_if_ite=1.):
1804    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1805    result = auc._compute_one_class_against_another(ite, class_index,
1806        all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1807    return result
1808
1809
1810@replace_use_weights
1811def AUC_ij(ite, class_index1, class_index2, ignore_weights=False,
1812           all_ite=None, divide_by_if_ite=1.):
1813    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1814    result = auc._compute_one_class_against_another(
1815        ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1816    return result
1817
1818AUC_single = replace_use_weights(
1819             deprecated_keywords({"classIndex": "class_index"})(
1820             deprecated_function_name(AUC_for_single_class)))
1821AUC_pair = replace_use_weights(
1822           deprecated_keywords({"classIndex1": "class_index1",
1823                                "classIndex2": "class_index2"})(
1824           deprecated_function_name(AUC_for_pair_of_classes)))
1825AUC_matrix = replace_use_weights(AUC_matrix)
1826
1827
1828@deprecated_keywords({"unweighted": "ignore_weights"})
1829def McNemar(res, ignore_weights=False, **argkw):
1830    """
1831    Compute a triangular matrix with McNemar statistics for each pair of
1832    classifiers. The statistics is distributed by chi-square distribution with
1833    one degree of freedom; critical value for 5% significance is around 3.84.
1834    """
1835    nLearners = res.number_of_learners
1836    mcm = []
1837    for i in range(nLearners):
1838       mcm.append([0.] * res.number_of_learners)
1839
1840    if not res.weights or ignore_weights:
1841        for i in res.results:
1842            actual = i.actual_class
1843            classes = i.classes
1844            for l1 in range(nLearners):
1845                for l2 in range(l1, nLearners):
1846                    if classes[l1] == actual:
1847                        if classes[l2] != actual:
1848                            mcm[l1][l2] += 1
1849                    elif classes[l2] == actual:
1850                        mcm[l2][l1] += 1
1851    else:
1852        for i in res.results:
1853            actual = i.actual_class
1854            classes = i.classes
1855            for l1 in range(nLearners):
1856                for l2 in range(l1, nLearners):
1857                    if classes[l1] == actual:
1858                        if classes[l2] != actual:
1859                            mcm[l1][l2] += i.weight
1860                    elif classes[l2] == actual:
1861                        mcm[l2][l1] += i.weight
1862
1863    for l1 in range(nLearners):
1864        for l2 in range(l1, nLearners):
1865            su = mcm[l1][l2] + mcm[l2][l1]
1866            if su:
1867                mcm[l2][l1] = (abs(mcm[l1][l2] - mcm[l2][l1]) - 1) ** 2 / su
1868            else:
1869                mcm[l2][l1] = 0
1870
1871    for l1 in range(nLearners):
1872        mcm[l1] = mcm[l1][:l1]
1873
1874    return mcm
1875
1876
1877def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False):
1878    """
1879    McNemar_of_two computes a McNemar statistics for a pair of classifier,
1880    specified by indices learner1 and learner2.
1881    """
1882    tf = ft = 0.
1883    if not res.weights or ignore_weights:
1884        for i in res.results:
1885            actual = i.actual_class
1886            if i.classes[lrn1] == actual:
1887                if i.classes[lrn2] != actual:
1888                    tf += i.weight
1889            elif i.classes[lrn2] == actual:
1890                    ft += i.weight
1891    else:
1892        for i in res.results:
1893            actual = i.actual_class
1894            if i.classes[lrn1] == actual:
1895                if i.classes[lrn2] != actual:
1896                    tf += 1.
1897            elif i.classes[lrn2] == actual:
1898                    ft += 1.
1899
1900    su = tf + ft
1901    if su:
1902        return (abs(tf - ft) - 1) ** 2 / su
1903    else:
1904        return 0
1905
1906
1907def Friedman(res, stat=CA):
1908    """
1909    Compare classifiers with Friedman test, treating folds as different examles.
1910    Returns F, p and average ranks.
1911    """
1912    res_split = split_by_iterations(res)
1913    res = [stat(r) for r in res_split]
1914
1915    N = len(res)
1916    k = len(res[0])
1917    sums = [0.] * k
1918    for r in res:
1919        ranks = [k - x + 1 for x in statc.rankdata(r)]
1920        if stat == Brier_score: # reverse ranks for Brier_score (lower better)
1921            ranks = [k + 1 - x for x in ranks]
1922        sums = map(add, ranks, sums)
1923
1924    T = sum(x * x for x in sums)
1925    sums = [x / N for x in sums]
1926
1927    F = 12. / (N * k * (k + 1)) * T - 3 * N * (k + 1)
1928
1929    return F, statc.chisqprob(F, k - 1), sums
1930
1931
1932def Wilcoxon_pairs(res, avgranks, stat=CA):
1933    """
1934    Return a triangular matrix, where element[i][j] stores significance of
1935    difference between the i-th and the j-th classifier, as computed by the
1936    Wilcoxon test. The element is positive if the i-th is better than the j-th,
1937    negative if it is worse, and 1 if they are equal.
1938    Arguments are ExperimentResults, average ranks (as returned by Friedman)
1939    and, optionally, a statistics; greater values should mean better results.
1940    """
1941    res_split = split_by_iterations(res)
1942    res = [stat(r) for r in res_split]
1943
1944    k = len(res[0])
1945    bt = []
1946    for m1 in range(k):
1947        nl = []
1948        for m2 in range(m1 + 1, k):
1949            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1950            if avgranks[m1] < avgranks[m2]:
1951                nl.append(p)
1952            elif avgranks[m2] < avgranks[m1]:
1953                nl.append(-p)
1954            else:
1955                nl.append(1)
1956        bt.append(nl)
1957    return bt
1958
1959
1960@deprecated_keywords({"allResults": "all_results",
1961                      "noConfidence": "no_confidence"})
1962def plot_learning_curve_learners(file, all_results, proportions, learners,
1963                                 no_confidence=0):
1964    plot_learning_curve(file, all_results, proportions,
1965        [Orange.misc.getobjectname(learners[i], "Learner %i" % i)
1966        for i in range(len(learners))], no_confidence)
1967
1968
1969@deprecated_keywords({"allResults": "all_results",
1970                      "noConfidence": "no_confidence"})
1971def plot_learning_curve(file, all_results, proportions, legend,
1972                        no_confidence=0):
1973    import types
1974    fopened = 0
1975    if type(file) == types.StringType:
1976        file = open(file, "wt")
1977        fopened = 1
1978
1979    file.write("set yrange [0:1]\n")
1980    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1981    file.write("set multiplot\n\n")
1982    CAs = [CA(x, report_se=True) for x in all_results]
1983
1984    file.write("plot \\\n")
1985    for i in range(len(legend) - 1):
1986        if not no_confidence:
1987            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i + 1))
1988        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
1989    if not no_confidence:
1990        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
1991    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
1992
1993    for i in range(len(legend)):
1994        if not no_confidence:
1995            for p in range(len(proportions)):
1996                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96 * CAs[p][i][1]))
1997            file.write("e\n\n")
1998
1999        for p in range(len(proportions)):
2000            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
2001        file.write("e\n\n")
2002
2003    if fopened:
2004        file.close()
2005
2006
2007def print_single_ROC_curve_coordinates(file, curve):
2008    import types
2009    fopened = 0
2010    if type(file) == types.StringType:
2011        file = open(file, "wt")
2012        fopened = 1
2013
2014    for coord in curve:
2015        file.write("%5.3f\t%5.3f\n" % tuple(coord))
2016
2017    if fopened:
2018        file.close()
2019
2020
2021def plot_ROC_learners(file, curves, learners):
2022    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
2023
2024def plot_ROC(file, curves, legend):
2025    import types
2026    fopened = 0
2027    if type(file) == types.StringType:
2028        file = open(file, "wt")
2029        fopened = 1
2030
2031    file.write("set yrange [0:1]\n")
2032    file.write("set xrange [0:1]\n")
2033    file.write("set multiplot\n\n")
2034
2035    file.write("plot \\\n")
2036    for leg in legend:
2037        file.write("'-' title '%s' with lines,\\\n" % leg)
2038    file.write("'-' title '' with lines\n")
2039
2040    for curve in curves:
2041        for coord in curve:
2042            file.write("%5.3f\t%5.3f\n" % tuple(coord))
2043        file.write("e\n\n")
2044
2045    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
2046
2047    if fopened:
2048        file.close()
2049
2050
2051@deprecated_keywords({"allResults": "all_results"})
2052def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference= -1):
2053    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
2054
2055
2056@deprecated_keywords({"allResults": "all_results"})
2057def plot_McNemar_curve(file, all_results, proportions, legend, reference= -1):
2058    if reference < 0:
2059        reference = len(legend) - 1
2060
2061    import types
2062    fopened = 0
2063    if type(file) == types.StringType:
2064        file = open(file, "wt")
2065        fopened = 1
2066
2067    #file.write("set yrange [0:1]\n")
2068    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2069    file.write("set multiplot\n\n")
2070    file.write("plot \\\n")
2071    tmap = range(reference) + range(reference + 1, len(legend))
2072    for i in tmap[:-1]:
2073        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2074    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
2075    file.write("\n")
2076
2077    for i in tmap:
2078        for p in range(len(proportions)):
2079            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
2080        file.write("e\n\n")
2081
2082    if fopened:
2083        file.close()
2084
2085default_point_types = ("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}") + tuple([chr(x) for x in range(97, 122)])
2086default_line_types = ("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
2087
2088@deprecated_keywords({"allResults": "all_results"})
2089def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
2090    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
2091
2092
2093@deprecated_keywords({"allResults": "all_results"})
2094def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
2095    import types
2096    fopened = 0
2097    if type(file) == types.StringType:
2098        file = open(file, "wt")
2099        fopened = 1
2100
2101    nexamples = len(all_results[0].results)
2102    CAs = [CA(x, report_se=True) for x in all_results]
2103
2104    graphsize = float(options.get("graphsize", 10.0)) #cm
2105    difprop = proportions[-1] - proportions[0]
2106    ntestexamples = nexamples * proportions[-1]
2107    xunit = graphsize / ntestexamples
2108
2109    yshift = float(options.get("yshift", -ntestexamples / 20.))
2110
2111    pointtypes = options.get("pointtypes", default_point_types)
2112    linetypes = options.get("linetypes", default_line_types)
2113
2114    if options.has_key("numberedx"):
2115        numberedx = options["numberedx"]
2116        if type(numberedx) == types.IntType:
2117            if numberedx > 0:
2118                numberedx = [nexamples * proportions[int(i / float(numberedx) * len(proportions))] for i in range(numberedx)] + [proportions[-1] * nexamples]
2119            elif numberedx < 0:
2120                numberedx = -numberedx
2121                newn = []
2122                for i in range(numberedx + 1):
2123                    wanted = proportions[0] + float(i) / numberedx * difprop
2124                    best = (10, 0)
2125                    for t in proportions:
2126                        td = abs(wanted - t)
2127                        if td < best[0]:
2128                            best = (td, t)
2129                    if not best[1] in newn:
2130                        newn.append(best[1])
2131                newn.sort()
2132                numberedx = [nexamples * x for x in newn]
2133        elif type(numberedx[0]) == types.FloatType:
2134            numberedx = [nexamples * x for x in numberedx]
2135    else:
2136        numberedx = [nexamples * x for x in proportions]
2137
2138    file.write("\\mbox{\n")
2139    file.write(\\beginpicture\n")
2140    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
2141    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
2142    file.write(\\axis bottom invisible\n")# label {#examples}\n")
2143    file.write("      ticks short at %s /\n" % reduce(lambda x, y:x + " " + y, ["%i" % (x * nexamples + 0.5) for x in proportions]))
2144    if numberedx:
2145        file.write("            long numbered at %s /\n" % reduce(lambda x, y:x + y, ["%i " % int(x + 0.5) for x in numberedx]))
2146    file.write("  /\n")
2147    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
2148    file.write("      shiftedto y=%5.3f\n" % yshift)
2149    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
2150    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
2151    file.write("  /\n")
2152    if options.has_key("default"):
2153        file.write(\\setdashpattern<1pt, 1pt>\n")
2154        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
2155
2156    for i in range(len(CAs[0])):
2157        coordinates = reduce(lambda x, y:x + " " + y, ["%i %5.3f" % (proportions[p] * nexamples, CAs[p][i][0]) for p in range(len(proportions))])
2158        if linetypes:
2159            file.write(%s\n" % linetypes[i])
2160            file.write(\\plot %s /\n" % coordinates)
2161        if pointtypes:
2162            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
2163
2164    file.write(\\endpicture\n")
2165    file.write("}\n")
2166    if fopened:
2167        file.close()
2168    file.close()
2169    del file
2170
2171def legend_learners_to_PiCTeX(file, learners, **options):
2172  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
2173
2174def legend_to_PiCTeX(file, legend, **options):
2175    import types
2176    fopened = 0
2177    if type(file) == types.StringType:
2178        file = open(file, "wt")
2179        fopened = 1
2180
2181    pointtypes = options.get("pointtypes", default_point_types)
2182    linetypes = options.get("linetypes", default_line_types)
2183
2184    file.write("\\mbox{\n")
2185    file.write(\\beginpicture\n")
2186    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
2187    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
2188
2189    for i in range(len(legend)):
2190        if linetypes:
2191            file.write(%s\n" % linetypes[i])
2192            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i + 0.2))
2193        if pointtypes:
2194            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i + 0.1))
2195        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i + 0.25))
2196
2197    file.write(\\endpicture\n")
2198    file.write("}\n")
2199    if fopened:
2200        file.close()
2201    file.close()
2202    del file
2203
2204
2205def compute_friedman(avranks, N):
2206    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
2207    and (Iman statistic - F-distribution, degrees of freedoma) given average
2208    ranks and a number of tested data sets N.
2209    """
2210
2211    k = len(avranks)
2212
2213    def friedman(N, k, ranks):
2214        return 12 * N * (sum([rank ** 2.0 for rank in ranks]) - (k * (k + 1) * (k + 1) / 4.0)) / (k * (k + 1))
2215
2216    def iman(fried, N, k):
2217        return (N - 1) * fried / (N * (k - 1) - fried)
2218
2219    f = friedman(N, k, avranks)
2220    im = iman(f, N, k)
2221    fdistdof = (k - 1, (k - 1) * (N - 1))
2222
2223    return (f, k - 1), (im, fdistdof)
2224
2225def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2226    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2227    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2228    ranks and number of tested data sets N. Type can be either "nemenyi" for
2229    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2230    """
2231
2232    k = len(avranks)
2233
2234    d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
2235                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
2236                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
2237                               3.426041, 3.458425, 3.488685, 3.517073, 3.543799]
2238        , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
2239                               2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
2240                               2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
2241                               3.195743, 3.229723, 3.261461, 3.291224, 3.319233]
2242        , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
2243                                        2.638, 2.690, 2.724, 2.773],
2244         ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
2245                                      2.394, 2.450, 2.498, 2.539]}
2246
2247    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2248    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2249
2250    q = d[(type, alpha)]
2251
2252    cd = q[k] * (k * (k + 1) / (6.0 * N)) ** 0.5
2253
2254    return cd
2255
2256
2257def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2258    """
2259    Draws a CD graph, which is used to display  the differences in methods'
2260    performance.
2261    See Janez Demsar, Statistical Comparisons of Classifiers over
2262    Multiple Data Sets, 7(Jan):1--30, 2006.
2263
2264    Needs matplotlib to work.
2265
2266    :param filename: Output file name (with extension). Formats supported
2267                     by matplotlib can be used.
2268    :param avranks: List of average methods' ranks.
2269    :param names: List of methods' names.
2270
2271    :param cd: Critical difference. Used for marking methods that whose
2272               difference is not statistically significant.
2273    :param lowv: The lowest shown rank, if None, use 1.
2274    :param highv: The highest shown rank, if None, use len(avranks).
2275    :param width: Width of the drawn figure in inches, default 6 in.
2276    :param textspace: Space on figure sides left for the description
2277                      of methods, default 1 in.
2278    :param reverse:  If True, the lowest rank is on the right. Default\: False.
2279    :param cdmethod: None by default. It can be an index of element in avranks
2280                     or or names which specifies the method which should be
2281                     marked with an interval.
2282    """
2283    if not HAS_MATPLOTLIB:
2284        import sys
2285        print >> sys.stderr, "Function requires matplotlib. Please install it."
2286        return
2287
2288    width = float(width)
2289    textspace = float(textspace)
2290
2291    def nth(l, n):
2292        """
2293        Returns only nth elemnt in a list.
2294        """
2295        n = lloc(l, n)
2296        return [ a[n] for a in l ]
2297
2298    def lloc(l, n):
2299        """
2300        List location in list of list structure.
2301        Enable the use of negative locations:
2302        -1 is the last element, -2 second last...
2303        """
2304        if n < 0:
2305            return len(l[0]) + n
2306        else:
2307            return n
2308
2309    def mxrange(lr):
2310        """
2311        Multiple xranges. Can be used to traverse matrices.
2312        This function is very slow due to unknown number of
2313        parameters.
2314
2315        >>> mxrange([3,5])
2316        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2317
2318        >>> mxrange([[3,5,1],[9,0,-3]])
2319        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2320
2321        """
2322        if not len(lr):
2323            yield ()
2324        else:
2325            #it can work with single numbers
2326            index = lr[0]
2327            if type(1) == type(index):
2328                index = [ index ]
2329            for a in range(*index):
2330                for b in mxrange(lr[1:]):
2331                    yield tuple([a] + list(b))
2332
2333
2334    from matplotlib.figure import Figure
2335    from matplotlib.backends.backend_agg import FigureCanvasAgg
2336
2337    def print_figure(fig, *args, **kwargs):
2338        canvas = FigureCanvasAgg(fig)
2339        canvas.print_figure(*args, **kwargs)
2340
2341    sums = avranks
2342
2343    tempsort = sorted([ (a, i) for i, a in  enumerate(sums) ], reverse=reverse)
2344    ssums = nth(tempsort, 0)
2345    sortidx = nth(tempsort, 1)
2346    nnames = [ names[x] for x in sortidx ]
2347
2348    if lowv is None:
2349        lowv = min(1, int(math.floor(min(ssums))))
2350    if highv is None:
2351        highv = max(len(avranks), int(math.ceil(max(ssums))))
2352
2353    cline = 0.4
2354
2355    k = len(sums)
2356
2357    lines = None
2358
2359    linesblank = 0
2360    scalewidth = width - 2 * textspace
2361
2362    def rankpos(rank):
2363        if not reverse:
2364            a = rank - lowv
2365        else:
2366            a = highv - rank
2367        return textspace + scalewidth / (highv - lowv) * a
2368
2369    distanceh = 0.25
2370
2371    if cd and cdmethod is None:
2372
2373        #get pairs of non significant methods
2374
2375        def get_lines(sums, hsd):
2376
2377            #get all pairs
2378            lsums = len(sums)
2379            allpairs = [ (i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i ]
2380            #remove not significant
2381            notSig = [ (i, j) for i, j in allpairs if abs(sums[i] - sums[j]) <= hsd ]
2382            #keep only longest
2383
2384            def no_longer((i, j), notSig):
2385                for i1, j1 in notSig:
2386                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2387                        return False
2388                return True
2389
2390            longest = [ (i, j) for i, j in notSig if no_longer((i, j), notSig) ]
2391
2392            return longest
2393
2394        lines = get_lines(ssums, cd)
2395        linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1
2396
2397        #add scale
2398        distanceh = 0.25
2399        cline += distanceh
2400
2401    #calculate height needed height of an image
2402    minnotsignificant = max(2 * 0.2, linesblank)
2403    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant
2404
2405    fig = Figure(figsize=(width, height))
2406    ax = fig.add_axes([0, 0, 1, 1]) #reverse y axis
2407    ax.set_axis_off()
2408
2409    hf = 1. / height # height factor
2410    wf = 1. / width
2411
2412    def hfl(l):
2413        return [ a * hf for a in l ]
2414
2415    def wfl(l):
2416        return [ a * wf for a in l ]
2417
2418
2419    # Upper left corner is (0,0).
2420
2421    ax.plot([0, 1], [0, 1], c="w")
2422    ax.set_xlim(0, 1)
2423    ax.set_ylim(1, 0)
2424
2425    def line(l, color='k', **kwargs):
2426        """
2427        Input is a list of pairs of points.
2428        """
2429        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)
2430
2431    def text(x, y, s, *args, **kwargs):
2432        ax.text(wf * x, hf * y, s, *args, **kwargs)
2433
2434    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)
2435
2436    bigtick = 0.1
2437    smalltick = 0.05
2438
2439
2440    import numpy
2441    tick = None
2442    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2443        tick = smalltick
2444        if a == int(a): tick = bigtick
2445        line([(rankpos(a), cline - tick / 2), (rankpos(a), cline)], linewidth=0.7)
2446
2447    for a in range(lowv, highv + 1):
2448        text(rankpos(a), cline - tick / 2 - 0.05, str(a), ha="center", va="bottom")
2449
2450    k = len(ssums)
2451
2452    for i in range((k + 1) / 2):
2453        chei = cline + minnotsignificant + i * 0.2
2454        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace - 0.1, chei)], linewidth=0.7)
2455        text(textspace - 0.2, chei, nnames[i], ha="right", va="center")
2456
2457    for i in range((k + 1) / 2, k):
2458        chei = cline + minnotsignificant + (k - i - 1) * 0.2
2459        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace + scalewidth + 0.1, chei)], linewidth=0.7)
2460        text(textspace + scalewidth + 0.2, chei, nnames[i], ha="left", va="center")
2461
2462    if cd and cdmethod is None:
2463
2464        #upper scale
2465        if not reverse:
2466            begin, end = rankpos(lowv), rankpos(lowv + cd)
2467        else:
2468            begin, end = rankpos(highv), rankpos(highv - cd)
2469
2470        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2471        line([(begin, distanceh + bigtick / 2), (begin, distanceh - bigtick / 2)], linewidth=0.7)
2472        line([(end, distanceh + bigtick / 2), (end, distanceh - bigtick / 2)], linewidth=0.7)
2473        text((begin + end) / 2, distanceh - 0.05, "CD", ha="center", va="bottom")
2474
2475        #non significance lines   
2476        def draw_lines(lines, side=0.05, height=0.1):
2477            start = cline + 0.2
2478            for l, r in lines:
2479                line([(rankpos(ssums[l]) - side, start), (rankpos(ssums[r]) + side, start)], linewidth=2.5)
2480                start += height
2481
2482        draw_lines(lines)
2483
2484    elif cd:
2485        begin = rankpos(avranks[cdmethod] - cd)
2486        end = rankpos(avranks[cdmethod] + cd)
2487        line([(begin, cline), (end, cline)], linewidth=2.5)
2488        line([(begin, cline + bigtick / 2), (begin, cline - bigtick / 2)], linewidth=2.5)
2489        line([(end, cline + bigtick / 2), (end, cline - bigtick / 2)], linewidth=2.5)
2490
2491    print_figure(fig, filename, **kwargs)
2492
2493def mlc_hamming_loss(res):
2494    """
2495    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2496   
2497    :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}`
2498    """
2499    losses = [0] * res.number_of_learners
2500    label_num = len(res.labels)
2501    example_num = gettotsize(res)
2502
2503    for e in res.results:
2504        aclass = e.actual_class
2505        for i, labels in enumerate(e.classes):
2506            labels = map(int, labels)
2507            if len(labels) <> len(aclass):
2508                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2509            for j in range(label_num):
2510                if labels[j] != aclass[j]:
2511                    losses[i] += 1
2512
2513    return [float(x) / (label_num * example_num) for x in losses]
2514
2515def mlc_accuracy(res, forgiveness_rate=1.0):
2516    """
2517    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2518     
2519    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}`
2520   
2521    Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`,
2522    called forgiveness rate:
2523   
2524    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}`
2525    """
2526    accuracies = [0.0] * res.number_of_learners
2527    example_num = gettotsize(res)
2528
2529    for e in res.results:
2530        aclass = e.actual_class
2531        for i, labels in enumerate(e.classes):
2532            labels = map(int, labels)
2533            if len(labels) <> len(aclass):
2534                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2535
2536            intersection = 0.0
2537            union = 0.0
2538            for real, pred in zip(labels, aclass):
2539                if real and pred:
2540                    intersection += 1
2541                if real or pred:
2542                    union += 1
2543
2544            if union:
2545                accuracies[i] += intersection / union
2546
2547    return [math.pow(x / example_num, forgiveness_rate) for x in accuracies]
2548
2549def mlc_precision(res):
2550    """
2551    :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}`
2552    """
2553    precisions = [0.0] * res.number_of_learners
2554    example_num = gettotsize(res)
2555
2556    for e in res.results:
2557        aclass = e.actual_class
2558        for i, labels in enumerate(e.classes):
2559            labels = map(int, labels)
2560            if len(labels) <> len(aclass):
2561                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2562
2563            intersection = 0.0
2564            predicted = 0.0
2565            for real, pred in zip(labels, aclass):
2566                if real and pred:
2567                    intersection += 1
2568                if real:
2569                    predicted += 1
2570            if predicted:
2571                precisions[i] += intersection / predicted
2572
2573    return [x / example_num for x in precisions]
2574
2575def mlc_recall(res):
2576    """
2577    :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}`
2578    """
2579    recalls = [0.0] * res.number_of_learners
2580    example_num = gettotsize(res)
2581
2582    for e in res.results:
2583        aclass = e.actual_class
2584        for i, labels in enumerate(e.classes):
2585            labels = map(int, labels)
2586            if len(labels) <> len(aclass):
2587                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2588
2589            intersection = 0.0
2590            actual = 0.0
2591            for real, pred in zip(labels, aclass):
2592                if real and pred:
2593                    intersection += 1
2594                if pred:
2595                    actual += 1
2596            if actual:
2597                recalls[i] += intersection / actual
2598
2599    return [x / example_num for x in recalls]
2600
2601#def mlc_ranking_loss(res):
2602#    pass
2603#
2604#def mlc_average_precision(res):
2605#    pass
2606#
2607#def mlc_hierarchical_loss(res):
2608#    pass
2609
2610
2611def mt_average_score(res, score, weights=None):
2612    """
2613    Compute individual scores for each target and return the (weighted) average.
2614
2615    One method can be used to compute scores for all targets or a list of
2616    scoring methods can be passed to use different methods for different
2617    targets. In the latter case, care has to be taken if the ranges of scoring
2618    methods differ.
2619    For example, when the first target is scored from -1 to 1 (1 best) and the
2620    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
2621    to a span of 1, and invert the second so that higher scores are better.
2622
2623    :param score: Single-target scoring method or a list of such methods
2624                  (one for each target).
2625    :param weights: List of real weights, one for each target,
2626                    for a weighted average.
2627
2628    """
2629    if not len(res.results):
2630        raise ValueError, "Cannot compute the score: no examples."
2631    if res.number_of_learners < 1:
2632        return []
2633    n_classes = len(res.results[0].actual_class)
2634    if weights is None:
2635        weights = [1.] * n_classes
2636    if not hasattr(score, '__len__'):
2637        score = [score] * n_classes
2638    elif len(score) != n_classes:
2639        raise ValueError, "Number of scoring methods and targets do not match."
2640    # save original classes
2641    clsss = [te.classes for te in res.results]
2642    aclsss = [te.actual_class for te in res.results]
2643    # compute single target scores
2644    single_scores = []
2645    for i in range(n_classes):
2646        for te, clss, aclss in zip(res.results, clsss, aclsss):
2647            te.classes = [cls[i] for cls in clss]
2648            te.actual_class = aclss[i]
2649        single_scores.append(score[i](res))
2650    # restore original classes
2651    for te, clss, aclss in zip(res.results, clsss, aclsss):
2652        te.classes = clss
2653        te.actual_class = aclss
2654    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
2655        for scores in zip(*single_scores)]
2656
2657def mt_flattened_score(res, score):
2658    """
2659    Flatten (concatenate into a single list) the predictions of multiple
2660    targets and compute a single-target score.
2661   
2662    :param score: Single-target scoring method.
2663    """
2664    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
2665        res.classifier_names, class_values=res.class_values,
2666        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
2667        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
2668    for te in res.results:
2669        for i, ac in enumerate(te.actual_class):
2670            te2 = Orange.evaluation.testing.TestedExample(
2671                iteration_number=te.iteration_number, actual_class=ac)
2672            for c, p in zip(te.classes, te.probabilities):
2673                te2.add_result(c[i], p[i])
2674            res2.results.append(te2)
2675    return score(res2)
2676
2677def mt_global_accuracy(res):
2678    """
2679    :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline`
2680   
2681    :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.`
2682    """
2683    results = []
2684    for l in xrange(res.number_of_learners):
2685        n_results = len(res.results)
2686        n_correct = 0.
2687
2688        for r in res.results:
2689            if list(r.classes[l]) == r.actual_class:
2690                n_correct+=1
2691
2692        results.append(n_correct/n_results)
2693    return results
2694
2695
2696def mt_mean_accuracy(res):
2697    """
2698    :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline`
2699   
2700    :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.`
2701    """
2702    results = []
2703    for l in xrange(res.number_of_learners):
2704        n_classes = len(res.results[0].actual_class)
2705        n_results = len(res.results)
2706        n_correct = 0.
2707
2708        for r in res.results:
2709            for i in xrange(n_classes):
2710                if r.classes[l][i] == r.actual_class[i]:
2711                    n_correct+=1
2712        results.append(n_correct/n_classes/n_results)
2713    return results
2714
2715
2716
2717################################################################################
2718if __name__ == "__main__":
2719    avranks = [3.143, 2.000, 2.893, 1.964]
2720    names = ["prva", "druga", "tretja", "cetrta" ]
2721    cd = compute_CD(avranks, 14)
2722    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2723    print cd
2724
2725    print compute_friedman(avranks, 14)
2726
2727    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.