source: orange/Orange/evaluation/scoring.py @ 10900:a07531c84486

Revision 10900:a07531c84486, 100.5 KB checked in by Matija Polajnar <matija.polajnar@…>, 23 months ago (diff)

Fix AUX and stacking to use the new TestedExample structure.

Line 
1import math
2import functools
3from operator import add
4
5import numpy
6
7import Orange
8from Orange import statc, corn
9from Orange.utils import deprecated_keywords, deprecated_function_name, \
10    deprecation_warning, environ
11from Orange.evaluation import testing
12
13try:
14    import matplotlib
15    HAS_MATPLOTLIB = True
16except ImportError:
17    matplotlib = None
18    HAS_MATPLOTLIB = False
19
20#### Private stuff
21
22def log2(x):
23    """Calculate logarithm in base 2."""
24    return math.log(x) / math.log(2)
25
26def check_non_zero(x):
27    """Throw Value Error when x = 0."""
28    if x == 0.:
29        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
30
31def gettotweight(res):
32    """Sum all the weights."""
33    totweight = reduce(lambda x, y: x + y.weight, res.results, 0)
34    if totweight == 0.:
35        raise ValueError, "Cannot compute the score: sum of weights is 0."
36    return totweight
37
38def gettotsize(res):
39    """Get number of result instances."""
40    if len(res.results):
41        return len(res.results)
42    else:
43        raise ValueError, "Cannot compute the score: no examples."
44
45
46def split_by_iterations(res):
47    """Split ExperimentResults of a multiple iteratation test into a list
48    of ExperimentResults, one for each iteration.
49    """
50    if res.number_of_iterations < 2:
51        return [res]
52
53    ress = [Orange.evaluation.testing.ExperimentResults(
54                1, res.classifier_names, res.class_values,
55                res.weights, classifiers=res.classifiers,
56                loaded=res.loaded, test_type=res.test_type, labels=res.labels)
57            for _ in range(res.number_of_iterations)]
58    for te in res.results:
59        ress[te.iteration_number].results.append(te)
60    return ress
61
62def split_by_classifiers(res):
63    """Split an instance of :obj:`ExperimentResults` into a list of
64    :obj:`ExperimentResults`, one for each classifier.
65    """
66    split_res = []
67    for i in range(len(res.classifierNames)):
68        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
69                    [res.classifierNames[i]], res.classValues,
70                    weights=res.weights, base_class=res.base_class,
71                    classifiers=[res.classifiers[i]] if res.classifiers else [],
72                    test_type=res.test_type, labels=res.labels)
73        r.results = []
74        for te in res.results:
75            r.results.append(Orange.evaluation.testing.TestedExample(
76                te.iterationNumber, te.actualClass, n=1, weight=te.weight))
77            r.results[-1].classes = [te.classes[i]]
78            r.results[-1].probabilities = [te.probabilities[i]]
79        split_res.append(r)
80    return split_res
81
82
83def class_probabilities_from_res(res, **argkw):
84    """Calculate class probabilities."""
85    probs = [0.] * len(res.class_values)
86    if argkw.get("unweighted", 0) or not res.weights:
87        for tex in res.results:
88            probs[int(tex.actual_class)] += 1.
89        totweight = gettotsize(res)
90    else:
91        totweight = 0.
92        for tex in res.results:
93            probs[tex.actual_class] += tex.weight
94            totweight += tex.weight
95        check_non_zero(totweight)
96    return [prob / totweight for prob in probs]
97
98
99@deprecated_keywords({
100    "foldN": "fold_n",
101    "reportSE": "report_se",
102    "iterationIsOuter": "iteration_is_outer"})
103def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
104    # remove empty folds, turn the matrix so that learner is outer
105    if iteration_is_outer:
106        if not stats:
107            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
108        number_of_learners = len(stats[0])
109        stats = filter(lambda (x, fN): fN > 0, zip(stats, fold_n))
110        stats = [[x[lrn] / fN for x, fN in stats]
111                 for lrn in range(number_of_learners)]
112    else:
113        stats = [[x / Fn for x, Fn in filter(lambda (x, Fn): Fn > 0,
114                 zip(lrnD, fold_n))] for lrnD in stats]
115
116    if not stats:
117        raise ValueError, "Cannot compute the score: no classifiers"
118    if not stats[0]:
119        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0."
120
121    if report_se:
122        return [(statc.mean(x), statc.sterr(x)) for x in stats]
123    else:
124        return [statc.mean(x) for x in stats]
125
126def ME(res, **argkw):
127    MEs = [0.] * res.number_of_learners
128
129    if argkw.get("unweighted", 0) or not res.weights:
130        for tex in res.results:
131            MEs = map(lambda res, cls, ac=float(tex.actual_class):
132                      res + abs(float(cls) - ac), MEs, tex.classes)
133        totweight = gettotsize(res)
134    else:
135        for tex in res.results:
136            MEs = map(lambda res, cls, ac=float(tex.actual_class), tw=tex.weight:
137                       res + tw * abs(float(cls) - ac), MEs, tex.classes)
138        totweight = gettotweight(res)
139
140    return [x / totweight for x in MEs]
141
142MAE = ME
143
144
145class ConfusionMatrix:
146    """
147    Classification result summary.
148    """
149    #: True Positive predictions
150    TP = 0.
151    #:True Negative predictions
152    TN = 0.
153    #:False Positive predictions
154    FP = 0.
155    #: False Negative predictions
156    FN = 0.
157
158    @deprecated_keywords({"predictedPositive": "predicted_positive",
159                          "isPositive": "is_positive"})
160    def addTFPosNeg(self, predicted_positive, is_positive, weight=1.0):
161        """
162        Update confusion matrix with result of a single classification.
163
164        :param predicted_positive: positive class value was predicted
165        :param is_positive: correct class value is positive
166        :param weight: weight of the selected instance
167         """
168        if predicted_positive:
169            if is_positive:
170                self.TP += weight
171            else:
172                self.FP += weight
173        else:
174            if is_positive:
175                self.FN += weight
176            else:
177                self.TN += weight
178
179
180
181#########################################################################
182# PERFORMANCE MEASURES:
183# Scores for evaluation of numeric predictions
184
185def check_argkw(dct, lst):
186    """Return True if any item from lst has a non-zero value in dct."""
187    return reduce(lambda x, y: x or y, [dct.get(k, 0) for k in lst])
188
189def regression_error(res, **argkw):
190    """Return the regression error (default: MSE)."""
191    if argkw.get("SE", 0) and res.number_of_iterations > 1:
192        # computes the scores for each iteration, then averages
193        scores = [[0.] * res.number_of_iterations
194                  for _ in range(res.number_of_learners)]
195        norm = None
196        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
197            norm = [0.] * res.number_of_iterations
198
199        # counts examples in each iteration
200        nIter = [0] * res.number_of_iterations
201        # average class in each iteration
202        a = [0] * res.number_of_iterations
203        for tex in res.results:
204            nIter[tex.iteration_number] += 1
205            a[tex.iteration_number] += float(tex.actual_class)
206        a = [a[i] / nIter[i] for i in range(res.number_of_iterations)]
207
208        if argkw.get("unweighted", 0) or not res.weights:
209            # iterate accross test cases
210            for tex in res.results:
211                ai = float(tex.actual_class)
212                nIter[tex.iteration_number] += 1
213
214                # compute normalization, if required
215                if argkw.get("norm-abs", 0):
216                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
217                elif argkw.get("norm-sqr", 0):
218                    norm[tex.iteration_number] += (ai - a[tex.iteration_number]) ** 2
219
220                # iterate accross results of different regressors
221                for i, cls in enumerate(tex.classes):
222                    if argkw.get("abs", 0):
223                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
224                    else:
225                        scores[i][tex.iteration_number] += (float(cls) - ai) ** 2
226        else: # unweighted != 0
227            raise NotImplementedError, "weighted error scores with SE not implemented yet"
228
229        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
230            scores = [[x / n for x, n in zip(y, norm)] for y in scores]
231        else:
232            scores = [[x / ni for x, ni in zip(y, nIter)] for y in scores]
233
234        if argkw.get("R2"):
235            scores = [[1.0 - x for x in y] for y in scores]
236
237        if argkw.get("sqrt", 0):
238            scores = [[math.sqrt(x) for x in y] for y in scores]
239
240        return [(statc.mean(x), statc.std(x)) for x in scores]
241
242    else: # single iteration (testing on a single test set)
243        scores = [0.] * res.number_of_learners
244        norm = 0.
245
246        if argkw.get("unweighted", 0) or not res.weights:
247            a = sum([tex.actual_class for tex in res.results]) \
248                / len(res.results)
249            for tex in res.results:
250                if argkw.get("abs", 0):
251                    scores = map(lambda res, cls, ac=float(tex.actual_class):
252                                 res + abs(float(cls) - ac), scores, tex.classes)
253                else:
254                    scores = map(lambda res, cls, ac=float(tex.actual_class):
255                                 res + (float(cls) - ac) ** 2, scores, tex.classes)
256
257                if argkw.get("norm-abs", 0):
258                    norm += abs(tex.actual_class - a)
259                elif argkw.get("norm-sqr", 0):
260                    norm += (tex.actual_class - a) ** 2
261            totweight = gettotsize(res)
262        else:
263            # UNFINISHED
264            MSEs = [0.] * res.number_of_learners
265            for tex in res.results:
266                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
267                           tw=tex.weight:
268                           res + tw * (float(cls) - ac) ** 2, MSEs, tex.classes)
269            totweight = gettotweight(res)
270
271        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
272            scores = [s / norm for s in scores]
273        else: # normalize by number of instances (or sum of weights)
274            scores = [s / totweight for s in scores]
275
276        if argkw.get("R2"):
277            scores = [1. - s for s in scores]
278
279        if argkw.get("sqrt", 0):
280            scores = [math.sqrt(x) for x in scores]
281
282        return scores
283
284def MSE(res, **argkw):
285    """Compute mean-squared error."""
286    return regression_error(res, **argkw)
287
288def RMSE(res, **argkw):
289    """Compute root mean-squared error."""
290    argkw.setdefault("sqrt", True)
291    return regression_error(res, **argkw)
292
293def MAE(res, **argkw):
294    """Compute mean absolute error."""
295    argkw.setdefault("abs", True)
296    return regression_error(res, **argkw)
297
298def RSE(res, **argkw):
299    """Compute relative squared error."""
300    argkw.setdefault("norm-sqr", True)
301    return regression_error(res, **argkw)
302
303def RRSE(res, **argkw):
304    """Compute relative squared error."""
305    argkw.setdefault("norm-sqr", True)
306    argkw.setdefault("sqrt", True)
307    return regression_error(res, **argkw)
308
309def RAE(res, **argkw):
310    """Compute relative absolute error."""
311    argkw.setdefault("abs", True)
312    argkw.setdefault("norm-abs", True)
313    return regression_error(res, **argkw)
314
315def R2(res, **argkw):
316    """Compute the coefficient of determination, R-squared."""
317    argkw.setdefault("norm-sqr", True)
318    argkw.setdefault("R2", True)
319    return regression_error(res, **argkw)
320
321def MSE_old(res, **argkw):
322    """Compute mean-squared error."""
323    if argkw.get("SE", 0) and res.number_of_iterations > 1:
324        MSEs = [[0.] * res.number_of_iterations
325                for _ in range(res.number_of_learners)]
326        nIter = [0] * res.number_of_iterations
327        if argkw.get("unweighted", 0) or not res.weights:
328            for tex in res.results:
329                ac = float(tex.actual_class)
330                nIter[tex.iteration_number] += 1
331                for i, cls in enumerate(tex.classes):
332                    MSEs[i][tex.iteration_number] += (float(cls) - ac) ** 2
333        else:
334            raise ValueError, "weighted RMSE with SE not implemented yet"
335        MSEs = [[x / ni for x, ni in zip(y, nIter)] for y in MSEs]
336        if argkw.get("sqrt", 0):
337            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
338        return [(statc.mean(x), statc.std(x)) for x in MSEs]
339
340    else:
341        MSEs = [0.] * res.number_of_learners
342        if argkw.get("unweighted", 0) or not res.weights:
343            for tex in res.results:
344                MSEs = map(lambda res, cls, ac=float(tex.actual_class):
345                           res + (float(cls) - ac) ** 2, MSEs, tex.classes)
346            totweight = gettotsize(res)
347        else:
348            for tex in res.results:
349                MSEs = map(lambda res, cls, ac=float(tex.actual_class),
350                           tw=tex.weight: res + tw * (float(cls) - ac) ** 2,
351                           MSEs, tex.classes)
352            totweight = gettotweight(res)
353
354        if argkw.get("sqrt", 0):
355            MSEs = [math.sqrt(x) for x in MSEs]
356        return [x / totweight for x in MSEs]
357
358def RMSE_old(res, **argkw):
359    """Compute root mean-squared error."""
360    argkw.setdefault("sqrt", 1)
361    return MSE_old(res, **argkw)
362
363#########################################################################
364# PERFORMANCE MEASURES:
365# Scores for evaluation of classifiers
366
367class CA(list):
368    """
369    Compute percentage of matches between predicted and actual class.
370
371    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
372                         or list of :obj:`ConfusionMatrix`.
373    :param report_se: include standard error in result.
374    :param ignore_weights: ignore instance weights.
375    :rtype: list of scores, one for each learner.
376
377    Standard errors are estimated from deviation of CAs across folds (if
378    test_results were produced by cross_validation) or approximated under
379    the assumption of normal distribution otherwise.
380    """
381    CONFUSION_MATRIX = 0
382    CONFUSION_MATRIX_LIST = 1
383    CLASSIFICATION = 2
384    CROSS_VALIDATION = 3
385
386    @deprecated_keywords({"reportSE": "report_se",
387                          "unweighted": "ignore_weights"})
388    def __init__(self, test_results, report_se=False, ignore_weights=False):
389        super(CA, self).__init__()
390        self.report_se = report_se
391        self.ignore_weights = ignore_weights
392
393        input_type = self.get_input_type(test_results)
394        if input_type == self.CONFUSION_MATRIX:
395            self[:] = [self.from_confusion_matrix(test_results)]
396        elif input_type == self.CONFUSION_MATRIX_LIST:
397            self[:] = self.from_confusion_matrix_list(test_results)
398        elif input_type == self.CLASSIFICATION:
399            self[:] = self.from_classification_results(test_results)
400        elif input_type == self.CROSS_VALIDATION:
401            self[:] = self.from_crossvalidation_results(test_results)
402
403    def from_confusion_matrix(self, cm):
404        all_predictions = 0.
405        correct_predictions = 0.
406        if isinstance(cm, ConfusionMatrix):
407            all_predictions += cm.TP + cm.FN + cm.FP + cm.TN
408            correct_predictions += cm.TP + cm.TN
409        else:
410            for r, row in enumerate(cm):
411                for c, column in enumerate(row):
412                    if r == c:
413                        correct_predictions += column
414                    all_predictions += column
415
416        check_non_zero(all_predictions)
417        ca = correct_predictions / all_predictions
418
419        if self.report_se:
420            return ca, ca * (1 - ca) / math.sqrt(all_predictions)
421        else:
422            return ca
423
424    def from_confusion_matrix_list(self, confusion_matrices):
425        return [self.from_confusion_matrix(cm) for cm in confusion_matrices]
426
427    def from_classification_results(self, test_results):
428        CAs = [0.] * test_results.number_of_learners
429        totweight = 0.
430        for tex in test_results.results:
431            w = 1. if self.ignore_weights else tex.weight
432            CAs = map(lambda res, cls: res + (cls == tex.actual_class and w),
433                      CAs, tex.classes)
434            totweight += w
435        check_non_zero(totweight)
436        ca = [x / totweight for x in CAs]
437
438        if self.report_se:
439            return [(x, x * (1 - x) / math.sqrt(totweight)) for x in ca]
440        else:
441            return ca
442
443    def from_crossvalidation_results(self, test_results):
444        CAsByFold = [[0.] * test_results.number_of_iterations
445                     for _ in range(test_results.number_of_learners)]
446        foldN = [0.] * test_results.number_of_iterations
447
448        for tex in test_results.results:
449            w = 1. if self.ignore_weights else tex.weight
450            for lrn in range(test_results.number_of_learners):
451                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn] ==
452                    tex.actual_class) and w
453            foldN[tex.iteration_number] += w
454
455        return statistics_by_folds(CAsByFold, foldN, self.report_se, False)
456
457    def get_input_type(self, test_results):
458        if isinstance(test_results, ConfusionMatrix):
459            return self.CONFUSION_MATRIX
460        elif isinstance(test_results, testing.ExperimentResults):
461            if test_results.number_of_iterations == 1:
462                return self.CLASSIFICATION
463            else:
464                return self.CROSS_VALIDATION
465        elif isinstance(test_results, list):
466            return self.CONFUSION_MATRIX_LIST
467
468
469@deprecated_keywords({"reportSE": "report_se",
470                      "unweighted": "ignore_weights"})
471def AP(res, report_se=False, ignore_weights=False, **argkw):
472    """Compute the average probability assigned to the correct class."""
473    if res.number_of_iterations == 1:
474        APs = [0.] * res.number_of_learners
475        if ignore_weights or not res.weights:
476            for tex in res.results:
477                APs = map(lambda res, probs: res + probs[tex.actual_class],
478                          APs, tex.probabilities)
479            totweight = gettotsize(res)
480        else:
481            totweight = 0.
482            for tex in res.results:
483                APs = map(lambda res, probs: res + probs[tex.actual_class] *
484                          tex.weight, APs, tex.probabilities)
485                totweight += tex.weight
486        check_non_zero(totweight)
487        return [AP / totweight for AP in APs]
488
489    APsByFold = [[0.] * res.number_of_learners
490                 for _ in range(res.number_of_iterations)]
491    foldN = [0.] * res.number_of_iterations
492    if ignore_weights or not res.weights:
493        for tex in res.results:
494            APsByFold[tex.iteration_number] = map(lambda res, probs:
495                res + probs[tex.actual_class],
496                APsByFold[tex.iteration_number], tex.probabilities)
497            foldN[tex.iteration_number] += 1
498    else:
499        for tex in res.results:
500            APsByFold[tex.iteration_number] = map(lambda res, probs:
501                res + probs[tex.actual_class] * tex.weight,
502                APsByFold[tex.iteration_number], tex.probabilities)
503            foldN[tex.iteration_number] += tex.weight
504
505    return statistics_by_folds(APsByFold, foldN, report_se, True)
506
507
508@deprecated_keywords({"reportSE": "report_se",
509                      "unweighted": "ignore_weights"})
510def Brier_score(res, report_se=False, ignore_weights=False, **argkw):
511    """Compute the Brier score, defined as the average (over test instances)
512    of :math:`\sum_x(t(x) - p(x))^2`, where x is a class value, t(x) is 1 for
513    the actual class value and 0 otherwise, and p(x) is the predicted
514    probability of x.
515    """
516    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
517    #    x is class,
518    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
519    #    p(x) is predicted probabilty.
520    # There's a trick: since t(x) is zero for all classes but the
521    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
522    # Since +1 is there for each example, it adds 1 to the average
523    # We skip the +1 inside the sum and add it just at the end of the function
524    # We take max(result, 0) to avoid -0.0000x due to rounding errors
525
526    if res.number_of_iterations == 1:
527        MSEs = [0.] * res.number_of_learners
528        if ignore_weights or not res.weights:
529            totweight = 0.
530            for tex in res.results:
531                MSEs = map(lambda res, probs: res + reduce(
532                    lambda s, pi: s + pi ** 2, probs, 0) -
533                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
534                totweight += tex.weight
535        else:
536            for tex in res.results:
537                MSEs = map(lambda res, probs: res + tex.weight * reduce(
538                    lambda s, pi: s + pi ** 2, probs, 0) -
539                    2 * probs[tex.actual_class], MSEs, tex.probabilities)
540            totweight = gettotweight(res)
541        check_non_zero(totweight)
542        if report_se:
543            ## change this, not zero!!!
544            return [(max(x / totweight + 1., 0), 0) for x in MSEs]
545        else:
546            return [max(x / totweight + 1., 0) for x in MSEs]
547
548    BSs = [[0.] * res.number_of_learners
549           for _ in range(res.number_of_iterations)]
550    foldN = [0.] * res.number_of_iterations
551
552    if ignore_weights or not res.weights:
553        for tex in res.results:
554            BSs[tex.iteration_number] = map(lambda rr, probs: rr + reduce(
555                lambda s, pi: s + pi ** 2, probs, 0) -
556                2 * probs[tex.actual_class], BSs[tex.iteration_number],
557                tex.probabilities)
558            foldN[tex.iteration_number] += 1
559    else:
560        for tex in res.results:
561            BSs[tex.iteration_number] = map(lambda res, probs:
562                res + tex.weight * reduce(lambda s, pi: s + pi ** 2, probs, 0) -
563                2 * probs[tex. actual_class], BSs[tex.iteration_number],
564                tex.probabilities)
565            foldN[tex.iteration_number] += tex.weight
566
567    stats = statistics_by_folds(BSs, foldN, report_se, True)
568    if report_se:
569        return [(x + 1., y) for x, y in stats]
570    else:
571        return [x + 1. for x in stats]
572
573def BSS(res, **argkw):
574    return [1 - x / 2 for x in apply(Brier_score, (res,), argkw)]
575
576def IS_ex(Pc, P):
577    """Pc aposterior probability, P aprior"""
578    if Pc >= P:
579        return -log2(P) + log2(Pc)
580    else:
581        return -(-log2(1 - P) + log2(1 - Pc))
582
583
584@deprecated_keywords({"reportSE": "report_se"})
585def IS(res, apriori=None, report_se=False, **argkw):
586    """Compute the information score as defined by
587    `Kononenko and Bratko (1991) \
588    <http://www.springerlink.com/content/g5p7473160476612/>`_.
589    Argument :obj:`apriori` gives the apriori class
590    distribution; if it is omitted, the class distribution is computed from
591    the actual classes of examples in :obj:`res`.
592    """
593    if not apriori:
594        apriori = class_probabilities_from_res(res)
595
596    if res.number_of_iterations == 1:
597        ISs = [0.] * res.number_of_learners
598        if argkw.get("unweighted", 0) or not res.weights:
599            for tex in res.results:
600              for i in range(len(tex.probabilities)):
601                    cls = tex.actual_class
602                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
603            totweight = gettotsize(res)
604        else:
605            for tex in res.results:
606              for i in range(len(tex.probabilities)):
607                    cls = tex.actual_class
608                    ISs[i] += (IS_ex(tex.probabilities[i][cls], apriori[cls]) *
609                               tex.weight)
610            totweight = gettotweight(res)
611        if report_se:
612            return [(IS / totweight, 0) for IS in ISs]
613        else:
614            return [IS / totweight for IS in ISs]
615
616
617    ISs = [[0.] * res.number_of_iterations
618           for _ in range(res.number_of_learners)]
619    foldN = [0.] * res.number_of_iterations
620
621    # compute info scores for each fold   
622    if argkw.get("unweighted", 0) or not res.weights:
623        for tex in res.results:
624            for i in range(len(tex.probabilities)):
625                cls = tex.actual_class
626                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
627                                                apriori[cls])
628            foldN[tex.iteration_number] += 1
629    else:
630        for tex in res.results:
631            for i in range(len(tex.probabilities)):
632                cls = tex.actual_class
633                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls],
634                                                apriori[cls]) * tex.weight
635            foldN[tex.iteration_number] += tex.weight
636
637    return statistics_by_folds(ISs, foldN, report_se, False)
638
639
640def Wilcoxon(res, statistics, **argkw):
641    res1, res2 = [], []
642    for ri in split_by_iterations(res):
643        stats = statistics(ri, **argkw)
644        if len(stats) != 2:
645            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
646        res1.append(stats[0])
647        res2.append(stats[1])
648    return statc.wilcoxont(res1, res2)
649
650def rank_difference(res, statistics, **argkw):
651    if not res.results:
652        raise TypeError, "no experiments"
653
654    k = len(res.results[0].classes)
655    if k < 2:
656        raise TypeError, "nothing to compare (less than two classifiers given)"
657    if k == 2:
658        return apply(Wilcoxon, (res, statistics), argkw)
659    else:
660        return apply(Friedman, (res, statistics), argkw)
661
662
663@deprecated_keywords({"res": "test_results",
664                      "classIndex": "class_index",
665                      "unweighted": "ignore_weights"})
666def confusion_matrices(test_results, class_index= -1,
667                       ignore_weights=False, cutoff=.5):
668    """
669    Return confusion matrices for test_results.
670
671    :param test_results: test results
672    :param class_index: index of class value for which the confusion matrices
673        are to be computed (by default unspecified - see note below).
674    :param ignore_weights: ignore instance weights.
675    :param cutoff: cutoff for probability
676
677    :rtype: list of :obj:`ConfusionMatrix` or list-of-list-of-lists (see
678        note below)
679
680    .. note:: If `class_index` is not specified and `test_results`
681        contain predictions for multi-class problem, then the return
682        value is a list of 2D tables (list-of-lists) of all class
683        value pairwise misclassifications.
684
685    """
686    tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)]
687
688    if class_index < 0:
689        numberOfClasses = len(test_results.class_values)
690        if class_index < -1 or numberOfClasses > 2:
691            cm = [[[0.] * numberOfClasses for _ in range(numberOfClasses)]
692                  for _ in range(test_results.number_of_learners)]
693            if ignore_weights or not test_results.weights:
694                for tex in test_results.results:
695                    trueClass = int(tex.actual_class)
696                    for li, pred in enumerate(tex.classes):
697                        predClass = int(pred)
698                        if predClass < numberOfClasses:
699                            cm[li][trueClass][predClass] += 1
700            else:
701                for tex in test_results.results:
702                    trueClass = int(tex.actual_class)
703                    for li, pred in tex.classes:
704                        predClass = int(pred)
705                        if predClass < numberOfClasses:
706                            cm[li][trueClass][predClass] += tex.weight
707            return cm
708
709        elif test_results.base_class >= 0:
710            class_index = test_results.base_class
711        else:
712            class_index = 1
713
714    if cutoff != .5:
715        if ignore_weights or not test_results.weights:
716            for lr in test_results.results:
717                isPositive = (lr.actual_class == class_index)
718                for i in range(test_results.number_of_learners):
719                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
720                                         cutoff, isPositive)
721        else:
722            for lr in test_results.results:
723                isPositive = (lr.actual_class == class_index)
724                for i in range(test_results.number_of_learners):
725                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index] >
726                                         cutoff, isPositive, lr.weight)
727    else:
728        if ignore_weights or not test_results.weights:
729            for lr in test_results.results:
730                isPositive = (lr.actual_class == class_index)
731                for i in range(test_results.number_of_learners):
732                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
733                                         isPositive)
734        else:
735            for lr in test_results.results:
736                isPositive = (lr.actual_class == class_index)
737                for i in range(test_results.number_of_learners):
738                    tfpns[i].addTFPosNeg(lr.classes[i] == class_index,
739                                         isPositive, lr.weight)
740    return tfpns
741
742
743# obsolete (renamed)
744compute_confusion_matrices = confusion_matrices
745
746
747@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
748def confusion_chi_square(confusion_matrix):
749    """
750    Return chi square statistic of the confusion matrix
751    (higher value indicates that prediction is not by chance).
752    """
753    if isinstance(confusion_matrix, ConfusionMatrix) or \
754       not isinstance(confusion_matrix[1], list):
755        return _confusion_chi_square(confusion_matrix)
756    else:
757        return map(_confusion_chi_square, confusion_matrix)
758
759def _confusion_chi_square(confusion_matrix):
760    if isinstance(confusion_matrix, ConfusionMatrix):
761        c = confusion_matrix
762        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
763    dim = len(confusion_matrix)
764    rowPriors = [sum(r) for r in confusion_matrix]
765    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
766    total = sum(rowPriors)
767    rowPriors = [r / total for r in rowPriors]
768    colPriors = [r / total for r in colPriors]
769    ss = 0
770    for ri, row in enumerate(confusion_matrix):
771        for ci, o in enumerate(row):
772            e = total * rowPriors[ri] * colPriors[ci]
773            if not e:
774                return -1, -1, -1
775            ss += (o - e) ** 2 / e
776    df = (dim - 1) ** 2
777    return ss, df, statc.chisqprob(ss, df)
778
779class CMScore(list):
780    """
781    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
782                         or list of :obj:`ConfusionMatrix`.
783    :rtype: list of scores, one for each learner."""
784    def __new__(cls, test_results, **kwargs):
785        self = list.__new__(cls)
786        if isinstance(test_results, ConfusionMatrix):
787            self.__init__(test_results, **kwargs)
788            return self[0]
789        return self
790
791
792    @deprecated_keywords({"confm": "test_results"})
793    def __init__(self, test_results=None):
794        super(CMScore, self).__init__()
795
796        if test_results is not None:
797            self[:] = self.__call__(test_results)
798
799    def __call__(self, test_results):
800        if isinstance(test_results, testing.ExperimentResults):
801            test_results = confusion_matrices(test_results, class_index=1)
802        if isinstance(test_results, ConfusionMatrix):
803            test_results = [test_results]
804
805        return map(self.compute, test_results)
806
807
808
809class Sensitivity(CMScore):
810    __doc__ = """Compute `sensitivity
811    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ (proportion
812    of actual positives which are correctly identified as such).
813    """ + CMScore.__doc__
814    @classmethod
815    def compute(self, confusion_matrix):
816        tot = confusion_matrix.TP + confusion_matrix.FN
817        if tot < 1e-6:
818            import warnings
819            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
820            return None
821
822        return confusion_matrix.TP / tot
823
824
825class Recall(Sensitivity):
826    __doc__ = """ Compute `recall
827    <http://en.wikipedia.org/wiki/Precision_and_recall>`_
828    (fraction of relevant instances that are retrieved).
829    """ + CMScore.__doc__
830    pass # Recall == Sensitivity
831
832
833class Specificity(CMScore):
834    __doc__ = """Compute `specificity
835    <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
836    (proportion of negatives which are correctly identified).
837    """ + CMScore.__doc__
838    @classmethod
839    def compute(self, confusion_matrix):
840        tot = confusion_matrix.FP + confusion_matrix.TN
841        if tot < 1e-6:
842            import warnings
843            warnings.warn("Can't compute specificity: one or both classes have no instances")
844            return None
845        return confusion_matrix.TN / tot
846
847
848class PPV(CMScore):
849    __doc__ = """Compute `positive predictive value
850    <http://en.wikipedia.org/wiki/Positive_predictive_value>`_ (proportion of
851    subjects with positive test results who are correctly diagnosed).
852    """ + CMScore.__doc__
853    @classmethod
854    def compute(self, confusion_matrix):
855        tot = confusion_matrix.TP + confusion_matrix.FP
856        if tot < 1e-6:
857            import warnings
858            warnings.warn("Can't compute PPV: one or both classes have no instances")
859            return None
860        return confusion_matrix.TP / tot
861
862
863class Precision(PPV):
864    __doc__ = """Compute `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_
865    (retrieved instances that are relevant).
866    """ + CMScore.__doc__
867    pass # Precision == PPV
868
869
870class NPV(CMScore):
871    __doc__ = """Compute `negative predictive value
872    <http://en.wikipedia.org/wiki/Negative_predictive_value>`_ (proportion of
873    subjects with a negative test result who are correctly diagnosed).
874     """ + CMScore.__doc__
875    @classmethod
876    def compute(self, confusion_matrix):
877        tot = confusion_matrix.FN + confusion_matrix.TN
878        if tot < 1e-6:
879            import warnings
880            warnings.warn("Can't compute NPV: one or both classes have no instances")
881            return None
882        return confusion_matrix.TN / tot
883
884
885class F1(CMScore):
886    __doc__ = """Return `F1 score
887    <http://en.wikipedia.org/wiki/F1_score>`_
888    (harmonic mean of precision and recall).
889    """ + CMScore.__doc__
890    @classmethod
891    def compute(self, confusion_matrix):
892        p = Precision.compute(confusion_matrix)
893        r = Recall.compute(confusion_matrix)
894        if p is not None and r is not None and (p + r) != 0:
895            return 2. * p * r / (p + r)
896        else:
897            import warnings
898            warnings.warn("Can't compute F1: P + R is zero or not defined")
899            return None
900
901
902class Falpha(CMScore):
903    __doc__ = """Compute the alpha-mean of precision and recall over the given confusion
904    matrix.
905    """ + CMScore.__doc__
906
907    def __init__(self, test_results, alpha=1.):
908        self.alpha = alpha
909        super(Falpha, self).__init__(test_results)
910
911    def compute(self, confusion_matrix):
912        p = Precision.compute(confusion_matrix)
913        r = Recall.compute(confusion_matrix)
914        return (1. + self.alpha) * p * r / (self.alpha * p + r)
915
916
917class MCC(CMScore):
918    __doc__ = """Compute `Matthew correlation coefficient
919    <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_
920    (correlation coefficient between the observed and predicted binary
921    classifications).
922    """ + CMScore.__doc__
923    @classmethod
924    def compute(self, cm):
925        # code by Boris Gorelik
926        TP, TN, FP, FN = cm.TP, cm.TN, cm.FP, cm.FN
927
928        try:
929            return (TP * TN - FP * FN) / \
930                 math.sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))
931        except ZeroDivisionError:
932            # Zero division occurs when there is either no true positives
933            # or no true negatives i.e. the problem contains only one
934            # type of classes.
935            import warnings
936            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
937
938
939@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
940def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
941   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
942
943   http://en.wikipedia.org/wiki/Scott%27s_Pi
944   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
945   raters.
946
947   @param confusion_matrix: confusion matrix, or list of confusion matrices.
948                            To obtain non-binary confusion matrix, call
949                            Orange.evaluation.scoring.confusion_matrices
950                            and set the class_index parameter to -2.
951   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
952                            This function needs to operate on non-binary
953                            confusion matrices, which are represented by python
954                            lists, therefore one needs a way to distinguish
955                            between a single matrix and list of matrices
956   """
957
958   if b_is_list_of_matrices:
959       try:
960           return [scotts_pi(cm, b_is_list_of_matrices=False)
961                   for cm in confusion_matrix]
962       except TypeError:
963           # Nevermind the parameter, maybe this is a "conventional" binary
964           # confusion matrix and bIsListOfMatrices was specified by mistake
965           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
966   else:
967       if isinstance(confusion_matrix, ConfusionMatrix):
968           confusion_matrix = numpy.array([[confusion_matrix.TP,
969               confusion_matrix.FN], [confusion_matrix.FP,
970               confusion_matrix.TN]], dtype=float)
971       else:
972           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
973
974       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
975       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
976       jointProportion = (marginalSumOfColumns + marginalSumOfRows) / \
977                           (2. * numpy.sum(confusion_matrix))
978       # In the eq. above, 2. is what the Wikipedia page calls
979       # the number of annotators. Here we have two annotators:
980       # the observed (true) labels (annotations) and the predicted by
981       # the learners.
982
983       prExpected = numpy.sum(jointProportion ** 2)
984       prActual = numpy.sum(numpy.diag(confusion_matrix)) / \
985                  numpy.sum(confusion_matrix)
986
987       ret = (prActual - prExpected) / (1.0 - prExpected)
988       return ret
989
990# Backward compatibility
991sens = Sensitivity
992spec = Specificity
993precision = Precision
994recall = Recall
995
996
997
998@deprecated_keywords({"classIndex": "class_index",
999                      "unweighted": "ignore_weights"})
1000def AUCWilcoxon(res, class_index= -1, ignore_weights=False, **argkw):
1001    """Compute the area under ROC (AUC) and its standard error using
1002    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
1003    :obj:`class_index` is not specified, the first class is used as
1004    "the positive" and others are negative. The result is a list of
1005    tuples (aROC, standard error).
1006
1007    If test results consist of multiple folds, you need to split them using
1008    :obj:`split_by_iterations` and perform this test on each fold separately.
1009    """
1010    useweights = res.weights and not ignore_weights
1011    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
1012
1013    results = []
1014
1015    totPos, totNeg = tots[1], tots[0]
1016    N = totPos + totNeg
1017    for plist in problists:
1018        highPos, lowNeg = totPos, 0.
1019        W, Q1, Q2 = 0., 0., 0.
1020        for prob in plist:
1021            thisPos, thisNeg = prob[1][1], prob[1][0]
1022            highPos -= thisPos
1023            W += thisNeg * (highPos + thisPos / 2.)
1024            Q2 += thisPos * (lowNeg ** 2 + lowNeg * thisNeg + thisNeg ** 2 / 3.)
1025            Q1 += thisNeg * (highPos ** 2 + highPos * thisPos + thisPos ** 2 / 3.)
1026
1027            lowNeg += thisNeg
1028
1029        W /= (totPos * totNeg)
1030        Q1 /= (totNeg * totPos ** 2)
1031        Q2 /= (totPos * totNeg ** 2)
1032
1033        SE = math.sqrt((W * (1 - W) + (totPos - 1) * (Q1 - W ** 2) +
1034                       (totNeg - 1) * (Q2 - W ** 2)) / (totPos * totNeg))
1035        results.append((W, SE))
1036    return results
1037
1038AROC = AUCWilcoxon # for backward compatibility, AROC is obsolete
1039
1040
1041@deprecated_keywords({"classIndex": "class_index",
1042                      "unweighted": "ignore_weights"})
1043def compare_2_AUCs(res, lrn1, lrn2, class_index= -1,
1044                   ignore_weights=False, **argkw):
1045    return corn.compare2ROCs(res, lrn1, lrn2, class_index,
1046                             res.weights and not ignore_weights)
1047
1048# for backward compatibility, compare_2_AROCs is obsolete
1049compare_2_AROCs = compare_2_AUCs
1050
1051
1052@deprecated_keywords({"classIndex": "class_index"})
1053def compute_ROC(res, class_index= -1):
1054    """Compute a ROC curve as a list of (x, y) tuples, where x is
1055    1-specificity and y is sensitivity.
1056    """
1057    problists, tots = corn.computeROCCumulative(res, class_index)
1058
1059    results = []
1060    totPos, totNeg = tots[1], tots[0]
1061
1062    for plist in problists:
1063        curve = [(1., 1.)]
1064        TP, TN = totPos, 0.
1065        FN, FP = 0., totNeg
1066        for prob in plist:
1067            thisPos, thisNeg = prob[1][1], prob[1][0]
1068            # thisPos go from TP to FN
1069            TP -= thisPos
1070            FN += thisPos
1071            # thisNeg go from FP to TN
1072            TN += thisNeg
1073            FP -= thisNeg
1074
1075            sens = TP / (TP + FN)
1076            spec = TN / (FP + TN)
1077            curve.append((1 - spec, sens))
1078        results.append(curve)
1079
1080    return results
1081
1082## TC's implementation of algorithms, taken from:
1083## T Fawcett: ROC Graphs: Notes and Practical Considerations for
1084## Data Mining Researchers, submitted to KDD Journal.
1085def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1086    if P1x == P2x:
1087        return 1e300
1088    return (P1y - P2y) / (P1x - P2x)
1089
1090
1091@deprecated_keywords({"keepConcavities": "keep_concavities"})
1092def ROC_add_point(P, R, keep_concavities=1):
1093    if keep_concavities:
1094        R.append(P)
1095    else:
1096        while True:
1097            if len(R) < 2:
1098                R.append(P)
1099                return R
1100            else:
1101                T = R.pop()
1102                T2 = R[-1]
1103                if ROC_slope(T2, T) > ROC_slope(T, P):
1104                    R.append(T)
1105                    R.append(P)
1106                    return R
1107    return R
1108
1109
1110@deprecated_keywords({"classIndex": "class_index",
1111                      "keepConcavities": "keep_concavities"})
1112def TC_compute_ROC(res, class_index= -1, keep_concavities=1):
1113    problists, tots = corn.computeROCCumulative(res, class_index)
1114
1115    results = []
1116    P, N = tots[1], tots[0]
1117
1118    for plist in problists:
1119        ## corn gives an increasing by scores list, we need a decreasing
1120        plist.reverse()
1121        TP = 0.
1122        FP = 0.
1123        curve = []
1124        fPrev = 10e300 # "infinity" score at 0., 0.
1125        for prob in plist:
1126            f = prob[0]
1127            if f != fPrev:
1128                if P:
1129                    tpr = TP / P
1130                else:
1131                    tpr = 0.
1132                if N:
1133                    fpr = FP / N
1134                else:
1135                    fpr = 0.
1136                curve = ROC_add_point((fpr, tpr, fPrev), curve,
1137                                      keep_concavities)
1138                fPrev = f
1139            thisPos, thisNeg = prob[1][1], prob[1][0]
1140            TP += thisPos
1141            FP += thisNeg
1142        if P:
1143            tpr = TP / P
1144        else:
1145            tpr = 0.
1146        if N:
1147            fpr = FP / N
1148        else:
1149            fpr = 0.
1150        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1151        results.append(curve)
1152
1153    return results
1154
1155## returns a list of points at the intersection of the tangential
1156## iso-performance line and the given ROC curve
1157## for given values of FPcost, FNcost and pval
1158def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1159    m = (FPcost * (1. - pval)) / (FNcost * pval)
1160
1161    ## put the iso-performance line in point (0., 1.)
1162    x0, y0 = (0., 1.)
1163    x1, y1 = (1., 1. + m)
1164    d01 = math.sqrt((x1 - x0) * (x1 - x0) + (y1 - y0) * (y1 - y0))
1165
1166    ## calculate and find the closest point to the line
1167    firstp = 1
1168    mind = 0.
1169    a = x0 * y1 - x1 * y0
1170    closestPoints = []
1171    for (x, y, fscore) in curve:
1172        d = ((y0 - y1) * x + (x1 - x0) * y + a) / d01
1173        d = abs(d)
1174        if firstp or d < mind:
1175            mind, firstp = d, 0
1176            closestPoints = [(x, y, fscore)]
1177        else:
1178            if abs(d - mind) <= 0.0001: ## close enough
1179                closestPoints.append((x, y, fscore))
1180    return closestPoints
1181
1182def frange(start, end=None, inc=None):
1183    """A range function, that does accept float increments..."""
1184
1185    if end is None:
1186        end = start + 0.
1187        start = 0.
1188
1189    if inc is None or inc == 0:
1190        inc = 1.
1191
1192    L = [start]
1193    while 1:
1194        next = start + len(L) * inc
1195        if inc > 0 and next >= end:
1196            L.append(end)
1197            break
1198        elif inc < 0 and next <= end:
1199            L.append(end)
1200            break
1201        L.append(next)
1202
1203    return L
1204
1205## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1206## where ROCcurvesX is a set of ROC curves,
1207## where a (one) ROC curve is a set of (FP, TP) points
1208##
1209## for each (sub)set of input ROC curves
1210## returns the average ROC curve and an array of (vertical) standard deviations
1211@deprecated_keywords({"ROCcurves": "roc_curves"})
1212def TC_vertical_average_ROC(roc_curves, samples=10):
1213    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1214        if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x:
1215            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1216        dx = float(P2x) - float(P1x)
1217        dy = float(P2y) - float(P1y)
1218        m = dy / dx
1219        return P1y + m * (X - P1x)
1220
1221    def TP_FOR_FP(FPsample, ROC, npts):
1222        i = 0
1223        while i < npts - 1:
1224            (fp, _, _) = ROC[i + 1]
1225            if fp <= FPsample:
1226                i += 1
1227            else:
1228                break
1229        (fp, tp, _) = ROC[i]
1230        if fp == FPsample:
1231            return tp
1232        elif fp < FPsample and i + 1 < len(ROC):
1233            return INTERPOLATE(ROC[i], ROC[i + 1], FPsample)
1234        elif fp < FPsample and i + 1 == len(ROC): # return the last
1235            return ROC[i][1]
1236        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1237        #return 0.
1238
1239    average = []
1240    stdev = []
1241    for ROCS in roc_curves:
1242        npts = []
1243        for c in ROCS:
1244            npts.append(len(c))
1245        nrocs = len(ROCS)
1246
1247        TPavg = []
1248        TPstd = []
1249        for FPsample in frange(0., 1., 1. / samples):
1250            TPsum = []
1251            for i in range(nrocs):
1252                ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1253                TPsum.append(TP_FOR_FP(FPsample, ROCS[i], npts[i]))
1254            TPavg.append((FPsample, statc.mean(TPsum)))
1255            if len(TPsum) > 1:
1256                stdv = statc.std(TPsum)
1257            else:
1258                stdv = 0.
1259            TPstd.append(stdv)
1260
1261        average.append(TPavg)
1262        stdev.append(TPstd)
1263
1264    return average, stdev
1265
1266## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1267## where ROCcurvesX is a set of ROC curves,
1268## where a (one) ROC curve is a set of (FP, TP) points
1269##
1270## for each (sub)set of input ROC curves
1271## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1272@deprecated_keywords({"ROCcurves": "roc_curves"})
1273def TC_threshold_average_ROC(roc_curves, samples=10):
1274    def POINT_AT_THRESH(ROC, npts, thresh):
1275        i = 0
1276        while i < npts - 1:
1277            (px, py, pfscore) = ROC[i]
1278            if pfscore > thresh:
1279                i += 1
1280            else:
1281                break
1282        return ROC[i]
1283
1284    average = []
1285    stdevV = []
1286    stdevH = []
1287    for ROCS in roc_curves:
1288        npts = []
1289        for c in ROCS:
1290            npts.append(len(c))
1291        nrocs = len(ROCS)
1292
1293        T = []
1294        for c in ROCS:
1295            for (px, py, pfscore) in c:
1296##                try:
1297##                    T.index(pfscore)
1298##                except:
1299                T.append(pfscore)
1300        T.sort()
1301        T.reverse() ## ugly
1302
1303        TPavg = []
1304        TPstdV = []
1305        TPstdH = []
1306        for tidx in frange(0, (len(T) - 1.), float(len(T)) / samples):
1307            FPsum = []
1308            TPsum = []
1309            for i in range(nrocs):
1310                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1311                FPsum.append(fp)
1312                TPsum.append(tp)
1313            TPavg.append((statc.mean(FPsum), statc.mean(TPsum)))
1314            ## vertical standard deviation
1315            if len(TPsum) > 1:
1316                stdv = statc.std(TPsum)
1317            else:
1318                stdv = 0.
1319            TPstdV.append(stdv)
1320            ## horizontal standard deviation
1321            if len(FPsum) > 1:
1322                stdh = statc.std(FPsum)
1323            else:
1324                stdh = 0.
1325            TPstdH.append(stdh)
1326
1327        average.append(TPavg)
1328        stdevV.append(TPstdV)
1329        stdevH.append(TPstdH)
1330
1331    return average, stdevV, stdevH
1332
1333## Calibration Curve
1334## returns an array of (curve, yesClassPredictions, noClassPredictions)
1335## elements, where:
1336##  - curve is an array of points (x, y) on the calibration curve
1337##  - yesClassRugPoints is an array of (x, 1) points
1338##  - noClassRugPoints is an array of (x, 0) points
1339@deprecated_keywords({"classIndex": "class_index"})
1340def compute_calibration_curve(res, class_index= -1):
1341    ## merge multiple iterations into one
1342    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1343        res.class_values, res.weights, classifiers=res.classifiers,
1344        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1345    for te in res.results:
1346        mres.results.append(te)
1347
1348    problists, tots = corn.computeROCCumulative(mres, class_index)
1349
1350    results = []
1351
1352    bins = 10 ## divide interval between 0. and 1. into N bins
1353
1354    for plist in problists:
1355        yesClassRugPoints = []
1356        noClassRugPoints = []
1357
1358        yesBinsVals = [0] * bins
1359        noBinsVals = [0] * bins
1360        for (f, (thisNeg, thisPos)) in plist:
1361            yesClassRugPoints.append((f, thisPos)) # 1.
1362            noClassRugPoints.append((f, thisNeg)) # 1.
1363
1364            index = int(f * bins)
1365            index = min(index, bins - 1) ## just in case for value 1.
1366            yesBinsVals[index] += thisPos
1367            noBinsVals[index] += thisNeg
1368
1369        curve = []
1370        for cn in range(bins):
1371            f = float(cn * 1. / bins) + (1. / 2. / bins)
1372            yesVal = yesBinsVals[cn]
1373            noVal = noBinsVals[cn]
1374            allVal = yesVal + noVal
1375            if allVal == 0.: continue
1376            y = float(yesVal) / float(allVal)
1377            curve.append((f, y))
1378
1379        ## smooth the curve
1380        maxnPoints = 100
1381        if len(curve) >= 3:
1382#            loessCurve = statc.loess(curve, -3, 0.6)
1383            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1384        else:
1385            loessCurve = curve
1386        clen = len(loessCurve)
1387        if clen > maxnPoints:
1388            df = clen / maxnPoints
1389            if df < 1: df = 1
1390            curve = [loessCurve[i]  for i in range(0, clen, df)]
1391        else:
1392            curve = loessCurve
1393        ## remove the third value (variance of epsilon?) that suddenly
1394        ## appeared in the output of the statc.loess function
1395        curve = [c[:2] for c in curve]
1396        results.append((curve, yesClassRugPoints, noClassRugPoints))
1397
1398    return results
1399
1400
1401## Lift Curve
1402## returns an array of curve elements, where:
1403##  - curve is an array of points ((TP + FP) / (P + N), TP / P, (th, FP / N))
1404##    on the Lift Curve
1405@deprecated_keywords({"classIndex": "class_index"})
1406def compute_lift_curve(res, class_index= -1):
1407    ## merge multiple iterations into one
1408    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names,
1409        res.class_values, res.weights, classifiers=res.classifiers,
1410        loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1411    for te in res.results:
1412        mres.results.append(te)
1413
1414    problists, tots = corn.computeROCCumulative(mres, class_index)
1415
1416    results = []
1417    P, N = tots[1], tots[0]
1418    for plist in problists:
1419        ## corn gives an increasing by scores list, we need a decreasing
1420        plist.reverse()
1421        TP = 0.
1422        FP = 0.
1423        curve = [(0., 0., (10e300, 0.))]
1424        for (f, (thisNeg, thisPos)) in plist:
1425            TP += thisPos
1426            FP += thisNeg
1427            curve.append(((TP + FP) / (P + N), TP, (f, FP / (N or 1))))
1428        results.append(curve)
1429
1430    return P, N, results
1431
1432
1433class CDT:
1434  """Store the number of concordant (C), discordant (D) and tied (T) pairs."""
1435  def __init__(self, C=0., D=0., T=0.):
1436    self.C, self.D, self.T = C, D, T
1437
1438def is_CDT_empty(cdt):
1439    return cdt.C + cdt.D + cdt.T < 1e-20
1440
1441
1442@deprecated_keywords({"classIndex": "class_index",
1443                      "unweighted": "ignore_weights"})
1444def compute_CDT(res, class_index= -1, ignore_weights=False, **argkw):
1445    """Obsolete, don't use."""
1446    if class_index < 0:
1447        if res.base_class >= 0:
1448            class_index = res.base_class
1449        else:
1450            class_index = 1
1451
1452    useweights = res.weights and not ignore_weights
1453    weightByClasses = argkw.get("weightByClasses", True)
1454
1455    if res.number_of_iterations > 1:
1456        CDTs = [CDT() for _ in range(res.number_of_learners)]
1457        iterationExperiments = split_by_iterations(res)
1458        for exp in iterationExperiments:
1459            expCDTs = corn.computeCDT(exp, class_index, useweights)
1460            for i in range(len(CDTs)):
1461                CDTs[i].C += expCDTs[i].C
1462                CDTs[i].D += expCDTs[i].D
1463                CDTs[i].T += expCDTs[i].T
1464        for i in range(res.number_of_learners):
1465            if is_CDT_empty(CDTs[0]):
1466                return corn.computeCDT(res, class_index, useweights)
1467
1468        return CDTs
1469    else:
1470        return corn.computeCDT(res, class_index, useweights)
1471
1472# Backward compatibility
1473def replace_use_weights(fun):
1474    if environ.orange_no_deprecated_members:
1475        return fun
1476
1477    @functools.wraps(fun)
1478    def wrapped(*args, **kwargs):
1479        use_weights = kwargs.pop("useWeights", None)
1480        if use_weights is not None:
1481            deprecation_warning("useWeights", "ignore_weights")
1482            kwargs["ignore_weights"] = not use_weights
1483        return fun(*args, **kwargs)
1484    return wrapped
1485
1486def replace_discrete_probabilities_with_list(method=False):
1487    if environ.orange_no_deprecated_members:
1488        return fun
1489
1490    def decorator(fun):
1491        @functools.wraps(fun)
1492        def wrapped(*args, **kwargs):
1493            res = args[method] if len(args)>method else kwargs.get("res", kwargs.get("test_results", None))
1494            convert = res is not None
1495
1496            if convert:
1497                old_probs = []
1498                for r in res.results:
1499                    old_probs.append(r.probabilities)
1500                    r.probabilities = [list(p) if type(p) is Orange.statistics.distribution.Discrete
1501                                       else p for p in r.probabilities]
1502            result = fun(*args, **kwargs)
1503            if convert:
1504                for r, old in zip(res.results, old_probs):
1505                    r.probabilities = old
1506            return result
1507        return wrapped
1508    return decorator
1509
1510class AUC(list):
1511    """
1512    Compute the area under ROC curve given a set of experimental results.
1513    If testing consisted of multiple folds, each fold is scored and the
1514    average score is returned. If a fold contains only instances with the
1515    same class value, folds will be merged.
1516
1517    :param test_results: test results to score
1518    :param ignore_weights: ignore instance weights when calculating score
1519    :param multiclass: tells what kind of averaging to perform if the target
1520                       class has more than 2 values.
1521    """
1522
1523    #!Compute AUC for each pair of classes (ignoring instances of all other
1524    #!classes) and average the results, weighting them by the number of
1525    #!pairs of instances from these two classes (e.g. by the product of
1526    #!probabilities of the two classes). AUC computed in this way still
1527    #!behaves as the concordance index, e.g., gives the probability that two
1528    #!randomly chosen instances from different classes will be correctly
1529    #!recognized (if the classifier knows from which two classes the
1530    #!instances came).
1531    ByWeightedPairs = 0
1532
1533    #!Similar to ByWeightedPairs, except that the average over class pairs
1534    #!is not weighted. This AUC is, like the binary version, independent of
1535    #!class distributions, but it is not related to the concordance index
1536    #!any more.
1537    ByPairs = 1
1538
1539    #!For each class, it computes AUC for this class against all others (that
1540    #!is, treating other classes as one class). The AUCs are then averaged by
1541    #!the class probabilities. This is related to the concordance index in
1542    #!which we test the classifier's (average) capability of distinguishing
1543    #!the instances from a specified class from those that come from other
1544    #!classes.
1545    #!Unlike the binary AUC, the measure is not independent of class
1546    #!distributions.
1547    WeightedOneAgainstAll = 2
1548
1549    #!Similar to weighted_one_against_all, except that the average
1550    #!is not weighted.
1551    OneAgainstAll = 3
1552
1553    @replace_use_weights
1554    @deprecated_keywords({"method": "multiclass"})
1555    def __init__(self, test_results=None, multiclass=ByWeightedPairs, ignore_weights=False):
1556
1557        super(AUC, self).__init__()
1558
1559        self.ignore_weights = ignore_weights
1560        self.method = multiclass
1561
1562        if test_results is not None:
1563            self[:] = self.__call__(test_results)
1564
1565    @replace_discrete_probabilities_with_list(method=True)
1566    def __call__(self, test_results):
1567        if len(test_results.class_values) < 2:
1568            raise ValueError("Cannot compute AUC on a single-class problem")
1569        elif len(test_results.class_values) == 2:
1570            return self._compute_for_binary_class(test_results)
1571        else:
1572            return self._compute_for_multi_value_class(test_results, self.method)
1573
1574    def _compute_for_binary_class(self, res):
1575        """AUC for binary classification problems"""
1576        if res.number_of_iterations > 1:
1577            return self._compute_for_multiple_folds(
1578                self._compute_one_class_against_all,
1579                split_by_iterations(res),
1580                (-1, res, res.number_of_iterations))
1581        else:
1582            return self._compute_one_class_against_all(res, -1)[0]
1583
1584    def _compute_for_multi_value_class(self, res, method=0):
1585        """AUC for multiclass classification problems"""
1586        numberOfClasses = len(res.class_values)
1587
1588        if res.number_of_iterations > 1:
1589            iterations = split_by_iterations(res)
1590            all_ite = res
1591        else:
1592            iterations = [res]
1593            all_ite = None
1594
1595        # by pairs
1596        sum_aucs = [0.] * res.number_of_learners
1597        usefulClassPairs = 0.
1598
1599        prob = None
1600        if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]:
1601            prob = class_probabilities_from_res(res)
1602
1603        if method in [self.ByWeightedPairs, self.ByPairs]:
1604            for classIndex1 in range(numberOfClasses):
1605                for classIndex2 in range(classIndex1):
1606                    subsum_aucs = self._compute_for_multiple_folds(
1607                        self._compute_one_class_against_another, iterations,
1608                        (classIndex1, classIndex2, all_ite,
1609                        res.number_of_iterations))
1610                    if subsum_aucs:
1611                        if method == self.ByWeightedPairs:
1612                            p_ij = prob[classIndex1] * prob[classIndex2]
1613                            subsum_aucs = [x * p_ij  for x in subsum_aucs]
1614                            usefulClassPairs += p_ij
1615                        else:
1616                            usefulClassPairs += 1
1617                        sum_aucs = map(add, sum_aucs, subsum_aucs)
1618        else:
1619            for classIndex in range(numberOfClasses):
1620                subsum_aucs = self._compute_for_multiple_folds(
1621                    self._compute_one_class_against_all,
1622                    iterations, (classIndex, all_ite,
1623                                 res.number_of_iterations))
1624                if subsum_aucs:
1625                    if method == self.ByWeightedPairs:
1626                        p_i = prob[classIndex]
1627                        subsum_aucs = [x * p_i  for x in subsum_aucs]
1628                        usefulClassPairs += p_i
1629                    else:
1630                        usefulClassPairs += 1
1631                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1632
1633        if usefulClassPairs > 0:
1634            sum_aucs = [x / usefulClassPairs for x in sum_aucs]
1635
1636        return sum_aucs
1637
1638    # computes the average AUC over folds using "AUCcomputer" (AUC_i or AUC_ij)
1639    # it returns the sum of what is returned by the computer,
1640    # unless at a certain fold the computer has to resort to computing
1641    # over all folds or even this failed;
1642    # in these cases the result is returned immediately
1643    def _compute_for_multiple_folds(self, auc_computer, iterations,
1644                                 computer_args):
1645        """Compute the average AUC over folds using :obj:`auc_computer`."""
1646        subsum_aucs = [0.] * iterations[0].number_of_learners
1647        for ite in iterations:
1648            aucs, foldsUsed = auc_computer(*(ite,) + computer_args)
1649            if not aucs:
1650                import warnings
1651                warnings.warn("AUC cannot be computed (all instances belong to the same class).")
1652                return
1653            if not foldsUsed:
1654                self[:] = aucs
1655                return aucs
1656            subsum_aucs = map(add, subsum_aucs, aucs)
1657        return subsum_aucs
1658
1659    # Computes AUC
1660    # in multivalued class problem, AUC is computed as one against all
1661    # results over folds are averages
1662    # if some folds examples from one class only, the folds are merged
1663    def _compute_for_single_class(self, res, class_index):
1664        if res.number_of_iterations > 1:
1665            return self._compute_for_multiple_folds(
1666                self._compute_one_class_against_all, split_by_iterations(res),
1667                (class_index, res, res.number_of_iterations))
1668        else:
1669            return self._compute_one_class_against_all(res, class_index)
1670
1671    # Computes AUC for a pair of classes (as if there were no other classes)
1672    # results over folds are averages
1673    # if some folds have examples from one class only, the folds are merged
1674    def _compute_for_pair_of_classes(self, res, class_index1, class_index2):
1675        if res.number_of_iterations > 1:
1676            return self._compute_for_multiple_folds(
1677                self._compute_one_class_against_another,
1678                split_by_iterations(res),
1679                (class_index1, class_index2, res, res.number_of_iterations))
1680        else:
1681            return self._compute_one_class_against_another(res, class_index1,
1682                                                    class_index2)
1683
1684    # computes AUC between class i and the other classes
1685    # (treating them as the same class)
1686    @deprecated_keywords({"classIndex": "class_index",
1687                          "divideByIfIte": "divide_by_if_ite"})
1688    def _compute_one_class_against_all(self, ite, class_index, all_ite=None,
1689                                      divide_by_if_ite=1.0):
1690        """Compute AUC between class i and all the other classes)"""
1691        return self._compute_auc(corn.computeCDT, ite, all_ite,
1692            divide_by_if_ite, (class_index, not self.ignore_weights))
1693
1694
1695    # computes AUC between classes i and j as if there are no other classes
1696    def _compute_one_class_against_another(
1697        self, ite, class_index1, class_index2,
1698        all_ite=None, divide_by_if_ite=1.):
1699        """
1700        Compute AUC between classes i and j as if there are no other classes.
1701        """
1702        return self._compute_auc(corn.computeCDTPair, ite,
1703            all_ite, divide_by_if_ite,
1704            (class_index1, class_index2, not self.ignore_weights))
1705
1706    # computes AUC using a specified 'cdtComputer' function
1707    # It tries to compute AUCs from 'ite' (examples from a single iteration)
1708    # and, if C+D+T=0, from 'all_ite' (entire test set). In the former case,
1709    # the AUCs are divided by 'divideByIfIte'.
1710    # Additional flag is returned which is True in the former case,
1711    # or False in the latter.
1712    @deprecated_keywords({"cdt_computer": "cdtComputer",
1713                          "divideByIfIte": "divide_by_if_ite",
1714                          "computerArgs": "computer_args"})
1715    def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite,
1716                     computer_args):
1717        """
1718        Compute AUC using a :obj:`cdt_computer`.
1719        """
1720        cdts = cdt_computer(*(ite,) + computer_args)
1721        if not is_CDT_empty(cdts[0]):
1722            return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T) /
1723                    divide_by_if_ite for cdt in cdts], True
1724
1725        if all_ite:
1726            cdts = cdt_computer(*(all_ite,) + computer_args)
1727            if not is_CDT_empty(cdts[0]):
1728                return [(cdt.C + cdt.T / 2) / (cdt.C + cdt.D + cdt.T)
1729                        for cdt in cdts], False
1730
1731        return False, False
1732
1733class AUC_for_single_class(AUC):
1734    """
1735    Compute AUC where the class with the given class_index is singled
1736    out and all other classes are treated as a single class.
1737    """
1738    def __init__(self, test_results=None, class_index= -1, ignore_weights=False):
1739        if class_index < 0:
1740            if test_results and test_results.base_class >= 0:
1741                self.class_index = test_results.base_class
1742            else:
1743                self.class_index = 1
1744        else:
1745            self.class_index = class_index
1746
1747        super(AUC_for_single_class, self).__init__(test_results, ignore_weights=ignore_weights)
1748
1749    @replace_discrete_probabilities_with_list(method=True)
1750    def __call__(self, test_results):
1751        return self._compute_for_single_class(test_results, self.class_index)
1752
1753
1754class AUC_for_pair_of_classes(AUC):
1755    """
1756    Computes AUC between a pair of classes, ignoring instances from all
1757    other classes.
1758    """
1759    def __init__(self, test_results, class_index1, class_index2, ignore_weights=False):
1760        self.class_index1 = class_index1
1761        self.class_index2 = class_index2
1762
1763        super(AUC_for_pair_of_classes, self).__init__(test_results, ignore_weights=ignore_weights)
1764
1765    @replace_discrete_probabilities_with_list(method=True)
1766    def __call__(self, test_results):
1767        return self._compute_for_pair_of_classes(test_results, self.class_index1, self.class_index2)
1768
1769
1770class AUC_matrix(AUC):
1771    """
1772    Compute a (lower diagonal) matrix with AUCs for all pairs of classes.
1773    If there are empty classes, the corresponding elements in the matrix
1774    are -1.
1775    """
1776
1777    @replace_discrete_probabilities_with_list(method=True)
1778    def __call__(self, test_results):
1779        numberOfClasses = len(test_results.class_values)
1780        number_of_learners = test_results.number_of_learners
1781        if test_results.number_of_iterations > 1:
1782            iterations, all_ite = split_by_iterations(test_results), test_results
1783        else:
1784            iterations, all_ite = [test_results], None
1785        aucs = [[[] for _ in range(numberOfClasses)]
1786        for _ in range(number_of_learners)]
1787        for classIndex1 in range(numberOfClasses):
1788            for classIndex2 in range(classIndex1):
1789                pair_aucs = self._compute_for_multiple_folds(
1790                    self._compute_one_class_against_another, iterations,
1791                    (classIndex1, classIndex2, all_ite,
1792                     test_results.number_of_iterations))
1793                if pair_aucs:
1794                    for lrn in range(number_of_learners):
1795                        aucs[lrn][classIndex1].append(pair_aucs[lrn])
1796                else:
1797                    for lrn in range(number_of_learners):
1798                        aucs[lrn][classIndex1].append(-1)
1799        return aucs
1800
1801#Backward compatibility
1802@replace_use_weights
1803@replace_discrete_probabilities_with_list
1804def AUC_binary(res, ignore_weights=False):
1805    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1806    auc._compute_for_binary_class(res)
1807    return auc
1808
1809@replace_use_weights
1810@replace_discrete_probabilities_with_list
1811def AUC_multi(res, ignore_weights=False, method=0):
1812    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights,
1813        method=method)
1814    auc._compute_for_multi_value_class(res)
1815    return auc
1816
1817
1818@deprecated_keywords({"AUCcomputer": "auc_computer",
1819                      "computerArgs": "computer_args"})
1820def AUC_iterations(auc_computer, iterations, computer_args):
1821    auc = deprecated_function_name(AUC)()
1822    auc._compute_for_multiple_folds(auc_computer, iterations, computer_args)
1823    return auc
1824
1825def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1826    auc = deprecated_function_name(AUC)()
1827    result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite,
1828                              computer_args)
1829    return result
1830
1831@replace_use_weights
1832def AUC_i(ite, class_index, ignore_weights=False, all_ite=None,
1833          divide_by_if_ite=1.):
1834    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1835    result = auc._compute_one_class_against_another(ite, class_index,
1836        all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1837    return result
1838
1839
1840@replace_use_weights
1841def AUC_ij(ite, class_index1, class_index2, ignore_weights=False,
1842           all_ite=None, divide_by_if_ite=1.):
1843    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1844    result = auc._compute_one_class_against_another(
1845        ite, class_index1, class_index2, all_ite=all_ite, divide_by_if_ite=divide_by_if_ite)
1846    return result
1847
1848AUC_single = replace_use_weights(
1849             deprecated_keywords({"classIndex": "class_index"})(
1850             deprecated_function_name(AUC_for_single_class)))
1851AUC_pair = replace_use_weights(
1852           deprecated_keywords({"classIndex1": "class_index1",
1853                                "classIndex2": "class_index2"})(
1854           deprecated_function_name(AUC_for_pair_of_classes)))
1855AUC_matrix = replace_use_weights(AUC_matrix)
1856
1857
1858@deprecated_keywords({"unweighted": "ignore_weights"})
1859@replace_discrete_probabilities_with_list
1860def McNemar(res, ignore_weights=False, **argkw):
1861    """
1862    Compute a triangular matrix with McNemar statistics for each pair of
1863    classifiers. The statistics is distributed by chi-square distribution with
1864    one degree of freedom; critical value for 5% significance is around 3.84.
1865    """
1866    nLearners = res.number_of_learners
1867    mcm = []
1868    for i in range(nLearners):
1869       mcm.append([0.] * res.number_of_learners)
1870
1871    if not res.weights or ignore_weights:
1872        for i in res.results:
1873            actual = i.actual_class
1874            classes = i.classes
1875            for l1 in range(nLearners):
1876                for l2 in range(l1, nLearners):
1877                    if classes[l1] == actual:
1878                        if classes[l2] != actual:
1879                            mcm[l1][l2] += 1
1880                    elif classes[l2] == actual:
1881                        mcm[l2][l1] += 1
1882    else:
1883        for i in res.results:
1884            actual = i.actual_class
1885            classes = i.classes
1886            for l1 in range(nLearners):
1887                for l2 in range(l1, nLearners):
1888                    if classes[l1] == actual:
1889                        if classes[l2] != actual:
1890                            mcm[l1][l2] += i.weight
1891                    elif classes[l2] == actual:
1892                        mcm[l2][l1] += i.weight
1893
1894    for l1 in range(nLearners):
1895        for l2 in range(l1, nLearners):
1896            su = mcm[l1][l2] + mcm[l2][l1]
1897            if su:
1898                mcm[l2][l1] = (abs(mcm[l1][l2] - mcm[l2][l1]) - 1) ** 2 / su
1899            else:
1900                mcm[l2][l1] = 0
1901
1902    for l1 in range(nLearners):
1903        mcm[l1] = mcm[l1][:l1]
1904
1905    return mcm
1906
1907@replace_discrete_probabilities_with_list
1908def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False):
1909    """
1910    McNemar_of_two computes a McNemar statistics for a pair of classifier,
1911    specified by indices learner1 and learner2.
1912    """
1913    tf = ft = 0.
1914    if not res.weights or ignore_weights:
1915        for i in res.results:
1916            actual = i.actual_class
1917            if i.classes[lrn1] == actual:
1918                if i.classes[lrn2] != actual:
1919                    tf += i.weight
1920            elif i.classes[lrn2] == actual:
1921                    ft += i.weight
1922    else:
1923        for i in res.results:
1924            actual = i.actual_class
1925            if i.classes[lrn1] == actual:
1926                if i.classes[lrn2] != actual:
1927                    tf += 1.
1928            elif i.classes[lrn2] == actual:
1929                    ft += 1.
1930
1931    su = tf + ft
1932    if su:
1933        return (abs(tf - ft) - 1) ** 2 / su
1934    else:
1935        return 0
1936
1937@replace_discrete_probabilities_with_list
1938def Friedman(res, stat=CA):
1939    """
1940    Compare classifiers with Friedman test, treating folds as different examles.
1941    Returns F, p and average ranks.
1942    """
1943    res_split = split_by_iterations(res)
1944    res = [stat(r) for r in res_split]
1945
1946    N = len(res)
1947    k = len(res[0])
1948    sums = [0.] * k
1949    for r in res:
1950        ranks = [k - x + 1 for x in statc.rankdata(r)]
1951        if stat == Brier_score: # reverse ranks for Brier_score (lower better)
1952            ranks = [k + 1 - x for x in ranks]
1953        sums = map(add, ranks, sums)
1954
1955    T = sum(x * x for x in sums)
1956    sums = [x / N for x in sums]
1957
1958    F = 12. / (N * k * (k + 1)) * T - 3 * N * (k + 1)
1959
1960    return F, statc.chisqprob(F, k - 1), sums
1961
1962@replace_discrete_probabilities_with_list
1963def Wilcoxon_pairs(res, avgranks, stat=CA):
1964    """
1965    Return a triangular matrix, where element[i][j] stores significance of
1966    difference between the i-th and the j-th classifier, as computed by the
1967    Wilcoxon test. The element is positive if the i-th is better than the j-th,
1968    negative if it is worse, and 1 if they are equal.
1969    Arguments are ExperimentResults, average ranks (as returned by Friedman)
1970    and, optionally, a statistics; greater values should mean better results.
1971    """
1972    res_split = split_by_iterations(res)
1973    res = [stat(r) for r in res_split]
1974
1975    k = len(res[0])
1976    bt = []
1977    for m1 in range(k):
1978        nl = []
1979        for m2 in range(m1 + 1, k):
1980            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1981            if avgranks[m1] < avgranks[m2]:
1982                nl.append(p)
1983            elif avgranks[m2] < avgranks[m1]:
1984                nl.append(-p)
1985            else:
1986                nl.append(1)
1987        bt.append(nl)
1988    return bt
1989
1990
1991@deprecated_keywords({"allResults": "all_results",
1992                      "noConfidence": "no_confidence"})
1993def plot_learning_curve_learners(file, all_results, proportions, learners,
1994                                 no_confidence=0):
1995    plot_learning_curve(file, all_results, proportions,
1996        [Orange.misc.getobjectname(learners[i], "Learner %i" % i)
1997        for i in range(len(learners))], no_confidence)
1998
1999
2000@deprecated_keywords({"allResults": "all_results",
2001                      "noConfidence": "no_confidence"})
2002def plot_learning_curve(file, all_results, proportions, legend,
2003                        no_confidence=0):
2004    import types
2005    fopened = 0
2006    if type(file) == types.StringType:
2007        file = open(file, "wt")
2008        fopened = 1
2009
2010    file.write("set yrange [0:1]\n")
2011    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2012    file.write("set multiplot\n\n")
2013    CAs = [CA(x, report_se=True) for x in all_results]
2014
2015    file.write("plot \\\n")
2016    for i in range(len(legend) - 1):
2017        if not no_confidence:
2018            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i + 1))
2019        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2020    if not no_confidence:
2021        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
2022    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
2023
2024    for i in range(len(legend)):
2025        if not no_confidence:
2026            for p in range(len(proportions)):
2027                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96 * CAs[p][i][1]))
2028            file.write("e\n\n")
2029
2030        for p in range(len(proportions)):
2031            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
2032        file.write("e\n\n")
2033
2034    if fopened:
2035        file.close()
2036
2037
2038def print_single_ROC_curve_coordinates(file, curve):
2039    import types
2040    fopened = 0
2041    if type(file) == types.StringType:
2042        file = open(file, "wt")
2043        fopened = 1
2044
2045    for coord in curve:
2046        file.write("%5.3f\t%5.3f\n" % tuple(coord))
2047
2048    if fopened:
2049        file.close()
2050
2051
2052def plot_ROC_learners(file, curves, learners):
2053    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
2054
2055def plot_ROC(file, curves, legend):
2056    import types
2057    fopened = 0
2058    if type(file) == types.StringType:
2059        file = open(file, "wt")
2060        fopened = 1
2061
2062    file.write("set yrange [0:1]\n")
2063    file.write("set xrange [0:1]\n")
2064    file.write("set multiplot\n\n")
2065
2066    file.write("plot \\\n")
2067    for leg in legend:
2068        file.write("'-' title '%s' with lines,\\\n" % leg)
2069    file.write("'-' title '' with lines\n")
2070
2071    for curve in curves:
2072        for coord in curve:
2073            file.write("%5.3f\t%5.3f\n" % tuple(coord))
2074        file.write("e\n\n")
2075
2076    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
2077
2078    if fopened:
2079        file.close()
2080
2081
2082@deprecated_keywords({"allResults": "all_results"})
2083def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference= -1):
2084    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
2085
2086
2087@deprecated_keywords({"allResults": "all_results"})
2088def plot_McNemar_curve(file, all_results, proportions, legend, reference= -1):
2089    if reference < 0:
2090        reference = len(legend) - 1
2091
2092    import types
2093    fopened = 0
2094    if type(file) == types.StringType:
2095        file = open(file, "wt")
2096        fopened = 1
2097
2098    #file.write("set yrange [0:1]\n")
2099    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
2100    file.write("set multiplot\n\n")
2101    file.write("plot \\\n")
2102    tmap = range(reference) + range(reference + 1, len(legend))
2103    for i in tmap[:-1]:
2104        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i + 1))
2105    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
2106    file.write("\n")
2107
2108    for i in tmap:
2109        for p in range(len(proportions)):
2110            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
2111        file.write("e\n\n")
2112
2113    if fopened:
2114        file.close()
2115
2116default_point_types = ("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}") + tuple([chr(x) for x in range(97, 122)])
2117default_line_types = ("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
2118
2119@deprecated_keywords({"allResults": "all_results"})
2120def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
2121    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
2122
2123
2124@deprecated_keywords({"allResults": "all_results"})
2125def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
2126    import types
2127    fopened = 0
2128    if type(file) == types.StringType:
2129        file = open(file, "wt")
2130        fopened = 1
2131
2132    nexamples = len(all_results[0].results)
2133    CAs = [CA(x, report_se=True) for x in all_results]
2134
2135    graphsize = float(options.get("graphsize", 10.0)) #cm
2136    difprop = proportions[-1] - proportions[0]
2137    ntestexamples = nexamples * proportions[-1]
2138    xunit = graphsize / ntestexamples
2139
2140    yshift = float(options.get("yshift", -ntestexamples / 20.))
2141
2142    pointtypes = options.get("pointtypes", default_point_types)
2143    linetypes = options.get("linetypes", default_line_types)
2144
2145    if options.has_key("numberedx"):
2146        numberedx = options["numberedx"]
2147        if type(numberedx) == types.IntType:
2148            if numberedx > 0:
2149                numberedx = [nexamples * proportions[int(i / float(numberedx) * len(proportions))] for i in range(numberedx)] + [proportions[-1] * nexamples]
2150            elif numberedx < 0:
2151                numberedx = -numberedx
2152                newn = []
2153                for i in range(numberedx + 1):
2154                    wanted = proportions[0] + float(i) / numberedx * difprop
2155                    best = (10, 0)
2156                    for t in proportions:
2157                        td = abs(wanted - t)
2158                        if td < best[0]:
2159                            best = (td, t)
2160                    if not best[1] in newn:
2161                        newn.append(best[1])
2162                newn.sort()
2163                numberedx = [nexamples * x for x in newn]
2164        elif type(numberedx[0]) == types.FloatType:
2165            numberedx = [nexamples * x for x in numberedx]
2166    else:
2167        numberedx = [nexamples * x for x in proportions]
2168
2169    file.write("\\mbox{\n")
2170    file.write(\\beginpicture\n")
2171    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
2172    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
2173    file.write(\\axis bottom invisible\n")# label {#examples}\n")
2174    file.write("      ticks short at %s /\n" % reduce(lambda x, y:x + " " + y, ["%i" % (x * nexamples + 0.5) for x in proportions]))
2175    if numberedx:
2176        file.write("            long numbered at %s /\n" % reduce(lambda x, y:x + y, ["%i " % int(x + 0.5) for x in numberedx]))
2177    file.write("  /\n")
2178    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
2179    file.write("      shiftedto y=%5.3f\n" % yshift)
2180    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
2181    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
2182    file.write("  /\n")
2183    if options.has_key("default"):
2184        file.write(\\setdashpattern<1pt, 1pt>\n")
2185        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
2186
2187    for i in range(len(CAs[0])):
2188        coordinates = reduce(lambda x, y:x + " " + y, ["%i %5.3f" % (proportions[p] * nexamples, CAs[p][i][0]) for p in range(len(proportions))])
2189        if linetypes:
2190            file.write(%s\n" % linetypes[i])
2191            file.write(\\plot %s /\n" % coordinates)
2192        if pointtypes:
2193            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
2194
2195    file.write(\\endpicture\n")
2196    file.write("}\n")
2197    if fopened:
2198        file.close()
2199    file.close()
2200    del file
2201
2202def legend_learners_to_PiCTeX(file, learners, **options):
2203  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
2204
2205def legend_to_PiCTeX(file, legend, **options):
2206    import types
2207    fopened = 0
2208    if type(file) == types.StringType:
2209        file = open(file, "wt")
2210        fopened = 1
2211
2212    pointtypes = options.get("pointtypes", default_point_types)
2213    linetypes = options.get("linetypes", default_line_types)
2214
2215    file.write("\\mbox{\n")
2216    file.write(\\beginpicture\n")
2217    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
2218    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
2219
2220    for i in range(len(legend)):
2221        if linetypes:
2222            file.write(%s\n" % linetypes[i])
2223            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i + 0.2))
2224        if pointtypes:
2225            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i + 0.1))
2226        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i + 0.25))
2227
2228    file.write(\\endpicture\n")
2229    file.write("}\n")
2230    if fopened:
2231        file.close()
2232    file.close()
2233    del file
2234
2235
2236def compute_friedman(avranks, N):
2237    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
2238    and (Iman statistic - F-distribution, degrees of freedoma) given average
2239    ranks and a number of tested data sets N.
2240    """
2241
2242    k = len(avranks)
2243
2244    def friedman(N, k, ranks):
2245        return 12 * N * (sum([rank ** 2.0 for rank in ranks]) - (k * (k + 1) * (k + 1) / 4.0)) / (k * (k + 1))
2246
2247    def iman(fried, N, k):
2248        return (N - 1) * fried / (N * (k - 1) - fried)
2249
2250    f = friedman(N, k, avranks)
2251    im = iman(f, N, k)
2252    fdistdof = (k - 1, (k - 1) * (N - 1))
2253
2254    return (f, k - 1), (im, fdistdof)
2255
2256def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2257    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2258    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2259    ranks and number of tested data sets N. Type can be either "nemenyi" for
2260    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2261    """
2262
2263    k = len(avranks)
2264
2265    d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
2266                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
2267                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
2268                               3.426041, 3.458425, 3.488685, 3.517073, 3.543799]
2269        , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
2270                               2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
2271                               2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
2272                               3.195743, 3.229723, 3.261461, 3.291224, 3.319233]
2273        , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
2274                                        2.638, 2.690, 2.724, 2.773],
2275         ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
2276                                      2.394, 2.450, 2.498, 2.539]}
2277
2278    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2279    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2280
2281    q = d[(type, alpha)]
2282
2283    cd = q[k] * (k * (k + 1) / (6.0 * N)) ** 0.5
2284
2285    return cd
2286
2287
2288def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2289    """
2290    Draws a CD graph, which is used to display  the differences in methods'
2291    performance.
2292    See Janez Demsar, Statistical Comparisons of Classifiers over
2293    Multiple Data Sets, 7(Jan):1--30, 2006.
2294
2295    Needs matplotlib to work.
2296
2297    :param filename: Output file name (with extension). Formats supported
2298                     by matplotlib can be used.
2299    :param avranks: List of average methods' ranks.
2300    :param names: List of methods' names.
2301
2302    :param cd: Critical difference. Used for marking methods that whose
2303               difference is not statistically significant.
2304    :param lowv: The lowest shown rank, if None, use 1.
2305    :param highv: The highest shown rank, if None, use len(avranks).
2306    :param width: Width of the drawn figure in inches, default 6 in.
2307    :param textspace: Space on figure sides left for the description
2308                      of methods, default 1 in.
2309    :param reverse:  If True, the lowest rank is on the right. Default\: False.
2310    :param cdmethod: None by default. It can be an index of element in avranks
2311                     or or names which specifies the method which should be
2312                     marked with an interval.
2313    """
2314    if not HAS_MATPLOTLIB:
2315        import sys
2316        print >> sys.stderr, "Function requires matplotlib. Please install it."
2317        return
2318
2319    width = float(width)
2320    textspace = float(textspace)
2321
2322    def nth(l, n):
2323        """
2324        Returns only nth elemnt in a list.
2325        """
2326        n = lloc(l, n)
2327        return [ a[n] for a in l ]
2328
2329    def lloc(l, n):
2330        """
2331        List location in list of list structure.
2332        Enable the use of negative locations:
2333        -1 is the last element, -2 second last...
2334        """
2335        if n < 0:
2336            return len(l[0]) + n
2337        else:
2338            return n
2339
2340    def mxrange(lr):
2341        """
2342        Multiple xranges. Can be used to traverse matrices.
2343        This function is very slow due to unknown number of
2344        parameters.
2345
2346        >>> mxrange([3,5])
2347        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2348
2349        >>> mxrange([[3,5,1],[9,0,-3]])
2350        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2351
2352        """
2353        if not len(lr):
2354            yield ()
2355        else:
2356            #it can work with single numbers
2357            index = lr[0]
2358            if type(1) == type(index):
2359                index = [ index ]
2360            for a in range(*index):
2361                for b in mxrange(lr[1:]):
2362                    yield tuple([a] + list(b))
2363
2364
2365    from matplotlib.figure import Figure
2366    from matplotlib.backends.backend_agg import FigureCanvasAgg
2367
2368    def print_figure(fig, *args, **kwargs):
2369        canvas = FigureCanvasAgg(fig)
2370        canvas.print_figure(*args, **kwargs)
2371
2372    sums = avranks
2373
2374    tempsort = sorted([ (a, i) for i, a in  enumerate(sums) ], reverse=reverse)
2375    ssums = nth(tempsort, 0)
2376    sortidx = nth(tempsort, 1)
2377    nnames = [ names[x] for x in sortidx ]
2378
2379    if lowv is None:
2380        lowv = min(1, int(math.floor(min(ssums))))
2381    if highv is None:
2382        highv = max(len(avranks), int(math.ceil(max(ssums))))
2383
2384    cline = 0.4
2385
2386    k = len(sums)
2387
2388    lines = None
2389
2390    linesblank = 0
2391    scalewidth = width - 2 * textspace
2392
2393    def rankpos(rank):
2394        if not reverse:
2395            a = rank - lowv
2396        else:
2397            a = highv - rank
2398        return textspace + scalewidth / (highv - lowv) * a
2399
2400    distanceh = 0.25
2401
2402    if cd and cdmethod is None:
2403
2404        #get pairs of non significant methods
2405
2406        def get_lines(sums, hsd):
2407
2408            #get all pairs
2409            lsums = len(sums)
2410            allpairs = [ (i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i ]
2411            #remove not significant
2412            notSig = [ (i, j) for i, j in allpairs if abs(sums[i] - sums[j]) <= hsd ]
2413            #keep only longest
2414
2415            def no_longer((i, j), notSig):
2416                for i1, j1 in notSig:
2417                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2418                        return False
2419                return True
2420
2421            longest = [ (i, j) for i, j in notSig if no_longer((i, j), notSig) ]
2422
2423            return longest
2424
2425        lines = get_lines(ssums, cd)
2426        linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1
2427
2428        #add scale
2429        distanceh = 0.25
2430        cline += distanceh
2431
2432    #calculate height needed height of an image
2433    minnotsignificant = max(2 * 0.2, linesblank)
2434    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant
2435
2436    fig = Figure(figsize=(width, height))
2437    ax = fig.add_axes([0, 0, 1, 1]) #reverse y axis
2438    ax.set_axis_off()
2439
2440    hf = 1. / height # height factor
2441    wf = 1. / width
2442
2443    def hfl(l):
2444        return [ a * hf for a in l ]
2445
2446    def wfl(l):
2447        return [ a * wf for a in l ]
2448
2449
2450    # Upper left corner is (0,0).
2451
2452    ax.plot([0, 1], [0, 1], c="w")
2453    ax.set_xlim(0, 1)
2454    ax.set_ylim(1, 0)
2455
2456    def line(l, color='k', **kwargs):
2457        """
2458        Input is a list of pairs of points.
2459        """
2460        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)
2461
2462    def text(x, y, s, *args, **kwargs):
2463        ax.text(wf * x, hf * y, s, *args, **kwargs)
2464
2465    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)
2466
2467    bigtick = 0.1
2468    smalltick = 0.05
2469
2470
2471    import numpy
2472    tick = None
2473    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2474        tick = smalltick
2475        if a == int(a): tick = bigtick
2476        line([(rankpos(a), cline - tick / 2), (rankpos(a), cline)], linewidth=0.7)
2477
2478    for a in range(lowv, highv + 1):
2479        text(rankpos(a), cline - tick / 2 - 0.05, str(a), ha="center", va="bottom")
2480
2481    k = len(ssums)
2482
2483    for i in range((k + 1) / 2):
2484        chei = cline + minnotsignificant + i * 0.2
2485        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace - 0.1, chei)], linewidth=0.7)
2486        text(textspace - 0.2, chei, nnames[i], ha="right", va="center")
2487
2488    for i in range((k + 1) / 2, k):
2489        chei = cline + minnotsignificant + (k - i - 1) * 0.2
2490        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace + scalewidth + 0.1, chei)], linewidth=0.7)
2491        text(textspace + scalewidth + 0.2, chei, nnames[i], ha="left", va="center")
2492
2493    if cd and cdmethod is None:
2494
2495        #upper scale
2496        if not reverse:
2497            begin, end = rankpos(lowv), rankpos(lowv + cd)
2498        else:
2499            begin, end = rankpos(highv), rankpos(highv - cd)
2500
2501        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2502        line([(begin, distanceh + bigtick / 2), (begin, distanceh - bigtick / 2)], linewidth=0.7)
2503        line([(end, distanceh + bigtick / 2), (end, distanceh - bigtick / 2)], linewidth=0.7)
2504        text((begin + end) / 2, distanceh - 0.05, "CD", ha="center", va="bottom")
2505
2506        #non significance lines   
2507        def draw_lines(lines, side=0.05, height=0.1):
2508            start = cline + 0.2
2509            for l, r in lines:
2510                line([(rankpos(ssums[l]) - side, start), (rankpos(ssums[r]) + side, start)], linewidth=2.5)
2511                start += height
2512
2513        draw_lines(lines)
2514
2515    elif cd:
2516        begin = rankpos(avranks[cdmethod] - cd)
2517        end = rankpos(avranks[cdmethod] + cd)
2518        line([(begin, cline), (end, cline)], linewidth=2.5)
2519        line([(begin, cline + bigtick / 2), (begin, cline - bigtick / 2)], linewidth=2.5)
2520        line([(end, cline + bigtick / 2), (end, cline - bigtick / 2)], linewidth=2.5)
2521
2522    print_figure(fig, filename, **kwargs)
2523
2524def mlc_hamming_loss(res):
2525    """
2526    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2527   
2528    :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}`
2529    """
2530    losses = [0] * res.number_of_learners
2531    label_num = len(res.labels)
2532    example_num = gettotsize(res)
2533
2534    for e in res.results:
2535        aclass = e.actual_class
2536        for i, labels in enumerate(e.classes):
2537            labels = map(int, labels)
2538            if len(labels) <> len(aclass):
2539                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2540            for j in range(label_num):
2541                if labels[j] != aclass[j]:
2542                    losses[i] += 1
2543
2544    return [float(x) / (label_num * example_num) for x in losses]
2545
2546def mlc_accuracy(res, forgiveness_rate=1.0):
2547    """
2548    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2549     
2550    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}`
2551   
2552    Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`,
2553    called forgiveness rate:
2554   
2555    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}`
2556    """
2557    accuracies = [0.0] * res.number_of_learners
2558    example_num = gettotsize(res)
2559
2560    for e in res.results:
2561        aclass = e.actual_class
2562        for i, labels in enumerate(e.classes):
2563            labels = map(int, labels)
2564            if len(labels) <> len(aclass):
2565                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2566
2567            intersection = 0.0
2568            union = 0.0
2569            for real, pred in zip(labels, aclass):
2570                if real and pred:
2571                    intersection += 1
2572                if real or pred:
2573                    union += 1
2574
2575            if union:
2576                accuracies[i] += intersection / union
2577
2578    return [math.pow(x / example_num, forgiveness_rate) for x in accuracies]
2579
2580def mlc_precision(res):
2581    """
2582    :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}`
2583    """
2584    precisions = [0.0] * res.number_of_learners
2585    example_num = gettotsize(res)
2586
2587    for e in res.results:
2588        aclass = e.actual_class
2589        for i, labels in enumerate(e.classes):
2590            labels = map(int, labels)
2591            if len(labels) <> len(aclass):
2592                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2593
2594            intersection = 0.0
2595            predicted = 0.0
2596            for real, pred in zip(labels, aclass):
2597                if real and pred:
2598                    intersection += 1
2599                if real:
2600                    predicted += 1
2601            if predicted:
2602                precisions[i] += intersection / predicted
2603
2604    return [x / example_num for x in precisions]
2605
2606def mlc_recall(res):
2607    """
2608    :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}`
2609    """
2610    recalls = [0.0] * res.number_of_learners
2611    example_num = gettotsize(res)
2612
2613    for e in res.results:
2614        aclass = e.actual_class
2615        for i, labels in enumerate(e.classes):
2616            labels = map(int, labels)
2617            if len(labels) <> len(aclass):
2618                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2619
2620            intersection = 0.0
2621            actual = 0.0
2622            for real, pred in zip(labels, aclass):
2623                if real and pred:
2624                    intersection += 1
2625                if pred:
2626                    actual += 1
2627            if actual:
2628                recalls[i] += intersection / actual
2629
2630    return [x / example_num for x in recalls]
2631
2632#def mlc_ranking_loss(res):
2633#    pass
2634#
2635#def mlc_average_precision(res):
2636#    pass
2637#
2638#def mlc_hierarchical_loss(res):
2639#    pass
2640
2641
2642def mt_average_score(res, score, weights=None):
2643    """
2644    Compute individual scores for each target and return the (weighted) average.
2645
2646    One method can be used to compute scores for all targets or a list of
2647    scoring methods can be passed to use different methods for different
2648    targets. In the latter case, care has to be taken if the ranges of scoring
2649    methods differ.
2650    For example, when the first target is scored from -1 to 1 (1 best) and the
2651    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
2652    to a span of 1, and invert the second so that higher scores are better.
2653
2654    :param score: Single-target scoring method or a list of such methods
2655                  (one for each target).
2656    :param weights: List of real weights, one for each target,
2657                    for a weighted average.
2658
2659    """
2660    if not len(res.results):
2661        raise ValueError, "Cannot compute the score: no examples."
2662    if res.number_of_learners < 1:
2663        return []
2664    n_classes = len(res.results[0].actual_class)
2665    if weights is None:
2666        weights = [1.] * n_classes
2667    if not hasattr(score, '__len__'):
2668        score = [score] * n_classes
2669    elif len(score) != n_classes:
2670        raise ValueError, "Number of scoring methods and targets do not match."
2671    # save original classes
2672    clsss = [te.classes for te in res.results]
2673    aclsss = [te.actual_class for te in res.results]
2674    # compute single target scores
2675    single_scores = []
2676    for i in range(n_classes):
2677        for te, clss, aclss in zip(res.results, clsss, aclsss):
2678            te.classes = [cls[i] for cls in clss]
2679            te.actual_class = aclss[i]
2680        single_scores.append(score[i](res))
2681    # restore original classes
2682    for te, clss, aclss in zip(res.results, clsss, aclsss):
2683        te.classes = clss
2684        te.actual_class = aclss
2685    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
2686        for scores in zip(*single_scores)]
2687
2688def mt_flattened_score(res, score):
2689    """
2690    Flatten (concatenate into a single list) the predictions of multiple
2691    targets and compute a single-target score.
2692   
2693    :param score: Single-target scoring method.
2694    """
2695    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
2696        res.classifier_names, class_values=res.class_values,
2697        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
2698        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
2699    for te in res.results:
2700        for i, ac in enumerate(te.actual_class):
2701            te2 = Orange.evaluation.testing.TestedExample(
2702                iteration_number=te.iteration_number, actual_class=ac)
2703            for c, p in zip(te.classes, te.probabilities):
2704                te2.add_result(c[i], p[i])
2705            res2.results.append(te2)
2706    return score(res2)
2707
2708def mt_global_accuracy(res):
2709    """
2710    :math:`Acc = \\frac{1}{N}\\sum_{i=1}^{N}\\delta(\\mathbf{c_{i}'},\\mathbf{c_{i}}) \\newline`
2711   
2712    :math:`\\delta (\\mathbf{c_{i}'},\\mathbf{c_{i}} )=\\left\\{\\begin{matrix}1:\\mathbf{c_{i}'}=\\mathbf{c_{i}}\\\\ 0: otherwise\\end{matrix}\\right.`
2713    """
2714    results = []
2715    for l in xrange(res.number_of_learners):
2716        n_results = len(res.results)
2717        n_correct = 0.
2718
2719        for r in res.results:
2720            if list(r.classes[l]) == r.actual_class:
2721                n_correct+=1
2722
2723        results.append(n_correct/n_results)
2724    return results
2725
2726
2727def mt_mean_accuracy(res):
2728    """
2729    :math:`\\overline{Acc_{d}} = \\frac{1}{d}\\sum_{j=1}^{d}Acc_{j} = \\frac{1}{d}\\sum_{j=1}^{d} \\frac{1}{N}\\sum_{i=1}^{N}\\delta(c_{ij}',c_{ij} ) \\newline`
2730   
2731    :math:`\\delta (c_{ij}',c_{ij} )=\\left\\{\\begin{matrix}1:c_{ij}'=c_{ij}\\\\ 0: otherwise\\end{matrix}\\right.`
2732    """
2733    results = []
2734    for l in xrange(res.number_of_learners):
2735        n_classes = len(res.results[0].actual_class)
2736        n_results = len(res.results)
2737        n_correct = 0.
2738
2739        for r in res.results:
2740            for i in xrange(n_classes):
2741                if r.classes[l][i] == r.actual_class[i]:
2742                    n_correct+=1
2743        results.append(n_correct/n_classes/n_results)
2744    return results
2745
2746
2747
2748################################################################################
2749if __name__ == "__main__":
2750    avranks = [3.143, 2.000, 2.893, 1.964]
2751    names = ["prva", "druga", "tretja", "cetrta" ]
2752    cd = compute_CD(avranks, 14)
2753    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2754    print cd
2755
2756    print compute_friedman(avranks, 14)
2757
2758    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.