# source:orange/Orange/evaluation/scoring.py@10000:d65550cf0356

Revision 10000:d65550cf0356, 88.0 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Refactored CA

Line
1import operator, math
3import numpy
4
5import Orange
6from Orange import statc, corn
7from Orange.misc import deprecated_keywords
8from Orange.evaluation import testing
9
10#### Private stuff
11
12def log2(x):
13    """Calculate logarithm in base 2."""
14    return math.log(x)/math.log(2)
15
16def check_non_zero(x):
17    """Throw Value Error when x = 0.0."""
18    if x==0.0:
19        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
20
21def gettotweight(res):
22    """Sum all the weights"""
23    totweight = reduce(lambda x, y: x+y.weight, res.results, 0)
24    if totweight==0.0:
25        raise ValueError, "Cannot compute the score: sum of weights is 0.0."
26    return totweight
27
28def gettotsize(res):
29    """ Get number of result instances """
30    if len(res.results):
31        return len(res.results)
32    else:
33        raise ValueError, "Cannot compute the score: no examples."
34
35
36def split_by_iterations(res):
37    """ Splits ExperimentResults of multiple iteratation test into a list
38    of ExperimentResults, one for each iteration.
39    """
40    if res.number_of_iterations < 2:
41        return [res]
42
44            for i in range(res.number_of_iterations)]
45    for te in res.results:
46        ress[te.iteration_number].results.append(te)
47    return ress
48
49def split_by_classifiers(res):
50    """ Splites an instance of :obj:ExperimentResults into a list of
51    :obj:ExperimentResults, one for each classifier.
52    """
53    split_res = []
54    for i in range(len(res.classifierNames)):
55        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
56                    [res.classifierNames[i]], res.classValues,
57                    weights=res.weights, baseClass=res.baseClass,
58                    classifiers=[res.classifiers[i]] if res.classifiers else [],
59                    test_type = res.test_type, labels = res.labels)
60        r.results = []
61        for te in res.results:
62            r.results.append(Orange.evaluation.testing.TestedExample(te.iterationNumber,
63                                te.actualClass, n=1, weight=te.weight))
64            r.results[-1].classes = [te.classes[i]]
65            r.results[-1].probabilities = [te.probabilities[i]]
66        split_res.append(r)
67    return split_res
68
69
70def class_probabilities_from_res(res, **argkw):
71    """Calculate class probabilities"""
72    probs = [0.0] * len(res.class_values)
73    if argkw.get("unweighted", 0) or not res.weights:
74        for tex in res.results:
75            probs[int(tex.actual_class)] += 1.0
76        totweight = gettotsize(res)
77    else:
78        totweight = 0.0
79        for tex in res.results:
80            probs[tex.actual_class] += tex.weight
81            totweight += tex.weight
82        check_non_zero(totweight)
83    return [prob/totweight for prob in probs]
84
85
86@deprecated_keywords({
87    "foldN": "fold_n",
88    "reportSE": "report_se",
89    "iterationIsOuter": "iteration_is_outer"})
90def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
91    # remove empty folds, turn the matrix so that learner is outer
92    if iteration_is_outer:
93        if not stats:
94            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
95        number_of_learners = len(stats[0])
96        stats = filter(lambda (x, fN): fN>0.0, zip(stats,fold_n))
97        stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(number_of_learners)]
98    else:
99        stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, fold_n))] for lrnD in stats]
100
101    if not stats:
102        raise ValueError, "Cannot compute the score: no classifiers"
103    if not stats[0]:
104        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
105
106    if report_se:
107        return [(statc.mean(x), statc.sterr(x)) for x in stats]
108    else:
109        return [statc.mean(x) for x in stats]
110
111def ME(res, **argkw):
112    MEs = [0.0]*res.number_of_learners
113
114    if argkw.get("unweighted", 0) or not res.weights:
115        for tex in res.results:
116            MEs = map(lambda res, cls, ac = float(tex.actual_class):
117                      res + abs(float(cls) - ac), MEs, tex.classes)
118        totweight = gettotsize(res)
119    else:
120        for tex in res.results:
121            MEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
122                       res + tw*abs(float(cls) - ac), MEs, tex.classes)
123        totweight = gettotweight(res)
124
125    return [x/totweight for x in MEs]
126
127MAE = ME
128
129
130class ConfusionMatrix:
131    """
132    Classification result summary
133
134    .. attribute:: TP
135
136        True Positive predictions
137
138    .. attribute:: TN
139
140        True Negative predictions
141
142    .. attribute:: FP
143
144        False Positive predictions
145
146    .. attribute:: FN
147
148        False Negative predictions
149    """
150    def __init__(self):
151        self.TP = self.FN = self.FP = self.TN = 0.0
152
153    @deprecated_keywords({"predictedPositive": "predicted_positive",
154                          "isPositive": "is_positive"})
155    def addTFPosNeg(self, predicted_positive, is_positive, weight = 1.0):
156        """
157        Update confusion matrix with result of a single classification
158
159        :param predicted_positive: positive class value was predicted
160        :param is_positive: correct class value is positive
161        :param weight: weight of the selected instance
162         """
163        if predicted_positive:
164            if is_positive:
165                self.TP += weight
166            else:
167                self.FP += weight
168        else:
169            if is_positive:
170                self.FN += weight
171            else:
172                self.TN += weight
173
174
175#########################################################################
176# PERFORMANCE MEASURES:
177# Scores for evaluation of numeric predictions
178
179def check_argkw(dct, lst):
180    """check_argkw(dct, lst) -> returns true if any items have non-zero value in dct"""
181    return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst])
182
183def regression_error(res, **argkw):
184    """regression_error(res) -> regression error (default: MSE)"""
185    if argkw.get("SE", 0) and res.number_of_iterations > 1:
186        # computes the scores for each iteration, then averages
187        scores = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
188        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
189            norm = [0.0] * res.number_of_iterations
190
191        nIter = [0]*res.number_of_iterations       # counts examples in each iteration
192        a = [0]*res.number_of_iterations           # average class in each iteration
193        for tex in res.results:
194            nIter[tex.iteration_number] += 1
195            a[tex.iteration_number] += float(tex.actual_class)
196        a = [a[i]/nIter[i] for i in range(res.number_of_iterations)]
197
198        if argkw.get("unweighted", 0) or not res.weights:
199            # iterate accross test cases
200            for tex in res.results:
201                ai = float(tex.actual_class)
202                nIter[tex.iteration_number] += 1
203
204                # compute normalization, if required
205                if argkw.get("norm-abs", 0):
206                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
207                elif argkw.get("norm-sqr", 0):
208                    norm[tex.iteration_number] += (ai - a[tex.iteration_number])**2
209
210                # iterate accross results of different regressors
211                for i, cls in enumerate(tex.classes):
212                    if argkw.get("abs", 0):
213                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
214                    else:
215                        scores[i][tex.iteration_number] += (float(cls) - ai)**2
216        else: # unweighted<>0
217            raise NotImplementedError, "weighted error scores with SE not implemented yet"
218
219        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
220            scores = [[x/n for x, n in zip(y, norm)] for y in scores]
221        else:
222            scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores]
223
224        if argkw.get("R2"):
225            scores = [[1.0 - x for x in y] for y in scores]
226
227        if argkw.get("sqrt", 0):
228            scores = [[math.sqrt(x) for x in y] for y in scores]
229
230        return [(statc.mean(x), statc.std(x)) for x in scores]
231
232    else: # single iteration (testing on a single test set)
233        scores = [0.0] * res.number_of_learners
234        norm = 0.0
235
236        if argkw.get("unweighted", 0) or not res.weights:
237            a = sum([tex.actual_class for tex in res.results]) \
238                / len(res.results)
239            for tex in res.results:
240                if argkw.get("abs", 0):
241                    scores = map(lambda res, cls, ac = float(tex.actual_class):
242                                 res + abs(float(cls) - ac), scores, tex.classes)
243                else:
244                    scores = map(lambda res, cls, ac = float(tex.actual_class):
245                                 res + (float(cls) - ac)**2, scores, tex.classes)
246
247                if argkw.get("norm-abs", 0):
248                    norm += abs(tex.actual_class - a)
249                elif argkw.get("norm-sqr", 0):
250                    norm += (tex.actual_class - a)**2
251            totweight = gettotsize(res)
252        else:
253            # UNFINISHED
254            for tex in res.results:
255                MSEs = map(lambda res, cls, ac = float(tex.actual_class),
256                           tw = tex.weight:
257                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
258            totweight = gettotweight(res)
259
260        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
261            scores = [s/norm for s in scores]
262        else: # normalize by number of instances (or sum of weights)
263            scores = [s/totweight for s in scores]
264
265        if argkw.get("R2"):
266            scores = [1.0 - s for s in scores]
267
268        if argkw.get("sqrt", 0):
269            scores = [math.sqrt(x) for x in scores]
270
271        return scores
272
273def MSE(res, **argkw):
274    """ Computes mean-squared error. """
275    return regression_error(res, **argkw)
276
277def RMSE(res, **argkw):
278    """ Computes root mean-squared error. """
279    argkw.setdefault("sqrt", True)
280    return regression_error(res, **argkw)
281
282def MAE(res, **argkw):
283    """ Computes mean absolute error. """
284    argkw.setdefault("abs", True)
285    return regression_error(res, **argkw)
286
287def RSE(res, **argkw):
288    """ Computes relative squared error. """
289    argkw.setdefault("norm-sqr", True)
290    return regression_error(res, **argkw)
291
292def RRSE(res, **argkw):
293    """ Computes relative squared error. """
294    argkw.setdefault("norm-sqr", True)
295    argkw.setdefault("sqrt", True)
296    return regression_error(res, **argkw)
297
298def RAE(res, **argkw):
299    """ Computes relative absolute error. """
300    argkw.setdefault("abs", True)
301    argkw.setdefault("norm-abs", True)
302    return regression_error(res, **argkw)
303
304def R2(res, **argkw):
305    """ Computes the coefficient of determination, R-squared. """
306    argkw.setdefault("norm-sqr", True)
307    argkw.setdefault("R2", True)
308    return regression_error(res, **argkw)
309
310def MSE_old(res, **argkw):
311    """MSE(res) -> mean-squared error"""
312    if argkw.get("SE", 0) and res.number_of_iterations > 1:
313        MSEs = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
314        nIter = [0]*res.number_of_iterations
315        if argkw.get("unweighted", 0) or not res.weights:
316            for tex in res.results:
317                ac = float(tex.actual_class)
318                nIter[tex.iteration_number] += 1
319                for i, cls in enumerate(tex.classes):
320                    MSEs[i][tex.iteration_number] += (float(cls) - ac)**2
321        else:
322            raise ValueError, "weighted RMSE with SE not implemented yet"
323        MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs]
324        if argkw.get("sqrt", 0):
325            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
326        return [(statc.mean(x), statc.std(x)) for x in MSEs]
327
328    else:
329        MSEs = [0.0]*res.number_of_learners
330        if argkw.get("unweighted", 0) or not res.weights:
331            for tex in res.results:
332                MSEs = map(lambda res, cls, ac = float(tex.actual_class):
333                           res + (float(cls) - ac)**2, MSEs, tex.classes)
334            totweight = gettotsize(res)
335        else:
336            for tex in res.results:
337                MSEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
338                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
339            totweight = gettotweight(res)
340
341        if argkw.get("sqrt", 0):
342            MSEs = [math.sqrt(x) for x in MSEs]
343        return [x/totweight for x in MSEs]
344
345def RMSE_old(res, **argkw):
346    """RMSE(res) -> root mean-squared error"""
347    argkw.setdefault("sqrt", 1)
348    return MSE_old(res, **argkw)
349
350#########################################################################
351# PERFORMANCE MEASURES:
352# Scores for evaluation of classifiers
353
354class CAClass(object):
355    CONFUSION_MATRIX = 0
356    CONFUSION_MATRIX_LIST = 1
357    CLASSIFICATION = 2
358    CROSS_VALIDATION = 3
359
360    @deprecated_keywords({"reportSE": "report_se"})
361    def __call__(self, test_results, report_se = False, unweighted=False):
362        """Return percentage of matches between predicted and actual class.
363
364        :param test_results: :obj:~Orange.evaluation.testing.ExperimentResults
365                             or :obj:ConfusionMatrix.
366        :param report_se: include standard error in result.
367        :rtype: list of scores, one for each learner.
368
369        Standard errors are estimated from deviation of CAs across folds (if
370        test_results were produced by cross_validation) or approximated under
371        the assumption of normal distribution otherwise.
372        """
373        input_type = self.get_input_type(test_results)
374        if input_type == self.CONFUSION_MATRIX:
375            return self.from_confusion_matrix(test_results, report_se)
376        elif input_type == self.CONFUSION_MATRIX_LIST:
377            return self.from_confusion_matrix_list(test_results, report_se)
378        elif input_type == self.CLASSIFICATION:
379            return self.from_classification_results(
380                                        test_results, report_se, unweighted)
381        elif input_type == self.CROSS_VALIDATION:
382            return self.from_crossvalidation_results(
383                                        test_results, report_se, unweighted)
384
385    def from_confusion_matrix(self, cm, report_se):
386        all_predictions = cm.TP+cm.FN+cm.FP+cm.TN
387        check_non_zero(all_predictions)
388        ca = (cm.TP+cm.TN)/all_predictions
389
390        if report_se:
391            return ca, ca*(1-ca)/math.sqrt(all_predictions)
392        else:
393            return ca
394
395    def from_confusion_matrix_list(self, confusion_matrices, report_se):
396        return map(self.from_confusion_matrix, confusion_matrices) # TODO: report_se
397
398    def from_classification_results(self, test_results, report_se, unweighted):
399        CAs = [0.0]*test_results.number_of_learners
400        totweight = 0.
401        for tex in test_results.results:
402            w = 1. if unweighted else tex.weight
403            CAs = map(lambda res, cls: res+(cls==tex.actual_class and w), CAs, tex.classes)
404            totweight += w
405        check_non_zero(totweight)
406        ca = [x/totweight for x in CAs]
407
408        if report_se:
409            return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca]
410        else:
411            return ca
412
413    def from_crossvalidation_results(self, test_results, report_se, unweighted):
414        CAsByFold = [[0.0]*test_results.number_of_iterations for i in range(test_results.number_of_learners)]
415        foldN = [0.0]*test_results.number_of_iterations
416
417        for tex in test_results.results:
418            w = 1. if unweighted else tex.weight
419            for lrn in range(test_results.number_of_learners):
420                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and w
421            foldN[tex.iteration_number] += w
422
423        return statistics_by_folds(CAsByFold, foldN, report_se, False)
424
425    def get_input_type(self, test_results):
426        if isinstance(test_results, ConfusionMatrix):
427            return self.CONFUSION_MATRIX
428        elif isinstance(test_results, testing.ExperimentResults):
429            if test_results.number_of_iterations == 1:
430                return self.CLASSIFICATION
431            else:
432                return self.CROSS_VALIDATION
433        elif isinstance(test_results, list):
434            return self.CONFUSION_MATRIX_LIST
435
436
437
438CA = CAClass
439
440@deprecated_keywords({"reportSE": "report_se"})
441def AP(res, report_se = False, **argkw):
442    """ Computes the average probability assigned to the correct class. """
443    if res.number_of_iterations == 1:
444        APs=[0.0]*res.number_of_learners
445        if argkw.get("unweighted", 0) or not res.weights:
446            for tex in res.results:
447                APs = map(lambda res, probs: res + probs[tex.actual_class], APs, tex.probabilities)
448            totweight = gettotsize(res)
449        else:
450            totweight = 0.
451            for tex in res.results:
452                APs = map(lambda res, probs: res + probs[tex.actual_class]*tex.weight, APs, tex.probabilities)
453                totweight += tex.weight
454        check_non_zero(totweight)
455        return [AP/totweight for AP in APs]
456
457    APsByFold = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
458    foldN = [0.0] * res.number_of_iterations
459    if argkw.get("unweighted", 0) or not res.weights:
460        for tex in res.results:
461            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class], APsByFold[tex.iteration_number], tex.probabilities)
462            foldN[tex.iteration_number] += 1
463    else:
464        for tex in res.results:
465            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class] * tex.weight, APsByFold[tex.iteration_number], tex.probabilities)
466            foldN[tex.iteration_number] += tex.weight
467
468    return statistics_by_folds(APsByFold, foldN, report_se, True)
469
470
471@deprecated_keywords({"reportSE": "report_se"})
472def Brier_score(res, report_se = False, **argkw):
473    """ Computes the Brier's score, defined as the average (over test examples)
474    of sumx(t(x)-p(x))2, where x is a class, t(x) is 1 for the correct class
475    and 0 for the others, and p(x) is the probability that the classifier
476    assigned to the class x
477    """
478    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
479    #    x is class,
480    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
481    #    p(x) is predicted probabilty.
482    # There's a trick: since t(x) is zero for all classes but the
483    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
484    # Since +1 is there for each example, it adds 1 to the average
485    # We skip the +1 inside the sum and add it just at the end of the function
486    # We take max(result, 0) to avoid -0.0000x due to rounding errors
487
488    if res.number_of_iterations == 1:
489        MSEs=[0.0]*res.number_of_learners
490        if argkw.get("unweighted", 0) or not res.weights:
491            totweight = 0.0
492            for tex in res.results:
493                MSEs = map(lambda res, probs:
494                           res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
495                totweight += tex.weight
496        else:
497            for tex in res.results:
498                MSEs = map(lambda res, probs:
499                           res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
500            totweight = gettotweight(res)
501        check_non_zero(totweight)
502        if report_se:
503            return [(max(x/totweight+1.0, 0), 0) for x in MSEs]  ## change this, not zero!!!
504        else:
505            return [max(x/totweight+1.0, 0) for x in MSEs]
506
507    BSs = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
508    foldN = [0.] * res.number_of_iterations
509
510    if argkw.get("unweighted", 0) or not res.weights:
511        for tex in res.results:
512            BSs[tex.iteration_number] = map(lambda rr, probs:
513                       rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
514            foldN[tex.iteration_number] += 1
515    else:
516        for tex in res.results:
517            BSs[tex.iteration_number] = map(lambda res, probs:
518                       res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
519            foldN[tex.iteration_number] += tex.weight
520
521    stats = statistics_by_folds(BSs, foldN, report_se, True)
522    if report_se:
523        return [(x+1.0, y) for x, y in stats]
524    else:
525        return [x+1.0 for x in stats]
526
527def BSS(res, **argkw):
528    return [1-x/2 for x in apply(Brier_score, (res, ), argkw)]
529
530def IS_ex(Pc, P):
531    "Pc aposterior probability, P aprior"
532    if (Pc>=P):
533        return -log2(P)+log2(Pc)
534    else:
535        return -(-log2(1-P)+log2(1-Pc))
536
537
538@deprecated_keywords({"reportSE": "report_se"})
539def IS(res, apriori=None, report_se = False, **argkw):
540    """ Computes the information score as defined by
541    Kononenko and Bratko (1991) \
543    Argument :obj:apriori gives the apriori class
544    distribution; if it is omitted, the class distribution is computed from
545    the actual classes of examples in :obj:res.
546    """
547    if not apriori:
548        apriori = class_probabilities_from_res(res)
549
550    if res.number_of_iterations==1:
551        ISs = [0.0]*res.number_of_learners
552        if argkw.get("unweighted", 0) or not res.weights:
553            for tex in res.results:
554              for i in range(len(tex.probabilities)):
555                    cls = tex.actual_class
556                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
557            totweight = gettotsize(res)
558        else:
559            for tex in res.results:
560              for i in range(len(tex.probabilities)):
561                    cls = tex.actual_class
562                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
563            totweight = gettotweight(res)
564        if report_se:
565            return [(IS/totweight,0) for IS in ISs]
566        else:
567            return [IS/totweight for IS in ISs]
568
569
570    ISs = [[0.0]*res.number_of_iterations for i in range(res.number_of_learners)]
571    foldN = [0.] * res.number_of_iterations
572
573    # compute info scores for each fold
574    if argkw.get("unweighted", 0) or not res.weights:
575        for tex in res.results:
576            for i in range(len(tex.probabilities)):
577                cls = tex.actual_class
578                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls])
579            foldN[tex.iteration_number] += 1
580    else:
581        for tex in res.results:
582            for i in range(len(tex.probabilities)):
583                cls = tex.actual_class
584                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
585            foldN[tex.iteration_number] += tex.weight
586
587    return statistics_by_folds(ISs, foldN, report_se, False)
588
589
590def Friedman(res, statistics, **argkw):
591    sums = None
592    for ri in split_by_iterations(res):
593        ranks = statc.rankdata(apply(statistics, (ri,), argkw))
594        if sums:
595            sums = sums and [ranks[i]+sums[i] for i in range(k)]
596        else:
597            sums = ranks
598            k = len(sums)
599    N = res.number_of_iterations
600    k = len(sums)
601    T = sum([x*x for x in sums])
602    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
603    return F, statc.chisqprob(F, k-1)
604
605
606def Wilcoxon(res, statistics, **argkw):
607    res1, res2 = [], []
608    for ri in split_by_iterations(res):
609        stats = apply(statistics, (ri,), argkw)
610        if (len(stats) != 2):
611            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
612        res1.append(stats[0])
613        res2.append(stats[1])
614    return statc.wilcoxont(res1, res2)
615
616def rank_difference(res, statistics, **argkw):
617    if not res.results:
618        raise TypeError, "no experiments"
619
620    k = len(res.results[0].classes)
621    if (k<2):
622        raise TypeError, "nothing to compare (less than two classifiers given)"
623    if k==2:
624        return apply(Wilcoxon, (res, statistics), argkw)
625    else:
626        return apply(Friedman, (res, statistics), argkw)
627
628
629@deprecated_keywords({"res": "test_results",
630                      "classIndex": "class_index"})
631def confusion_matrices(test_results, class_index=1,
632                       unweighted=False, cutoff=.5):
633    """
634    Return confusion matrices for test_results.
635
636    :param test_results: test results
637    :param class_index: index of class value for which the confusion matrices
638                        are to be computed.
639    :param unweighted: ignore instance weights.
640    :params cutoff: cutoff for probability
641
642    :rtype: list of :obj:ConfusionMatrix
643    """
644    tfpns = [ConfusionMatrix() for i in range(test_results.number_of_learners)]
645
646    if class_index<0:
647        numberOfClasses = len(test_results.class_values)
648        if class_index < -1 or numberOfClasses > 2:
649            cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(test_results.number_of_learners)]
650            if unweighted or not test_results.weights:
651                for tex in test_results.results:
652                    trueClass = int(tex.actual_class)
653                    for li, pred in enumerate(tex.classes):
654                        predClass = int(pred)
655                        if predClass < numberOfClasses:
656                            cm[li][trueClass][predClass] += 1
657            else:
658                for tex in enumerate(test_results.results):
659                    trueClass = int(tex.actual_class)
660                    for li, pred in tex.classes:
661                        predClass = int(pred)
662                        if predClass < numberOfClasses:
663                            cm[li][trueClass][predClass] += tex.weight
664            return cm
665
666        elif test_results.baseClass>=0:
667            class_index = test_results.baseClass
668        else:
669            class_index = 1
670
671    if cutoff != .5:
672        if unweighted or not test_results.weights:
673            for lr in test_results.results:
674                isPositive=(lr.actual_class==class_index)
675                for i in range(test_results.number_of_learners):
677        else:
678            for lr in test_results.results:
679                isPositive=(lr.actual_class==class_index)
680                for i in range(test_results.number_of_learners):
682    else:
683        if unweighted or not test_results.weights:
684            for lr in test_results.results:
685                isPositive=(lr.actual_class==class_index)
686                for i in range(test_results.number_of_learners):
688        else:
689            for lr in test_results.results:
690                isPositive=(lr.actual_class==class_index)
691                for i in range(test_results.number_of_learners):
693    return tfpns
694
695
696# obsolete (renamed)
697compute_confusion_matrices = confusion_matrices
698
699
700@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
701def confusion_chi_square(confusion_matrix):
702    """
703    Return chi square statistic of the confusion matrix
704    (higher value indicates that prediction is not by chance).
705    """
706    if isinstance(confusion_matrix, ConfusionMatrix) or \
707       not isinstance(confusion_matrix[1], list):
708        return _confusion_chi_square(confusion_matrix)
709    else:
710        return map(_confusion_chi_square, confusion_matrix)
711
712def _confusion_chi_square(confusion_matrix):
713    if isinstance(confusion_matrix, ConfusionMatrix):
714        c = confusion_matrix
715        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
716    dim = len(confusion_matrix)
717    rowPriors = [sum(r) for r in confusion_matrix]
718    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
719    total = sum(rowPriors)
720    rowPriors = [r/total for r in rowPriors]
721    colPriors = [r/total for r in colPriors]
722    ss = 0
723    for ri, row in enumerate(confusion_matrix):
724        for ci, o in enumerate(row):
725            e = total * rowPriors[ri] * colPriors[ci]
726            if not e:
727                return -1, -1, -1
728            ss += (o-e)**2 / e
729    df = (dim-1)**2
730    return ss, df, statc.chisqprob(ss, df)
731
732@deprecated_keywords({"confm": "confusion_matrix"})
733def sens(confusion_matrix):
734    """
735    Return sensitivity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>_
736    (proportion of actual positives which are correctly identified as such).
737    """
738    if type(confusion_matrix) == list:
739        return [sens(cm) for cm in confusion_matrix]
740    else:
741        tot = confusion_matrix.TP+confusion_matrix.FN
742        if tot < 1e-6:
743            import warnings
744            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
745            return -1
746
747        return confusion_matrix.TP/tot
748
749
750@deprecated_keywords({"confm": "confusion_matrix"})
751def recall(confusion_matrix):
752    """
753    Return recall <http://en.wikipedia.org/wiki/Precision_and_recall>_
754    (fraction of relevant instances that are retrieved).
755    """
756    return sens(confusion_matrix)
757
758
759@deprecated_keywords({"confm": "confusion_matrix"})
760def spec(confusion_matrix):
761    """
762    Return specificity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>_
763    (proportion of negatives which are correctly identified).
764    """
765    if type(confusion_matrix) == list:
766        return [spec(cm) for cm in confusion_matrix]
767    else:
768        tot = confusion_matrix.FP+confusion_matrix.TN
769        if tot < 1e-6:
770            import warnings
771            warnings.warn("Can't compute specificity: one or both classes have no instances")
772            return -1
773        return confusion_matrix.TN/tot
774
775
776@deprecated_keywords({"confm": "confusion_matrix"})
777def PPV(confusion_matrix):
778    """
779    Return positive predictive value <http://en.wikipedia.org/wiki/Positive_predictive_value>_
780    (proportion of subjects with positive test results who are correctly diagnosed)."""
781    if type(confusion_matrix) == list:
782        return [PPV(cm) for cm in confusion_matrix]
783    else:
784        tot = confusion_matrix.TP+confusion_matrix.FP
785        if tot < 1e-6:
786            import warnings
787            warnings.warn("Can't compute PPV: one or both classes have no instances")
788            return -1
789        return confusion_matrix.TP/tot
790
791
792@deprecated_keywords({"confm": "confusion_matrix"})
793def precision(confusion_matrix):
794    """
795    Return precision <http://en.wikipedia.org/wiki/Precision_and_recall>_
796    (retrieved instances that are relevant).
797    """
798    return PPV(confusion_matrix)
799
800@deprecated_keywords({"confm": "confusion_matrix"})
801def NPV(confusion_matrix):
802    """Return negative predictive value <http://en.wikipedia.org/wiki/Negative_predictive_value>_
803     (proportion of subjects with a negative test result who are correctly
804     diagnosed).
805     """
806    if type(confusion_matrix) == list:
807        return [NPV(cm) for cm in confusion_matrix]
808    else:
809        tot = confusion_matrix.FN+confusion_matrix.TN
810        if tot < 1e-6:
811            import warnings
812            warnings.warn("Can't compute NPV: one or both classes have no instances")
813            return -1
814        return confusion_matrix.TN/tot
815
816@deprecated_keywords({"confm": "confusion_matrix"})
817def F1(confusion_matrix):
818    """Return F1 score <http://en.wikipedia.org/wiki/F1_score>_
819    (harmonic mean of precision and recall)."""
820    if type(confusion_matrix) == list:
821        return [F1(cm) for cm in confusion_matrix]
822    else:
823        p = precision(confusion_matrix)
824        r = recall(confusion_matrix)
825        if p + r > 0:
826            return 2. * p * r / (p + r)
827        else:
828            import warnings
829            warnings.warn("Can't compute F1: P + R is zero or not defined")
830            return -1
831
832
833@deprecated_keywords({"confm": "confusion_matrix"})
834def Falpha(confusion_matrix, alpha=1.0):
835    """Return the alpha-mean of precision and recall over the given confusion matrix."""
836    if type(confusion_matrix) == list:
837        return [Falpha(cm, alpha=alpha) for cm in confusion_matrix]
838    else:
839        p = precision(confusion_matrix)
840        r = recall(confusion_matrix)
841        return (1. + alpha) * p * r / (alpha * p + r)
842
843
844@deprecated_keywords({"confm": "confusion_matrix"})
845def MCC(confusion_matrix):
846    """
847    Return Matthew correlation coefficient <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>_
848    (correlation coefficient between the observed and predicted binary classifications)
849    """
850    # code by Boris Gorelik
851    if type(confusion_matrix) == list:
852        return [MCC(cm) for cm in confusion_matrix]
853    else:
854        truePositive = confusion_matrix.TP
855        trueNegative = confusion_matrix.TN
856        falsePositive = confusion_matrix.FP
857        falseNegative = confusion_matrix.FN
858
859        try:
860            r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/
861                math.sqrt(  (truePositive + falsePositive)  *
862                ( truePositive + falseNegative ) *
863                ( trueNegative + falsePositive ) *
864                ( trueNegative + falseNegative ) )
865                )
866        except ZeroDivisionError:
867            # Zero difision occurs when there is either no true positives
868            # or no true negatives i.e. the problem contains only one
869            # type of classes.
870            import warnings
871            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
872            r = None
873
874    return r
875
876
877@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
878def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
879   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
880
881   http://en.wikipedia.org/wiki/Scott%27s_Pi
882   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
883   raters.
884
885   @param confusion_matrix: confusion matrix, or list of confusion matrices. To obtain
886                           non-binary confusion matrix, call
887                           Orange.evaluation.scoring.compute_confusion_matrices and set the
888                           classIndex parameter to -2.
889   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
890                           This function needs to operate on non-binary
891                           confusion matrices, which are represented by python
892                           lists, therefore one needs a way to distinguish
893                           between a single matrix and list of matrices
894   """
895
896   if b_is_list_of_matrices:
897       try:
898           return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confusion_matrix]
899       except TypeError:
900           # Nevermind the parameter, maybe this is a "conventional" binary
901           # confusion matrix and bIsListOfMatrices was specified by mistake
902           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
903   else:
904       if isinstance(confusion_matrix, ConfusionMatrix):
905           confusion_matrix = numpy.array( [[confusion_matrix.TP, confusion_matrix.FN],
906                   [confusion_matrix.FP, confusion_matrix.TN]], dtype=float)
907       else:
908           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
909
910       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
911       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
912       jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \
913                           (2.0 * numpy.sum(confusion_matrix, axis=None))
914       # In the eq. above, 2.0 is what the Wikipedia page calls
915       # the number of annotators. Here we have two annotators:
916       # the observed (true) labels (annotations) and the predicted by
917       # the learners.
918
919       prExpected = numpy.sum(jointProportion ** 2, axis=None)
920       prActual = numpy.sum(numpy.diag(confusion_matrix), axis=None)/numpy.sum(confusion_matrix, axis=None)
921
922       ret = (prActual - prExpected) / (1.0 - prExpected)
923       return ret
924
925@deprecated_keywords({"classIndex": "class_index"})
926def AUCWilcoxon(res, class_index=-1, **argkw):
927    """ Computes the area under ROC (AUC) and its standard error using
928    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
929    :obj:classIndex is not specified, the first class is used as
930    "the positive" and others are negative. The result is a list of
931    tuples (aROC, standard error).
932    """
933    useweights = res.weights and not argkw.get("unweighted", 0)
934    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
935
936    results=[]
937
938    totPos, totNeg = tots[1], tots[0]
939    N = totPos + totNeg
940    for plist in problists:
941        highPos, lowNeg = totPos, 0.0
942        W, Q1, Q2 = 0.0, 0.0, 0.0
943        for prob in plist:
944            thisPos, thisNeg = prob[1][1], prob[1][0]
945            highPos -= thisPos
946            W += thisNeg * (highPos + thisPos/2.)
947            Q2 += thisPos * (lowNeg**2  + lowNeg*thisNeg  + thisNeg**2 /3.)
948            Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.)
949
950            lowNeg += thisNeg
951
952        W  /= (totPos*totNeg)
953        Q1 /= (totNeg*totPos**2)
954        Q2 /= (totPos*totNeg**2)
955
956        SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) )
957        results.append((W, SE))
958    return results
959
960AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote
961
962
963@deprecated_keywords({"classIndex": "class_index"})
964def compare_2_AUCs(res, lrn1, lrn2, class_index=-1, **argkw):
965    return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not argkw.get("unweighted"))
966
967compare_2_AROCs = compare_2_AUCs # for backward compatibility, compare_2_AROCs is obsolote
968
969
970@deprecated_keywords({"classIndex": "class_index"})
971def compute_ROC(res, class_index=-1):
972    """ Computes a ROC curve as a list of (x, y) tuples, where x is
973    1-specificity and y is sensitivity.
974    """
975    problists, tots = corn.computeROCCumulative(res, class_index)
976
977    results = []
978    totPos, totNeg = tots[1], tots[0]
979
980    for plist in problists:
981        curve=[(1., 1.)]
982        TP, TN = totPos, 0.0
983        FN, FP = 0., totNeg
984        for prob in plist:
985            thisPos, thisNeg = prob[1][1], prob[1][0]
986            # thisPos go from TP to FN
987            TP -= thisPos
988            FN += thisPos
989            # thisNeg go from FP to TN
990            TN += thisNeg
991            FP -= thisNeg
992
993            sens = TP/(TP+FN)
994            spec = TN/(FP+TN)
995            curve.append((1-spec, sens))
996        results.append(curve)
997
998    return results
999
1000## TC's implementation of algorithms, taken from:
1001## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal.
1002def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1003    if (P1x == P2x):
1004        return 1e300
1005    return (P1y - P2y) / (P1x - P2x)
1006
1007
1008@deprecated_keywords({"keepConcavities": "keep_concavities"})
1010    if keepConcavities:
1011        R.append(P)
1012    else:
1013        while (1):
1014            if len(R) < 2:
1015                R.append(P)
1016                return R
1017            else:
1018                T = R.pop()
1019                T2 = R[-1]
1020                if ROC_slope(T2, T) > ROC_slope(T, P):
1021                    R.append(T)
1022                    R.append(P)
1023                    return R
1024    return R
1025
1026
1027@deprecated_keywords({"classIndex": "class_index",
1028                      "keepConcavities": "keep_concavities"})
1029def TC_compute_ROC(res, class_index=-1, keep_concavities=1):
1030    problists, tots = corn.computeROCCumulative(res, class_index)
1031
1032    results = []
1033    P, N = tots[1], tots[0]
1034
1035    for plist in problists:
1036        ## corn gives an increasing by scores list, we need a decreasing by scores
1037        plist.reverse()
1038        TP = 0.0
1039        FP = 0.0
1040        curve=[]
1041        fPrev = 10e300 # "infinity" score at 0.0, 0.0
1042        for prob in plist:
1043            f = prob[0]
1044            if f <> fPrev:
1045                if P:
1046                    tpr = TP/P
1047                else:
1048                    tpr = 0.0
1049                if N:
1050                    fpr = FP/N
1051                else:
1052                    fpr = 0.0
1053                curve = ROC_add_point((fpr, tpr, fPrev), curve, keep_concavities)
1054                fPrev = f
1055            thisPos, thisNeg = prob[1][1], prob[1][0]
1056            TP += thisPos
1057            FP += thisNeg
1058        if P:
1059            tpr = TP/P
1060        else:
1061            tpr = 0.0
1062        if N:
1063            fpr = FP/N
1064        else:
1065            fpr = 0.0
1066        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1067        results.append(curve)
1068
1069    return results
1070
1071## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve
1072## for given values of FPcost, FNcost and pval
1073def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1074    m = (FPcost*(1.0 - pval)) / (FNcost*pval)
1075
1076    ## put the iso-performance line in point (0.0, 1.0)
1077    x0, y0 = (0.0, 1.0)
1078    x1, y1 = (1.0, 1.0 + m)
1079    d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0))
1080
1081    ## calculate and find the closest point to the line
1082    firstp = 1
1083    mind = 0.0
1084    a = (x0*y1 - x1*y0)
1085    closestPoints = []
1086    for (x, y, fscore) in curve:
1087        d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01
1088        d = abs(d)
1089        if firstp or d < mind:
1090            mind, firstp = d, 0
1091            closestPoints = [(x, y, fscore)]
1092        else:
1093            if abs(d - mind) <= 0.0001: ## close enough
1094                closestPoints.append( (x, y, fscore) )
1095    return closestPoints
1096
1097def frange(start, end=None, inc=None):
1098    "A range function, that does accept float increments..."
1099
1100    if end == None:
1101        end = start + 0.0
1102        start = 0.0
1103
1104    if inc == None or inc == 0:
1105        inc = 1.0
1106
1107    L = [start]
1108    while 1:
1109        next = start + len(L) * inc
1110        if inc > 0 and next >= end:
1111            L.append(end)
1112            break
1113        elif inc < 0 and next <= end:
1114            L.append(end)
1115            break
1116        L.append(next)
1117
1118    return L
1119
1120## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1121## where ROCcurvesX is a set of ROC curves,
1122## where a (one) ROC curve is a set of (FP, TP) points
1123##
1124## for each (sub)set of input ROC curves
1125## returns the average ROC curve and an array of (vertical) standard deviations
1126@deprecated_keywords({"ROCcurves": "roc_curves"})
1127def TC_vertical_average_ROC(roc_curves, samples = 10):
1128    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1129        if (P1x == P2x) or ((X > P1x) and (X > P2x)) or ((X < P1x) and (X < P2x)):
1130            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1131        dx = float(P2x) - float(P1x)
1132        dy = float(P2y) - float(P1y)
1133        m = dy/dx
1134        return P1y + m*(X - P1x)
1135
1136    def TP_FOR_FP(FPsample, ROC, npts):
1137        i = 0
1138        while i < npts - 1:
1139            (fp, _, _) = ROC[i + 1]
1140            if (fp <= FPsample):
1141                i += 1
1142            else:
1143                break
1144        (fp, tp, _) = ROC[i]
1145        if fp == FPsample:
1146            return tp
1147        elif fp < FPsample and i + 1 < len(ROC):
1148            return INTERPOLATE(ROC[i], ROC[i+1], FPsample)
1149        elif fp < FPsample and i + 1 == len(ROC): # return the last
1150            return ROC[i][1]
1151        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1152        #return 0.0
1153
1154    average = []
1155    stdev = []
1156    for ROCS in roc_curves:
1157        npts = []
1158        for c in ROCS:
1159            npts.append(len(c))
1160        nrocs = len(ROCS)
1161
1162        TPavg = []
1163        TPstd = []
1164        for FPsample in frange(0.0, 1.0, 1.0/samples):
1165            TPsum = []
1166            for i in range(nrocs):
1167                TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1168            TPavg.append( (FPsample, statc.mean(TPsum)) )
1169            if len(TPsum) > 1:
1170                stdv = statc.std(TPsum)
1171            else:
1172                stdv = 0.0
1173            TPstd.append( stdv )
1174
1175        average.append(TPavg)
1176        stdev.append(TPstd)
1177
1178    return (average, stdev)
1179
1180## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1181## where ROCcurvesX is a set of ROC curves,
1182## where a (one) ROC curve is a set of (FP, TP) points
1183##
1184## for each (sub)set of input ROC curves
1185## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1186@deprecated_keywords({"ROCcurves": "roc_curves"})
1187def TC_threshold_average_ROC(roc_curves, samples = 10):
1188    def POINT_AT_THRESH(ROC, npts, thresh):
1189        i = 0
1190        while i < npts - 1:
1191            (px, py, pfscore) = ROC[i]
1192            if (pfscore > thresh):
1193                i += 1
1194            else:
1195                break
1196        return ROC[i]
1197
1198    average = []
1199    stdevV = []
1200    stdevH = []
1201    for ROCS in roc_curves:
1202        npts = []
1203        for c in ROCS:
1204            npts.append(len(c))
1205        nrocs = len(ROCS)
1206
1207        T = []
1208        for c in ROCS:
1209            for (px, py, pfscore) in c:
1210##                try:
1211##                    T.index(pfscore)
1212##                except:
1213                T.append(pfscore)
1214        T.sort()
1215        T.reverse() ## ugly
1216
1217        TPavg = []
1218        TPstdV = []
1219        TPstdH = []
1220        for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples):
1221            FPsum = []
1222            TPsum = []
1223            for i in range(nrocs):
1224                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1225                FPsum.append(fp)
1226                TPsum.append(tp)
1227            TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) )
1228            ## vertical standard deviation
1229            if len(TPsum) > 1:
1230                stdv = statc.std(TPsum)
1231            else:
1232                stdv = 0.0
1233            TPstdV.append( stdv )
1234            ## horizontal standard deviation
1235            if len(FPsum) > 1:
1236                stdh = statc.std(FPsum)
1237            else:
1238                stdh = 0.0
1239            TPstdH.append( stdh )
1240
1241        average.append(TPavg)
1242        stdevV.append(TPstdV)
1243        stdevH.append(TPstdH)
1244
1245    return (average, stdevV, stdevH)
1246
1247## Calibration Curve
1248## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where:
1249##  - curve is an array of points (x, y) on the calibration curve
1250##  - yesClassRugPoints is an array of (x, 1) points
1251##  - noClassRugPoints is an array of (x, 0) points
1252@deprecated_keywords({"classIndex": "class_index"})
1253def compute_calibration_curve(res, class_index=-1):
1254    ## merge multiple iterations into one
1256    for te in res.results:
1257        mres.results.append( te )
1258
1259    problists, tots = corn.computeROCCumulative(mres, class_index)
1260
1261    results = []
1262    P, N = tots[1], tots[0]
1263
1264    bins = 10 ## divide interval between 0.0 and 1.0 into N bins
1265
1266    for plist in problists:
1267        yesClassRugPoints = []
1268        noClassRugPoints = []
1269
1270        yesBinsVals = [0] * bins
1271        noBinsVals = [0] * bins
1272        for (f, (thisNeg, thisPos)) in plist:
1273            yesClassRugPoints.append( (f, thisPos) ) #1.0
1274            noClassRugPoints.append( (f, thisNeg) ) #1.0
1275
1276            index = int(f * bins )
1277            index = min(index, bins - 1) ## just in case for value 1.0
1278            yesBinsVals[index] += thisPos
1279            noBinsVals[index] += thisNeg
1280
1281        curve = []
1282        for cn in range(bins):
1283            f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins)
1284            yesVal = yesBinsVals[cn]
1285            noVal = noBinsVals[cn]
1286            allVal = yesVal + noVal
1287            if allVal == 0.0: continue
1288            y = float(yesVal)/float(allVal)
1289            curve.append( (f,  y) )
1290
1291        ## smooth the curve
1292        maxnPoints = 100
1293        if len(curve) >= 3:
1294#            loessCurve = statc.loess(curve, -3, 0.6)
1295            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1296        else:
1297            loessCurve = curve
1298        clen = len(loessCurve)
1299        if clen > maxnPoints:
1300            df = clen / maxnPoints
1301            if df < 1: df = 1
1302            curve = [loessCurve[i]  for i in range(0, clen, df)]
1303        else:
1304            curve = loessCurve
1305        curve = [(c)[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function
1306        results.append((curve, yesClassRugPoints, noClassRugPoints))
1307
1308    return results
1309
1310
1311## Lift Curve
1312## returns an array of curve elements, where:
1313##  - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve
1314@deprecated_keywords({"classIndex": "class_index"})
1315def compute_lift_curve(res, class_index=-1):
1316    ## merge multiple iterations into one
1318    for te in res.results:
1319        mres.results.append( te )
1320
1321    problists, tots = corn.computeROCCumulative(mres, class_index)
1322
1323    results = []
1324    P, N = tots[1], tots[0]
1325    for plist in problists:
1326        ## corn gives an increasing by scores list, we need a decreasing by scores
1327        plist.reverse()
1328        TP = 0.0
1329        FP = 0.0
1330        curve = [(0.0, 0.0, (10e300, 0.0))]
1331        for (f, (thisNeg, thisPos)) in plist:
1332            TP += thisPos
1333            FP += thisNeg
1334            curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) )
1335        results.append(curve)
1336
1337    return P, N, results
1338###
1339
1340class CDT:
1341  """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """
1342  def __init__(self, C=0.0, D=0.0, T=0.0):
1343    self.C, self.D, self.T = C, D, T
1344
1345def is_CDT_empty(cdt):
1346    return cdt.C + cdt.D + cdt.T < 1e-20
1347
1348
1349@deprecated_keywords({"classIndex": "class_index"})
1350def compute_CDT(res, class_index=-1, **argkw):
1351    """Obsolete, don't use"""
1352    if class_index<0:
1353        if res.baseClass>=0:
1354            class_index = res.baseClass
1355        else:
1356            class_index = 1
1357
1358    useweights = res.weights and not argkw.get("unweighted", 0)
1359    weightByClasses = argkw.get("weightByClasses", True)
1360
1361    if (res.number_of_iterations>1):
1362        CDTs = [CDT() for i in range(res.number_of_learners)]
1363        iterationExperiments = split_by_iterations(res)
1364        for exp in iterationExperiments:
1365            expCDTs = corn.computeCDT(exp, class_index, useweights)
1366            for i in range(len(CDTs)):
1367                CDTs[i].C += expCDTs[i].C
1368                CDTs[i].D += expCDTs[i].D
1369                CDTs[i].T += expCDTs[i].T
1370        for i in range(res.number_of_learners):
1371            if is_CDT_empty(CDTs[0]):
1372                return corn.computeCDT(res, class_index, useweights)
1373
1374        return CDTs
1375    else:
1376        return corn.computeCDT(res, class_index, useweights)
1377
1378## THIS FUNCTION IS OBSOLETE AND ITS AVERAGING OVER FOLDS IS QUESTIONABLE
1379## DON'T USE IT
1380def ROCs_from_CDT(cdt, **argkw):
1381    """Obsolete, don't use"""
1382    if type(cdt) == list:
1383        return [ROCs_from_CDT(c) for c in cdt]
1384
1385    C, D, T = cdt.C, cdt.D, cdt.T
1386    N = C+D+T
1387    if N < 1e-6:
1388        import warnings
1389        warnings.warn("Can't compute AUC: one or both classes have no instances")
1390        return (-1,)*8
1391    if N < 2:
1392        import warnings
1393        warnings.warn("Can't compute AUC: one or both classes have too few examples")
1394
1395    som = (C-D)/N
1396    c = 0.5*(1+som)
1397
1398    if (C+D):
1399        res = (C/N*100, D/N*100, T/N*100, N, som, (C-D)/(C+D), (C-D)/(N*(N-1)/2), 0.5*(1+som))
1400    else:
1401        res = (C/N*100, D/N*100, T/N*100, N, som, -1.0, (C-D)/(N*(N-1)/2), 0.5*(1+som))
1402
1403    if argkw.get("print"):
1404        print "Concordant  = %5.1f       Somers' D = %1.3f" % (res[0], res[4])
1405        print "Discordant  = %5.1f       Gamma     = %1.3f" % (res[1], res[5]>0 and res[5] or "N/A")
1406        print "Tied        = %5.1f       Tau-a     = %1.3f" % (res[2], res[6])
1407        print " %6d pairs             c         = %1.3f"    % (res[3], res[7])
1408
1409    return res
1410
1411AROC_from_CDT = ROCs_from_CDT  # for backward compatibility, AROC_from_CDT is obsolote
1412
1413
1414
1415# computes AUC using a specified 'cdtComputer' function
1416# It tries to compute AUCs from 'ite' (examples from a single iteration) and,
1417# if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs
1418# are divided by 'divideByIfIte'. Additional flag is returned which is True in
1419# the former case, or False in the latter.
1420@deprecated_keywords({"divideByIfIte": "divide_by_if_ite",
1421                      "computerArgs": "computer_args"})
1422def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1423    cdts = cdtComputer(*(ite, ) + computer_args)
1424    if not is_CDT_empty(cdts[0]):
1425        return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divide_by_if_ite for cdt in cdts], True
1426
1427    if all_ite:
1428        cdts = cdtComputer(*(all_ite, ) + computer_args)
1429        if not is_CDT_empty(cdts[0]):
1430            return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False
1431
1432    return False, False
1433
1434
1435# computes AUC between classes i and j as if there we no other classes
1436@deprecated_keywords({"classIndex1": "class_index1",
1437                      "classIndex2": "class_index2",
1438                      "useWeights": "use_weights",
1439                      "divideByIfIte": "divide_by_if_ite"})
1440def AUC_ij(ite, class_index1, class_index2, use_weights = True, all_ite = None, divide_by_if_ite = 1.0):
1441    return AUC_x(corn.computeCDTPair, ite, all_ite, divide_by_if_ite, (class_index1, class_index2, use_weights))
1442
1443
1444# computes AUC between class i and the other classes (treating them as the same class)
1445@deprecated_keywords({"classIndex": "class_index",
1446                      "useWeights": "use_weights",
1447                      "divideByIfIte": "divide_by_if_ite"})
1448def AUC_i(ite, class_index, use_weights = True, all_ite = None,
1449          divide_by_if_ite = 1.0):
1450    return AUC_x(corn.computeCDT, ite, all_ite, divide_by_if_ite, (class_index, use_weights))
1451
1452
1453# computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij)
1454# it returns the sum of what is returned by the computer, unless at a certain
1455# fold the computer has to resort to computing over all folds or even this failed;
1456# in these cases the result is returned immediately
1457
1458@deprecated_keywords({"AUCcomputer": "auc_computer",
1459                      "computerArgs": "computer_args"})
1460def AUC_iterations(auc_computer, iterations, computer_args):
1461    subsum_aucs = [0.] * iterations[0].number_of_learners
1462    for ite in iterations:
1463        aucs, foldsUsed = auc_computer(*(ite, ) + computer_args)
1464        if not aucs:
1465            return None
1466        if not foldsUsed:
1467            return aucs
1468        subsum_aucs = map(add, subsum_aucs, aucs)
1469    return subsum_aucs
1470
1471
1472# AUC for binary classification problems
1473@deprecated_keywords({"useWeights": "use_weights"})
1474def AUC_binary(res, use_weights = True):
1475    if res.number_of_iterations > 1:
1476        return AUC_iterations(AUC_i, split_by_iterations(res), (-1, use_weights, res, res.number_of_iterations))
1477    else:
1478        return AUC_i(res, -1, use_weights)[0]
1479
1480# AUC for multiclass problems
1481@deprecated_keywords({"useWeights": "use_weights"})
1482def AUC_multi(res, use_weights = True, method = 0):
1483    numberOfClasses = len(res.class_values)
1484
1485    if res.number_of_iterations > 1:
1486        iterations = split_by_iterations(res)
1487        all_ite = res
1488    else:
1489        iterations = [res]
1490        all_ite = None
1491
1492    # by pairs
1493    sum_aucs = [0.] * res.number_of_learners
1494    usefulClassPairs = 0.
1495
1496    if method in [0, 2]:
1497        prob = class_probabilities_from_res(res)
1498
1499    if method <= 1:
1500        for classIndex1 in range(numberOfClasses):
1501            for classIndex2 in range(classIndex1):
1502                subsum_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1503                if subsum_aucs:
1504                    if method == 0:
1505                        p_ij = prob[classIndex1] * prob[classIndex2]
1506                        subsum_aucs = [x * p_ij  for x in subsum_aucs]
1507                        usefulClassPairs += p_ij
1508                    else:
1509                        usefulClassPairs += 1
1510                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1511    else:
1512        for classIndex in range(numberOfClasses):
1513            subsum_aucs = AUC_iterations(AUC_i, iterations, (classIndex, use_weights, all_ite, res.number_of_iterations))
1514            if subsum_aucs:
1515                if method == 0:
1516                    p_i = prob[classIndex]
1517                    subsum_aucs = [x * p_i  for x in subsum_aucs]
1518                    usefulClassPairs += p_i
1519                else:
1520                    usefulClassPairs += 1
1521                sum_aucs = map(add, sum_aucs, subsum_aucs)
1522
1523    if usefulClassPairs > 0:
1524        sum_aucs = [x/usefulClassPairs for x in sum_aucs]
1525
1526    return sum_aucs
1527
1528def AUC():
1529    pass
1530
1531AUC.ByWeightedPairs = 0
1532
1533# Computes AUC, possibly for multiple classes (the averaging method can be specified)
1534# Results over folds are averages; if some folds examples from one class only, the folds are merged
1535@deprecated_keywords({"useWeights": "use_weights"})
1536def AUC(res, method = AUC.ByWeightedPairs, use_weights = True):
1537    """ Returns the area under ROC curve (AUC) given a set of experimental
1538    results. For multivalued class problems, it will compute some sort of
1539    average, as specified by the argument method.
1540    """
1541    if len(res.class_values) < 2:
1542        raise ValueError("Cannot compute AUC on a single-class problem")
1543    elif len(res.class_values) == 2:
1544        return AUC_binary(res, use_weights)
1545    else:
1546        return AUC_multi(res, use_weights, method)
1547
1548AUC.ByWeightedPairs = 0
1549AUC.ByPairs = 1
1550AUC.WeightedOneAgainstAll = 2
1551AUC.OneAgainstAll = 3
1552
1553
1554# Computes AUC; in multivalued class problem, AUC is computed as one against all
1555# Results over folds are averages; if some folds examples from one class only, the folds are merged
1556@deprecated_keywords({"classIndex": "class_index",
1557                      "useWeights": "use_weights"})
1558def AUC_single(res, class_index = -1, use_weights = True):
1559    """ Computes AUC where the class given classIndex is singled out, and
1560    all other classes are treated as a single class. To find how good our
1561    classifiers are in distinguishing between vans and other vehicle, call
1562    the function like this::
1563
1564        Orange.evaluation.scoring.AUC_single(resVeh, \
1565classIndex = vehicle.domain.classVar.values.index("van"))
1566    """
1567    if class_index<0:
1568        if res.baseClass>=0:
1569            class_index = res.baseClass
1570        else:
1571            class_index = 1
1572
1573    if res.number_of_iterations > 1:
1574        return AUC_iterations(AUC_i, split_by_iterations(res), (class_index, use_weights, res, res.number_of_iterations))
1575    else:
1576        return AUC_i( res, class_index, use_weights)[0]
1577
1578# Computes AUC for a pair of classes (as if there were no other classes)
1579# Results over folds are averages; if some folds have examples from one class only, the folds are merged
1580@deprecated_keywords({"classIndex1": "class_index1",
1581                      "classIndex2": "class_index2",
1582                      "useWeights": "use_weights"})
1583def AUC_pair(res, class_index1, class_index2, use_weights = True):
1584    """ Computes AUC between a pair of instances, ignoring instances from all
1585    other classes.
1586    """
1587    if res.number_of_iterations > 1:
1588        return AUC_iterations(AUC_ij, split_by_iterations(res), (class_index1, class_index2, use_weights, res, res.number_of_iterations))
1589    else:
1590        return AUC_ij(res, class_index1, class_index2, use_weights)
1591
1592
1593# AUC for multiclass problems
1594@deprecated_keywords({"useWeights": "use_weights"})
1595def AUC_matrix(res, use_weights = True):
1596    """ Computes a (lower diagonal) matrix with AUCs for all pairs of classes.
1597    If there are empty classes, the corresponding elements in the matrix
1598    are -1. Remember the beautiful(?) code for printing out the confusion
1599    matrix? Here it strikes again::
1600
1601        classes = vehicle.domain.classVar.values
1602        AUCmatrix = Orange.evaluation.scoring.AUC_matrix(resVeh)[0]
1603        print "\t"+"\t".join(classes[:-1])
1604        for className, AUCrow in zip(classes[1:], AUCmatrix[1:]):
1605            print ("%s" + ("\t%5.3f" * len(AUCrow))) % ((className, ) + tuple(AUCrow))
1606    """
1607    numberOfClasses = len(res.class_values)
1608    number_of_learners = res.number_of_learners
1609
1610    if res.number_of_iterations > 1:
1611        iterations, all_ite = split_by_iterations(res), res
1612    else:
1613        iterations, all_ite = [res], None
1614
1615    aucs = [[[] for i in range(numberOfClasses)] for i in range(number_of_learners)]
1616    prob = class_probabilities_from_res(res)
1617
1618    for classIndex1 in range(numberOfClasses):
1619        for classIndex2 in range(classIndex1):
1620            pair_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1621            if pair_aucs:
1622                for lrn in range(number_of_learners):
1623                    aucs[lrn][classIndex1].append(pair_aucs[lrn])
1624            else:
1625                for lrn in range(number_of_learners):
1626                    aucs[lrn][classIndex1].append(-1)
1627    return aucs
1628
1629
1630def McNemar(res, **argkw):
1631    """ Computes a triangular matrix with McNemar statistics for each pair of
1632    classifiers. The statistics is distributed by chi-square distribution with
1633    one degree of freedom; critical value for 5% significance is around 3.84.
1634    """
1635    nLearners = res.number_of_learners
1636    mcm = []
1637    for i in range(nLearners):
1638       mcm.append([0.0]*res.number_of_learners)
1639
1640    if not res.weights or argkw.get("unweighted"):
1641        for i in res.results:
1642            actual = i.actual_class
1643            classes = i.classes
1644            for l1 in range(nLearners):
1645                for l2 in range(l1, nLearners):
1646                    if classes[l1]==actual:
1647                        if classes[l2]!=actual:
1648                            mcm[l1][l2] += 1
1649                    elif classes[l2]==actual:
1650                        mcm[l2][l1] += 1
1651    else:
1652        for i in res.results:
1653            actual = i.actual_class
1654            classes = i.classes
1655            for l1 in range(nLearners):
1656                for l2 in range(l1, nLearners):
1657                    if classes[l1]==actual:
1658                        if classes[l2]!=actual:
1659                            mcm[l1][l2] += i.weight
1660                    elif classes[l2]==actual:
1661                        mcm[l2][l1] += i.weight
1662
1663    for l1 in range(nLearners):
1664        for l2 in range(l1, nLearners):
1665            su=mcm[l1][l2] + mcm[l2][l1]
1666            if su:
1667                mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su
1668            else:
1669                mcm[l2][l1] = 0
1670
1671    for l1 in range(nLearners):
1672        mcm[l1]=mcm[l1][:l1]
1673
1674    return mcm
1675
1676
1677def McNemar_of_two(res, lrn1, lrn2):
1678    """ McNemar_of_two computes a McNemar statistics for a pair of classifier,
1679    specified by indices learner1 and learner2.
1680    """
1681    tf = ft = 0.0
1682    if not res.weights or argkw.get("unweighted"):
1683        for i in res.results:
1684            actual=i.actual_class
1685            if i.classes[lrn1]==actual:
1686                if i.classes[lrn2]!=actual:
1687                    tf += i.weight
1688            elif i.classes[lrn2]==actual:
1689                    ft += i.weight
1690    else:
1691        for i in res.results:
1692            actual=i.actual_class
1693            if i.classes[lrn1]==actual:
1694                if i.classes[lrn2]!=actual:
1695                    tf += 1.0
1696            elif i.classes[lrn2]==actual:
1697                    ft += 1.0
1698
1699    su = tf + ft
1700    if su:
1701        return (abs(tf-ft)-1)**2 / su
1702    else:
1703        return 0
1704
1705
1706def Friedman(res, stat=CA):
1707    """ Compares classifiers by Friedman test, treating folds as different examles.
1708        Returns F, p and average ranks
1709    """
1710    res_split = split_by_iterations(res)
1711    res = [stat(r) for r in res_split]
1712
1713    N = len(res)
1714    k = len(res[0])
1715    sums = [0.0]*k
1716    for r in res:
1717        ranks = [k-x+1 for x in statc.rankdata(r)]
1718        if stat==Brier_score: # reverse ranks for Brier_score (lower better)
1719            ranks = [k+1-x for x in ranks]
1720        sums = [ranks[i]+sums[i] for i in range(k)]
1721
1722    T = sum([x*x for x in sums])
1723    sums = [x/N for x in sums]
1724
1725    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
1726
1727    return F, statc.chisqprob(F, k-1), sums
1728
1729
1730def Wilcoxon_pairs(res, avgranks, stat=CA):
1731    """ Returns a triangular matrix, where element[i][j] stores significance of difference
1732        between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive
1733        if i-th is better than j-th, negative if it is worse, and 1 if they are equal.
1734        Arguments to function are ExperimentResults, average ranks (as returned by Friedman)
1735        and, optionally, a statistics; greater values should mean better results.append
1736    """
1737    res_split = split_by_iterations(res)
1738    res = [stat(r) for r in res_split]
1739
1740    k = len(res[0])
1741    bt = []
1742    for m1 in range(k):
1743        nl = []
1744        for m2 in range(m1+1, k):
1745            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1746            if avgranks[m1]<avgranks[m2]:
1747                nl.append(p)
1748            elif avgranks[m2]<avgranks[m1]:
1749                nl.append(-p)
1750            else:
1751                nl.append(1)
1752        bt.append(nl)
1753    return bt
1754
1755
1756@deprecated_keywords({"allResults": "all_results",
1757                      "noConfidence": "no_confidence"})
1758def plot_learning_curve_learners(file, all_results, proportions, learners, no_confidence=0):
1759    plot_learning_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], no_confidence)
1760
1761
1762@deprecated_keywords({"allResults": "all_results",
1763                      "noConfidence": "no_confidence"})
1764def plot_learning_curve(file, all_results, proportions, legend, no_confidence=0):
1765    import types
1766    fopened=0
1767    if type(file)==types.StringType:
1768        file=open(file, "wt")
1769        fopened=1
1770
1771    file.write("set yrange [0:1]\n")
1772    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1773    file.write("set multiplot\n\n")
1774    CAs = [CA_dev(x) for x in all_results]
1775
1776    file.write("plot \\\n")
1777    for i in range(len(legend)-1):
1778        if not no_confidence:
1779            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1))
1780        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1781    if not no_confidence:
1782        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
1783    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
1784
1785    for i in range(len(legend)):
1786        if not no_confidence:
1787            for p in range(len(proportions)):
1788                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1]))
1789            file.write("e\n\n")
1790
1791        for p in range(len(proportions)):
1792            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
1793        file.write("e\n\n")
1794
1795    if fopened:
1796        file.close()
1797
1798
1799def print_single_ROC_curve_coordinates(file, curve):
1800    import types
1801    fopened=0
1802    if (type(file)==types.StringType):
1803        file=open(file, "wt")
1804        fopened=1
1805
1806    for coord in curve:
1807        file.write("%5.3f\t%5.3f\n" % tuple(coord))
1808
1809    if fopened:
1810        file.close()
1811
1812
1813def plot_ROC_learners(file, curves, learners):
1814    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
1815
1816def plot_ROC(file, curves, legend):
1817    import types
1818    fopened=0
1819    if (type(file)==types.StringType):
1820        file=open(file, "wt")
1821        fopened=1
1822
1823    file.write("set yrange [0:1]\n")
1824    file.write("set xrange [0:1]\n")
1825    file.write("set multiplot\n\n")
1826
1827    file.write("plot \\\n")
1828    for leg in legend:
1829        file.write("'-' title '%s' with lines,\\\n" % leg)
1830    file.write("'-' title '' with lines\n")
1831
1832    for curve in curves:
1833        for coord in curve:
1834            file.write("%5.3f\t%5.3f\n" % tuple(coord))
1835        file.write("e\n\n")
1836
1837    file.write("1.0\t1.0\n0.0\t0.0e\n\n")
1838
1839    if fopened:
1840        file.close()
1841
1842
1843@deprecated_keywords({"allResults": "all_results"})
1844def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference=-1):
1845    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
1846
1847
1848@deprecated_keywords({"allResults": "all_results"})
1849def plot_McNemar_curve(file, all_results, proportions, legend, reference=-1):
1850    if reference<0:
1851        reference=len(legend)-1
1852
1853    import types
1854    fopened=0
1855    if (type(file)==types.StringType):
1856        file=open(file, "wt")
1857        fopened=1
1858
1859    #file.write("set yrange [0:1]\n")
1860    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1861    file.write("set multiplot\n\n")
1862    file.write("plot \\\n")
1863    tmap=range(reference)+range(reference+1, len(legend))
1864    for i in tmap[:-1]:
1865        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1866    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
1867    file.write("\n")
1868
1869    for i in tmap:
1870        for p in range(len(proportions)):
1871            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
1872        file.write("e\n\n")
1873
1874    if fopened:
1875        file.close()
1876
1877default_point_types=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)])
1878default_line_types=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
1879
1880@deprecated_keywords({"allResults": "all_results"})
1881def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
1882    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
1883
1884
1885@deprecated_keywords({"allResults": "all_results"})
1886def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
1887    import types
1888    fopened=0
1889    if (type(file)==types.StringType):
1890        file=open(file, "wt")
1891        fopened=1
1892
1893    nexamples=len(all_results[0].results)
1894    CAs = [CA_dev(x) for x in all_results]
1895
1896    graphsize=float(options.get("graphsize", 10.0)) #cm
1897    difprop=proportions[-1]-proportions[0]
1898    ntestexamples=nexamples*proportions[-1]
1899    xunit=graphsize/ntestexamples
1900
1901    yshift=float(options.get("yshift", -ntestexamples/20.))
1902
1903    pointtypes=options.get("pointtypes", default_point_types)
1904    linetypes=options.get("linetypes", default_line_types)
1905
1906    if options.has_key("numberedx"):
1907        numberedx=options["numberedx"]
1908        if type(numberedx)==types.IntType:
1909            if numberedx>0:
1910                numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples]
1911            elif numberedx<0:
1912                numberedx = -numberedx
1913                newn=[]
1914                for i in range(numberedx+1):
1915                    wanted=proportions[0]+float(i)/numberedx*difprop
1916                    best=(10, 0)
1917                    for t in proportions:
1918                        td=abs(wanted-t)
1919                        if td<best[0]:
1920                            best=(td, t)
1921                    if not best[1] in newn:
1922                        newn.append(best[1])
1923                newn.sort()
1924                numberedx=[nexamples*x for x in newn]
1925        elif type(numberedx[0])==types.FloatType:
1926            numberedx=[nexamples*x for x in numberedx]
1927    else:
1928        numberedx=[nexamples*x for x in proportions]
1929
1930    file.write("\\mbox{\n")
1931    file.write(\\beginpicture\n")
1932    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))
1933    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))
1934    file.write(\\axis bottom invisible\n")# label {#examples}\n")
1935    file.write("      ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions]))
1936    if numberedx:
1937        file.write("            long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx]))
1938    file.write("  /\n")
1939    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
1940    file.write("      shiftedto y=%5.3f\n" % yshift)
1941    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
1942    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
1943    file.write("  /\n")
1944    if options.has_key("default"):
1945        file.write(\\setdashpattern<1pt, 1pt>\n")
1946        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
1947
1948    for i in range(len(CAs[0])):
1949        coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))])
1950        if linetypes:
1951            file.write(%s\n" % linetypes[i])
1952            file.write(\\plot %s /\n" % coordinates)
1953        if pointtypes:
1954            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
1955
1956    file.write(\\endpicture\n")
1957    file.write("}\n")
1958    if fopened:
1959        file.close()
1960    file.close()
1961    del file
1962
1963def legend_learners_to_PiCTeX(file, learners, **options):
1964  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
1965
1966def legend_to_PiCTeX(file, legend, **options):
1967    import types
1968    fopened=0
1969    if (type(file)==types.StringType):
1970        file=open(file, "wt")
1971        fopened=1
1972
1973    pointtypes=options.get("pointtypes", default_point_types)
1974    linetypes=options.get("linetypes", default_line_types)
1975
1976    file.write("\\mbox{\n")
1977    file.write(\\beginpicture\n")
1978    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
1979    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
1980
1981    for i in range(len(legend)):
1982        if linetypes:
1983            file.write(%s\n" % linetypes[i])
1984            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2))
1985        if pointtypes:
1986            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1))
1987        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25))
1988
1989    file.write(\\endpicture\n")
1990    file.write("}\n")
1991    if fopened:
1992        file.close()
1993    file.close()
1994    del file
1995
1996
1997def compute_friedman(avranks, N):
1998    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
1999    and (Iman statistic - F-distribution, degrees of freedoma) given average
2000    ranks and a number of tested data sets N.
2001    """
2002
2003    k = len(avranks)
2004
2005    def friedman(N, k, ranks):
2006        return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1))
2007
2008    def iman(fried, N, k):
2009        return (N-1)*fried/(N*(k-1) - fried)
2010
2011    f = friedman(N, k, avranks)
2012    im = iman(f, N, k)
2013    fdistdof = (k-1, (k-1)*(N-1))
2014
2015    return (f, k-1), (im, fdistdof)
2016
2017def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2018    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2019    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2020    ranks and number of tested data sets N. Type can be either "nemenyi" for
2021    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2022    """
2023
2024    k = len(avranks)
2025
2026    d = {}
2027
2028    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2029    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2030    d[("nemenyi", "0.05")] = [0, 0, 1.959964, 2.343701, 2.569032, 2.727774, 2.849705, 2.94832, 3.030879, 3.101730, 3.163684, 3.218654, 3.268004, 3.312739, 3.353618, 3.39123, 3.426041, 3.458425, 3.488685, 3.517073, 3.543799 ]
2031    d[("nemenyi", "0.1")] = [0, 0, 1.644854, 2.052293, 2.291341, 2.459516, 2.588521, 2.692732, 2.779884, 2.854606, 2.919889, 2.977768, 3.029694, 3.076733, 3.119693, 3.159199, 3.195743, 3.229723, 3.261461, 3.291224, 3.319233 ]
2032
2033    d[("bonferroni-dunn", "0.05")] =  [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, 2.638, 2.690, 2.724, 2.773 ]
2034    d[("bonferroni-dunn", "0.1")] = [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, 2.394, 2.450, 2.498, 2.539 ]
2035
2036    q = d[(type, alpha)]
2037
2038    cd = q[k]*(k*(k+1)/(6.0*N))**0.5
2039
2040    return cd
2041
2042
2043def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2044    """
2045    Draws a CD graph, which is used to display  the differences in methods'
2046    performance.
2047    See Janez Demsar, Statistical Comparisons of Classifiers over
2048    Multiple Data Sets, 7(Jan):1--30, 2006.
2049
2050    Needs matplotlib to work.
2051
2052    :param filename: Output file name (with extension). Formats supported
2053                     by matplotlib can be used.
2054    :param avranks: List of average methods' ranks.
2055    :param names: List of methods' names.
2056
2057    :param cd: Critical difference. Used for marking methods that whose
2058               difference is not statistically significant.
2059    :param lowv: The lowest shown rank, if None, use 1.
2060    :param highv: The highest shown rank, if None, use len(avranks).
2061    :param width: Width of the drawn figure in inches, default 6 in.
2062    :param textspace: Space on figure sides left for the description
2063                      of methods, default 1 in.
2064    :param reverse:  If True, the lowest rank is on the right. Default: False.
2065    :param cdmethod: None by default. It can be an index of element in avranks
2066                     or or names which specifies the method which should be
2067                     marked with an interval.
2068    """
2069
2070    width = float(width)
2071    textspace = float(textspace)
2072
2073    def nth(l,n):
2074        """
2075        Returns only nth elemnt in a list.
2076        """
2077        n = lloc(l,n)
2078        return [ a[n] for a in l ]
2079
2080    def lloc(l,n):
2081        """
2082        List location in list of list structure.
2083        Enable the use of negative locations:
2084        -1 is the last element, -2 second last...
2085        """
2086        if n < 0:
2087            return len(l[0])+n
2088        else:
2089            return n
2090
2091    def mxrange(lr):
2092        """
2093        Multiple xranges. Can be used to traverse matrices.
2094        This function is very slow due to unknown number of
2095        parameters.
2096
2097        >>> mxrange([3,5])
2098        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2099
2100        >>> mxrange([[3,5,1],[9,0,-3]])
2101        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2102
2103        """
2104        if len(lr) == 0:
2105            yield ()
2106        else:
2107            #it can work with single numbers
2108            index = lr[0]
2109            if type(1) == type(index):
2110                index = [ index ]
2111            for a in range(*index):
2112                for b in mxrange(lr[1:]):
2113                    yield tuple([a] + list(b))
2114
2115    try:
2116        from matplotlib.figure import Figure
2117        from matplotlib.patches import Polygon
2118        from matplotlib.backends.backend_agg import FigureCanvasAgg
2119    except:
2120        import sys
2121        print >> sys.stderr, "Function requires matplotlib. Please install it."
2122        return
2123
2124    def print_figure(fig, *args, **kwargs):
2125        canvas = FigureCanvasAgg(fig)
2126        canvas.print_figure(*args, **kwargs)
2127
2128    sums = avranks
2129
2130    tempsort =  sorted([ (a,i) for i,a in  enumerate(sums) ], reverse=reverse)
2131    ssums = nth(tempsort, 0)
2132    sortidx = nth(tempsort, 1)
2133    nnames = [ names[x] for x in sortidx ]
2134
2135    if lowv == None:
2136        lowv = min(1, int(math.floor(min(ssums))))
2137    if highv == None:
2138        highv = max(len(avranks), int(math.ceil(max(ssums))))
2139
2140    cline = 0.4
2141
2142    k = len(sums)
2143
2144    lines = None
2145    sums = sorted(sums)
2146
2147    linesblank = 0
2148    scalewidth = width - 2*textspace
2149
2150    def rankpos(rank):
2151        if not reverse:
2152            a = rank - lowv
2153        else:
2154            a = highv - rank
2155        return textspace+scalewidth/(highv-lowv)*a
2156
2157    distanceh = 0.25
2158
2159    if cd and cdmethod == None:
2160
2161        #get pairs of non significant methods
2162
2163        def get_lines(sums, hsd):
2164
2165            #get all pairs
2166            lsums = len(sums)
2167            allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ]
2168            #remove not significant
2169            notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ]
2170            #keep only longest
2171
2172            def no_longer((i,j), notSig):
2173                for i1,j1 in notSig:
2174                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2175                        return False
2176                return True
2177
2178            longest = [ (i,j) for i,j in notSig if no_longer((i,j),notSig) ]
2179
2180            return longest
2181
2182        lines = get_lines(ssums, cd)
2183        linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1
2184
2186        distanceh = 0.25
2187        cline += distanceh
2188
2189    #calculate height needed height of an image
2190    minnotsignificant = max(2*0.2, linesblank)
2191    height = cline + ((k+1)/2)*0.2 + minnotsignificant
2192
2193    fig = Figure(figsize=(width, height))
2194    ax = fig.add_axes([0,0,1,1]) #reverse y axis
2195    ax.set_axis_off()
2196
2197    hf = 1./height # height factor
2198    wf = 1./width
2199
2200    def hfl(l):
2201        return [ a*hf for a in l ]
2202
2203    def wfl(l):
2204        return [ a*wf for a in l ]
2205
2206    """
2207    Upper left corner is (0,0).
2208    """
2209
2210    ax.plot([0,1], [0,1], c="w")
2211    ax.set_xlim(0, 1)
2212    ax.set_ylim(1, 0)
2213
2214    def line(l, color='k', **kwargs):
2215        """
2216        Input is a list of pairs of points.
2217        """
2218        ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs)
2219
2220    def text(x, y, s, *args, **kwargs):
2221        ax.text(wf*x, hf*y, s, *args, **kwargs)
2222
2223    line([(textspace, cline), (width-textspace, cline)], linewidth=0.7)
2224
2225    bigtick = 0.1
2226    smalltick = 0.05
2227
2228
2229    import numpy
2230
2231    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2232        tick = smalltick
2233        if a == int(a): tick = bigtick
2234        line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7)
2235
2236    for a in range(lowv, highv+1):
2237        text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom")
2238
2239    k = len(ssums)
2240
2241    for i in range((k+1)/2):
2242        chei = cline+ minnotsignificant + (i)*0.2
2243        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7)
2244        text(textspace-0.2, chei, nnames[i], ha="right", va="center")
2245
2246    for i in range((k+1)/2, k):
2247        chei = cline + minnotsignificant + (k-i-1)*0.2
2248        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7)
2249        text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center")
2250
2251    if cd and cdmethod == None:
2252
2253        #upper scale
2254        if not reverse:
2255            begin, end = rankpos(lowv), rankpos(lowv+cd)
2256        else:
2257            begin, end = rankpos(highv), rankpos(highv - cd)
2258
2259        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2260        line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7)
2261        line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7)
2262        text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom")
2263
2264        #non significance lines
2265        def draw_lines(lines, side=0.05, height=0.1):
2266            start = cline + 0.2
2267            for l,r in lines:
2268                line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5)
2269                start += height
2270
2271        draw_lines(lines)
2272
2273    elif cd:
2274        begin = rankpos(avranks[cdmethod]-cd)
2275        end = rankpos(avranks[cdmethod]+cd)
2276        line([(begin, cline), (end, cline)], linewidth=2.5)
2277        line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5)
2278        line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5)
2279
2280    print_figure(fig, filename, **kwargs)
2281
2282def mlc_hamming_loss(res):
2283    """
2284    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2285
2286    :math:HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}
2287    """
2288    losses = [0]*res.number_of_learners
2289    label_num = len(res.labels)
2290    example_num = gettotsize(res)
2291
2292    for e in res.results:
2293        aclass = e.actual_class
2294        for i, labels in enumerate(e.classes):
2295            labels = map(int, labels)
2296            if len(labels) <> len(aclass):
2297                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2298            for j in range(label_num):
2299                if labels[j] != aclass[j]:
2300                    losses[i] += 1
2301
2302    return [float(x)/(label_num*example_num) for x in losses]
2303
2304def mlc_accuracy(res, forgiveness_rate = 1.0):
2305    """
2306    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2307
2308    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}
2309
2310    Boutell et al. (2004) give a more generalized version using a parameter :math:\\alpha \\ge 0,
2311    called forgiveness rate:
2312
2313    :math:Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}
2314    """
2315    accuracies = [0.0]*res.number_of_learners
2316    label_num = len(res.labels)
2317    example_num = gettotsize(res)
2318
2319    for e in res.results:
2320        aclass = e.actual_class
2321        for i, labels in enumerate(e.classes):
2322            labels = map(int, labels)
2323            if len(labels) <> len(aclass):
2324                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2325
2326            intersection = 0.0
2327            union = 0.0
2328            for real, pred in zip(labels, aclass):
2329                if real and pred:
2330                    intersection = intersection+1
2331                if real or pred:
2332                    union = union+1
2333
2334            if union != 0:
2335                accuracies[i] = accuracies[i] + intersection/union
2336
2337    return [math.pow(x/example_num,forgiveness_rate) for x in accuracies]
2338
2339def mlc_precision(res):
2340    """
2341    :math:Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}
2342    """
2343    precisions = [0.0]*res.number_of_learners
2344    label_num = len(res.labels)
2345    example_num = gettotsize(res)
2346
2347    for e in res.results:
2348        aclass = e.actual_class
2349        for i, labels in enumerate(e.classes):
2350            labels = map(int, labels)
2351            if len(labels) <> len(aclass):
2352                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2353
2354            intersection = 0.0
2355            predicted = 0.0
2356            for real, pred in zip(labels, aclass):
2357                if real and pred:
2358                    intersection = intersection+1
2359                if real:
2360                    predicted = predicted + 1
2361            if predicted <> 0:
2362                precisions[i] = precisions[i] + intersection/predicted
2363
2364    return [x/example_num for x in precisions]
2365
2366def mlc_recall(res):
2367    """
2368    :math:Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}
2369    """
2370    recalls = [0.0]*res.number_of_learners
2371    label_num = len(res.labels)
2372    example_num = gettotsize(res)
2373
2374    for e in res.results:
2375        aclass = e.actual_class
2376        for i, labels in enumerate(e.classes):
2377            labels = map(int, labels)
2378            if len(labels) <> len(aclass):
2379                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2380
2381            intersection = 0.0
2382            actual = 0.0
2383            for real, pred in zip(labels, aclass):
2384                if real and pred:
2385                    intersection = intersection+1
2386                if pred:
2387                    actual = actual + 1
2388            if actual <> 0:
2389                recalls[i] = recalls[i] + intersection/actual
2390
2391    return [x/example_num for x in recalls]
2392
2393#def mlc_ranking_loss(res):
2394#    pass
2395#
2396#def mlc_average_precision(res):
2397#    pass
2398#
2399#def mlc_hierarchical_loss(res):
2400#    pass
2401
2402#########################################################################################
2403if __name__ == "__main__":
2404    avranks =  [3.143, 2.000, 2.893, 1.964]
2405    names = ["prva", "druga", "tretja", "cetrta" ]
2406    cd = compute_CD(avranks, 14)
2407    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2408    print cd
2409
2410    print compute_friedman(avranks, 14)
2411
2412    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.