source: orange/Orange/evaluation/scoring.py @ 9999:3b8b4cc606c0

Revision 9999:3b8b4cc606c0, 87.1 KB checked in by anze <anze.staric@…>, 2 years ago (diff)

Improved documentation.

Line 
1import operator, math
2from operator import add
3import numpy
4
5import Orange
6from Orange import statc, corn
7from Orange.misc import deprecated_keywords
8
9#### Private stuff
10
11def log2(x):
12    """Calculate logarithm in base 2."""
13    return math.log(x)/math.log(2)
14
15def check_non_zero(x):
16    """Throw Value Error when x = 0.0."""
17    if x==0.0:
18        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
19
20def gettotweight(res):
21    """Sum all the weights"""
22    totweight = reduce(lambda x, y: x+y.weight, res.results, 0)
23    if totweight==0.0:
24        raise ValueError, "Cannot compute the score: sum of weights is 0.0."
25    return totweight
26
27def gettotsize(res):
28    """ Get number of result instances """
29    if len(res.results):
30        return len(res.results)
31    else:
32        raise ValueError, "Cannot compute the score: no examples."
33
34
35def split_by_iterations(res):
36    """ Splits ExperimentResults of multiple iteratation test into a list
37    of ExperimentResults, one for each iteration.
38    """
39    if res.number_of_iterations < 2:
40        return [res]
41       
42    ress = [Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels)
43            for i in range(res.number_of_iterations)]
44    for te in res.results:
45        ress[te.iteration_number].results.append(te)
46    return ress
47
48def split_by_classifiers(res):
49    """ Splites an instance of :obj:`ExperimentResults` into a list of
50    :obj:`ExperimentResults`, one for each classifier.
51    """
52    split_res = []
53    for i in range(len(res.classifierNames)):
54        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
55                    [res.classifierNames[i]], res.classValues,
56                    weights=res.weights, baseClass=res.baseClass,
57                    classifiers=[res.classifiers[i]] if res.classifiers else [],
58                    test_type = res.test_type, labels = res.labels)
59        r.results = []
60        for te in res.results:
61            r.results.append(Orange.evaluation.testing.TestedExample(te.iterationNumber,
62                                te.actualClass, n=1, weight=te.weight))
63            r.results[-1].classes = [te.classes[i]]
64            r.results[-1].probabilities = [te.probabilities[i]]
65        split_res.append(r)
66    return split_res
67
68
69def class_probabilities_from_res(res, **argkw):
70    """Calculate class probabilities"""
71    probs = [0.0] * len(res.class_values)
72    if argkw.get("unweighted", 0) or not res.weights:
73        for tex in res.results:
74            probs[int(tex.actual_class)] += 1.0
75        totweight = gettotsize(res)
76    else:
77        totweight = 0.0
78        for tex in res.results:
79            probs[tex.actual_class] += tex.weight
80            totweight += tex.weight
81        check_non_zero(totweight)
82    return [prob/totweight for prob in probs]
83
84
85@deprecated_keywords({
86    "foldN": "fold_n",
87    "reportSE": "report_se",
88    "iterationIsOuter": "iteration_is_outer"})
89def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
90    # remove empty folds, turn the matrix so that learner is outer
91    if iteration_is_outer:
92        if not stats:
93            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
94        number_of_learners = len(stats[0])
95        stats = filter(lambda (x, fN): fN>0.0, zip(stats,fold_n))
96        stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(number_of_learners)]
97    else:
98        stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, fold_n))] for lrnD in stats]
99
100    if not stats:
101        raise ValueError, "Cannot compute the score: no classifiers"
102    if not stats[0]:
103        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
104   
105    if report_se:
106        return [(statc.mean(x), statc.sterr(x)) for x in stats]
107    else:
108        return [statc.mean(x) for x in stats]
109   
110def ME(res, **argkw):
111    MEs = [0.0]*res.number_of_learners
112
113    if argkw.get("unweighted", 0) or not res.weights:
114        for tex in res.results:
115            MEs = map(lambda res, cls, ac = float(tex.actual_class):
116                      res + abs(float(cls) - ac), MEs, tex.classes)
117        totweight = gettotsize(res)
118    else:
119        for tex in res.results:
120            MEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
121                       res + tw*abs(float(cls) - ac), MEs, tex.classes)
122        totweight = gettotweight(res)
123
124    return [x/totweight for x in MEs]
125
126MAE = ME
127
128#########################################################################
129# PERFORMANCE MEASURES:
130# Scores for evaluation of numeric predictions
131
132def check_argkw(dct, lst):
133    """check_argkw(dct, lst) -> returns true if any items have non-zero value in dct"""
134    return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst])
135
136def regression_error(res, **argkw):
137    """regression_error(res) -> regression error (default: MSE)"""
138    if argkw.get("SE", 0) and res.number_of_iterations > 1:
139        # computes the scores for each iteration, then averages
140        scores = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
141        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
142            norm = [0.0] * res.number_of_iterations
143
144        nIter = [0]*res.number_of_iterations       # counts examples in each iteration
145        a = [0]*res.number_of_iterations           # average class in each iteration
146        for tex in res.results:
147            nIter[tex.iteration_number] += 1
148            a[tex.iteration_number] += float(tex.actual_class)
149        a = [a[i]/nIter[i] for i in range(res.number_of_iterations)]
150
151        if argkw.get("unweighted", 0) or not res.weights:
152            # iterate accross test cases
153            for tex in res.results:
154                ai = float(tex.actual_class)
155                nIter[tex.iteration_number] += 1
156
157                # compute normalization, if required
158                if argkw.get("norm-abs", 0):
159                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
160                elif argkw.get("norm-sqr", 0):
161                    norm[tex.iteration_number] += (ai - a[tex.iteration_number])**2
162
163                # iterate accross results of different regressors
164                for i, cls in enumerate(tex.classes):
165                    if argkw.get("abs", 0):
166                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
167                    else:
168                        scores[i][tex.iteration_number] += (float(cls) - ai)**2
169        else: # unweighted<>0
170            raise NotImplementedError, "weighted error scores with SE not implemented yet"
171
172        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
173            scores = [[x/n for x, n in zip(y, norm)] for y in scores]
174        else:
175            scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores]
176
177        if argkw.get("R2"):
178            scores = [[1.0 - x for x in y] for y in scores]
179
180        if argkw.get("sqrt", 0):
181            scores = [[math.sqrt(x) for x in y] for y in scores]
182
183        return [(statc.mean(x), statc.std(x)) for x in scores]
184       
185    else: # single iteration (testing on a single test set)
186        scores = [0.0] * res.number_of_learners
187        norm = 0.0
188
189        if argkw.get("unweighted", 0) or not res.weights:
190            a = sum([tex.actual_class for tex in res.results]) \
191                / len(res.results)
192            for tex in res.results:
193                if argkw.get("abs", 0):
194                    scores = map(lambda res, cls, ac = float(tex.actual_class):
195                                 res + abs(float(cls) - ac), scores, tex.classes)
196                else:
197                    scores = map(lambda res, cls, ac = float(tex.actual_class):
198                                 res + (float(cls) - ac)**2, scores, tex.classes)
199
200                if argkw.get("norm-abs", 0):
201                    norm += abs(tex.actual_class - a)
202                elif argkw.get("norm-sqr", 0):
203                    norm += (tex.actual_class - a)**2
204            totweight = gettotsize(res)
205        else:
206            # UNFINISHED
207            for tex in res.results:
208                MSEs = map(lambda res, cls, ac = float(tex.actual_class),
209                           tw = tex.weight:
210                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
211            totweight = gettotweight(res)
212
213        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
214            scores = [s/norm for s in scores]
215        else: # normalize by number of instances (or sum of weights)
216            scores = [s/totweight for s in scores]
217
218        if argkw.get("R2"):
219            scores = [1.0 - s for s in scores]
220
221        if argkw.get("sqrt", 0):
222            scores = [math.sqrt(x) for x in scores]
223
224        return scores
225
226def MSE(res, **argkw):
227    """ Computes mean-squared error. """
228    return regression_error(res, **argkw)
229   
230def RMSE(res, **argkw):
231    """ Computes root mean-squared error. """
232    argkw.setdefault("sqrt", True)
233    return regression_error(res, **argkw)
234
235def MAE(res, **argkw):
236    """ Computes mean absolute error. """
237    argkw.setdefault("abs", True)
238    return regression_error(res, **argkw)
239
240def RSE(res, **argkw):
241    """ Computes relative squared error. """
242    argkw.setdefault("norm-sqr", True)
243    return regression_error(res, **argkw)
244
245def RRSE(res, **argkw):
246    """ Computes relative squared error. """
247    argkw.setdefault("norm-sqr", True)
248    argkw.setdefault("sqrt", True)
249    return regression_error(res, **argkw)
250
251def RAE(res, **argkw):
252    """ Computes relative absolute error. """
253    argkw.setdefault("abs", True)
254    argkw.setdefault("norm-abs", True)
255    return regression_error(res, **argkw)
256
257def R2(res, **argkw):
258    """ Computes the coefficient of determination, R-squared. """
259    argkw.setdefault("norm-sqr", True)
260    argkw.setdefault("R2", True)
261    return regression_error(res, **argkw)
262
263def MSE_old(res, **argkw):
264    """MSE(res) -> mean-squared error"""
265    if argkw.get("SE", 0) and res.number_of_iterations > 1:
266        MSEs = [[0.0] * res.number_of_iterations for i in range(res.number_of_learners)]
267        nIter = [0]*res.number_of_iterations
268        if argkw.get("unweighted", 0) or not res.weights:
269            for tex in res.results:
270                ac = float(tex.actual_class)
271                nIter[tex.iteration_number] += 1
272                for i, cls in enumerate(tex.classes):
273                    MSEs[i][tex.iteration_number] += (float(cls) - ac)**2
274        else:
275            raise ValueError, "weighted RMSE with SE not implemented yet"
276        MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs]
277        if argkw.get("sqrt", 0):
278            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
279        return [(statc.mean(x), statc.std(x)) for x in MSEs]
280       
281    else:
282        MSEs = [0.0]*res.number_of_learners
283        if argkw.get("unweighted", 0) or not res.weights:
284            for tex in res.results:
285                MSEs = map(lambda res, cls, ac = float(tex.actual_class):
286                           res + (float(cls) - ac)**2, MSEs, tex.classes)
287            totweight = gettotsize(res)
288        else:
289            for tex in res.results:
290                MSEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
291                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
292            totweight = gettotweight(res)
293
294        if argkw.get("sqrt", 0):
295            MSEs = [math.sqrt(x) for x in MSEs]
296        return [x/totweight for x in MSEs]
297
298def RMSE_old(res, **argkw):
299    """RMSE(res) -> root mean-squared error"""
300    argkw.setdefault("sqrt", 1)
301    return MSE_old(res, **argkw)
302
303#########################################################################
304# PERFORMANCE MEASURES:
305# Scores for evaluation of classifiers
306
307@deprecated_keywords({"reportSE": "report_se"})
308def CA(test_results, report_se = False, **argkw):
309    """Return percentage of matches between predicted and actual class.
310
311    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
312                         or :obj:`ConfusionMatrix`.
313    :param report_se: include standard error in result.
314    :rtype: list of scores, one for each learner.
315
316    Standard errors are estimated from deviation of CAs across folds (if
317    test_results were produced by cross_validation) or approximated under
318    the assumption of normal distribution otherwise.
319    """
320    if isinstance(test_results, list) and len(test_results) > 0 \
321                             and isinstance(test_results[0], ConfusionMatrix):
322        results = []
323        for cm in test_results:
324            div = cm.TP+cm.FN+cm.FP+cm.TN
325            check_non_zero(div)
326            results.append((cm.TP+cm.TN)/div)
327        return results
328    elif test_results.number_of_iterations==1:
329        CAs = [0.0]*test_results.number_of_learners
330        if argkw.get("unweighted", 0) or not test_results.weights:
331            totweight = gettotsize(test_results)
332            for tex in test_results.results:
333                CAs = map(lambda res, cls: res+(cls==tex.actual_class), CAs, tex.classes)
334        else:
335            totweight = 0.
336            for tex in test_results.results:
337                CAs = map(lambda res, cls: res+(cls==tex.actual_class and tex.weight), CAs, tex.classes)
338                totweight += tex.weight
339        check_non_zero(totweight)
340        ca = [x/totweight for x in CAs]
341           
342        if report_se:
343            return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca]
344        else:
345            return ca
346       
347    else:
348        CAsByFold = [[0.0]*test_results.number_of_iterations for i in range(test_results.number_of_learners)]
349        foldN = [0.0]*test_results.number_of_iterations
350
351        if argkw.get("unweighted", 0) or not test_results.weights:
352            for tex in test_results.results:
353                for lrn in range(test_results.number_of_learners):
354                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class)
355                foldN[tex.iteration_number] += 1
356        else:
357            for tex in test_results.results:
358                for lrn in range(test_results.number_of_learners):
359                    CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and tex.weight
360                foldN[tex.iteration_number] += tex.weight
361
362        return statistics_by_folds(CAsByFold, foldN, report_se, False)
363
364
365# Obsolete, but kept for compatibility
366def CA_se(res, **argkw):
367    return CA(res, True, **argkw)
368
369@deprecated_keywords({"reportSE": "report_se"})
370def AP(res, report_se = False, **argkw):
371    """ Computes the average probability assigned to the correct class. """
372    if res.number_of_iterations == 1:
373        APs=[0.0]*res.number_of_learners
374        if argkw.get("unweighted", 0) or not res.weights:
375            for tex in res.results:
376                APs = map(lambda res, probs: res + probs[tex.actual_class], APs, tex.probabilities)
377            totweight = gettotsize(res)
378        else:
379            totweight = 0.
380            for tex in res.results:
381                APs = map(lambda res, probs: res + probs[tex.actual_class]*tex.weight, APs, tex.probabilities)
382                totweight += tex.weight
383        check_non_zero(totweight)
384        return [AP/totweight for AP in APs]
385
386    APsByFold = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
387    foldN = [0.0] * res.number_of_iterations
388    if argkw.get("unweighted", 0) or not res.weights:
389        for tex in res.results:
390            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class], APsByFold[tex.iteration_number], tex.probabilities)
391            foldN[tex.iteration_number] += 1
392    else:
393        for tex in res.results:
394            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class] * tex.weight, APsByFold[tex.iteration_number], tex.probabilities)
395            foldN[tex.iteration_number] += tex.weight
396
397    return statistics_by_folds(APsByFold, foldN, report_se, True)
398
399
400@deprecated_keywords({"reportSE": "report_se"})
401def Brier_score(res, report_se = False, **argkw):
402    """ Computes the Brier's score, defined as the average (over test examples)
403    of sumx(t(x)-p(x))2, where x is a class, t(x) is 1 for the correct class
404    and 0 for the others, and p(x) is the probability that the classifier
405    assigned to the class x
406    """
407    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
408    #    x is class,
409    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
410    #    p(x) is predicted probabilty.
411    # There's a trick: since t(x) is zero for all classes but the
412    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
413    # Since +1 is there for each example, it adds 1 to the average
414    # We skip the +1 inside the sum and add it just at the end of the function
415    # We take max(result, 0) to avoid -0.0000x due to rounding errors
416
417    if res.number_of_iterations == 1:
418        MSEs=[0.0]*res.number_of_learners
419        if argkw.get("unweighted", 0) or not res.weights:
420            totweight = 0.0
421            for tex in res.results:
422                MSEs = map(lambda res, probs:
423                           res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
424                totweight += tex.weight
425        else:
426            for tex in res.results:
427                MSEs = map(lambda res, probs:
428                           res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
429            totweight = gettotweight(res)
430        check_non_zero(totweight)
431        if report_se:
432            return [(max(x/totweight+1.0, 0), 0) for x in MSEs]  ## change this, not zero!!!
433        else:
434            return [max(x/totweight+1.0, 0) for x in MSEs]
435
436    BSs = [[0.0]*res.number_of_learners for i in range(res.number_of_iterations)]
437    foldN = [0.] * res.number_of_iterations
438
439    if argkw.get("unweighted", 0) or not res.weights:
440        for tex in res.results:
441            BSs[tex.iteration_number] = map(lambda rr, probs:
442                       rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
443            foldN[tex.iteration_number] += 1
444    else:
445        for tex in res.results:
446            BSs[tex.iteration_number] = map(lambda res, probs:
447                       res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
448            foldN[tex.iteration_number] += tex.weight
449
450    stats = statistics_by_folds(BSs, foldN, report_se, True)
451    if report_se:
452        return [(x+1.0, y) for x, y in stats]
453    else:
454        return [x+1.0 for x in stats]
455
456def BSS(res, **argkw):
457    return [1-x/2 for x in apply(Brier_score, (res, ), argkw)]
458
459def IS_ex(Pc, P):
460    "Pc aposterior probability, P aprior"
461    if (Pc>=P):
462        return -log2(P)+log2(Pc)
463    else:
464        return -(-log2(1-P)+log2(1-Pc))
465
466
467@deprecated_keywords({"reportSE": "report_se"})
468def IS(res, apriori=None, report_se = False, **argkw):
469    """ Computes the information score as defined by
470    `Kononenko and Bratko (1991) \
471    <http://www.springerlink.com/content/g5p7473160476612/>`_.
472    Argument :obj:`apriori` gives the apriori class
473    distribution; if it is omitted, the class distribution is computed from
474    the actual classes of examples in :obj:`res`.
475    """
476    if not apriori:
477        apriori = class_probabilities_from_res(res)
478
479    if res.number_of_iterations==1:
480        ISs = [0.0]*res.number_of_learners
481        if argkw.get("unweighted", 0) or not res.weights:
482            for tex in res.results:
483              for i in range(len(tex.probabilities)):
484                    cls = tex.actual_class
485                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
486            totweight = gettotsize(res)
487        else:
488            for tex in res.results:
489              for i in range(len(tex.probabilities)):
490                    cls = tex.actual_class
491                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
492            totweight = gettotweight(res)
493        if report_se:
494            return [(IS/totweight,0) for IS in ISs]
495        else:
496            return [IS/totweight for IS in ISs]
497
498       
499    ISs = [[0.0]*res.number_of_iterations for i in range(res.number_of_learners)]
500    foldN = [0.] * res.number_of_iterations
501
502    # compute info scores for each fold   
503    if argkw.get("unweighted", 0) or not res.weights:
504        for tex in res.results:
505            for i in range(len(tex.probabilities)):
506                cls = tex.actual_class
507                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls])
508            foldN[tex.iteration_number] += 1
509    else:
510        for tex in res.results:
511            for i in range(len(tex.probabilities)):
512                cls = tex.actual_class
513                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
514            foldN[tex.iteration_number] += tex.weight
515
516    return statistics_by_folds(ISs, foldN, report_se, False)
517
518
519def Friedman(res, statistics, **argkw):
520    sums = None
521    for ri in split_by_iterations(res):
522        ranks = statc.rankdata(apply(statistics, (ri,), argkw))
523        if sums:
524            sums = sums and [ranks[i]+sums[i] for i in range(k)]
525        else:
526            sums = ranks
527            k = len(sums)
528    N = res.number_of_iterations
529    k = len(sums)
530    T = sum([x*x for x in sums])
531    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
532    return F, statc.chisqprob(F, k-1)
533   
534
535def Wilcoxon(res, statistics, **argkw):
536    res1, res2 = [], []
537    for ri in split_by_iterations(res):
538        stats = apply(statistics, (ri,), argkw)
539        if (len(stats) != 2):
540            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
541        res1.append(stats[0])
542        res2.append(stats[1])
543    return statc.wilcoxont(res1, res2)
544
545def rank_difference(res, statistics, **argkw):
546    if not res.results:
547        raise TypeError, "no experiments"
548
549    k = len(res.results[0].classes)
550    if (k<2):
551        raise TypeError, "nothing to compare (less than two classifiers given)"
552    if k==2:
553        return apply(Wilcoxon, (res, statistics), argkw)
554    else:
555        return apply(Friedman, (res, statistics), argkw)
556   
557class ConfusionMatrix:
558    """
559    Classification result summary
560
561    .. attribute:: TP
562
563        True Positive predictions
564
565    .. attribute:: TN
566
567        True Negative predictions
568
569    .. attribute:: FP
570
571        False Positive predictions
572
573    .. attribute:: FN
574
575        False Negative predictions
576    """
577    def __init__(self):
578        self.TP = self.FN = self.FP = self.TN = 0.0
579
580    @deprecated_keywords({"predictedPositive": "predicted_positive",
581                          "isPositive": "is_positive"})
582    def addTFPosNeg(self, predicted_positive, is_positive, weight = 1.0):
583        """
584        Update confusion matrix with result of a single classification
585
586        :param predicted_positive: positive class value was predicted
587        :param is_positive: correct class value is positive
588        :param weight: weight of the selected instance
589         """
590        if predicted_positive:
591            if is_positive:
592                self.TP += weight
593            else:
594                self.FP += weight
595        else:
596            if is_positive:
597                self.FN += weight
598            else:
599                self.TN += weight
600
601@deprecated_keywords({"res": "test_results",
602                      "classIndex": "class_index"})
603def confusion_matrices(test_results, class_index=1,
604                       unweighted=False, cutoff=.5):
605    """
606    Return confusion matrices for test_results.
607
608    :param test_results: test results
609    :param class_index: index of class value for which the confusion matrices
610                        are to be computed.
611    :param unweighted: ignore instance weights.
612    :params cutoff: cutoff for probability
613
614    :rtype: list of :obj:`ConfusionMatrix`
615    """
616    tfpns = [ConfusionMatrix() for i in range(test_results.number_of_learners)]
617   
618    if class_index<0:
619        numberOfClasses = len(test_results.class_values)
620        if class_index < -1 or numberOfClasses > 2:
621            cm = [[[0.0] * numberOfClasses for i in range(numberOfClasses)] for l in range(test_results.number_of_learners)]
622            if unweighted or not test_results.weights:
623                for tex in test_results.results:
624                    trueClass = int(tex.actual_class)
625                    for li, pred in enumerate(tex.classes):
626                        predClass = int(pred)
627                        if predClass < numberOfClasses:
628                            cm[li][trueClass][predClass] += 1
629            else:
630                for tex in enumerate(test_results.results):
631                    trueClass = int(tex.actual_class)
632                    for li, pred in tex.classes:
633                        predClass = int(pred)
634                        if predClass < numberOfClasses:
635                            cm[li][trueClass][predClass] += tex.weight
636            return cm
637           
638        elif test_results.baseClass>=0:
639            class_index = test_results.baseClass
640        else:
641            class_index = 1
642
643    if cutoff != .5:
644        if unweighted or not test_results.weights:
645            for lr in test_results.results:
646                isPositive=(lr.actual_class==class_index)
647                for i in range(test_results.number_of_learners):
648                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive)
649        else:
650            for lr in test_results.results:
651                isPositive=(lr.actual_class==class_index)
652                for i in range(test_results.number_of_learners):
653                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive, lr.weight)
654    else:
655        if unweighted or not test_results.weights:
656            for lr in test_results.results:
657                isPositive=(lr.actual_class==class_index)
658                for i in range(test_results.number_of_learners):
659                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive)
660        else:
661            for lr in test_results.results:
662                isPositive=(lr.actual_class==class_index)
663                for i in range(test_results.number_of_learners):
664                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive, lr.weight)
665    return tfpns
666
667
668# obsolete (renamed)
669compute_confusion_matrices = confusion_matrices
670
671
672@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
673def confusion_chi_square(confusion_matrix):
674    """
675    Return chi square statistic of the confusion matrix
676    (higher value indicates that prediction is not by chance).
677    """
678    if isinstance(confusion_matrix, ConfusionMatrix) or \
679       not isinstance(confusion_matrix[1], list):
680        return _confusion_chi_square(confusion_matrix)
681    else:
682        return map(_confusion_chi_square, confusion_matrix)
683
684def _confusion_chi_square(confusion_matrix):
685    if isinstance(confusion_matrix, ConfusionMatrix):
686        c = confusion_matrix
687        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
688    dim = len(confusion_matrix)
689    rowPriors = [sum(r) for r in confusion_matrix]
690    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
691    total = sum(rowPriors)
692    rowPriors = [r/total for r in rowPriors]
693    colPriors = [r/total for r in colPriors]
694    ss = 0
695    for ri, row in enumerate(confusion_matrix):
696        for ci, o in enumerate(row):
697            e = total * rowPriors[ri] * colPriors[ci]
698            if not e:
699                return -1, -1, -1
700            ss += (o-e)**2 / e
701    df = (dim-1)**2
702    return ss, df, statc.chisqprob(ss, df)
703
704@deprecated_keywords({"confm": "confusion_matrix"})
705def sens(confusion_matrix):
706    """
707    Return `sensitivity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
708    (proportion of actual positives which are correctly identified as such).
709    """
710    if type(confusion_matrix) == list:
711        return [sens(cm) for cm in confusion_matrix]
712    else:
713        tot = confusion_matrix.TP+confusion_matrix.FN
714        if tot < 1e-6:
715            import warnings
716            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
717            return -1
718
719        return confusion_matrix.TP/tot
720
721
722@deprecated_keywords({"confm": "confusion_matrix"})
723def recall(confusion_matrix):
724    """
725    Return `recall <http://en.wikipedia.org/wiki/Precision_and_recall>`_
726    (fraction of relevant instances that are retrieved).
727    """
728    return sens(confusion_matrix)
729
730
731@deprecated_keywords({"confm": "confusion_matrix"})
732def spec(confusion_matrix):
733    """
734    Return `specificity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
735    (proportion of negatives which are correctly identified).
736    """
737    if type(confusion_matrix) == list:
738        return [spec(cm) for cm in confusion_matrix]
739    else:
740        tot = confusion_matrix.FP+confusion_matrix.TN
741        if tot < 1e-6:
742            import warnings
743            warnings.warn("Can't compute specificity: one or both classes have no instances")
744            return -1
745        return confusion_matrix.TN/tot
746
747
748@deprecated_keywords({"confm": "confusion_matrix"})
749def PPV(confusion_matrix):
750    """
751    Return `positive predictive value <http://en.wikipedia.org/wiki/Positive_predictive_value>`_
752    (proportion of subjects with positive test results who are correctly diagnosed)."""
753    if type(confusion_matrix) == list:
754        return [PPV(cm) for cm in confusion_matrix]
755    else:
756        tot = confusion_matrix.TP+confusion_matrix.FP
757        if tot < 1e-6:
758            import warnings
759            warnings.warn("Can't compute PPV: one or both classes have no instances")
760            return -1
761        return confusion_matrix.TP/tot
762
763
764@deprecated_keywords({"confm": "confusion_matrix"})
765def precision(confusion_matrix):
766    """
767    Return `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_
768    (retrieved instances that are relevant).
769    """
770    return PPV(confusion_matrix)
771
772@deprecated_keywords({"confm": "confusion_matrix"})
773def NPV(confusion_matrix):
774    """Return `negative predictive value <http://en.wikipedia.org/wiki/Negative_predictive_value>`_
775     (proportion of subjects with a negative test result who are correctly
776     diagnosed).
777     """
778    if type(confusion_matrix) == list:
779        return [NPV(cm) for cm in confusion_matrix]
780    else:
781        tot = confusion_matrix.FN+confusion_matrix.TN
782        if tot < 1e-6:
783            import warnings
784            warnings.warn("Can't compute NPV: one or both classes have no instances")
785            return -1
786        return confusion_matrix.TN/tot
787
788@deprecated_keywords({"confm": "confusion_matrix"})
789def F1(confusion_matrix):
790    """Return `F1 score <http://en.wikipedia.org/wiki/F1_score>`_
791    (harmonic mean of precision and recall)."""
792    if type(confusion_matrix) == list:
793        return [F1(cm) for cm in confusion_matrix]
794    else:
795        p = precision(confusion_matrix)
796        r = recall(confusion_matrix)
797        if p + r > 0:
798            return 2. * p * r / (p + r)
799        else:
800            import warnings
801            warnings.warn("Can't compute F1: P + R is zero or not defined")
802            return -1
803
804
805@deprecated_keywords({"confm": "confusion_matrix"})
806def Falpha(confusion_matrix, alpha=1.0):
807    """Return the alpha-mean of precision and recall over the given confusion matrix."""
808    if type(confusion_matrix) == list:
809        return [Falpha(cm, alpha=alpha) for cm in confusion_matrix]
810    else:
811        p = precision(confusion_matrix)
812        r = recall(confusion_matrix)
813        return (1. + alpha) * p * r / (alpha * p + r)
814
815
816@deprecated_keywords({"confm": "confusion_matrix"})
817def MCC(confusion_matrix):
818    """
819    Return `Matthew correlation coefficient <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_
820    (correlation coefficient between the observed and predicted binary classifications)
821    """
822    # code by Boris Gorelik
823    if type(confusion_matrix) == list:
824        return [MCC(cm) for cm in confusion_matrix]
825    else:
826        truePositive = confusion_matrix.TP
827        trueNegative = confusion_matrix.TN
828        falsePositive = confusion_matrix.FP
829        falseNegative = confusion_matrix.FN
830         
831        try:   
832            r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/ 
833                math.sqrt(  (truePositive + falsePositive)  * 
834                ( truePositive + falseNegative ) * 
835                ( trueNegative + falsePositive ) * 
836                ( trueNegative + falseNegative ) )
837                )
838        except ZeroDivisionError:
839            # Zero difision occurs when there is either no true positives
840            # or no true negatives i.e. the problem contains only one
841            # type of classes.
842            import warnings
843            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
844            r = None
845
846    return r
847
848
849@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
850def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
851   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
852
853   http://en.wikipedia.org/wiki/Scott%27s_Pi
854   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
855   raters.
856
857   @param confusion_matrix: confusion matrix, or list of confusion matrices. To obtain
858                           non-binary confusion matrix, call
859                           Orange.evaluation.scoring.compute_confusion_matrices and set the
860                           classIndex parameter to -2.
861   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
862                           This function needs to operate on non-binary
863                           confusion matrices, which are represented by python
864                           lists, therefore one needs a way to distinguish
865                           between a single matrix and list of matrices
866   """
867
868   if b_is_list_of_matrices:
869       try:
870           return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confusion_matrix]
871       except TypeError:
872           # Nevermind the parameter, maybe this is a "conventional" binary
873           # confusion matrix and bIsListOfMatrices was specified by mistake
874           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
875   else:
876       if isinstance(confusion_matrix, ConfusionMatrix):
877           confusion_matrix = numpy.array( [[confusion_matrix.TP, confusion_matrix.FN],
878                   [confusion_matrix.FP, confusion_matrix.TN]], dtype=float)
879       else:
880           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
881
882       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
883       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
884       jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \
885                           (2.0 * numpy.sum(confusion_matrix, axis=None))
886       # In the eq. above, 2.0 is what the Wikipedia page calls
887       # the number of annotators. Here we have two annotators:
888       # the observed (true) labels (annotations) and the predicted by
889       # the learners.
890
891       prExpected = numpy.sum(jointProportion ** 2, axis=None)
892       prActual = numpy.sum(numpy.diag(confusion_matrix), axis=None)/numpy.sum(confusion_matrix, axis=None)
893
894       ret = (prActual - prExpected) / (1.0 - prExpected)
895       return ret
896
897@deprecated_keywords({"classIndex": "class_index"})
898def AUCWilcoxon(res, class_index=-1, **argkw):
899    """ Computes the area under ROC (AUC) and its standard error using
900    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
901    :obj:`classIndex` is not specified, the first class is used as
902    "the positive" and others are negative. The result is a list of
903    tuples (aROC, standard error).
904    """
905    useweights = res.weights and not argkw.get("unweighted", 0)
906    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
907
908    results=[]
909
910    totPos, totNeg = tots[1], tots[0]
911    N = totPos + totNeg
912    for plist in problists:
913        highPos, lowNeg = totPos, 0.0
914        W, Q1, Q2 = 0.0, 0.0, 0.0
915        for prob in plist:
916            thisPos, thisNeg = prob[1][1], prob[1][0]
917            highPos -= thisPos
918            W += thisNeg * (highPos + thisPos/2.)
919            Q2 += thisPos * (lowNeg**2  + lowNeg*thisNeg  + thisNeg**2 /3.)
920            Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.)
921
922            lowNeg += thisNeg
923
924        W  /= (totPos*totNeg)
925        Q1 /= (totNeg*totPos**2)
926        Q2 /= (totPos*totNeg**2)
927
928        SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) )
929        results.append((W, SE))
930    return results
931
932AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote
933
934
935@deprecated_keywords({"classIndex": "class_index"})
936def compare_2_AUCs(res, lrn1, lrn2, class_index=-1, **argkw):
937    return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not argkw.get("unweighted"))
938
939compare_2_AROCs = compare_2_AUCs # for backward compatibility, compare_2_AROCs is obsolote
940
941
942@deprecated_keywords({"classIndex": "class_index"})
943def compute_ROC(res, class_index=-1):
944    """ Computes a ROC curve as a list of (x, y) tuples, where x is
945    1-specificity and y is sensitivity.
946    """
947    problists, tots = corn.computeROCCumulative(res, class_index)
948
949    results = []
950    totPos, totNeg = tots[1], tots[0]
951
952    for plist in problists:
953        curve=[(1., 1.)]
954        TP, TN = totPos, 0.0
955        FN, FP = 0., totNeg
956        for prob in plist:
957            thisPos, thisNeg = prob[1][1], prob[1][0]
958            # thisPos go from TP to FN
959            TP -= thisPos
960            FN += thisPos
961            # thisNeg go from FP to TN
962            TN += thisNeg
963            FP -= thisNeg
964
965            sens = TP/(TP+FN)
966            spec = TN/(FP+TN)
967            curve.append((1-spec, sens))
968        results.append(curve)
969
970    return results   
971
972## TC's implementation of algorithms, taken from:
973## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal.
974def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
975    if (P1x == P2x):
976        return 1e300
977    return (P1y - P2y) / (P1x - P2x)
978
979
980@deprecated_keywords({"keepConcavities": "keep_concavities"})
981def ROC_add_point(P, R, keep_concavities=1):
982    if keepConcavities:
983        R.append(P)
984    else:
985        while (1):
986            if len(R) < 2:
987                R.append(P)
988                return R
989            else:
990                T = R.pop()
991                T2 = R[-1]
992                if ROC_slope(T2, T) > ROC_slope(T, P):
993                    R.append(T)
994                    R.append(P)
995                    return R
996    return R
997
998
999@deprecated_keywords({"classIndex": "class_index",
1000                      "keepConcavities": "keep_concavities"})
1001def TC_compute_ROC(res, class_index=-1, keep_concavities=1):
1002    problists, tots = corn.computeROCCumulative(res, class_index)
1003
1004    results = []
1005    P, N = tots[1], tots[0]
1006
1007    for plist in problists:
1008        ## corn gives an increasing by scores list, we need a decreasing by scores
1009        plist.reverse()
1010        TP = 0.0
1011        FP = 0.0
1012        curve=[]
1013        fPrev = 10e300 # "infinity" score at 0.0, 0.0
1014        for prob in plist:
1015            f = prob[0]
1016            if f <> fPrev:
1017                if P:
1018                    tpr = TP/P
1019                else:
1020                    tpr = 0.0
1021                if N:
1022                    fpr = FP/N
1023                else:
1024                    fpr = 0.0
1025                curve = ROC_add_point((fpr, tpr, fPrev), curve, keep_concavities)
1026                fPrev = f
1027            thisPos, thisNeg = prob[1][1], prob[1][0]
1028            TP += thisPos
1029            FP += thisNeg
1030        if P:
1031            tpr = TP/P
1032        else:
1033            tpr = 0.0
1034        if N:
1035            fpr = FP/N
1036        else:
1037            fpr = 0.0
1038        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1039        results.append(curve)
1040
1041    return results
1042
1043## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve
1044## for given values of FPcost, FNcost and pval
1045def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1046    m = (FPcost*(1.0 - pval)) / (FNcost*pval)
1047
1048    ## put the iso-performance line in point (0.0, 1.0)
1049    x0, y0 = (0.0, 1.0)
1050    x1, y1 = (1.0, 1.0 + m)
1051    d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0))
1052
1053    ## calculate and find the closest point to the line
1054    firstp = 1
1055    mind = 0.0
1056    a = (x0*y1 - x1*y0)
1057    closestPoints = []
1058    for (x, y, fscore) in curve:
1059        d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01
1060        d = abs(d)
1061        if firstp or d < mind:
1062            mind, firstp = d, 0
1063            closestPoints = [(x, y, fscore)]
1064        else:
1065            if abs(d - mind) <= 0.0001: ## close enough
1066                closestPoints.append( (x, y, fscore) )
1067    return closestPoints         
1068
1069def frange(start, end=None, inc=None):
1070    "A range function, that does accept float increments..."
1071
1072    if end == None:
1073        end = start + 0.0
1074        start = 0.0
1075
1076    if inc == None or inc == 0:
1077        inc = 1.0
1078
1079    L = [start]
1080    while 1:
1081        next = start + len(L) * inc
1082        if inc > 0 and next >= end:
1083            L.append(end)
1084            break
1085        elif inc < 0 and next <= end:
1086            L.append(end)
1087            break
1088        L.append(next)
1089       
1090    return L
1091
1092## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1093## where ROCcurvesX is a set of ROC curves,
1094## where a (one) ROC curve is a set of (FP, TP) points
1095##
1096## for each (sub)set of input ROC curves
1097## returns the average ROC curve and an array of (vertical) standard deviations
1098@deprecated_keywords({"ROCcurves": "roc_curves"})
1099def TC_vertical_average_ROC(roc_curves, samples = 10):
1100    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1101        if (P1x == P2x) or ((X > P1x) and (X > P2x)) or ((X < P1x) and (X < P2x)):
1102            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1103        dx = float(P2x) - float(P1x)
1104        dy = float(P2y) - float(P1y)
1105        m = dy/dx
1106        return P1y + m*(X - P1x)
1107
1108    def TP_FOR_FP(FPsample, ROC, npts):
1109        i = 0
1110        while i < npts - 1:
1111            (fp, _, _) = ROC[i + 1]
1112            if (fp <= FPsample):
1113                i += 1
1114            else:
1115                break
1116        (fp, tp, _) = ROC[i]
1117        if fp == FPsample:
1118            return tp
1119        elif fp < FPsample and i + 1 < len(ROC):
1120            return INTERPOLATE(ROC[i], ROC[i+1], FPsample)
1121        elif fp < FPsample and i + 1 == len(ROC): # return the last
1122            return ROC[i][1]
1123        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1124        #return 0.0
1125
1126    average = []
1127    stdev = []
1128    for ROCS in roc_curves:
1129        npts = []
1130        for c in ROCS:
1131            npts.append(len(c))
1132        nrocs = len(ROCS)
1133
1134        TPavg = []
1135        TPstd = []
1136        for FPsample in frange(0.0, 1.0, 1.0/samples):
1137            TPsum = []
1138            for i in range(nrocs):
1139                TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1140            TPavg.append( (FPsample, statc.mean(TPsum)) )
1141            if len(TPsum) > 1:
1142                stdv = statc.std(TPsum)
1143            else:
1144                stdv = 0.0
1145            TPstd.append( stdv )
1146
1147        average.append(TPavg)
1148        stdev.append(TPstd)
1149
1150    return (average, stdev)
1151
1152## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1153## where ROCcurvesX is a set of ROC curves,
1154## where a (one) ROC curve is a set of (FP, TP) points
1155##
1156## for each (sub)set of input ROC curves
1157## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1158@deprecated_keywords({"ROCcurves": "roc_curves"})
1159def TC_threshold_average_ROC(roc_curves, samples = 10):
1160    def POINT_AT_THRESH(ROC, npts, thresh):
1161        i = 0
1162        while i < npts - 1:
1163            (px, py, pfscore) = ROC[i]
1164            if (pfscore > thresh):
1165                i += 1
1166            else:
1167                break
1168        return ROC[i]
1169
1170    average = []
1171    stdevV = []
1172    stdevH = []
1173    for ROCS in roc_curves:
1174        npts = []
1175        for c in ROCS:
1176            npts.append(len(c))
1177        nrocs = len(ROCS)
1178
1179        T = []
1180        for c in ROCS:
1181            for (px, py, pfscore) in c:
1182##                try:
1183##                    T.index(pfscore)
1184##                except:
1185                T.append(pfscore)
1186        T.sort()
1187        T.reverse() ## ugly
1188
1189        TPavg = []
1190        TPstdV = []
1191        TPstdH = []
1192        for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples):
1193            FPsum = []
1194            TPsum = []
1195            for i in range(nrocs):
1196                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1197                FPsum.append(fp)
1198                TPsum.append(tp)
1199            TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) )
1200            ## vertical standard deviation
1201            if len(TPsum) > 1:
1202                stdv = statc.std(TPsum)
1203            else:
1204                stdv = 0.0
1205            TPstdV.append( stdv )
1206            ## horizontal standard deviation
1207            if len(FPsum) > 1:
1208                stdh = statc.std(FPsum)
1209            else:
1210                stdh = 0.0
1211            TPstdH.append( stdh )
1212
1213        average.append(TPavg)
1214        stdevV.append(TPstdV)
1215        stdevH.append(TPstdH)
1216
1217    return (average, stdevV, stdevH)
1218
1219## Calibration Curve
1220## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where:
1221##  - curve is an array of points (x, y) on the calibration curve
1222##  - yesClassRugPoints is an array of (x, 1) points
1223##  - noClassRugPoints is an array of (x, 0) points
1224@deprecated_keywords({"classIndex": "class_index"})
1225def compute_calibration_curve(res, class_index=-1):
1226    ## merge multiple iterations into one
1227    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1228    for te in res.results:
1229        mres.results.append( te )
1230
1231    problists, tots = corn.computeROCCumulative(mres, class_index)
1232
1233    results = []
1234    P, N = tots[1], tots[0]
1235
1236    bins = 10 ## divide interval between 0.0 and 1.0 into N bins
1237
1238    for plist in problists:
1239        yesClassRugPoints = [] 
1240        noClassRugPoints = []
1241
1242        yesBinsVals = [0] * bins
1243        noBinsVals = [0] * bins
1244        for (f, (thisNeg, thisPos)) in plist:
1245            yesClassRugPoints.append( (f, thisPos) ) #1.0
1246            noClassRugPoints.append( (f, thisNeg) ) #1.0
1247
1248            index = int(f * bins )
1249            index = min(index, bins - 1) ## just in case for value 1.0
1250            yesBinsVals[index] += thisPos
1251            noBinsVals[index] += thisNeg
1252
1253        curve = []
1254        for cn in range(bins):
1255            f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins)
1256            yesVal = yesBinsVals[cn]
1257            noVal = noBinsVals[cn]
1258            allVal = yesVal + noVal
1259            if allVal == 0.0: continue
1260            y = float(yesVal)/float(allVal)
1261            curve.append( (f,  y) )
1262
1263        ## smooth the curve
1264        maxnPoints = 100
1265        if len(curve) >= 3:
1266#            loessCurve = statc.loess(curve, -3, 0.6)
1267            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1268        else:
1269            loessCurve = curve
1270        clen = len(loessCurve)
1271        if clen > maxnPoints:
1272            df = clen / maxnPoints
1273            if df < 1: df = 1
1274            curve = [loessCurve[i]  for i in range(0, clen, df)]
1275        else:
1276            curve = loessCurve
1277        curve = [(c)[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function
1278        results.append((curve, yesClassRugPoints, noClassRugPoints))
1279
1280    return results
1281
1282
1283## Lift Curve
1284## returns an array of curve elements, where:
1285##  - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve
1286@deprecated_keywords({"classIndex": "class_index"})
1287def compute_lift_curve(res, class_index=-1):
1288    ## merge multiple iterations into one
1289    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1290    for te in res.results:
1291        mres.results.append( te )
1292
1293    problists, tots = corn.computeROCCumulative(mres, class_index)
1294
1295    results = []
1296    P, N = tots[1], tots[0]
1297    for plist in problists:
1298        ## corn gives an increasing by scores list, we need a decreasing by scores
1299        plist.reverse()
1300        TP = 0.0
1301        FP = 0.0
1302        curve = [(0.0, 0.0, (10e300, 0.0))]
1303        for (f, (thisNeg, thisPos)) in plist:
1304            TP += thisPos
1305            FP += thisNeg
1306            curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) )
1307        results.append(curve)
1308
1309    return P, N, results
1310###
1311
1312class CDT:
1313  """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """
1314  def __init__(self, C=0.0, D=0.0, T=0.0):
1315    self.C, self.D, self.T = C, D, T
1316   
1317def is_CDT_empty(cdt):
1318    return cdt.C + cdt.D + cdt.T < 1e-20
1319
1320
1321@deprecated_keywords({"classIndex": "class_index"})
1322def compute_CDT(res, class_index=-1, **argkw):
1323    """Obsolete, don't use"""
1324    if class_index<0:
1325        if res.baseClass>=0:
1326            class_index = res.baseClass
1327        else:
1328            class_index = 1
1329           
1330    useweights = res.weights and not argkw.get("unweighted", 0)
1331    weightByClasses = argkw.get("weightByClasses", True)
1332
1333    if (res.number_of_iterations>1):
1334        CDTs = [CDT() for i in range(res.number_of_learners)]
1335        iterationExperiments = split_by_iterations(res)
1336        for exp in iterationExperiments:
1337            expCDTs = corn.computeCDT(exp, class_index, useweights)
1338            for i in range(len(CDTs)):
1339                CDTs[i].C += expCDTs[i].C
1340                CDTs[i].D += expCDTs[i].D
1341                CDTs[i].T += expCDTs[i].T
1342        for i in range(res.number_of_learners):
1343            if is_CDT_empty(CDTs[0]):
1344                return corn.computeCDT(res, class_index, useweights)
1345       
1346        return CDTs
1347    else:
1348        return corn.computeCDT(res, class_index, useweights)
1349
1350## THIS FUNCTION IS OBSOLETE AND ITS AVERAGING OVER FOLDS IS QUESTIONABLE
1351## DON'T USE IT
1352def ROCs_from_CDT(cdt, **argkw):
1353    """Obsolete, don't use"""
1354    if type(cdt) == list:
1355        return [ROCs_from_CDT(c) for c in cdt]
1356
1357    C, D, T = cdt.C, cdt.D, cdt.T
1358    N = C+D+T
1359    if N < 1e-6:
1360        import warnings
1361        warnings.warn("Can't compute AUC: one or both classes have no instances")
1362        return (-1,)*8
1363    if N < 2:
1364        import warnings
1365        warnings.warn("Can't compute AUC: one or both classes have too few examples")
1366
1367    som = (C-D)/N
1368    c = 0.5*(1+som)
1369 
1370    if (C+D):
1371        res = (C/N*100, D/N*100, T/N*100, N, som, (C-D)/(C+D), (C-D)/(N*(N-1)/2), 0.5*(1+som))
1372    else:
1373        res = (C/N*100, D/N*100, T/N*100, N, som, -1.0, (C-D)/(N*(N-1)/2), 0.5*(1+som))
1374
1375    if argkw.get("print"):
1376        print "Concordant  = %5.1f       Somers' D = %1.3f" % (res[0], res[4])
1377        print "Discordant  = %5.1f       Gamma     = %1.3f" % (res[1], res[5]>0 and res[5] or "N/A")
1378        print "Tied        = %5.1f       Tau-a     = %1.3f" % (res[2], res[6])
1379        print " %6d pairs             c         = %1.3f"    % (res[3], res[7])
1380
1381    return res
1382
1383AROC_from_CDT = ROCs_from_CDT  # for backward compatibility, AROC_from_CDT is obsolote
1384
1385
1386
1387# computes AUC using a specified 'cdtComputer' function
1388# It tries to compute AUCs from 'ite' (examples from a single iteration) and,
1389# if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs
1390# are divided by 'divideByIfIte'. Additional flag is returned which is True in
1391# the former case, or False in the latter.
1392@deprecated_keywords({"divideByIfIte": "divide_by_if_ite",
1393                      "computerArgs": "computer_args"})
1394def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1395    cdts = cdtComputer(*(ite, ) + computer_args)
1396    if not is_CDT_empty(cdts[0]):
1397        return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divide_by_if_ite for cdt in cdts], True
1398       
1399    if all_ite:
1400        cdts = cdtComputer(*(all_ite, ) + computer_args)
1401        if not is_CDT_empty(cdts[0]):
1402            return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False
1403
1404    return False, False
1405
1406   
1407# computes AUC between classes i and j as if there we no other classes
1408@deprecated_keywords({"classIndex1": "class_index1",
1409                      "classIndex2": "class_index2",
1410                      "useWeights": "use_weights",
1411                      "divideByIfIte": "divide_by_if_ite"})
1412def AUC_ij(ite, class_index1, class_index2, use_weights = True, all_ite = None, divide_by_if_ite = 1.0):
1413    return AUC_x(corn.computeCDTPair, ite, all_ite, divide_by_if_ite, (class_index1, class_index2, use_weights))
1414
1415
1416# computes AUC between class i and the other classes (treating them as the same class)
1417@deprecated_keywords({"classIndex": "class_index",
1418                      "useWeights": "use_weights",
1419                      "divideByIfIte": "divide_by_if_ite"})
1420def AUC_i(ite, class_index, use_weights = True, all_ite = None,
1421          divide_by_if_ite = 1.0):
1422    return AUC_x(corn.computeCDT, ite, all_ite, divide_by_if_ite, (class_index, use_weights))
1423
1424
1425# computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij)
1426# it returns the sum of what is returned by the computer, unless at a certain
1427# fold the computer has to resort to computing over all folds or even this failed;
1428# in these cases the result is returned immediately
1429
1430@deprecated_keywords({"AUCcomputer": "auc_computer",
1431                      "computerArgs": "computer_args"})
1432def AUC_iterations(auc_computer, iterations, computer_args):
1433    subsum_aucs = [0.] * iterations[0].number_of_learners
1434    for ite in iterations:
1435        aucs, foldsUsed = auc_computer(*(ite, ) + computer_args)
1436        if not aucs:
1437            return None
1438        if not foldsUsed:
1439            return aucs
1440        subsum_aucs = map(add, subsum_aucs, aucs)
1441    return subsum_aucs
1442
1443
1444# AUC for binary classification problems
1445@deprecated_keywords({"useWeights": "use_weights"})
1446def AUC_binary(res, use_weights = True):
1447    if res.number_of_iterations > 1:
1448        return AUC_iterations(AUC_i, split_by_iterations(res), (-1, use_weights, res, res.number_of_iterations))
1449    else:
1450        return AUC_i(res, -1, use_weights)[0]
1451
1452# AUC for multiclass problems
1453@deprecated_keywords({"useWeights": "use_weights"})
1454def AUC_multi(res, use_weights = True, method = 0):
1455    numberOfClasses = len(res.class_values)
1456   
1457    if res.number_of_iterations > 1:
1458        iterations = split_by_iterations(res)
1459        all_ite = res
1460    else:
1461        iterations = [res]
1462        all_ite = None
1463   
1464    # by pairs
1465    sum_aucs = [0.] * res.number_of_learners
1466    usefulClassPairs = 0.
1467
1468    if method in [0, 2]:
1469        prob = class_probabilities_from_res(res)
1470       
1471    if method <= 1:
1472        for classIndex1 in range(numberOfClasses):
1473            for classIndex2 in range(classIndex1):
1474                subsum_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1475                if subsum_aucs:
1476                    if method == 0:
1477                        p_ij = prob[classIndex1] * prob[classIndex2]
1478                        subsum_aucs = [x * p_ij  for x in subsum_aucs]
1479                        usefulClassPairs += p_ij
1480                    else:
1481                        usefulClassPairs += 1
1482                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1483    else:
1484        for classIndex in range(numberOfClasses):
1485            subsum_aucs = AUC_iterations(AUC_i, iterations, (classIndex, use_weights, all_ite, res.number_of_iterations))
1486            if subsum_aucs:
1487                if method == 0:
1488                    p_i = prob[classIndex]
1489                    subsum_aucs = [x * p_i  for x in subsum_aucs]
1490                    usefulClassPairs += p_i
1491                else:
1492                    usefulClassPairs += 1
1493                sum_aucs = map(add, sum_aucs, subsum_aucs)
1494                   
1495    if usefulClassPairs > 0:
1496        sum_aucs = [x/usefulClassPairs for x in sum_aucs]
1497
1498    return sum_aucs
1499
1500def AUC():
1501    pass
1502
1503AUC.ByWeightedPairs = 0
1504
1505# Computes AUC, possibly for multiple classes (the averaging method can be specified)
1506# Results over folds are averages; if some folds examples from one class only, the folds are merged
1507@deprecated_keywords({"useWeights": "use_weights"})
1508def AUC(res, method = AUC.ByWeightedPairs, use_weights = True):
1509    """ Returns the area under ROC curve (AUC) given a set of experimental
1510    results. For multivalued class problems, it will compute some sort of
1511    average, as specified by the argument method.
1512    """
1513    if len(res.class_values) < 2:
1514        raise ValueError("Cannot compute AUC on a single-class problem")
1515    elif len(res.class_values) == 2:
1516        return AUC_binary(res, use_weights)
1517    else:
1518        return AUC_multi(res, use_weights, method)
1519
1520AUC.ByWeightedPairs = 0
1521AUC.ByPairs = 1
1522AUC.WeightedOneAgainstAll = 2
1523AUC.OneAgainstAll = 3
1524
1525
1526# Computes AUC; in multivalued class problem, AUC is computed as one against all
1527# Results over folds are averages; if some folds examples from one class only, the folds are merged
1528@deprecated_keywords({"classIndex": "class_index",
1529                      "useWeights": "use_weights"})
1530def AUC_single(res, class_index = -1, use_weights = True):
1531    """ Computes AUC where the class given classIndex is singled out, and
1532    all other classes are treated as a single class. To find how good our
1533    classifiers are in distinguishing between vans and other vehicle, call
1534    the function like this::
1535   
1536        Orange.evaluation.scoring.AUC_single(resVeh, \
1537classIndex = vehicle.domain.classVar.values.index("van"))
1538    """
1539    if class_index<0:
1540        if res.baseClass>=0:
1541            class_index = res.baseClass
1542        else:
1543            class_index = 1
1544
1545    if res.number_of_iterations > 1:
1546        return AUC_iterations(AUC_i, split_by_iterations(res), (class_index, use_weights, res, res.number_of_iterations))
1547    else:
1548        return AUC_i( res, class_index, use_weights)[0]
1549
1550# Computes AUC for a pair of classes (as if there were no other classes)
1551# Results over folds are averages; if some folds have examples from one class only, the folds are merged
1552@deprecated_keywords({"classIndex1": "class_index1",
1553                      "classIndex2": "class_index2",
1554                      "useWeights": "use_weights"})
1555def AUC_pair(res, class_index1, class_index2, use_weights = True):
1556    """ Computes AUC between a pair of instances, ignoring instances from all
1557    other classes.
1558    """
1559    if res.number_of_iterations > 1:
1560        return AUC_iterations(AUC_ij, split_by_iterations(res), (class_index1, class_index2, use_weights, res, res.number_of_iterations))
1561    else:
1562        return AUC_ij(res, class_index1, class_index2, use_weights)
1563 
1564
1565# AUC for multiclass problems
1566@deprecated_keywords({"useWeights": "use_weights"})
1567def AUC_matrix(res, use_weights = True):
1568    """ Computes a (lower diagonal) matrix with AUCs for all pairs of classes.
1569    If there are empty classes, the corresponding elements in the matrix
1570    are -1. Remember the beautiful(?) code for printing out the confusion
1571    matrix? Here it strikes again::
1572   
1573        classes = vehicle.domain.classVar.values
1574        AUCmatrix = Orange.evaluation.scoring.AUC_matrix(resVeh)[0]
1575        print "\t"+"\t".join(classes[:-1])
1576        for className, AUCrow in zip(classes[1:], AUCmatrix[1:]):
1577            print ("%s" + ("\t%5.3f" * len(AUCrow))) % ((className, ) + tuple(AUCrow))
1578    """
1579    numberOfClasses = len(res.class_values)
1580    number_of_learners = res.number_of_learners
1581   
1582    if res.number_of_iterations > 1:
1583        iterations, all_ite = split_by_iterations(res), res
1584    else:
1585        iterations, all_ite = [res], None
1586   
1587    aucs = [[[] for i in range(numberOfClasses)] for i in range(number_of_learners)]
1588    prob = class_probabilities_from_res(res)
1589       
1590    for classIndex1 in range(numberOfClasses):
1591        for classIndex2 in range(classIndex1):
1592            pair_aucs = AUC_iterations(AUC_ij, iterations, (classIndex1, classIndex2, use_weights, all_ite, res.number_of_iterations))
1593            if pair_aucs:
1594                for lrn in range(number_of_learners):
1595                    aucs[lrn][classIndex1].append(pair_aucs[lrn])
1596            else:
1597                for lrn in range(number_of_learners):
1598                    aucs[lrn][classIndex1].append(-1)
1599    return aucs
1600               
1601
1602def McNemar(res, **argkw):
1603    """ Computes a triangular matrix with McNemar statistics for each pair of
1604    classifiers. The statistics is distributed by chi-square distribution with
1605    one degree of freedom; critical value for 5% significance is around 3.84.
1606    """
1607    nLearners = res.number_of_learners
1608    mcm = []
1609    for i in range(nLearners):
1610       mcm.append([0.0]*res.number_of_learners)
1611
1612    if not res.weights or argkw.get("unweighted"):
1613        for i in res.results:
1614            actual = i.actual_class
1615            classes = i.classes
1616            for l1 in range(nLearners):
1617                for l2 in range(l1, nLearners):
1618                    if classes[l1]==actual:
1619                        if classes[l2]!=actual:
1620                            mcm[l1][l2] += 1
1621                    elif classes[l2]==actual:
1622                        mcm[l2][l1] += 1
1623    else:
1624        for i in res.results:
1625            actual = i.actual_class
1626            classes = i.classes
1627            for l1 in range(nLearners):
1628                for l2 in range(l1, nLearners):
1629                    if classes[l1]==actual:
1630                        if classes[l2]!=actual:
1631                            mcm[l1][l2] += i.weight
1632                    elif classes[l2]==actual:
1633                        mcm[l2][l1] += i.weight
1634
1635    for l1 in range(nLearners):
1636        for l2 in range(l1, nLearners):
1637            su=mcm[l1][l2] + mcm[l2][l1]
1638            if su:
1639                mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su
1640            else:
1641                mcm[l2][l1] = 0
1642
1643    for l1 in range(nLearners):
1644        mcm[l1]=mcm[l1][:l1]
1645
1646    return mcm
1647
1648
1649def McNemar_of_two(res, lrn1, lrn2):
1650    """ McNemar_of_two computes a McNemar statistics for a pair of classifier,
1651    specified by indices learner1 and learner2.
1652    """
1653    tf = ft = 0.0
1654    if not res.weights or argkw.get("unweighted"):
1655        for i in res.results:
1656            actual=i.actual_class
1657            if i.classes[lrn1]==actual:
1658                if i.classes[lrn2]!=actual:
1659                    tf += i.weight
1660            elif i.classes[lrn2]==actual:
1661                    ft += i.weight
1662    else:
1663        for i in res.results:
1664            actual=i.actual_class
1665            if i.classes[lrn1]==actual:
1666                if i.classes[lrn2]!=actual:
1667                    tf += 1.0
1668            elif i.classes[lrn2]==actual:
1669                    ft += 1.0
1670
1671    su = tf + ft
1672    if su:
1673        return (abs(tf-ft)-1)**2 / su
1674    else:
1675        return 0
1676
1677
1678def Friedman(res, stat=CA):
1679    """ Compares classifiers by Friedman test, treating folds as different examles.
1680        Returns F, p and average ranks
1681    """
1682    res_split = split_by_iterations(res)
1683    res = [stat(r) for r in res_split]
1684   
1685    N = len(res)
1686    k = len(res[0])
1687    sums = [0.0]*k
1688    for r in res:
1689        ranks = [k-x+1 for x in statc.rankdata(r)]
1690        if stat==Brier_score: # reverse ranks for Brier_score (lower better)
1691            ranks = [k+1-x for x in ranks]
1692        sums = [ranks[i]+sums[i] for i in range(k)]
1693
1694    T = sum([x*x for x in sums])
1695    sums = [x/N for x in sums]
1696
1697    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
1698
1699    return F, statc.chisqprob(F, k-1), sums
1700
1701
1702def Wilcoxon_pairs(res, avgranks, stat=CA):
1703    """ Returns a triangular matrix, where element[i][j] stores significance of difference
1704        between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive
1705        if i-th is better than j-th, negative if it is worse, and 1 if they are equal.
1706        Arguments to function are ExperimentResults, average ranks (as returned by Friedman)
1707        and, optionally, a statistics; greater values should mean better results.append
1708    """
1709    res_split = split_by_iterations(res)
1710    res = [stat(r) for r in res_split]
1711
1712    k = len(res[0])
1713    bt = []
1714    for m1 in range(k):
1715        nl = []
1716        for m2 in range(m1+1, k):
1717            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1718            if avgranks[m1]<avgranks[m2]:
1719                nl.append(p)
1720            elif avgranks[m2]<avgranks[m1]:
1721                nl.append(-p)
1722            else:
1723                nl.append(1)
1724        bt.append(nl)
1725    return bt
1726
1727
1728@deprecated_keywords({"allResults": "all_results",
1729                      "noConfidence": "no_confidence"})
1730def plot_learning_curve_learners(file, all_results, proportions, learners, no_confidence=0):
1731    plot_learning_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], no_confidence)
1732
1733
1734@deprecated_keywords({"allResults": "all_results",
1735                      "noConfidence": "no_confidence"})
1736def plot_learning_curve(file, all_results, proportions, legend, no_confidence=0):
1737    import types
1738    fopened=0
1739    if type(file)==types.StringType:
1740        file=open(file, "wt")
1741        fopened=1
1742       
1743    file.write("set yrange [0:1]\n")
1744    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1745    file.write("set multiplot\n\n")
1746    CAs = [CA_dev(x) for x in all_results]
1747
1748    file.write("plot \\\n")
1749    for i in range(len(legend)-1):
1750        if not no_confidence:
1751            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1))
1752        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1753    if not no_confidence:
1754        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
1755    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
1756
1757    for i in range(len(legend)):
1758        if not no_confidence:
1759            for p in range(len(proportions)):
1760                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1]))
1761            file.write("e\n\n")
1762
1763        for p in range(len(proportions)):
1764            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
1765        file.write("e\n\n")
1766
1767    if fopened:
1768        file.close()
1769
1770
1771def print_single_ROC_curve_coordinates(file, curve):
1772    import types
1773    fopened=0
1774    if (type(file)==types.StringType):
1775        file=open(file, "wt")
1776        fopened=1
1777
1778    for coord in curve:
1779        file.write("%5.3f\t%5.3f\n" % tuple(coord))
1780
1781    if fopened:
1782        file.close()
1783
1784
1785def plot_ROC_learners(file, curves, learners):
1786    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
1787   
1788def plot_ROC(file, curves, legend):
1789    import types
1790    fopened=0
1791    if (type(file)==types.StringType):
1792        file=open(file, "wt")
1793        fopened=1
1794
1795    file.write("set yrange [0:1]\n")
1796    file.write("set xrange [0:1]\n")
1797    file.write("set multiplot\n\n")
1798
1799    file.write("plot \\\n")
1800    for leg in legend:
1801        file.write("'-' title '%s' with lines,\\\n" % leg)
1802    file.write("'-' title '' with lines\n")
1803
1804    for curve in curves:
1805        for coord in curve:
1806            file.write("%5.3f\t%5.3f\n" % tuple(coord))
1807        file.write("e\n\n")
1808
1809    file.write("1.0\t1.0\n0.0\t0.0e\n\n")         
1810
1811    if fopened:
1812        file.close()
1813
1814
1815@deprecated_keywords({"allResults": "all_results"})
1816def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference=-1):
1817    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
1818
1819
1820@deprecated_keywords({"allResults": "all_results"})
1821def plot_McNemar_curve(file, all_results, proportions, legend, reference=-1):
1822    if reference<0:
1823        reference=len(legend)-1
1824       
1825    import types
1826    fopened=0
1827    if (type(file)==types.StringType):
1828        file=open(file, "wt")
1829        fopened=1
1830       
1831    #file.write("set yrange [0:1]\n")
1832    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1833    file.write("set multiplot\n\n")
1834    file.write("plot \\\n")
1835    tmap=range(reference)+range(reference+1, len(legend))
1836    for i in tmap[:-1]:
1837        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1838    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
1839    file.write("\n")
1840
1841    for i in tmap:
1842        for p in range(len(proportions)):
1843            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
1844        file.write("e\n\n")
1845
1846    if fopened:
1847        file.close()
1848
1849default_point_types=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)])
1850default_line_types=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
1851
1852@deprecated_keywords({"allResults": "all_results"})
1853def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
1854    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
1855
1856
1857@deprecated_keywords({"allResults": "all_results"})
1858def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
1859    import types
1860    fopened=0
1861    if (type(file)==types.StringType):
1862        file=open(file, "wt")
1863        fopened=1
1864
1865    nexamples=len(all_results[0].results)
1866    CAs = [CA_dev(x) for x in all_results]
1867
1868    graphsize=float(options.get("graphsize", 10.0)) #cm
1869    difprop=proportions[-1]-proportions[0]
1870    ntestexamples=nexamples*proportions[-1]
1871    xunit=graphsize/ntestexamples
1872
1873    yshift=float(options.get("yshift", -ntestexamples/20.))
1874   
1875    pointtypes=options.get("pointtypes", default_point_types)
1876    linetypes=options.get("linetypes", default_line_types)
1877
1878    if options.has_key("numberedx"):
1879        numberedx=options["numberedx"]
1880        if type(numberedx)==types.IntType:
1881            if numberedx>0:
1882                numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples]
1883            elif numberedx<0:
1884                numberedx = -numberedx
1885                newn=[]
1886                for i in range(numberedx+1):
1887                    wanted=proportions[0]+float(i)/numberedx*difprop
1888                    best=(10, 0)
1889                    for t in proportions:
1890                        td=abs(wanted-t)
1891                        if td<best[0]:
1892                            best=(td, t)
1893                    if not best[1] in newn:
1894                        newn.append(best[1])
1895                newn.sort()
1896                numberedx=[nexamples*x for x in newn]
1897        elif type(numberedx[0])==types.FloatType:
1898            numberedx=[nexamples*x for x in numberedx]
1899    else:
1900        numberedx=[nexamples*x for x in proportions]
1901
1902    file.write("\\mbox{\n")
1903    file.write(\\beginpicture\n")
1904    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))   
1905    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))   
1906    file.write(\\axis bottom invisible\n")# label {#examples}\n")
1907    file.write("      ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions]))
1908    if numberedx:
1909        file.write("            long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx]))
1910    file.write("  /\n")
1911    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
1912    file.write("      shiftedto y=%5.3f\n" % yshift)
1913    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
1914    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
1915    file.write("  /\n")
1916    if options.has_key("default"):
1917        file.write(\\setdashpattern<1pt, 1pt>\n")
1918        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
1919   
1920    for i in range(len(CAs[0])):
1921        coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))])
1922        if linetypes:
1923            file.write(%s\n" % linetypes[i])
1924            file.write(\\plot %s /\n" % coordinates)
1925        if pointtypes:
1926            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
1927
1928    file.write(\\endpicture\n")
1929    file.write("}\n")
1930    if fopened:
1931        file.close()
1932    file.close()
1933    del file
1934
1935def legend_learners_to_PiCTeX(file, learners, **options):
1936  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
1937   
1938def legend_to_PiCTeX(file, legend, **options):
1939    import types
1940    fopened=0
1941    if (type(file)==types.StringType):
1942        file=open(file, "wt")
1943        fopened=1
1944
1945    pointtypes=options.get("pointtypes", default_point_types)
1946    linetypes=options.get("linetypes", default_line_types)
1947
1948    file.write("\\mbox{\n")
1949    file.write(\\beginpicture\n")
1950    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
1951    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
1952
1953    for i in range(len(legend)):
1954        if linetypes:
1955            file.write(%s\n" % linetypes[i])
1956            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2))
1957        if pointtypes:
1958            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1))
1959        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25))
1960
1961    file.write(\\endpicture\n")
1962    file.write("}\n")
1963    if fopened:
1964        file.close()
1965    file.close()
1966    del file
1967
1968
1969def compute_friedman(avranks, N):
1970    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
1971    and (Iman statistic - F-distribution, degrees of freedoma) given average
1972    ranks and a number of tested data sets N.
1973    """
1974
1975    k = len(avranks)
1976
1977    def friedman(N, k, ranks):
1978        return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1))
1979
1980    def iman(fried, N, k):
1981        return (N-1)*fried/(N*(k-1) - fried)
1982
1983    f = friedman(N, k, avranks)
1984    im = iman(f, N, k)
1985    fdistdof = (k-1, (k-1)*(N-1))
1986
1987    return (f, k-1), (im, fdistdof)
1988
1989def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
1990    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
1991    according to given alpha (either alpha="0.05" or alpha="0.1") for average
1992    ranks and number of tested data sets N. Type can be either "nemenyi" for
1993    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
1994    """
1995
1996    k = len(avranks)
1997   
1998    d = {}
1999
2000    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2001    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2002    d[("nemenyi", "0.05")] = [0, 0, 1.959964, 2.343701, 2.569032, 2.727774, 2.849705, 2.94832, 3.030879, 3.101730, 3.163684, 3.218654, 3.268004, 3.312739, 3.353618, 3.39123, 3.426041, 3.458425, 3.488685, 3.517073, 3.543799 ]
2003    d[("nemenyi", "0.1")] = [0, 0, 1.644854, 2.052293, 2.291341, 2.459516, 2.588521, 2.692732, 2.779884, 2.854606, 2.919889, 2.977768, 3.029694, 3.076733, 3.119693, 3.159199, 3.195743, 3.229723, 3.261461, 3.291224, 3.319233 ]
2004
2005    d[("bonferroni-dunn", "0.05")] =  [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576, 2.638, 2.690, 2.724, 2.773 ]
2006    d[("bonferroni-dunn", "0.1")] = [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326, 2.394, 2.450, 2.498, 2.539 ]
2007
2008    q = d[(type, alpha)]
2009
2010    cd = q[k]*(k*(k+1)/(6.0*N))**0.5
2011
2012    return cd
2013 
2014
2015def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2016    """
2017    Draws a CD graph, which is used to display  the differences in methods'
2018    performance.
2019    See Janez Demsar, Statistical Comparisons of Classifiers over
2020    Multiple Data Sets, 7(Jan):1--30, 2006.
2021
2022    Needs matplotlib to work.
2023
2024    :param filename: Output file name (with extension). Formats supported
2025                     by matplotlib can be used.
2026    :param avranks: List of average methods' ranks.
2027    :param names: List of methods' names.
2028
2029    :param cd: Critical difference. Used for marking methods that whose
2030               difference is not statistically significant.
2031    :param lowv: The lowest shown rank, if None, use 1.
2032    :param highv: The highest shown rank, if None, use len(avranks).
2033    :param width: Width of the drawn figure in inches, default 6 in.
2034    :param textspace: Space on figure sides left for the description
2035                      of methods, default 1 in.
2036    :param reverse:  If True, the lowest rank is on the right. Default: False.
2037    :param cdmethod: None by default. It can be an index of element in avranks
2038                     or or names which specifies the method which should be
2039                     marked with an interval.
2040    """
2041
2042    width = float(width)
2043    textspace = float(textspace)
2044
2045    def nth(l,n):
2046        """
2047        Returns only nth elemnt in a list.
2048        """
2049        n = lloc(l,n)
2050        return [ a[n] for a in l ]
2051
2052    def lloc(l,n):
2053        """
2054        List location in list of list structure.
2055        Enable the use of negative locations:
2056        -1 is the last element, -2 second last...
2057        """
2058        if n < 0:
2059            return len(l[0])+n
2060        else:
2061            return n
2062
2063    def mxrange(lr):
2064        """
2065        Multiple xranges. Can be used to traverse matrices.
2066        This function is very slow due to unknown number of
2067        parameters.
2068
2069        >>> mxrange([3,5])
2070        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2071
2072        >>> mxrange([[3,5,1],[9,0,-3]])
2073        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2074
2075        """
2076        if len(lr) == 0:
2077            yield ()
2078        else:
2079            #it can work with single numbers
2080            index = lr[0]
2081            if type(1) == type(index):
2082                index = [ index ]
2083            for a in range(*index):
2084                for b in mxrange(lr[1:]):
2085                    yield tuple([a] + list(b))
2086
2087    try:
2088        from matplotlib.figure import Figure
2089        from matplotlib.patches import Polygon
2090        from matplotlib.backends.backend_agg import FigureCanvasAgg
2091    except:
2092        import sys
2093        print >> sys.stderr, "Function requires matplotlib. Please install it."
2094        return
2095
2096    def print_figure(fig, *args, **kwargs):
2097        canvas = FigureCanvasAgg(fig)
2098        canvas.print_figure(*args, **kwargs)
2099
2100    sums = avranks
2101
2102    tempsort =  sorted([ (a,i) for i,a in  enumerate(sums) ], reverse=reverse)
2103    ssums = nth(tempsort, 0)
2104    sortidx = nth(tempsort, 1)
2105    nnames = [ names[x] for x in sortidx ]
2106   
2107    if lowv == None:
2108        lowv = min(1, int(math.floor(min(ssums))))
2109    if highv == None:
2110        highv = max(len(avranks), int(math.ceil(max(ssums))))
2111
2112    cline = 0.4
2113
2114    k = len(sums)
2115
2116    lines = None
2117    sums = sorted(sums)
2118
2119    linesblank = 0
2120    scalewidth = width - 2*textspace
2121
2122    def rankpos(rank):
2123        if not reverse:
2124            a = rank - lowv
2125        else:
2126            a = highv - rank
2127        return textspace+scalewidth/(highv-lowv)*a
2128
2129    distanceh = 0.25
2130
2131    if cd and cdmethod == None:
2132   
2133        #get pairs of non significant methods
2134
2135        def get_lines(sums, hsd):
2136
2137            #get all pairs
2138            lsums = len(sums)
2139            allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ]
2140            #remove not significant
2141            notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ]
2142            #keep only longest
2143           
2144            def no_longer((i,j), notSig):
2145                for i1,j1 in notSig:
2146                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2147                        return False
2148                return True
2149
2150            longest = [ (i,j) for i,j in notSig if no_longer((i,j),notSig) ]
2151           
2152            return longest
2153
2154        lines = get_lines(ssums, cd)
2155        linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1
2156
2157        #add scale
2158        distanceh = 0.25
2159        cline += distanceh
2160
2161    #calculate height needed height of an image
2162    minnotsignificant = max(2*0.2, linesblank)
2163    height = cline + ((k+1)/2)*0.2 + minnotsignificant
2164
2165    fig = Figure(figsize=(width, height))
2166    ax = fig.add_axes([0,0,1,1]) #reverse y axis
2167    ax.set_axis_off()
2168
2169    hf = 1./height # height factor
2170    wf = 1./width
2171
2172    def hfl(l): 
2173        return [ a*hf for a in l ]
2174
2175    def wfl(l): 
2176        return [ a*wf for a in l ]
2177
2178    """
2179    Upper left corner is (0,0).
2180    """
2181
2182    ax.plot([0,1], [0,1], c="w")
2183    ax.set_xlim(0, 1)
2184    ax.set_ylim(1, 0)
2185
2186    def line(l, color='k', **kwargs):
2187        """
2188        Input is a list of pairs of points.
2189        """
2190        ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs)
2191
2192    def text(x, y, s, *args, **kwargs):
2193        ax.text(wf*x, hf*y, s, *args, **kwargs)
2194
2195    line([(textspace, cline), (width-textspace, cline)], linewidth=0.7)
2196   
2197    bigtick = 0.1
2198    smalltick = 0.05
2199
2200
2201    import numpy
2202
2203    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2204        tick = smalltick
2205        if a == int(a): tick = bigtick
2206        line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7)
2207
2208    for a in range(lowv, highv+1):
2209        text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom")
2210
2211    k = len(ssums)
2212
2213    for i in range((k+1)/2):
2214        chei = cline+ minnotsignificant + (i)*0.2
2215        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7)
2216        text(textspace-0.2, chei, nnames[i], ha="right", va="center")
2217
2218    for i in range((k+1)/2, k):
2219        chei = cline + minnotsignificant + (k-i-1)*0.2
2220        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7)
2221        text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center")
2222
2223    if cd and cdmethod == None:
2224
2225        #upper scale
2226        if not reverse:
2227            begin, end = rankpos(lowv), rankpos(lowv+cd)
2228        else:
2229            begin, end = rankpos(highv), rankpos(highv - cd)
2230           
2231        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2232        line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7)
2233        line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7)
2234        text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom")
2235
2236        #non significance lines   
2237        def draw_lines(lines, side=0.05, height=0.1):
2238            start = cline + 0.2
2239            for l,r in lines: 
2240                line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5) 
2241                start += height
2242
2243        draw_lines(lines)
2244
2245    elif cd:
2246        begin = rankpos(avranks[cdmethod]-cd)
2247        end = rankpos(avranks[cdmethod]+cd)
2248        line([(begin, cline), (end, cline)], linewidth=2.5) 
2249        line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5)
2250        line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5)
2251 
2252    print_figure(fig, filename, **kwargs)
2253
2254def mlc_hamming_loss(res):
2255    """
2256    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2257   
2258    :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}`
2259    """
2260    losses = [0]*res.number_of_learners
2261    label_num = len(res.labels)
2262    example_num = gettotsize(res)
2263
2264    for e in res.results:
2265        aclass = e.actual_class
2266        for i, labels in enumerate(e.classes):
2267            labels = map(int, labels)
2268            if len(labels) <> len(aclass):
2269                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2270            for j in range(label_num):
2271                if labels[j] != aclass[j]:
2272                    losses[i] += 1
2273           
2274    return [float(x)/(label_num*example_num) for x in losses]
2275
2276def mlc_accuracy(res, forgiveness_rate = 1.0):
2277    """
2278    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2279     
2280    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}`
2281   
2282    Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`,
2283    called forgiveness rate:
2284   
2285    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}`
2286    """
2287    accuracies = [0.0]*res.number_of_learners
2288    label_num = len(res.labels)
2289    example_num = gettotsize(res)
2290   
2291    for e in res.results:
2292        aclass = e.actual_class
2293        for i, labels in enumerate(e.classes):
2294            labels = map(int, labels)
2295            if len(labels) <> len(aclass):
2296                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2297           
2298            intersection = 0.0
2299            union = 0.0
2300            for real, pred in zip(labels, aclass):
2301                if real and pred:
2302                    intersection = intersection+1
2303                if real or pred:
2304                    union = union+1
2305
2306            if union != 0:
2307                accuracies[i] = accuracies[i] + intersection/union
2308           
2309    return [math.pow(x/example_num,forgiveness_rate) for x in accuracies]
2310
2311def mlc_precision(res):
2312    """
2313    :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}`
2314    """
2315    precisions = [0.0]*res.number_of_learners
2316    label_num = len(res.labels)
2317    example_num = gettotsize(res)
2318   
2319    for e in res.results:
2320        aclass = e.actual_class
2321        for i, labels in enumerate(e.classes):
2322            labels = map(int, labels)
2323            if len(labels) <> len(aclass):
2324                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2325           
2326            intersection = 0.0
2327            predicted = 0.0
2328            for real, pred in zip(labels, aclass):
2329                if real and pred:
2330                    intersection = intersection+1
2331                if real:
2332                    predicted = predicted + 1
2333            if predicted <> 0:
2334                precisions[i] = precisions[i] + intersection/predicted
2335           
2336    return [x/example_num for x in precisions]
2337
2338def mlc_recall(res):
2339    """
2340    :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}`
2341    """
2342    recalls = [0.0]*res.number_of_learners
2343    label_num = len(res.labels)
2344    example_num = gettotsize(res)
2345   
2346    for e in res.results:
2347        aclass = e.actual_class
2348        for i, labels in enumerate(e.classes):
2349            labels = map(int, labels)
2350            if len(labels) <> len(aclass):
2351                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2352           
2353            intersection = 0.0
2354            actual = 0.0
2355            for real, pred in zip(labels, aclass):
2356                if real and pred:
2357                    intersection = intersection+1
2358                if pred:
2359                    actual = actual + 1
2360            if actual <> 0:
2361                recalls[i] = recalls[i] + intersection/actual
2362           
2363    return [x/example_num for x in recalls]
2364
2365#def mlc_ranking_loss(res):
2366#    pass
2367#
2368#def mlc_average_precision(res):
2369#    pass
2370#
2371#def mlc_hierarchical_loss(res):
2372#    pass
2373
2374#########################################################################################
2375if __name__ == "__main__":
2376    avranks =  [3.143, 2.000, 2.893, 1.964]
2377    names = ["prva", "druga", "tretja", "cetrta" ]
2378    cd = compute_CD(avranks, 14)
2379    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2380    print cd
2381
2382    print compute_friedman(avranks, 14)
2383
2384    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.