source: orange/Orange/evaluation/scoring.py @ 10343:528f672373f5

Revision 10343:528f672373f5, 97.1 KB checked in by Lan Zagar <lan.zagar@…>, 2 years ago (diff)

Improved multitarget scoring.

Line 
1import  math, functools
2from operator import add
3import numpy
4
5import Orange
6from Orange import statc, corn
7from Orange.misc import deprecated_keywords, deprecated_function_name, \
8    deprecation_warning, environ
9from Orange.evaluation import testing
10
11try:
12    import matplotlib
13    HAS_MATPLOTLIB = True
14except ImportError:
15    matplotlib = None
16    HAS_MATPLOTLIB = False
17
18#### Private stuff
19
20def log2(x):
21    """Calculate logarithm in base 2."""
22    return math.log(x)/math.log(2)
23
24def check_non_zero(x):
25    """Throw Value Error when x = 0.0."""
26    if x==0.0:
27        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
28
29def gettotweight(res):
30    """Sum all the weights"""
31    totweight = reduce(lambda x, y: x+y.weight, res.results, 0)
32    if totweight==0.0:
33        raise ValueError, "Cannot compute the score: sum of weights is 0.0."
34    return totweight
35
36def gettotsize(res):
37    """ Get number of result instances """
38    if len(res.results):
39        return len(res.results)
40    else:
41        raise ValueError, "Cannot compute the score: no examples."
42
43
44def split_by_iterations(res):
45    """ Splits ExperimentResults of multiple iteratation test into a list
46    of ExperimentResults, one for each iteration.
47    """
48    if res.number_of_iterations < 2:
49        return [res]
50       
51    ress = [Orange.evaluation.testing.ExperimentResults(
52                1, res.classifier_names, res.class_values,
53                res.weights, classifiers=res.classifiers,
54                loaded=res.loaded, test_type=res.test_type, labels=res.labels)
55            for _ in range(res.number_of_iterations)]
56    for te in res.results:
57        ress[te.iteration_number].results.append(te)
58    return ress
59
60def split_by_classifiers(res):
61    """ Splites an instance of :obj:`ExperimentResults` into a list of
62    :obj:`ExperimentResults`, one for each classifier.
63    """
64    split_res = []
65    for i in range(len(res.classifierNames)):
66        r = Orange.evaluation.testing.ExperimentResults(res.numberOfIterations,
67                    [res.classifierNames[i]], res.classValues,
68                    weights=res.weights, baseClass=res.baseClass,
69                    classifiers=[res.classifiers[i]] if res.classifiers else [],
70                    test_type = res.test_type, labels = res.labels)
71        r.results = []
72        for te in res.results:
73            r.results.append(Orange.evaluation.testing.TestedExample(te.iterationNumber,
74                                te.actualClass, n=1, weight=te.weight))
75            r.results[-1].classes = [te.classes[i]]
76            r.results[-1].probabilities = [te.probabilities[i]]
77        split_res.append(r)
78    return split_res
79
80
81def class_probabilities_from_res(res, **argkw):
82    """Calculate class probabilities"""
83    probs = [0.0] * len(res.class_values)
84    if argkw.get("unweighted", 0) or not res.weights:
85        for tex in res.results:
86            probs[int(tex.actual_class)] += 1.0
87        totweight = gettotsize(res)
88    else:
89        totweight = 0.0
90        for tex in res.results:
91            probs[tex.actual_class] += tex.weight
92            totweight += tex.weight
93        check_non_zero(totweight)
94    return [prob/totweight for prob in probs]
95
96
97@deprecated_keywords({
98    "foldN": "fold_n",
99    "reportSE": "report_se",
100    "iterationIsOuter": "iteration_is_outer"})
101def statistics_by_folds(stats, fold_n, report_se, iteration_is_outer):
102    # remove empty folds, turn the matrix so that learner is outer
103    if iteration_is_outer:
104        if not stats:
105            raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
106        number_of_learners = len(stats[0])
107        stats = filter(lambda (x, fN): fN>0.0, zip(stats,fold_n))
108        stats = [ [x[lrn]/fN for x, fN in stats] for lrn in range(number_of_learners)]
109    else:
110        stats = [ [x/Fn for x, Fn in filter(lambda (x, Fn): Fn > 0.0, zip(lrnD, fold_n))] for lrnD in stats]
111
112    if not stats:
113        raise ValueError, "Cannot compute the score: no classifiers"
114    if not stats[0]:
115        raise ValueError, "Cannot compute the score: no examples or sum of weights is 0.0."
116   
117    if report_se:
118        return [(statc.mean(x), statc.sterr(x)) for x in stats]
119    else:
120        return [statc.mean(x) for x in stats]
121   
122def ME(res, **argkw):
123    MEs = [0.0]*res.number_of_learners
124
125    if argkw.get("unweighted", 0) or not res.weights:
126        for tex in res.results:
127            MEs = map(lambda res, cls, ac = float(tex.actual_class):
128                      res + abs(float(cls) - ac), MEs, tex.classes)
129        totweight = gettotsize(res)
130    else:
131        for tex in res.results:
132            MEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
133                       res + tw*abs(float(cls) - ac), MEs, tex.classes)
134        totweight = gettotweight(res)
135
136    return [x/totweight for x in MEs]
137
138MAE = ME
139
140
141class ConfusionMatrix:
142    """
143    Classification result summary
144    """
145    #: True Positive predictions
146    TP = 0.
147    #:True Negative predictions
148    TN = 0.
149    #:False Positive predictions
150    FP = 0.
151    #: False Negative predictions
152    FN = 0.
153
154    @deprecated_keywords({"predictedPositive": "predicted_positive",
155                          "isPositive": "is_positive"})
156    def addTFPosNeg(self, predicted_positive, is_positive, weight = 1.0):
157        """
158        Update confusion matrix with result of a single classification
159
160        :param predicted_positive: positive class value was predicted
161        :param is_positive: correct class value is positive
162        :param weight: weight of the selected instance
163         """
164        if predicted_positive:
165            if is_positive:
166                self.TP += weight
167            else:
168                self.FP += weight
169        else:
170            if is_positive:
171                self.FN += weight
172            else:
173                self.TN += weight
174
175
176
177#########################################################################
178# PERFORMANCE MEASURES:
179# Scores for evaluation of numeric predictions
180
181def check_argkw(dct, lst):
182    """check_argkw(dct, lst) -> returns true if any items have non-zero value in dct"""
183    return reduce(lambda x,y: x or y, [dct.get(k, 0) for k in lst])
184
185def regression_error(res, **argkw):
186    """regression_error(res) -> regression error (default: MSE)"""
187    if argkw.get("SE", 0) and res.number_of_iterations > 1:
188        # computes the scores for each iteration, then averages
189        scores = [[0.0] * res.number_of_iterations for _ in range(res.number_of_learners)]
190        norm=None
191        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
192            norm = [0.0] * res.number_of_iterations
193
194        nIter = [0]*res.number_of_iterations       # counts examples in each iteration
195        a = [0]*res.number_of_iterations           # average class in each iteration
196        for tex in res.results:
197            nIter[tex.iteration_number] += 1
198            a[tex.iteration_number] += float(tex.actual_class)
199        a = [a[i]/nIter[i] for i in range(res.number_of_iterations)]
200
201        if argkw.get("unweighted", 0) or not res.weights:
202            # iterate accross test cases
203            for tex in res.results:
204                ai = float(tex.actual_class)
205                nIter[tex.iteration_number] += 1
206
207                # compute normalization, if required
208                if argkw.get("norm-abs", 0):
209                    norm[tex.iteration_number] += abs(ai - a[tex.iteration_number])
210                elif argkw.get("norm-sqr", 0):
211                    norm[tex.iteration_number] += (ai - a[tex.iteration_number])**2
212
213                # iterate accross results of different regressors
214                for i, cls in enumerate(tex.classes):
215                    if argkw.get("abs", 0):
216                        scores[i][tex.iteration_number] += abs(float(cls) - ai)
217                    else:
218                        scores[i][tex.iteration_number] += (float(cls) - ai)**2
219        else: # unweighted<>0
220            raise NotImplementedError, "weighted error scores with SE not implemented yet"
221
222        if argkw.get("norm-abs") or argkw.get("norm-sqr"):
223            scores = [[x/n for x, n in zip(y, norm)] for y in scores]
224        else:
225            scores = [[x/ni for x, ni in zip(y, nIter)] for y in scores]
226
227        if argkw.get("R2"):
228            scores = [[1.0 - x for x in y] for y in scores]
229
230        if argkw.get("sqrt", 0):
231            scores = [[math.sqrt(x) for x in y] for y in scores]
232
233        return [(statc.mean(x), statc.std(x)) for x in scores]
234       
235    else: # single iteration (testing on a single test set)
236        scores = [0.0] * res.number_of_learners
237        norm = 0.0
238
239        if argkw.get("unweighted", 0) or not res.weights:
240            a = sum([tex.actual_class for tex in res.results]) \
241                / len(res.results)
242            for tex in res.results:
243                if argkw.get("abs", 0):
244                    scores = map(lambda res, cls, ac = float(tex.actual_class):
245                                 res + abs(float(cls) - ac), scores, tex.classes)
246                else:
247                    scores = map(lambda res, cls, ac = float(tex.actual_class):
248                                 res + (float(cls) - ac)**2, scores, tex.classes)
249
250                if argkw.get("norm-abs", 0):
251                    norm += abs(tex.actual_class - a)
252                elif argkw.get("norm-sqr", 0):
253                    norm += (tex.actual_class - a)**2
254            totweight = gettotsize(res)
255        else:
256            # UNFINISHED
257            MSEs = [0.]*res.number_of_learners
258            for tex in res.results:
259                MSEs = map(lambda res, cls, ac = float(tex.actual_class),
260                           tw = tex.weight:
261                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
262            totweight = gettotweight(res)
263
264        if argkw.get("norm-abs", 0) or argkw.get("norm-sqr", 0):
265            scores = [s/norm for s in scores]
266        else: # normalize by number of instances (or sum of weights)
267            scores = [s/totweight for s in scores]
268
269        if argkw.get("R2"):
270            scores = [1.0 - s for s in scores]
271
272        if argkw.get("sqrt", 0):
273            scores = [math.sqrt(x) for x in scores]
274
275        return scores
276
277def MSE(res, **argkw):
278    """ Computes mean-squared error. """
279    return regression_error(res, **argkw)
280   
281def RMSE(res, **argkw):
282    """ Computes root mean-squared error. """
283    argkw.setdefault("sqrt", True)
284    return regression_error(res, **argkw)
285
286def MAE(res, **argkw):
287    """ Computes mean absolute error. """
288    argkw.setdefault("abs", True)
289    return regression_error(res, **argkw)
290
291def RSE(res, **argkw):
292    """ Computes relative squared error. """
293    argkw.setdefault("norm-sqr", True)
294    return regression_error(res, **argkw)
295
296def RRSE(res, **argkw):
297    """ Computes relative squared error. """
298    argkw.setdefault("norm-sqr", True)
299    argkw.setdefault("sqrt", True)
300    return regression_error(res, **argkw)
301
302def RAE(res, **argkw):
303    """ Computes relative absolute error. """
304    argkw.setdefault("abs", True)
305    argkw.setdefault("norm-abs", True)
306    return regression_error(res, **argkw)
307
308def R2(res, **argkw):
309    """ Computes the coefficient of determination, R-squared. """
310    argkw.setdefault("norm-sqr", True)
311    argkw.setdefault("R2", True)
312    return regression_error(res, **argkw)
313
314def MSE_old(res, **argkw):
315    """MSE(res) -> mean-squared error"""
316    if argkw.get("SE", 0) and res.number_of_iterations > 1:
317        MSEs = [[0.0] * res.number_of_iterations for _ in range(res.number_of_learners)]
318        nIter = [0]*res.number_of_iterations
319        if argkw.get("unweighted", 0) or not res.weights:
320            for tex in res.results:
321                ac = float(tex.actual_class)
322                nIter[tex.iteration_number] += 1
323                for i, cls in enumerate(tex.classes):
324                    MSEs[i][tex.iteration_number] += (float(cls) - ac)**2
325        else:
326            raise ValueError, "weighted RMSE with SE not implemented yet"
327        MSEs = [[x/ni for x, ni in zip(y, nIter)] for y in MSEs]
328        if argkw.get("sqrt", 0):
329            MSEs = [[math.sqrt(x) for x in y] for y in MSEs]
330        return [(statc.mean(x), statc.std(x)) for x in MSEs]
331       
332    else:
333        MSEs = [0.0]*res.number_of_learners
334        if argkw.get("unweighted", 0) or not res.weights:
335            for tex in res.results:
336                MSEs = map(lambda res, cls, ac = float(tex.actual_class):
337                           res + (float(cls) - ac)**2, MSEs, tex.classes)
338            totweight = gettotsize(res)
339        else:
340            for tex in res.results:
341                MSEs = map(lambda res, cls, ac = float(tex.actual_class), tw = tex.weight:
342                           res + tw * (float(cls) - ac)**2, MSEs, tex.classes)
343            totweight = gettotweight(res)
344
345        if argkw.get("sqrt", 0):
346            MSEs = [math.sqrt(x) for x in MSEs]
347        return [x/totweight for x in MSEs]
348
349def RMSE_old(res, **argkw):
350    """RMSE(res) -> root mean-squared error"""
351    argkw.setdefault("sqrt", 1)
352    return MSE_old(res, **argkw)
353
354#########################################################################
355# PERFORMANCE MEASURES:
356# Scores for evaluation of classifiers
357
358class CA(list):
359    """
360    Compute percentage of matches between predicted and actual class.
361
362    :param test_results: :obj:`~Orange.evaluation.testing.ExperimentResults`
363                         or list of :obj:`ConfusionMatrix`.
364    :param report_se: include standard error in result.
365    :param ignore_weights: ignore instance weights.
366    :rtype: list of scores, one for each learner.
367
368    Standard errors are estimated from deviation of CAs across folds (if
369    test_results were produced by cross_validation) or approximated under
370    the assumption of normal distribution otherwise.
371    """
372    CONFUSION_MATRIX = 0
373    CONFUSION_MATRIX_LIST = 1
374    CLASSIFICATION = 2
375    CROSS_VALIDATION = 3
376
377    @deprecated_keywords({"reportSE": "report_se",
378                          "unweighted": "ignore_weights"})
379    def __init__(self, test_results, report_se = False, ignore_weights=False):
380        super(CA, self).__init__()
381        self.report_se = report_se
382        self.ignore_weights = ignore_weights
383
384        input_type = self.get_input_type(test_results)
385        if input_type == self.CONFUSION_MATRIX:
386            self[:] =  [self.from_confusion_matrix(test_results)]
387        elif input_type == self.CONFUSION_MATRIX_LIST:
388            self[:] = self.from_confusion_matrix_list(test_results)
389        elif input_type == self.CLASSIFICATION:
390            self[:] = self.from_classification_results(test_results)
391        elif input_type == self.CROSS_VALIDATION:
392            self[:] =  self.from_crossvalidation_results(test_results)
393
394    def from_confusion_matrix(self, cm):
395        all_predictions = 0.
396        correct_predictions = 0.
397        if isinstance(cm, ConfusionMatrix):
398            all_predictions += cm.TP+cm.FN+cm.FP+cm.TN
399            correct_predictions += cm.TP+cm.TN
400        else:
401            for r, row in enumerate(cm):
402                for c, column in enumerate(row):
403                    if r == c:
404                        correct_predictions += column
405                    all_predictions += column
406
407        check_non_zero(all_predictions)
408        ca = correct_predictions/all_predictions
409
410        if self.report_se:
411            return ca, ca*(1-ca)/math.sqrt(all_predictions)
412        else:
413            return ca
414
415    def from_confusion_matrix_list(self, confusion_matrices):
416        return [self.from_confusion_matrix(cm) for cm in confusion_matrices]
417
418    def from_classification_results(self, test_results):
419        CAs = [0.0]*test_results.number_of_learners
420        totweight = 0.
421        for tex in test_results.results:
422            w = 1. if self.ignore_weights else tex.weight
423            CAs = map(lambda res, cls: res+(cls==tex.actual_class and w), CAs, tex.classes)
424            totweight += w
425        check_non_zero(totweight)
426        ca = [x/totweight for x in CAs]
427
428        if self.report_se:
429            return [(x, x*(1-x)/math.sqrt(totweight)) for x in ca]
430        else:
431            return ca
432
433    def from_crossvalidation_results(self, test_results):
434        CAsByFold = [[0.0]*test_results.number_of_iterations for _ in range(test_results.number_of_learners)]
435        foldN = [0.0]*test_results.number_of_iterations
436
437        for tex in test_results.results:
438            w = 1. if self.ignore_weights else tex.weight
439            for lrn in range(test_results.number_of_learners):
440                CAsByFold[lrn][tex.iteration_number] += (tex.classes[lrn]==tex.actual_class) and w
441            foldN[tex.iteration_number] += w
442
443        return statistics_by_folds(CAsByFold, foldN, self.report_se, False)
444
445    def get_input_type(self, test_results):
446        if isinstance(test_results, ConfusionMatrix):
447            return self.CONFUSION_MATRIX
448        elif isinstance(test_results, testing.ExperimentResults):
449            if test_results.number_of_iterations == 1:
450                return self.CLASSIFICATION
451            else:
452                return self.CROSS_VALIDATION
453        elif isinstance(test_results, list):
454            return self.CONFUSION_MATRIX_LIST
455
456
457@deprecated_keywords({"reportSE": "report_se",
458                      "unweighted": "ignore_weights"})
459def AP(res, report_se = False, ignore_weights=False, **argkw):
460    """ Computes the average probability assigned to the correct class. """
461    if res.number_of_iterations == 1:
462        APs=[0.0]*res.number_of_learners
463        if ignore_weights or not res.weights:
464            for tex in res.results:
465                APs = map(lambda res, probs: res + probs[tex.actual_class], APs, tex.probabilities)
466            totweight = gettotsize(res)
467        else:
468            totweight = 0.
469            for tex in res.results:
470                APs = map(lambda res, probs: res + probs[tex.actual_class]*tex.weight, APs, tex.probabilities)
471                totweight += tex.weight
472        check_non_zero(totweight)
473        return [AP/totweight for AP in APs]
474
475    APsByFold = [[0.0]*res.number_of_learners for _ in range(res.number_of_iterations)]
476    foldN = [0.0] * res.number_of_iterations
477    if ignore_weights or not res.weights:
478        for tex in res.results:
479            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class], APsByFold[tex.iteration_number], tex.probabilities)
480            foldN[tex.iteration_number] += 1
481    else:
482        for tex in res.results:
483            APsByFold[tex.iteration_number] = map(lambda res, probs: res + probs[tex.actual_class] * tex.weight, APsByFold[tex.iteration_number], tex.probabilities)
484            foldN[tex.iteration_number] += tex.weight
485
486    return statistics_by_folds(APsByFold, foldN, report_se, True)
487
488
489@deprecated_keywords({"reportSE": "report_se",
490                      "unweighted": "ignore_weights"})
491def Brier_score(res, report_se = False, ignore_weights=False, **argkw):
492    """ Computes the Brier's score, defined as the average (over test examples)
493    of sumx(t(x)-p(x))2, where x is a class, t(x) is 1 for the correct class
494    and 0 for the others, and p(x) is the probability that the classifier
495    assigned to the class x
496    """
497    # Computes an average (over examples) of sum_x(t(x) - p(x))^2, where
498    #    x is class,
499    #    t(x) is 0 for 'wrong' and 1 for 'correct' class
500    #    p(x) is predicted probabilty.
501    # There's a trick: since t(x) is zero for all classes but the
502    # correct one (c), we compute the sum as sum_x(p(x)^2) - 2*p(c) + 1
503    # Since +1 is there for each example, it adds 1 to the average
504    # We skip the +1 inside the sum and add it just at the end of the function
505    # We take max(result, 0) to avoid -0.0000x due to rounding errors
506
507    if res.number_of_iterations == 1:
508        MSEs=[0.0]*res.number_of_learners
509        if ignore_weights or not res.weights:
510            totweight = 0.0
511            for tex in res.results:
512                MSEs = map(lambda res, probs:
513                           res + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
514                totweight += tex.weight
515        else:
516            for tex in res.results:
517                MSEs = map(lambda res, probs:
518                           res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], MSEs, tex.probabilities)
519            totweight = gettotweight(res)
520        check_non_zero(totweight)
521        if report_se:
522            return [(max(x/totweight+1.0, 0), 0) for x in MSEs]  ## change this, not zero!!!
523        else:
524            return [max(x/totweight+1.0, 0) for x in MSEs]
525
526    BSs = [[0.0]*res.number_of_learners for _ in range(res.number_of_iterations)]
527    foldN = [0.] * res.number_of_iterations
528
529    if ignore_weights or not res.weights:
530        for tex in res.results:
531            BSs[tex.iteration_number] = map(lambda rr, probs:
532                       rr + reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
533            foldN[tex.iteration_number] += 1
534    else:
535        for tex in res.results:
536            BSs[tex.iteration_number] = map(lambda res, probs:
537                       res + tex.weight*reduce(lambda s, pi: s+pi**2, probs, 0) - 2*probs[tex.actual_class], BSs[tex.iteration_number], tex.probabilities)
538            foldN[tex.iteration_number] += tex.weight
539
540    stats = statistics_by_folds(BSs, foldN, report_se, True)
541    if report_se:
542        return [(x+1.0, y) for x, y in stats]
543    else:
544        return [x+1.0 for x in stats]
545
546def BSS(res, **argkw):
547    return [1-x/2 for x in apply(Brier_score, (res, ), argkw)]
548
549def IS_ex(Pc, P):
550    """Pc aposterior probability, P aprior"""
551    if Pc>=P:
552        return -log2(P)+log2(Pc)
553    else:
554        return -(-log2(1-P)+log2(1-Pc))
555
556
557@deprecated_keywords({"reportSE": "report_se"})
558def IS(res, apriori=None, report_se = False, **argkw):
559    """ Computes the information score as defined by
560    `Kononenko and Bratko (1991) \
561    <http://www.springerlink.com/content/g5p7473160476612/>`_.
562    Argument :obj:`apriori` gives the apriori class
563    distribution; if it is omitted, the class distribution is computed from
564    the actual classes of examples in :obj:`res`.
565    """
566    if not apriori:
567        apriori = class_probabilities_from_res(res)
568
569    if res.number_of_iterations==1:
570        ISs = [0.0]*res.number_of_learners
571        if argkw.get("unweighted", 0) or not res.weights:
572            for tex in res.results:
573              for i in range(len(tex.probabilities)):
574                    cls = tex.actual_class
575                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls])
576            totweight = gettotsize(res)
577        else:
578            for tex in res.results:
579              for i in range(len(tex.probabilities)):
580                    cls = tex.actual_class
581                    ISs[i] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
582            totweight = gettotweight(res)
583        if report_se:
584            return [(IS/totweight,0) for IS in ISs]
585        else:
586            return [IS/totweight for IS in ISs]
587
588       
589    ISs = [[0.0]*res.number_of_iterations for _ in range(res.number_of_learners)]
590    foldN = [0.] * res.number_of_iterations
591
592    # compute info scores for each fold   
593    if argkw.get("unweighted", 0) or not res.weights:
594        for tex in res.results:
595            for i in range(len(tex.probabilities)):
596                cls = tex.actual_class
597                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls])
598            foldN[tex.iteration_number] += 1
599    else:
600        for tex in res.results:
601            for i in range(len(tex.probabilities)):
602                cls = tex.actual_class
603                ISs[i][tex.iteration_number] += IS_ex(tex.probabilities[i][cls], apriori[cls]) * tex.weight
604            foldN[tex.iteration_number] += tex.weight
605
606    return statistics_by_folds(ISs, foldN, report_se, False)
607   
608
609def Wilcoxon(res, statistics, **argkw):
610    res1, res2 = [], []
611    for ri in split_by_iterations(res):
612        stats = statistics(ri, **argkw)
613        if len(stats) != 2:
614            raise TypeError, "Wilcoxon compares two classifiers, no more, no less"
615        res1.append(stats[0])
616        res2.append(stats[1])
617    return statc.wilcoxont(res1, res2)
618
619def rank_difference(res, statistics, **argkw):
620    if not res.results:
621        raise TypeError, "no experiments"
622
623    k = len(res.results[0].classes)
624    if k<2:
625        raise TypeError, "nothing to compare (less than two classifiers given)"
626    if k==2:
627        return apply(Wilcoxon, (res, statistics), argkw)
628    else:
629        return apply(Friedman, (res, statistics), argkw)
630
631
632@deprecated_keywords({"res": "test_results",
633                      "classIndex": "class_index",
634                      "unweighted": "ignore_weights"})
635def confusion_matrices(test_results, class_index=-1,
636                       ignore_weights=False, cutoff=.5):
637    """
638    Return confusion matrices for test_results.
639
640    :param test_results: test results
641    :param class_index: index of class value for which the confusion matrices
642                        are to be computed.
643    :param ignore_weights: ignore instance weights.
644    :params cutoff: cutoff for probability
645
646    :rtype: list of :obj:`ConfusionMatrix`
647    """
648    tfpns = [ConfusionMatrix() for _ in range(test_results.number_of_learners)]
649   
650    if class_index<0:
651        numberOfClasses = len(test_results.class_values)
652        if class_index < -1 or numberOfClasses > 2:
653            cm = [[[0.0] * numberOfClasses for _ in range(numberOfClasses)] for _ in range(test_results.number_of_learners)]
654            if ignore_weights or not test_results.weights:
655                for tex in test_results.results:
656                    trueClass = int(tex.actual_class)
657                    for li, pred in enumerate(tex.classes):
658                        predClass = int(pred)
659                        if predClass < numberOfClasses:
660                            cm[li][trueClass][predClass] += 1
661            else:
662                for tex in test_results.results:
663                    trueClass = int(tex.actual_class)
664                    for li, pred in tex.classes:
665                        predClass = int(pred)
666                        if predClass < numberOfClasses:
667                            cm[li][trueClass][predClass] += tex.weight
668            return cm
669           
670        elif test_results.baseClass>=0:
671            class_index = test_results.baseClass
672        else:
673            class_index = 1
674
675    if cutoff != .5:
676        if ignore_weights or not test_results.weights:
677            for lr in test_results.results:
678                isPositive=(lr.actual_class==class_index)
679                for i in range(test_results.number_of_learners):
680                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive)
681        else:
682            for lr in test_results.results:
683                isPositive=(lr.actual_class==class_index)
684                for i in range(test_results.number_of_learners):
685                    tfpns[i].addTFPosNeg(lr.probabilities[i][class_index]>cutoff, isPositive, lr.weight)
686    else:
687        if ignore_weights or not test_results.weights:
688            for lr in test_results.results:
689                isPositive=(lr.actual_class==class_index)
690                for i in range(test_results.number_of_learners):
691                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive)
692        else:
693            for lr in test_results.results:
694                isPositive=(lr.actual_class==class_index)
695                for i in range(test_results.number_of_learners):
696                    tfpns[i].addTFPosNeg(lr.classes[i]==class_index, isPositive, lr.weight)
697    return tfpns
698
699
700# obsolete (renamed)
701compute_confusion_matrices = confusion_matrices
702
703
704@deprecated_keywords({"confusionMatrix": "confusion_matrix"})
705def confusion_chi_square(confusion_matrix):
706    """
707    Return chi square statistic of the confusion matrix
708    (higher value indicates that prediction is not by chance).
709    """
710    if isinstance(confusion_matrix, ConfusionMatrix) or \
711       not isinstance(confusion_matrix[1], list):
712        return _confusion_chi_square(confusion_matrix)
713    else:
714        return map(_confusion_chi_square, confusion_matrix)
715
716def _confusion_chi_square(confusion_matrix):
717    if isinstance(confusion_matrix, ConfusionMatrix):
718        c = confusion_matrix
719        confusion_matrix = [[c.TP, c.FN], [c.FP, c.TN]]
720    dim = len(confusion_matrix)
721    rowPriors = [sum(r) for r in confusion_matrix]
722    colPriors = [sum(r[i] for r in confusion_matrix) for i in range(dim)]
723    total = sum(rowPriors)
724    rowPriors = [r/total for r in rowPriors]
725    colPriors = [r/total for r in colPriors]
726    ss = 0
727    for ri, row in enumerate(confusion_matrix):
728        for ci, o in enumerate(row):
729            e = total * rowPriors[ri] * colPriors[ci]
730            if not e:
731                return -1, -1, -1
732            ss += (o-e)**2 / e
733    df = (dim-1)**2
734    return ss, df, statc.chisqprob(ss, df)
735
736@deprecated_keywords({"confm": "confusion_matrix"})
737def sens(confusion_matrix):
738    """
739    Return `sensitivity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
740    (proportion of actual positives which are correctly identified as such).
741    """
742    if type(confusion_matrix) == list:
743        return [sens(cm) for cm in confusion_matrix]
744    else:
745        tot = confusion_matrix.TP+confusion_matrix.FN
746        if tot < 1e-6:
747            import warnings
748            warnings.warn("Can't compute sensitivity: one or both classes have no instances")
749            return -1
750
751        return confusion_matrix.TP/tot
752
753
754@deprecated_keywords({"confm": "confusion_matrix"})
755def recall(confusion_matrix):
756    """
757    Return `recall <http://en.wikipedia.org/wiki/Precision_and_recall>`_
758    (fraction of relevant instances that are retrieved).
759    """
760    return sens(confusion_matrix)
761
762
763@deprecated_keywords({"confm": "confusion_matrix"})
764def spec(confusion_matrix):
765    """
766    Return `specificity <http://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_
767    (proportion of negatives which are correctly identified).
768    """
769    if type(confusion_matrix) == list:
770        return [spec(cm) for cm in confusion_matrix]
771    else:
772        tot = confusion_matrix.FP+confusion_matrix.TN
773        if tot < 1e-6:
774            import warnings
775            warnings.warn("Can't compute specificity: one or both classes have no instances")
776            return -1
777        return confusion_matrix.TN/tot
778
779
780@deprecated_keywords({"confm": "confusion_matrix"})
781def PPV(confusion_matrix):
782    """
783    Return `positive predictive value <http://en.wikipedia.org/wiki/Positive_predictive_value>`_
784    (proportion of subjects with positive test results who are correctly diagnosed)."""
785    if type(confusion_matrix) == list:
786        return [PPV(cm) for cm in confusion_matrix]
787    else:
788        tot = confusion_matrix.TP+confusion_matrix.FP
789        if tot < 1e-6:
790            import warnings
791            warnings.warn("Can't compute PPV: one or both classes have no instances")
792            return -1
793        return confusion_matrix.TP/tot
794
795
796@deprecated_keywords({"confm": "confusion_matrix"})
797def precision(confusion_matrix):
798    """
799    Return `precision <http://en.wikipedia.org/wiki/Precision_and_recall>`_
800    (retrieved instances that are relevant).
801    """
802    return PPV(confusion_matrix)
803
804@deprecated_keywords({"confm": "confusion_matrix"})
805def NPV(confusion_matrix):
806    """Return `negative predictive value <http://en.wikipedia.org/wiki/Negative_predictive_value>`_
807     (proportion of subjects with a negative test result who are correctly
808     diagnosed).
809     """
810    if type(confusion_matrix) == list:
811        return [NPV(cm) for cm in confusion_matrix]
812    else:
813        tot = confusion_matrix.FN+confusion_matrix.TN
814        if tot < 1e-6:
815            import warnings
816            warnings.warn("Can't compute NPV: one or both classes have no instances")
817            return -1
818        return confusion_matrix.TN/tot
819
820@deprecated_keywords({"confm": "confusion_matrix"})
821def F1(confusion_matrix):
822    """Return `F1 score <http://en.wikipedia.org/wiki/F1_score>`_
823    (harmonic mean of precision and recall)."""
824    if type(confusion_matrix) == list:
825        return [F1(cm) for cm in confusion_matrix]
826    else:
827        p = precision(confusion_matrix)
828        r = recall(confusion_matrix)
829        if p + r > 0:
830            return 2. * p * r / (p + r)
831        else:
832            import warnings
833            warnings.warn("Can't compute F1: P + R is zero or not defined")
834            return -1
835
836
837@deprecated_keywords({"confm": "confusion_matrix"})
838def Falpha(confusion_matrix, alpha=1.0):
839    """Return the alpha-mean of precision and recall over the given confusion matrix."""
840    if type(confusion_matrix) == list:
841        return [Falpha(cm, alpha=alpha) for cm in confusion_matrix]
842    else:
843        p = precision(confusion_matrix)
844        r = recall(confusion_matrix)
845        return (1. + alpha) * p * r / (alpha * p + r)
846
847
848@deprecated_keywords({"confm": "confusion_matrix"})
849def MCC(confusion_matrix):
850    """
851    Return `Matthew correlation coefficient <http://en.wikipedia.org/wiki/Matthews_correlation_coefficient>`_
852    (correlation coefficient between the observed and predicted binary classifications)
853    """
854    # code by Boris Gorelik
855    if type(confusion_matrix) == list:
856        return [MCC(cm) for cm in confusion_matrix]
857    else:
858        truePositive = confusion_matrix.TP
859        trueNegative = confusion_matrix.TN
860        falsePositive = confusion_matrix.FP
861        falseNegative = confusion_matrix.FN
862         
863        try:   
864            r = (((truePositive * trueNegative) - (falsePositive * falseNegative))/ 
865                math.sqrt(  (truePositive + falsePositive)  * 
866                ( truePositive + falseNegative ) * 
867                ( trueNegative + falsePositive ) * 
868                ( trueNegative + falseNegative ) )
869                )
870        except ZeroDivisionError:
871            # Zero difision occurs when there is either no true positives
872            # or no true negatives i.e. the problem contains only one
873            # type of classes.
874            import warnings
875            warnings.warn("Can't compute MCC: TP or TN is zero or not defined")
876            r = None
877
878    return r
879
880
881@deprecated_keywords({"bIsListOfMatrices": "b_is_list_of_matrices"})
882def scotts_pi(confusion_matrix, b_is_list_of_matrices=True):
883   """Compute Scott's Pi for measuring inter-rater agreement for nominal data
884
885   http://en.wikipedia.org/wiki/Scott%27s_Pi
886   Scott's Pi is a statistic for measuring inter-rater reliability for nominal
887   raters.
888
889   @param confusion_matrix: confusion matrix, or list of confusion matrices. To obtain
890                           non-binary confusion matrix, call
891                           Orange.evaluation.scoring.compute_confusion_matrices and set the
892                           classIndex parameter to -2.
893   @param b_is_list_of_matrices: specifies whether confm is list of matrices.
894                           This function needs to operate on non-binary
895                           confusion matrices, which are represented by python
896                           lists, therefore one needs a way to distinguish
897                           between a single matrix and list of matrices
898   """
899
900   if b_is_list_of_matrices:
901       try:
902           return [scotts_pi(cm, b_is_list_of_matrices=False) for cm in confusion_matrix]
903       except TypeError:
904           # Nevermind the parameter, maybe this is a "conventional" binary
905           # confusion matrix and bIsListOfMatrices was specified by mistake
906           return scottsPiSingle(confusion_matrix, bIsListOfMatrices=False)
907   else:
908       if isinstance(confusion_matrix, ConfusionMatrix):
909           confusion_matrix = numpy.array( [[confusion_matrix.TP, confusion_matrix.FN],
910                   [confusion_matrix.FP, confusion_matrix.TN]], dtype=float)
911       else:
912           confusion_matrix = numpy.array(confusion_matrix, dtype=float)
913
914       marginalSumOfRows = numpy.sum(confusion_matrix, axis=0)
915       marginalSumOfColumns = numpy.sum(confusion_matrix, axis=1)
916       jointProportion = (marginalSumOfColumns + marginalSumOfRows)/ \
917                           (2.0 * numpy.sum(confusion_matrix))
918       # In the eq. above, 2.0 is what the Wikipedia page calls
919       # the number of annotators. Here we have two annotators:
920       # the observed (true) labels (annotations) and the predicted by
921       # the learners.
922
923       prExpected = numpy.sum(jointProportion ** 2)
924       prActual = numpy.sum(numpy.diag(confusion_matrix)) /numpy.sum(confusion_matrix)
925
926       ret = (prActual - prExpected) / (1.0 - prExpected)
927       return ret
928
929@deprecated_keywords({"classIndex": "class_index",
930                      "unweighted": "ignore_weights"})
931def AUCWilcoxon(res, class_index=-1, ignore_weights=False, **argkw):
932    """ Computes the area under ROC (AUC) and its standard error using
933    Wilcoxon's approach proposed by Hanley and McNeal (1982). If
934    :obj:`classIndex` is not specified, the first class is used as
935    "the positive" and others are negative. The result is a list of
936    tuples (aROC, standard error).
937
938    If test results consist of multiple folds, you need to split them using
939    :obj:`split_by_iterations` and perform this test on each fold separately.
940    """
941    useweights = res.weights and not ignore_weights
942    problists, tots = corn.computeROCCumulative(res, class_index, useweights)
943
944    results=[]
945
946    totPos, totNeg = tots[1], tots[0]
947    N = totPos + totNeg
948    for plist in problists:
949        highPos, lowNeg = totPos, 0.0
950        W, Q1, Q2 = 0.0, 0.0, 0.0
951        for prob in plist:
952            thisPos, thisNeg = prob[1][1], prob[1][0]
953            highPos -= thisPos
954            W += thisNeg * (highPos + thisPos/2.)
955            Q2 += thisPos * (lowNeg**2  + lowNeg*thisNeg  + thisNeg**2 /3.)
956            Q1 += thisNeg * (highPos**2 + highPos*thisPos + thisPos**2 /3.)
957
958            lowNeg += thisNeg
959
960        W  /= (totPos*totNeg)
961        Q1 /= (totNeg*totPos**2)
962        Q2 /= (totPos*totNeg**2)
963
964        SE = math.sqrt( (W*(1-W) + (totPos-1)*(Q1-W**2) + (totNeg-1)*(Q2-W**2)) / (totPos*totNeg) )
965        results.append((W, SE))
966    return results
967
968AROC = AUCWilcoxon # for backward compatibility, AROC is obsolote
969
970
971@deprecated_keywords({"classIndex": "class_index",
972                      "unweighted": "ignore_weights"})
973def compare_2_AUCs(res, lrn1, lrn2, class_index=-1,
974                   ignore_weights=False, **argkw):
975    return corn.compare2ROCs(res, lrn1, lrn2, class_index, res.weights and not ignore_weights)
976
977compare_2_AROCs = compare_2_AUCs # for backward compatibility, compare_2_AROCs is obsolote
978
979
980@deprecated_keywords({"classIndex": "class_index"})
981def compute_ROC(res, class_index=-1):
982    """ Computes a ROC curve as a list of (x, y) tuples, where x is
983    1-specificity and y is sensitivity.
984    """
985    problists, tots = corn.computeROCCumulative(res, class_index)
986
987    results = []
988    totPos, totNeg = tots[1], tots[0]
989
990    for plist in problists:
991        curve=[(1., 1.)]
992        TP, TN = totPos, 0.0
993        FN, FP = 0., totNeg
994        for prob in plist:
995            thisPos, thisNeg = prob[1][1], prob[1][0]
996            # thisPos go from TP to FN
997            TP -= thisPos
998            FN += thisPos
999            # thisNeg go from FP to TN
1000            TN += thisNeg
1001            FP -= thisNeg
1002
1003            sens = TP/(TP+FN)
1004            spec = TN/(FP+TN)
1005            curve.append((1-spec, sens))
1006        results.append(curve)
1007
1008    return results   
1009
1010## TC's implementation of algorithms, taken from:
1011## T Fawcett: ROC Graphs: Notes and Practical Considerations for Data Mining Researchers, submitted to KDD Journal.
1012def ROC_slope((P1x, P1y, P1fscore), (P2x, P2y, P2fscore)):
1013    if P1x == P2x:
1014        return 1e300
1015    return (P1y - P2y) / (P1x - P2x)
1016
1017
1018@deprecated_keywords({"keepConcavities": "keep_concavities"})
1019def ROC_add_point(P, R, keep_concavities=1):
1020    if keep_concavities:
1021        R.append(P)
1022    else:
1023        while True:
1024            if len(R) < 2:
1025                R.append(P)
1026                return R
1027            else:
1028                T = R.pop()
1029                T2 = R[-1]
1030                if ROC_slope(T2, T) > ROC_slope(T, P):
1031                    R.append(T)
1032                    R.append(P)
1033                    return R
1034    return R
1035
1036
1037@deprecated_keywords({"classIndex": "class_index",
1038                      "keepConcavities": "keep_concavities"})
1039def TC_compute_ROC(res, class_index=-1, keep_concavities=1):
1040    problists, tots = corn.computeROCCumulative(res, class_index)
1041
1042    results = []
1043    P, N = tots[1], tots[0]
1044
1045    for plist in problists:
1046        ## corn gives an increasing by scores list, we need a decreasing by scores
1047        plist.reverse()
1048        TP = 0.0
1049        FP = 0.0
1050        curve=[]
1051        fPrev = 10e300 # "infinity" score at 0.0, 0.0
1052        for prob in plist:
1053            f = prob[0]
1054            if f <> fPrev:
1055                if P:
1056                    tpr = TP/P
1057                else:
1058                    tpr = 0.0
1059                if N:
1060                    fpr = FP/N
1061                else:
1062                    fpr = 0.0
1063                curve = ROC_add_point((fpr, tpr, fPrev), curve, keep_concavities)
1064                fPrev = f
1065            thisPos, thisNeg = prob[1][1], prob[1][0]
1066            TP += thisPos
1067            FP += thisNeg
1068        if P:
1069            tpr = TP/P
1070        else:
1071            tpr = 0.0
1072        if N:
1073            fpr = FP/N
1074        else:
1075            fpr = 0.0
1076        curve = ROC_add_point((fpr, tpr, f), curve, keep_concavities) ## ugly
1077        results.append(curve)
1078
1079    return results
1080
1081## returns a list of points at the intersection of the tangential iso-performance line and the given ROC curve
1082## for given values of FPcost, FNcost and pval
1083def TC_best_thresholds_on_ROC_curve(FPcost, FNcost, pval, curve):
1084    m = (FPcost*(1.0 - pval)) / (FNcost*pval)
1085
1086    ## put the iso-performance line in point (0.0, 1.0)
1087    x0, y0 = (0.0, 1.0)
1088    x1, y1 = (1.0, 1.0 + m)
1089    d01 = math.sqrt((x1 - x0)*(x1 - x0) + (y1 - y0)*(y1 - y0))
1090
1091    ## calculate and find the closest point to the line
1092    firstp = 1
1093    mind = 0.0
1094    a = (x0*y1 - x1*y0)
1095    closestPoints = []
1096    for (x, y, fscore) in curve:
1097        d = ((y0 - y1)*x + (x1 - x0)*y + a) / d01
1098        d = abs(d)
1099        if firstp or d < mind:
1100            mind, firstp = d, 0
1101            closestPoints = [(x, y, fscore)]
1102        else:
1103            if abs(d - mind) <= 0.0001: ## close enough
1104                closestPoints.append( (x, y, fscore) )
1105    return closestPoints         
1106
1107def frange(start, end=None, inc=None):
1108    """A range function, that does accept float increments..."""
1109
1110    if end is None:
1111        end = start + 0.0
1112        start = 0.0
1113
1114    if inc is None or inc == 0:
1115        inc = 1.0
1116
1117    L = [start]
1118    while 1:
1119        next = start + len(L) * inc
1120        if inc > 0 and next >= end:
1121            L.append(end)
1122            break
1123        elif inc < 0 and next <= end:
1124            L.append(end)
1125            break
1126        L.append(next)
1127       
1128    return L
1129
1130## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1131## where ROCcurvesX is a set of ROC curves,
1132## where a (one) ROC curve is a set of (FP, TP) points
1133##
1134## for each (sub)set of input ROC curves
1135## returns the average ROC curve and an array of (vertical) standard deviations
1136@deprecated_keywords({"ROCcurves": "roc_curves"})
1137def TC_vertical_average_ROC(roc_curves, samples = 10):
1138    def INTERPOLATE((P1x, P1y, P1fscore), (P2x, P2y, P2fscore), X):
1139        if (P1x == P2x) or P1x < X > P2x or P1x > X < P2x:
1140            raise ValueError, "assumptions for interpolation are not met: P1 = %f,%f P2 = %f,%f X = %f" % (P1x, P1y, P2x, P2y, X)
1141        dx = float(P2x) - float(P1x)
1142        dy = float(P2y) - float(P1y)
1143        m = dy/dx
1144        return P1y + m*(X - P1x)
1145
1146    def TP_FOR_FP(FPsample, ROC, npts):
1147        i = 0
1148        while i < npts - 1:
1149            (fp, _, _) = ROC[i + 1]
1150            if fp <= FPsample:
1151                i += 1
1152            else:
1153                break
1154        (fp, tp, _) = ROC[i]
1155        if fp == FPsample:
1156            return tp
1157        elif fp < FPsample and i + 1 < len(ROC):
1158            return INTERPOLATE(ROC[i], ROC[i+1], FPsample)
1159        elif fp < FPsample and i + 1 == len(ROC): # return the last
1160            return ROC[i][1]
1161        raise ValueError, "cannot compute: TP_FOR_FP in TC_vertical_average_ROC"
1162        #return 0.0
1163
1164    average = []
1165    stdev = []
1166    for ROCS in roc_curves:
1167        npts = []
1168        for c in ROCS:
1169            npts.append(len(c))
1170        nrocs = len(ROCS)
1171
1172        TPavg = []
1173        TPstd = []
1174        for FPsample in frange(0.0, 1.0, 1.0/samples):
1175            TPsum = []
1176            for i in range(nrocs):
1177                TPsum.append( TP_FOR_FP(FPsample, ROCS[i], npts[i]) ) ##TPsum = TPsum + TP_FOR_FP(FPsample, ROCS[i], npts[i])
1178            TPavg.append( (FPsample, statc.mean(TPsum)) )
1179            if len(TPsum) > 1:
1180                stdv = statc.std(TPsum)
1181            else:
1182                stdv = 0.0
1183            TPstd.append( stdv )
1184
1185        average.append(TPavg)
1186        stdev.append(TPstd)
1187
1188    return average, stdev
1189
1190## input ROCcurves are of form [ROCcurves1, ROCcurves2, ... ROCcurvesN],
1191## where ROCcurvesX is a set of ROC curves,
1192## where a (one) ROC curve is a set of (FP, TP) points
1193##
1194## for each (sub)set of input ROC curves
1195## returns the average ROC curve, an array of vertical standard deviations and an array of horizontal standard deviations
1196@deprecated_keywords({"ROCcurves": "roc_curves"})
1197def TC_threshold_average_ROC(roc_curves, samples = 10):
1198    def POINT_AT_THRESH(ROC, npts, thresh):
1199        i = 0
1200        while i < npts - 1:
1201            (px, py, pfscore) = ROC[i]
1202            if pfscore > thresh:
1203                i += 1
1204            else:
1205                break
1206        return ROC[i]
1207
1208    average = []
1209    stdevV = []
1210    stdevH = []
1211    for ROCS in roc_curves:
1212        npts = []
1213        for c in ROCS:
1214            npts.append(len(c))
1215        nrocs = len(ROCS)
1216
1217        T = []
1218        for c in ROCS:
1219            for (px, py, pfscore) in c:
1220##                try:
1221##                    T.index(pfscore)
1222##                except:
1223                T.append(pfscore)
1224        T.sort()
1225        T.reverse() ## ugly
1226
1227        TPavg = []
1228        TPstdV = []
1229        TPstdH = []
1230        for tidx in frange(0, (len(T) - 1.0), float(len(T))/samples):
1231            FPsum = []
1232            TPsum = []
1233            for i in range(nrocs):
1234                (fp, tp, _) = POINT_AT_THRESH(ROCS[i], npts[i], T[int(tidx)])
1235                FPsum.append(fp)
1236                TPsum.append(tp)
1237            TPavg.append( (statc.mean(FPsum), statc.mean(TPsum)) )
1238            ## vertical standard deviation
1239            if len(TPsum) > 1:
1240                stdv = statc.std(TPsum)
1241            else:
1242                stdv = 0.0
1243            TPstdV.append( stdv )
1244            ## horizontal standard deviation
1245            if len(FPsum) > 1:
1246                stdh = statc.std(FPsum)
1247            else:
1248                stdh = 0.0
1249            TPstdH.append( stdh )
1250
1251        average.append(TPavg)
1252        stdevV.append(TPstdV)
1253        stdevH.append(TPstdH)
1254
1255    return average, stdevV, stdevH
1256
1257## Calibration Curve
1258## returns an array of (curve, yesClassPredictions, noClassPredictions) elements, where:
1259##  - curve is an array of points (x, y) on the calibration curve
1260##  - yesClassRugPoints is an array of (x, 1) points
1261##  - noClassRugPoints is an array of (x, 0) points
1262@deprecated_keywords({"classIndex": "class_index"})
1263def compute_calibration_curve(res, class_index=-1):
1264    ## merge multiple iterations into one
1265    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1266    for te in res.results:
1267        mres.results.append( te )
1268
1269    problists, tots = corn.computeROCCumulative(mres, class_index)
1270
1271    results = []
1272
1273    bins = 10 ## divide interval between 0.0 and 1.0 into N bins
1274
1275    for plist in problists:
1276        yesClassRugPoints = [] 
1277        noClassRugPoints = []
1278
1279        yesBinsVals = [0] * bins
1280        noBinsVals = [0] * bins
1281        for (f, (thisNeg, thisPos)) in plist:
1282            yesClassRugPoints.append( (f, thisPos) ) #1.0
1283            noClassRugPoints.append( (f, thisNeg) ) #1.0
1284
1285            index = int(f * bins )
1286            index = min(index, bins - 1) ## just in case for value 1.0
1287            yesBinsVals[index] += thisPos
1288            noBinsVals[index] += thisNeg
1289
1290        curve = []
1291        for cn in range(bins):
1292            f = float(cn * 1.0 / bins) + (1.0 / 2.0 / bins)
1293            yesVal = yesBinsVals[cn]
1294            noVal = noBinsVals[cn]
1295            allVal = yesVal + noVal
1296            if allVal == 0.0: continue
1297            y = float(yesVal)/float(allVal)
1298            curve.append( (f,  y) )
1299
1300        ## smooth the curve
1301        maxnPoints = 100
1302        if len(curve) >= 3:
1303#            loessCurve = statc.loess(curve, -3, 0.6)
1304            loessCurve = statc.loess(curve, maxnPoints, 0.5, 3)
1305        else:
1306            loessCurve = curve
1307        clen = len(loessCurve)
1308        if clen > maxnPoints:
1309            df = clen / maxnPoints
1310            if df < 1: df = 1
1311            curve = [loessCurve[i]  for i in range(0, clen, df)]
1312        else:
1313            curve = loessCurve
1314        curve = [c[:2] for c in curve] ## remove the third value (variance of epsilon?) that suddenly appeared in the output of the statc.loess function
1315        results.append((curve, yesClassRugPoints, noClassRugPoints))
1316
1317    return results
1318
1319
1320## Lift Curve
1321## returns an array of curve elements, where:
1322##  - curve is an array of points ((TP+FP)/(P + N), TP/P, (th, FP/N)) on the Lift Curve
1323@deprecated_keywords({"classIndex": "class_index"})
1324def compute_lift_curve(res, class_index=-1):
1325    ## merge multiple iterations into one
1326    mres = Orange.evaluation.testing.ExperimentResults(1, res.classifier_names, res.class_values, res.weights, classifiers=res.classifiers, loaded=res.loaded, test_type=res.test_type, labels=res.labels)
1327    for te in res.results:
1328        mres.results.append( te )
1329
1330    problists, tots = corn.computeROCCumulative(mres, class_index)
1331
1332    results = []
1333    P, N = tots[1], tots[0]
1334    for plist in problists:
1335        ## corn gives an increasing by scores list, we need a decreasing by scores
1336        plist.reverse()
1337        TP = 0.0
1338        FP = 0.0
1339        curve = [(0.0, 0.0, (10e300, 0.0))]
1340        for (f, (thisNeg, thisPos)) in plist:
1341            TP += thisPos
1342            FP += thisNeg
1343            curve.append( ((TP+FP)/(P + N), TP, (f, FP/(N or 1))) )
1344        results.append(curve)
1345
1346    return P, N, results
1347###
1348
1349class CDT:
1350  """ Stores number of concordant (C), discordant (D) and tied (T) pairs (used for AUC) """
1351  def __init__(self, C=0.0, D=0.0, T=0.0):
1352    self.C, self.D, self.T = C, D, T
1353   
1354def is_CDT_empty(cdt):
1355    return cdt.C + cdt.D + cdt.T < 1e-20
1356
1357
1358@deprecated_keywords({"classIndex": "class_index",
1359                      "unweighted": "ignore_weights"})
1360def compute_CDT(res, class_index=-1, ignore_weights=False, **argkw):
1361    """Obsolete, don't use"""
1362    if class_index<0:
1363        if res.baseClass>=0:
1364            class_index = res.baseClass
1365        else:
1366            class_index = 1
1367           
1368    useweights = res.weights and not ignore_weights
1369    weightByClasses = argkw.get("weightByClasses", True)
1370
1371    if res.number_of_iterations>1:
1372        CDTs = [CDT() for _ in range(res.number_of_learners)]
1373        iterationExperiments = split_by_iterations(res)
1374        for exp in iterationExperiments:
1375            expCDTs = corn.computeCDT(exp, class_index, useweights)
1376            for i in range(len(CDTs)):
1377                CDTs[i].C += expCDTs[i].C
1378                CDTs[i].D += expCDTs[i].D
1379                CDTs[i].T += expCDTs[i].T
1380        for i in range(res.number_of_learners):
1381            if is_CDT_empty(CDTs[0]):
1382                return corn.computeCDT(res, class_index, useweights)
1383       
1384        return CDTs
1385    else:
1386        return corn.computeCDT(res, class_index, useweights)
1387
1388# Backward compatibility
1389def replace_use_weights(fun):
1390    if environ.orange_no_deprecated_members:
1391        return fun
1392
1393    @functools.wraps(fun)
1394    def wrapped(*args, **kwargs):
1395        use_weights = kwargs.pop("useWeights", None)
1396        if use_weights is not None:
1397            deprecation_warning("useWeights", "ignore_weights")
1398            kwargs["ignore_weights"] = not use_weights
1399        return fun(*args, **kwargs)
1400    return wrapped
1401
1402class AUC(list):
1403    """
1404    Compute the area under ROC curve given a set of experimental results.
1405    For multivalued class problems, return the result of :obj:`by_weighted_pairs`.
1406    If testing consisted of multiple folds, each fold is scored and
1407    average score is returned. If a fold contains only instances with
1408    same class value, folds will be merged.
1409
1410    :param test_results: test results to score
1411    :param ignore_weights: ignore instance weights when calculating score
1412    :param method: DEPRECATED, call the appropriate method directly.
1413    """
1414
1415    @replace_use_weights
1416    def __init__(self, test_results=None, method=0, ignore_weights=False):
1417
1418        super(AUC, self).__init__()
1419
1420        self.ignore_weights=ignore_weights
1421        self.method=method
1422
1423        if test_results is not None:
1424            self.__call__(test_results)
1425
1426    def __call__(self, test_results):
1427        if len(test_results.class_values) < 2:
1428            raise ValueError("Cannot compute AUC on a single-class problem")
1429        elif len(test_results.class_values) == 2:
1430            self._compute_for_binary_class(test_results)
1431        else:
1432            self._compute_for_multi_value_class(test_results, self.method)
1433
1434    @classmethod
1435    def by_weighted_pairs(cls, res, ignore_weights=False):
1436        """
1437        Compute AUC for each pair of classes (ignoring instances of all other
1438        classes) and averages the results, weighting them by the number of
1439        pairs of instances from these two classes (e.g. by the product of
1440        probabilities of the two classes). AUC computed in this way still
1441        behaves as concordance index, e.g., gives the probability that two
1442        randomly chosen instances from different classes will be correctly
1443        recognized (if the classifier knows from which two classes the
1444        instances came).
1445        """
1446        auc = AUC(ignore_weights=ignore_weights)
1447        auc._compute_for_multi_value_class(res, method=cls.ByWeightedPairs)
1448        return auc
1449
1450    @classmethod
1451    def by_pairs(cls, res, ignore_weights=False):
1452        """
1453        Similar as above, except that the average over class pairs is not
1454        weighted. This AUC is, like the binary, independent of class
1455        distributions, but it is not related to concordance index any more.
1456        """
1457        auc = AUC(ignore_weights=ignore_weights)
1458        auc._compute_for_multi_value_class(res, method=cls.ByPairs)
1459        return auc
1460
1461    @classmethod
1462    def weighted_one_against_all(cls, res, ignore_weights=False):
1463        """
1464        For each class, it computes AUC for this class against all others (that
1465        is, treating other classes as one class). The AUCs are then averaged by
1466        the class probabilities. This is related to concordance index in which
1467        we test the classifier's (average) capability for distinguishing the
1468        instances from a specified class from those that come from other classes.
1469        Unlike the binary AUC, the measure is not independent of class
1470        distributions.
1471        """
1472        auc = AUC(ignore_weights=ignore_weights)
1473        auc._compute_for_multi_value_class(res,
1474            method=cls.WeightedOneAgainstAll)
1475        return auc
1476
1477    @classmethod
1478    def one_against_all(cls, res, ignore_weights=False):
1479        """As above, except that the average is not weighted."""
1480        auc = AUC(ignore_weights=ignore_weights)
1481        auc._compute_for_multi_value_class(res, method=cls.OneAgainstAll)
1482        return auc
1483
1484    @classmethod
1485    def single_class(cls, res, class_index=-1, ignore_weights=False):
1486        """
1487        Compute AUC where the class with the given class_index is singled
1488        out and all other classes are treated as a single class.
1489        """
1490        if class_index<0:
1491            if res.base_class>=0:
1492                class_index = res.base_class
1493            else:
1494                class_index = 1
1495
1496        auc = AUC(ignore_weights=ignore_weights)
1497        auc._compute_for_single_class(res, class_index)
1498        return auc
1499
1500    @classmethod
1501    def pair(cls, res, class_index1, class_index2, ignore_weights=False):
1502        """
1503        Computes AUC between a pair of classes, ignoring instances from all
1504        other classes.
1505        """
1506        auc = AUC(ignore_weights=ignore_weights)
1507        auc._compute_for_pair_of_classes(res, class_index1, class_index2)
1508        return auc
1509
1510    @classmethod
1511    def matrix(cls, res, ignore_weights=False):
1512        """
1513        Compute a (lower diagonal) matrix with AUCs for all pairs of classes.
1514        If there are empty classes, the corresponding elements in the matrix
1515        are -1.
1516        """
1517        auc = AUC(ignore_weights=ignore_weights)
1518        auc._compute_matrix(res)
1519        return auc
1520
1521    def _compute_for_binary_class(self, res):
1522        """AUC for binary classification problems"""
1523        if res.number_of_iterations > 1:
1524            return self._compute_for_multiple_folds(
1525                self._compute_one_class_against_all,
1526                split_by_iterations(res),
1527                (-1, res, res.number_of_iterations))
1528        else:
1529            auc, _ = self._compute_one_class_against_all(res, -1)
1530            self[:] = auc
1531            return self
1532
1533    def _compute_for_multi_value_class(self, res, method=0):
1534        """AUC for multiclass classification problems"""
1535        numberOfClasses = len(res.class_values)
1536
1537        if res.number_of_iterations > 1:
1538            iterations = split_by_iterations(res)
1539            all_ite = res
1540        else:
1541            iterations = [res]
1542            all_ite = None
1543
1544        # by pairs
1545        sum_aucs = [0.] * res.number_of_learners
1546        usefulClassPairs = 0.
1547
1548        prob = None
1549        if method in [self.ByWeightedPairs, self.WeightedOneAgainstAll]:
1550            prob = class_probabilities_from_res(res)
1551
1552        if method in [self.ByWeightedPairs, self.ByPairs]:
1553            for classIndex1 in range(numberOfClasses):
1554                for classIndex2 in range(classIndex1):
1555                    subsum_aucs = self._compute_for_multiple_folds(
1556                                             self._compute_one_class_against_another,
1557                                             iterations,
1558                                             (classIndex1, classIndex2,
1559                                              all_ite,
1560                                              res.number_of_iterations))
1561                    if subsum_aucs:
1562                        if method == self.ByWeightedPairs:
1563                            p_ij = prob[classIndex1] * prob[classIndex2]
1564                            subsum_aucs = [x * p_ij  for x in subsum_aucs]
1565                            usefulClassPairs += p_ij
1566                        else:
1567                            usefulClassPairs += 1
1568                        sum_aucs = map(add, sum_aucs, subsum_aucs)
1569        else:
1570            for classIndex in range(numberOfClasses):
1571                subsum_aucs = self._compute_for_multiple_folds(
1572                    self._compute_one_class_against_all,
1573                    iterations, (classIndex, all_ite,
1574                                 res.number_of_iterations))
1575                if subsum_aucs:
1576                    if method == self.ByWeightedPairs:
1577                        p_i = prob[classIndex]
1578                        subsum_aucs = [x * p_i  for x in subsum_aucs]
1579                        usefulClassPairs += p_i
1580                    else:
1581                        usefulClassPairs += 1
1582                    sum_aucs = map(add, sum_aucs, subsum_aucs)
1583
1584        if usefulClassPairs > 0:
1585            sum_aucs = [x/usefulClassPairs for x in sum_aucs]
1586
1587        self[:] = sum_aucs
1588        return self
1589
1590    # computes the average AUC over folds using a "AUCcomputer" (AUC_i or AUC_ij)
1591    # it returns the sum of what is returned by the computer, unless at a certain
1592    # fold the computer has to resort to computing over all folds or even this failed;
1593    # in these cases the result is returned immediately
1594    @deprecated_keywords({"AUCcomputer": "auc_computer",
1595                          "computerArgs": "computer_args"})
1596    def _compute_for_multiple_folds(self, auc_computer, iterations,
1597                                 computer_args):
1598        """Compute the average AUC over folds using :obj:`auc_computer`."""
1599        subsum_aucs = [0.] * iterations[0].number_of_learners
1600        for ite in iterations:
1601            aucs, foldsUsed = auc_computer(*(ite, ) + computer_args)
1602            if not aucs:
1603                return None
1604            if not foldsUsed:
1605                return aucs
1606            subsum_aucs = map(add, subsum_aucs, aucs)
1607        self[:] = subsum_aucs
1608        return self
1609
1610    # Computes AUC; in multivalued class problem, AUC is computed as one against all
1611    # Results over folds are averages; if some folds examples from one class only, the folds are merged
1612    def _compute_for_single_class(self, res, class_index):
1613        if res.number_of_iterations > 1:
1614            self._compute_for_multiple_folds(
1615                self._compute_one_class_against_all, split_by_iterations(res),
1616                (class_index, res, res.number_of_iterations))
1617        else:
1618            self._compute_one_class_against_all(res, class_index)
1619
1620    # Computes AUC for a pair of classes (as if there were no other classes)
1621    # Results over folds are averages; if some folds have examples from one class only, the folds are merged
1622    def _compute_for_pair_of_classes(self, res, class_index1, class_index2):
1623        if res.number_of_iterations > 1:
1624            self._compute_for_multiple_folds(self._compute_one_class_against_another,
1625                split_by_iterations(res),
1626                (class_index1, class_index2, res, res.number_of_iterations))
1627        else:
1628            self._compute_one_class_against_another(res, class_index1, class_index2)
1629
1630    # computes AUC between class i and the other classes (treating them as the same class)
1631    @deprecated_keywords({"classIndex": "class_index",
1632                          "divideByIfIte": "divide_by_if_ite"})
1633    def _compute_one_class_against_all(self, ite, class_index, all_ite=None,
1634                                      divide_by_if_ite=1.0):
1635        """Compute AUC between class i and all the other classes)"""
1636        return self._compute_auc(corn.computeCDT, ite, all_ite, divide_by_if_ite,
1637            (class_index, not self.ignore_weights))
1638
1639
1640    # computes AUC between classes i and j as if there are no other classes
1641    def _compute_one_class_against_another(self, ite, class_index1,
1642            class_index2, all_ite=None,
1643            divide_by_if_ite=1.0):
1644        """
1645        Compute AUC between classes i and j as if there are no other classes.
1646        """
1647        return self._compute_auc(corn.computeCDTPair, ite, all_ite, divide_by_if_ite,
1648            (class_index1, class_index2, not self.ignore_weights))
1649
1650    # computes AUC using a specified 'cdtComputer' function
1651    # It tries to compute AUCs from 'ite' (examples from a single iteration) and,
1652    # if C+D+T=0, from 'all_ite' (entire test set). In the former case, the AUCs
1653    # are divided by 'divideByIfIte'. Additional flag is returned which is True in
1654    # the former case, or False in the latter.
1655    @deprecated_keywords({"cdt_computer": "cdtComputer",
1656                          "divideByIfIte": "divide_by_if_ite",
1657                          "computerArgs": "computer_args"})
1658    def _compute_auc(self, cdt_computer, ite, all_ite, divide_by_if_ite,
1659              computer_args):
1660        """
1661        Compute AUC using a :obj:`cdt_computer`.
1662        """
1663        cdts = cdt_computer(*(ite, ) + computer_args)
1664        if not is_CDT_empty(cdts[0]):
1665            return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T)/divide_by_if_ite for cdt in cdts], True
1666
1667        if all_ite:
1668            cdts = cdt_computer(*(all_ite, ) + computer_args)
1669            if not is_CDT_empty(cdts[0]):
1670                return [(cdt.C+cdt.T/2)/(cdt.C+cdt.D+cdt.T) for cdt in cdts], False
1671
1672        return False, False
1673
1674    def _compute_matrix(self, res):
1675        numberOfClasses = len(res.class_values)
1676        number_of_learners = res.number_of_learners
1677        if res.number_of_iterations > 1:
1678            iterations, all_ite = split_by_iterations(res), res
1679        else:
1680            iterations, all_ite = [res], None
1681        aucs = [[[] for _ in range(numberOfClasses)] for _ in
1682                                                     range(number_of_learners)]
1683        for classIndex1 in range(numberOfClasses):
1684            for classIndex2 in range(classIndex1):
1685                pair_aucs = self._compute_for_multiple_folds(
1686                    self._compute_one_class_against_another,
1687                    iterations, (classIndex1,
1688                                                  classIndex2,
1689                                                  all_ite,
1690                                                  res.number_of_iterations))
1691                if pair_aucs:
1692                    for lrn in range(number_of_learners):
1693                        aucs[lrn][classIndex1].append(pair_aucs[lrn])
1694                else:
1695                    for lrn in range(number_of_learners):
1696                        aucs[lrn][classIndex1].append(-1)
1697        self[:] = aucs
1698        return aucs
1699
1700#Backward compatibility
1701AUC.ByWeightedPairs = 0
1702AUC.ByPairs = 1
1703AUC.WeightedOneAgainstAll = 2
1704AUC.OneAgainstAll = 3
1705
1706@replace_use_weights
1707def AUC_binary(res, ignore_weights=False):
1708    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1709    auc._compute_for_binary_class(res)
1710    return auc
1711
1712@replace_use_weights
1713def AUC_multi(res, ignore_weights=False, method = 0):
1714    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights,
1715        method=method)
1716    auc._compute_for_multi_value_class(res)
1717    return auc
1718
1719def AUC_iterations(auc_computer, iterations, computer_args):
1720    auc = deprecated_function_name(AUC)()
1721    auc._compute_for_multiple_folds(auc_computer, iterations, computer_args)
1722    return auc
1723
1724def AUC_x(cdtComputer, ite, all_ite, divide_by_if_ite, computer_args):
1725    auc = deprecated_function_name(AUC)()
1726    result = auc._compute_auc(cdtComputer, ite, all_ite, divide_by_if_ite,
1727        computer_args)
1728    return result
1729
1730@replace_use_weights
1731def AUC_i(ite, class_index, ignore_weights=False, all_ite=None,
1732          divide_by_if_ite=1.0):
1733    auc = deprecated_function_name(AUC)()
1734    result = auc._compute_one_class_against_another(ite, class_index,
1735        all_ite=None, divide_by_if_ite=1.0)
1736    return result
1737
1738
1739@replace_use_weights
1740def AUC_ij(ite, class_index1, class_index2, ignore_weights=False,
1741           all_ite = None, divide_by_if_ite = 1.0):
1742    auc = deprecated_function_name(AUC)(ignore_weights=ignore_weights)
1743    result = auc._compute_one_class_against_another(
1744        ite, class_index1, class_index2, all_ite = None, divide_by_if_ite = 1.0)
1745    return result
1746
1747
1748
1749
1750#AUC_binary = replace_use_weights(deprecated_function_name(AUC()._compute_for_binary_class))
1751#AUC_multi = replace_use_weights(deprecated_function_name(AUC._compute_for_multi_value_class))
1752#AUC_iterations = replace_use_weights(deprecated_function_name(AUC._compute_for_multiple_folds))
1753#AUC_x = replace_use_weights(deprecated_function_name(AUC._compute_auc))
1754#AUC_i = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_all))
1755#AUC_ij = replace_use_weights(deprecated_function_name(AUC._compute_one_class_against_another))
1756
1757AUC_single = replace_use_weights(
1758             deprecated_keywords({"classIndex": "class_index"})(
1759             deprecated_function_name(AUC.single_class)))
1760AUC_pair = replace_use_weights(
1761           deprecated_keywords({"classIndex1": "class_index1",
1762                                "classIndex2": "class_index2"})(
1763           deprecated_function_name(AUC.pair)))
1764AUC_matrix = replace_use_weights(deprecated_function_name(AUC.matrix))
1765
1766
1767@deprecated_keywords({"unweighted": "ignore_weights"})
1768def McNemar(res, ignore_weights=False, **argkw):
1769    """ Computes a triangular matrix with McNemar statistics for each pair of
1770    classifiers. The statistics is distributed by chi-square distribution with
1771    one degree of freedom; critical value for 5% significance is around 3.84.
1772    """
1773    nLearners = res.number_of_learners
1774    mcm = []
1775    for i in range(nLearners):
1776       mcm.append([0.0]*res.number_of_learners)
1777
1778    if not res.weights or ignore_weights:
1779        for i in res.results:
1780            actual = i.actual_class
1781            classes = i.classes
1782            for l1 in range(nLearners):
1783                for l2 in range(l1, nLearners):
1784                    if classes[l1]==actual:
1785                        if classes[l2]!=actual:
1786                            mcm[l1][l2] += 1
1787                    elif classes[l2]==actual:
1788                        mcm[l2][l1] += 1
1789    else:
1790        for i in res.results:
1791            actual = i.actual_class
1792            classes = i.classes
1793            for l1 in range(nLearners):
1794                for l2 in range(l1, nLearners):
1795                    if classes[l1]==actual:
1796                        if classes[l2]!=actual:
1797                            mcm[l1][l2] += i.weight
1798                    elif classes[l2]==actual:
1799                        mcm[l2][l1] += i.weight
1800
1801    for l1 in range(nLearners):
1802        for l2 in range(l1, nLearners):
1803            su=mcm[l1][l2] + mcm[l2][l1]
1804            if su:
1805                mcm[l2][l1] = (abs(mcm[l1][l2]-mcm[l2][l1])-1)**2 / su
1806            else:
1807                mcm[l2][l1] = 0
1808
1809    for l1 in range(nLearners):
1810        mcm[l1]=mcm[l1][:l1]
1811
1812    return mcm
1813
1814
1815def McNemar_of_two(res, lrn1, lrn2, ignore_weights=False):
1816    """ McNemar_of_two computes a McNemar statistics for a pair of classifier,
1817    specified by indices learner1 and learner2.
1818    """
1819    tf = ft = 0.0
1820    if not res.weights or ignore_weights:
1821        for i in res.results:
1822            actual=i.actual_class
1823            if i.classes[lrn1]==actual:
1824                if i.classes[lrn2]!=actual:
1825                    tf += i.weight
1826            elif i.classes[lrn2]==actual:
1827                    ft += i.weight
1828    else:
1829        for i in res.results:
1830            actual=i.actual_class
1831            if i.classes[lrn1]==actual:
1832                if i.classes[lrn2]!=actual:
1833                    tf += 1.0
1834            elif i.classes[lrn2]==actual:
1835                    ft += 1.0
1836
1837    su = tf + ft
1838    if su:
1839        return (abs(tf-ft)-1)**2 / su
1840    else:
1841        return 0
1842
1843
1844def Friedman(res, stat=CA):
1845    """ Compares classifiers by Friedman test, treating folds as different examles.
1846        Returns F, p and average ranks
1847    """
1848    res_split = split_by_iterations(res)
1849    res = [stat(r) for r in res_split]
1850   
1851    N = len(res)
1852    k = len(res[0])
1853    sums = [0.0]*k
1854    for r in res:
1855        ranks = [k-x+1 for x in statc.rankdata(r)]
1856        if stat==Brier_score: # reverse ranks for Brier_score (lower better)
1857            ranks = [k+1-x for x in ranks]
1858        sums = map(add, ranks, sums)
1859
1860    T = sum(x*x for x in sums)
1861    sums = [x/N for x in sums]
1862
1863    F = 12.0 / (N*k*(k+1)) * T  - 3 * N * (k+1)
1864
1865    return F, statc.chisqprob(F, k-1), sums
1866
1867
1868def Wilcoxon_pairs(res, avgranks, stat=CA):
1869    """ Returns a triangular matrix, where element[i][j] stores significance of difference
1870        between i-th and j-th classifier, as computed by Wilcoxon test. The element is positive
1871        if i-th is better than j-th, negative if it is worse, and 1 if they are equal.
1872        Arguments to function are ExperimentResults, average ranks (as returned by Friedman)
1873        and, optionally, a statistics; greater values should mean better results.append
1874    """
1875    res_split = split_by_iterations(res)
1876    res = [stat(r) for r in res_split]
1877
1878    k = len(res[0])
1879    bt = []
1880    for m1 in range(k):
1881        nl = []
1882        for m2 in range(m1+1, k):
1883            t, p = statc.wilcoxont([r[m1] for r in res], [r[m2] for r in res])
1884            if avgranks[m1]<avgranks[m2]:
1885                nl.append(p)
1886            elif avgranks[m2]<avgranks[m1]:
1887                nl.append(-p)
1888            else:
1889                nl.append(1)
1890        bt.append(nl)
1891    return bt
1892
1893
1894@deprecated_keywords({"allResults": "all_results",
1895                      "noConfidence": "no_confidence"})
1896def plot_learning_curve_learners(file, all_results, proportions, learners, no_confidence=0):
1897    plot_learning_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], no_confidence)
1898
1899
1900@deprecated_keywords({"allResults": "all_results",
1901                      "noConfidence": "no_confidence"})
1902def plot_learning_curve(file, all_results, proportions, legend, no_confidence=0):
1903    import types
1904    fopened=0
1905    if type(file)==types.StringType:
1906        file=open(file, "wt")
1907        fopened=1
1908       
1909    file.write("set yrange [0:1]\n")
1910    file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1911    file.write("set multiplot\n\n")
1912    CAs = [CA(x, report_se=True) for x in all_results]
1913
1914    file.write("plot \\\n")
1915    for i in range(len(legend)-1):
1916        if not no_confidence:
1917            file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (i+1))
1918        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
1919    if not no_confidence:
1920        file.write("'-' title '' with yerrorbars pointtype %i,\\\n" % (len(legend)))
1921    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[-1], len(legend)))
1922
1923    for i in range(len(legend)):
1924        if not no_confidence:
1925            for p in range(len(proportions)):
1926                file.write("%f\t%f\t%f\n" % (proportions[p], CAs[p][i][0], 1.96*CAs[p][i][1]))
1927            file.write("e\n\n")
1928
1929        for p in range(len(proportions)):
1930            file.write("%f\t%f\n" % (proportions[p], CAs[p][i][0]))
1931        file.write("e\n\n")
1932
1933    if fopened:
1934        file.close()
1935
1936
1937def print_single_ROC_curve_coordinates(file, curve):
1938    import types
1939    fopened=0
1940    if type(file)==types.StringType:
1941        file=open(file, "wt")
1942        fopened=1
1943
1944    for coord in curve:
1945        file.write("%5.3f\t%5.3f\n" % tuple(coord))
1946
1947    if fopened:
1948        file.close()
1949
1950
1951def plot_ROC_learners(file, curves, learners):
1952    plot_ROC(file, curves, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))])
1953   
1954def plot_ROC(file, curves, legend):
1955    import types
1956    fopened=0
1957    if type(file)==types.StringType:
1958        file=open(file, "wt")
1959        fopened=1
1960
1961    file.write("set yrange [0:1]\n")
1962    file.write("set xrange [0:1]\n")
1963    file.write("set multiplot\n\n")
1964
1965    file.write("plot \\\n")
1966    for leg in legend:
1967        file.write("'-' title '%s' with lines,\\\n" % leg)
1968    file.write("'-' title '' with lines\n")
1969
1970    for curve in curves:
1971        for coord in curve:
1972            file.write("%5.3f\t%5.3f\n" % tuple(coord))
1973        file.write("e\n\n")
1974
1975    file.write("1.0\t1.0\n0.0\t0.0e\n\n")         
1976
1977    if fopened:
1978        file.close()
1979
1980
1981@deprecated_keywords({"allResults": "all_results"})
1982def plot_McNemar_curve_learners(file, all_results, proportions, learners, reference=-1):
1983    plot_McNemar_curve(file, all_results, proportions, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))], reference)
1984
1985
1986@deprecated_keywords({"allResults": "all_results"})
1987def plot_McNemar_curve(file, all_results, proportions, legend, reference=-1):
1988    if reference<0:
1989        reference=len(legend)-1
1990       
1991    import types
1992    fopened=0
1993    if type(file)==types.StringType:
1994        file=open(file, "wt")
1995        fopened=1
1996       
1997    #file.write("set yrange [0:1]\n")
1998    #file.write("set xrange [%f:%f]\n" % (proportions[0], proportions[-1]))
1999    file.write("set multiplot\n\n")
2000    file.write("plot \\\n")
2001    tmap=range(reference)+range(reference+1, len(legend))
2002    for i in tmap[:-1]:
2003        file.write("'-' title '%s' with linespoints pointtype %i,\\\n" % (legend[i], i+1))
2004    file.write("'-' title '%s' with linespoints pointtype %i\n" % (legend[tmap[-1]], tmap[-1]))
2005    file.write("\n")
2006
2007    for i in tmap:
2008        for p in range(len(proportions)):
2009            file.write("%f\t%f\n" % (proportions[p], McNemar_of_two(all_results[p], i, reference)))
2010        file.write("e\n\n")
2011
2012    if fopened:
2013        file.close()
2014
2015default_point_types=("{$\\circ$}", "{$\\diamond$}", "{$+$}", "{$\\times$}", "{$|$}")+tuple([chr(x) for x in range(97, 122)])
2016default_line_types=("\\setsolid", "\\setdashpattern <4pt, 2pt>", "\\setdashpattern <8pt, 2pt>", "\\setdashes", "\\setdots")
2017
2018@deprecated_keywords({"allResults": "all_results"})
2019def learning_curve_learners_to_PiCTeX(file, all_results, proportions, **options):
2020    return apply(learning_curve_to_PiCTeX, (file, all_results, proportions), options)
2021
2022
2023@deprecated_keywords({"allResults": "all_results"})
2024def learning_curve_to_PiCTeX(file, all_results, proportions, **options):
2025    import types
2026    fopened=0
2027    if type(file)==types.StringType:
2028        file=open(file, "wt")
2029        fopened=1
2030
2031    nexamples=len(all_results[0].results)
2032    CAs = [CA(x, report_se=True) for x in all_results]
2033
2034    graphsize=float(options.get("graphsize", 10.0)) #cm
2035    difprop=proportions[-1]-proportions[0]
2036    ntestexamples=nexamples*proportions[-1]
2037    xunit=graphsize/ntestexamples
2038
2039    yshift=float(options.get("yshift", -ntestexamples/20.))
2040   
2041    pointtypes=options.get("pointtypes", default_point_types)
2042    linetypes=options.get("linetypes", default_line_types)
2043
2044    if options.has_key("numberedx"):
2045        numberedx=options["numberedx"]
2046        if type(numberedx)==types.IntType:
2047            if numberedx>0:
2048                numberedx=[nexamples*proportions[int(i/float(numberedx)*len(proportions))] for i in range(numberedx)]+[proportions[-1]*nexamples]
2049            elif numberedx<0:
2050                numberedx = -numberedx
2051                newn=[]
2052                for i in range(numberedx+1):
2053                    wanted=proportions[0]+float(i)/numberedx*difprop
2054                    best=(10, 0)
2055                    for t in proportions:
2056                        td=abs(wanted-t)
2057                        if td<best[0]:
2058                            best=(td, t)
2059                    if not best[1] in newn:
2060                        newn.append(best[1])
2061                newn.sort()
2062                numberedx=[nexamples*x for x in newn]
2063        elif type(numberedx[0])==types.FloatType:
2064            numberedx=[nexamples*x for x in numberedx]
2065    else:
2066        numberedx=[nexamples*x for x in proportions]
2067
2068    file.write("\\mbox{\n")
2069    file.write(\\beginpicture\n")
2070    file.write(\\setcoordinatesystem units <%10.8fcm, %5.3fcm>\n\n" % (xunit, graphsize))   
2071    file.write(\\setplotarea x from %5.3f to %5.3f, y from 0 to 1\n" % (0, ntestexamples))   
2072    file.write(\\axis bottom invisible\n")# label {#examples}\n")
2073    file.write("      ticks short at %s /\n" % reduce(lambda x,y:x+" "+y, ["%i"%(x*nexamples+0.5) for x in proportions]))
2074    if numberedx:
2075        file.write("            long numbered at %s /\n" % reduce(lambda x,y:x+y, ["%i " % int(x+0.5) for x in numberedx]))
2076    file.write("  /\n")
2077    file.write(\\axis left invisible\n")# label {classification accuracy}\n")
2078    file.write("      shiftedto y=%5.3f\n" % yshift)
2079    file.write("      ticks short from 0.0 to 1.0 by 0.05\n")
2080    file.write("            long numbered from 0.0 to 1.0 by 0.25\n")
2081    file.write("  /\n")
2082    if options.has_key("default"):
2083        file.write(\\setdashpattern<1pt, 1pt>\n")
2084        file.write(\\plot %5.3f %5.3f %5.3f %5.3f /\n" % (0., options["default"], ntestexamples, options["default"]))
2085   
2086    for i in range(len(CAs[0])):
2087        coordinates=reduce(lambda x,y:x+" "+y, ["%i %5.3f" % (proportions[p]*nexamples, CAs[p][i][0]) for p in range(len(proportions))])
2088        if linetypes:
2089            file.write(%s\n" % linetypes[i])
2090            file.write(\\plot %s /\n" % coordinates)
2091        if pointtypes:
2092            file.write(\\multiput %s at %s /\n" % (pointtypes[i], coordinates))
2093
2094    file.write(\\endpicture\n")
2095    file.write("}\n")
2096    if fopened:
2097        file.close()
2098    file.close()
2099    del file
2100
2101def legend_learners_to_PiCTeX(file, learners, **options):
2102  return apply(legend_to_PiCTeX, (file, [Orange.misc.getobjectname(learners[i], "Learner %i" % i) for i in range(len(learners))]), options)
2103   
2104def legend_to_PiCTeX(file, legend, **options):
2105    import types
2106    fopened=0
2107    if type(file)==types.StringType:
2108        file=open(file, "wt")
2109        fopened=1
2110
2111    pointtypes=options.get("pointtypes", default_point_types)
2112    linetypes=options.get("linetypes", default_line_types)
2113
2114    file.write("\\mbox{\n")
2115    file.write(\\beginpicture\n")
2116    file.write(\\setcoordinatesystem units <5cm, 1pt>\n\n")
2117    file.write(\\setplotarea x from 0.000 to %5.3f, y from 0 to 12\n" % len(legend))
2118
2119    for i in range(len(legend)):
2120        if linetypes:
2121            file.write(%s\n" % linetypes[i])
2122            file.write(\\plot %5.3f 6 %5.3f 6 /\n" % (i, i+0.2))
2123        if pointtypes:
2124            file.write(\\put {%s} at %5.3f 6\n" % (pointtypes[i], i+0.1))
2125        file.write(\\put {%s} [lb] at %5.3f 0\n" % (legend[i], i+0.25))
2126
2127    file.write(\\endpicture\n")
2128    file.write("}\n")
2129    if fopened:
2130        file.close()
2131    file.close()
2132    del file
2133
2134
2135def compute_friedman(avranks, N):
2136    """ Returns a tuple composed of (friedman statistic, degrees of freedom)
2137    and (Iman statistic - F-distribution, degrees of freedoma) given average
2138    ranks and a number of tested data sets N.
2139    """
2140
2141    k = len(avranks)
2142
2143    def friedman(N, k, ranks):
2144        return 12*N*(sum([rank**2.0 for rank in ranks]) - (k*(k+1)*(k+1)/4.0) )/(k*(k+1))
2145
2146    def iman(fried, N, k):
2147        return (N-1)*fried/(N*(k-1) - fried)
2148
2149    f = friedman(N, k, avranks)
2150    im = iman(f, N, k)
2151    fdistdof = (k-1, (k-1)*(N-1))
2152
2153    return (f, k-1), (im, fdistdof)
2154
2155def compute_CD(avranks, N, alpha="0.05", type="nemenyi"):
2156    """ Returns critical difference for Nemenyi or Bonferroni-Dunn test
2157    according to given alpha (either alpha="0.05" or alpha="0.1") for average
2158    ranks and number of tested data sets N. Type can be either "nemenyi" for
2159    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
2160    """
2161
2162    k = len(avranks)
2163   
2164    d = {("nemenyi", "0.05"): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
2165                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
2166                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
2167                               3.426041, 3.458425, 3.488685, 3.517073, 3.543799]
2168        , ("nemenyi", "0.1"): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
2169                               2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
2170                               2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
2171                               3.195743, 3.229723, 3.261461, 3.291224, 3.319233]
2172        , ("bonferroni-dunn", "0.05"): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
2173                                        2.638, 2.690, 2.724, 2.773],
2174         ("bonferroni-dunn", "0.1"): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
2175                                      2.394, 2.450, 2.498, 2.539]}
2176
2177    #can be computed in R as qtukey(0.95, n, Inf)**0.5
2178    #for (x in c(2:20)) print(qtukey(0.95, x, Inf)/(2**0.5)
2179
2180    q = d[(type, alpha)]
2181
2182    cd = q[k]*(k*(k+1)/(6.0*N))**0.5
2183
2184    return cd
2185 
2186
2187def graph_ranks(filename, avranks, names, cd=None, cdmethod=None, lowv=None, highv=None, width=6, textspace=1, reverse=False, **kwargs):
2188    """
2189    Draws a CD graph, which is used to display  the differences in methods'
2190    performance.
2191    See Janez Demsar, Statistical Comparisons of Classifiers over
2192    Multiple Data Sets, 7(Jan):1--30, 2006.
2193
2194    Needs matplotlib to work.
2195
2196    :param filename: Output file name (with extension). Formats supported
2197                     by matplotlib can be used.
2198    :param avranks: List of average methods' ranks.
2199    :param names: List of methods' names.
2200
2201    :param cd: Critical difference. Used for marking methods that whose
2202               difference is not statistically significant.
2203    :param lowv: The lowest shown rank, if None, use 1.
2204    :param highv: The highest shown rank, if None, use len(avranks).
2205    :param width: Width of the drawn figure in inches, default 6 in.
2206    :param textspace: Space on figure sides left for the description
2207                      of methods, default 1 in.
2208    :param reverse:  If True, the lowest rank is on the right. Default\: False.
2209    :param cdmethod: None by default. It can be an index of element in avranks
2210                     or or names which specifies the method which should be
2211                     marked with an interval.
2212    """
2213    if not HAS_MATPLOTLIB:
2214        import sys
2215        print >> sys.stderr, "Function requires matplotlib. Please install it."
2216        return
2217
2218    width = float(width)
2219    textspace = float(textspace)
2220
2221    def nth(l,n):
2222        """
2223        Returns only nth elemnt in a list.
2224        """
2225        n = lloc(l,n)
2226        return [ a[n] for a in l ]
2227
2228    def lloc(l,n):
2229        """
2230        List location in list of list structure.
2231        Enable the use of negative locations:
2232        -1 is the last element, -2 second last...
2233        """
2234        if n < 0:
2235            return len(l[0])+n
2236        else:
2237            return n
2238
2239    def mxrange(lr):
2240        """
2241        Multiple xranges. Can be used to traverse matrices.
2242        This function is very slow due to unknown number of
2243        parameters.
2244
2245        >>> mxrange([3,5])
2246        [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]
2247
2248        >>> mxrange([[3,5,1],[9,0,-3]])
2249        [(3, 9), (3, 6), (3, 3), (4, 9), (4, 6), (4, 3)]
2250
2251        """
2252        if not len(lr):
2253            yield ()
2254        else:
2255            #it can work with single numbers
2256            index = lr[0]
2257            if type(1) == type(index):
2258                index = [ index ]
2259            for a in range(*index):
2260                for b in mxrange(lr[1:]):
2261                    yield tuple([a] + list(b))
2262
2263
2264    from matplotlib.figure import Figure
2265    from matplotlib.backends.backend_agg import FigureCanvasAgg
2266
2267    def print_figure(fig, *args, **kwargs):
2268        canvas = FigureCanvasAgg(fig)
2269        canvas.print_figure(*args, **kwargs)
2270
2271    sums = avranks
2272
2273    tempsort =  sorted([ (a,i) for i,a in  enumerate(sums) ], reverse=reverse)
2274    ssums = nth(tempsort, 0)
2275    sortidx = nth(tempsort, 1)
2276    nnames = [ names[x] for x in sortidx ]
2277   
2278    if lowv is None:
2279        lowv = min(1, int(math.floor(min(ssums))))
2280    if highv is None:
2281        highv = max(len(avranks), int(math.ceil(max(ssums))))
2282
2283    cline = 0.4
2284
2285    k = len(sums)
2286
2287    lines = None
2288
2289    linesblank = 0
2290    scalewidth = width - 2*textspace
2291
2292    def rankpos(rank):
2293        if not reverse:
2294            a = rank - lowv
2295        else:
2296            a = highv - rank
2297        return textspace+scalewidth/(highv-lowv)*a
2298
2299    distanceh = 0.25
2300
2301    if cd and cdmethod is None:
2302   
2303        #get pairs of non significant methods
2304
2305        def get_lines(sums, hsd):
2306
2307            #get all pairs
2308            lsums = len(sums)
2309            allpairs = [ (i,j) for i,j in mxrange([[lsums], [lsums]]) if j > i ]
2310            #remove not significant
2311            notSig = [ (i,j) for i,j in allpairs if abs(sums[i]-sums[j]) <= hsd ]
2312            #keep only longest
2313           
2314            def no_longer((i,j), notSig):
2315                for i1,j1 in notSig:
2316                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
2317                        return False
2318                return True
2319
2320            longest = [ (i,j) for i,j in notSig if no_longer((i,j),notSig) ]
2321           
2322            return longest
2323
2324        lines = get_lines(ssums, cd)
2325        linesblank = 0.2 + 0.2 + (len(lines)-1)*0.1
2326
2327        #add scale
2328        distanceh = 0.25
2329        cline += distanceh
2330
2331    #calculate height needed height of an image
2332    minnotsignificant = max(2*0.2, linesblank)
2333    height = cline + ((k+1)/2)*0.2 + minnotsignificant
2334
2335    fig = Figure(figsize=(width, height))
2336    ax = fig.add_axes([0,0,1,1]) #reverse y axis
2337    ax.set_axis_off()
2338
2339    hf = 1./height # height factor
2340    wf = 1./width
2341
2342    def hfl(l): 
2343        return [ a*hf for a in l ]
2344
2345    def wfl(l): 
2346        return [ a*wf for a in l ]
2347
2348
2349    # Upper left corner is (0,0).
2350
2351    ax.plot([0,1], [0,1], c="w")
2352    ax.set_xlim(0, 1)
2353    ax.set_ylim(1, 0)
2354
2355    def line(l, color='k', **kwargs):
2356        """
2357        Input is a list of pairs of points.
2358        """
2359        ax.plot(wfl(nth(l,0)), hfl(nth(l,1)), color=color, **kwargs)
2360
2361    def text(x, y, s, *args, **kwargs):
2362        ax.text(wf*x, hf*y, s, *args, **kwargs)
2363
2364    line([(textspace, cline), (width-textspace, cline)], linewidth=0.7)
2365   
2366    bigtick = 0.1
2367    smalltick = 0.05
2368
2369
2370    import numpy
2371    tick = None
2372    for a in list(numpy.arange(lowv, highv, 0.5)) + [highv]:
2373        tick = smalltick
2374        if a == int(a): tick = bigtick
2375        line([(rankpos(a), cline-tick/2),(rankpos(a), cline)], linewidth=0.7)
2376
2377    for a in range(lowv, highv+1):
2378        text(rankpos(a), cline-tick/2-0.05, str(a), ha="center", va="bottom")
2379
2380    k = len(ssums)
2381
2382    for i in range((k+1)/2):
2383        chei = cline+ minnotsignificant + i *0.2
2384        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace-0.1, chei)], linewidth=0.7)
2385        text(textspace-0.2, chei, nnames[i], ha="right", va="center")
2386
2387    for i in range((k+1)/2, k):
2388        chei = cline + minnotsignificant + (k-i-1)*0.2
2389        line([(rankpos(ssums[i]), cline), (rankpos(ssums[i]), chei), (textspace+scalewidth+0.1, chei)], linewidth=0.7)
2390        text(textspace+scalewidth+0.2, chei, nnames[i], ha="left", va="center")
2391
2392    if cd and cdmethod is None:
2393
2394        #upper scale
2395        if not reverse:
2396            begin, end = rankpos(lowv), rankpos(lowv+cd)
2397        else:
2398            begin, end = rankpos(highv), rankpos(highv - cd)
2399           
2400        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
2401        line([(begin, distanceh + bigtick/2), (begin, distanceh - bigtick/2)], linewidth=0.7)
2402        line([(end, distanceh + bigtick/2), (end, distanceh - bigtick/2)], linewidth=0.7)
2403        text((begin+end)/2, distanceh - 0.05, "CD", ha="center", va="bottom")
2404
2405        #non significance lines   
2406        def draw_lines(lines, side=0.05, height=0.1):
2407            start = cline + 0.2
2408            for l,r in lines: 
2409                line([(rankpos(ssums[l])-side, start), (rankpos(ssums[r])+side, start)], linewidth=2.5) 
2410                start += height
2411
2412        draw_lines(lines)
2413
2414    elif cd:
2415        begin = rankpos(avranks[cdmethod]-cd)
2416        end = rankpos(avranks[cdmethod]+cd)
2417        line([(begin, cline), (end, cline)], linewidth=2.5) 
2418        line([(begin, cline + bigtick/2), (begin, cline - bigtick/2)], linewidth=2.5)
2419        line([(end, cline + bigtick/2), (end, cline - bigtick/2)], linewidth=2.5)
2420 
2421    print_figure(fig, filename, **kwargs)
2422
2423def mlc_hamming_loss(res):
2424    """
2425    Schapire and Singer (2000) presented Hamming Loss, which id defined as:
2426   
2427    :math:`HammingLoss(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{Y_i \\vartriangle Z_i}{|L|}`
2428    """
2429    losses = [0]*res.number_of_learners
2430    label_num = len(res.labels)
2431    example_num = gettotsize(res)
2432
2433    for e in res.results:
2434        aclass = e.actual_class
2435        for i, labels in enumerate(e.classes):
2436            labels = map(int, labels)
2437            if len(labels) <> len(aclass):
2438                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2439            for j in range(label_num):
2440                if labels[j] != aclass[j]:
2441                    losses[i] += 1
2442           
2443    return [float(x)/(label_num*example_num) for x in losses]
2444
2445def mlc_accuracy(res, forgiveness_rate = 1.0):
2446    """
2447    Godbole & Sarawagi, 2004 uses the metrics accuracy, precision, recall as follows:
2448     
2449    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|}`
2450   
2451    Boutell et al. (2004) give a more generalized version using a parameter :math:`\\alpha \\ge 0`,
2452    called forgiveness rate:
2453   
2454    :math:`Accuracy(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} (\\frac{|Y_i \\cap Z_i|}{|Y_i \\cup Z_i|})^{\\alpha}`
2455    """
2456    accuracies = [0.0]*res.number_of_learners
2457    example_num = gettotsize(res)
2458   
2459    for e in res.results:
2460        aclass = e.actual_class
2461        for i, labels in enumerate(e.classes):
2462            labels = map(int, labels)
2463            if len(labels) <> len(aclass):
2464                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2465           
2466            intersection = 0.0
2467            union = 0.0
2468            for real, pred in zip(labels, aclass):
2469                if real and pred:
2470                    intersection += 1
2471                if real or pred:
2472                    union += 1
2473
2474            if union:
2475                accuracies[i] += intersection / union
2476           
2477    return [math.pow(x/example_num,forgiveness_rate) for x in accuracies]
2478
2479def mlc_precision(res):
2480    """
2481    :math:`Precision(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Z_i|}`
2482    """
2483    precisions = [0.0]*res.number_of_learners
2484    example_num = gettotsize(res)
2485   
2486    for e in res.results:
2487        aclass = e.actual_class
2488        for i, labels in enumerate(e.classes):
2489            labels = map(int, labels)
2490            if len(labels) <> len(aclass):
2491                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2492           
2493            intersection = 0.0
2494            predicted = 0.0
2495            for real, pred in zip(labels, aclass):
2496                if real and pred:
2497                    intersection += 1
2498                if real:
2499                    predicted += 1
2500            if predicted:
2501                precisions[i] += intersection / predicted
2502           
2503    return [x/example_num for x in precisions]
2504
2505def mlc_recall(res):
2506    """
2507    :math:`Recall(H,D)=\\frac{1}{|D|} \\sum_{i=1}^{|D|} \\frac{|Y_i \\cap Z_i|}{|Y_i|}`
2508    """
2509    recalls = [0.0]*res.number_of_learners
2510    example_num = gettotsize(res)
2511   
2512    for e in res.results:
2513        aclass = e.actual_class
2514        for i, labels in enumerate(e.classes):
2515            labels = map(int, labels)
2516            if len(labels) <> len(aclass):
2517                raise ValueError, "The dimensions of the classified output and the actual class array do not match."
2518           
2519            intersection = 0.0
2520            actual = 0.0
2521            for real, pred in zip(labels, aclass):
2522                if real and pred:
2523                    intersection += 1
2524                if pred:
2525                    actual += 1
2526            if actual:
2527                recalls[i] += intersection / actual
2528           
2529    return [x/example_num for x in recalls]
2530
2531#def mlc_ranking_loss(res):
2532#    pass
2533#
2534#def mlc_average_precision(res):
2535#    pass
2536#
2537#def mlc_hierarchical_loss(res):
2538#    pass
2539
2540
2541def mt_average_score(res, score, weights=None):
2542    """
2543    Compute individual scores for each target and return the (weighted) average.
2544
2545    One method can be used to compute scores for all targets or a list of
2546    scoring methods can be passed to use different methods for different
2547    targets. In the latter case, care has to be taken if the ranges of scoring
2548    methods differ.
2549    For example, when the first target is scored from -1 to 1 (1 best) and the
2550    second from 0 to 1 (0 best), using `weights=[0.5,-1]` would scale both
2551    to a span of 1, and invert the second so that higher scores are better.
2552
2553    :param score: Single-target scoring method or a list of such methods
2554                  (one for each target).
2555    :param weights: List of real weights, one for each target,
2556                    for a weighted average.
2557
2558    """
2559    if not len(res.results):
2560        raise ValueError, "Cannot compute the score: no examples."
2561    if res.number_of_learners < 1:
2562        return []
2563    n_classes = len(res.results[0].actual_class)
2564    if weights is None:
2565        weights = [1.] * n_classes
2566    if not hasattr(score, '__len__'):
2567        score = [score] * n_classes
2568    elif len(score) != n_classes:
2569        raise ValueError, "Number of scoring methods and targets do not match."
2570    # save original classes
2571    clsss = [te.classes for te in res.results]
2572    aclsss = [te.actual_class for te in res.results]
2573    # compute single target scores
2574    single_scores = []
2575    for i in range(n_classes):
2576        for te, clss, aclss in zip(res.results, clsss, aclsss):
2577            te.classes = [cls[i] for cls in clss]
2578            te.actual_class = aclss[i]
2579        single_scores.append(score[i](res))
2580    # restore original classes
2581    for te, clss, aclss in zip(res.results, clsss, aclsss):
2582        te.classes = clss
2583        te.actual_class = aclss
2584    return [sum(w * s for w, s in zip(weights, scores)) / sum(weights)
2585        for scores in zip(*single_scores)]
2586
2587def mt_flattened_score(res, score):
2588    """
2589    Flatten (concatenate into a single list) the predictions of multiple
2590    targets and compute a single-target score.
2591   
2592    :param score: Single-target scoring method.
2593    """
2594    res2 = Orange.evaluation.testing.ExperimentResults(res.number_of_iterations,
2595        res.classifier_names, class_values=res.class_values,
2596        weights=res.weights, classifiers=res.classifiers, loaded=res.loaded,
2597        test_type=Orange.evaluation.testing.TEST_TYPE_SINGLE, labels=res.labels)
2598    for te in res.results:
2599        for i, ac in enumerate(te.actual_class):
2600            te2 = Orange.evaluation.testing.TestedExample(
2601                iteration_number=te.iteration_number, actual_class=ac)
2602            for c, p in zip(te.classes, te.probabilities):
2603                te2.add_result(c[i], p[i])
2604            res2.results.append(te2)
2605    return score(res2)
2606
2607
2608#########################################################################################
2609if __name__ == "__main__":
2610    avranks =  [3.143, 2.000, 2.893, 1.964]
2611    names = ["prva", "druga", "tretja", "cetrta" ]
2612    cd = compute_CD(avranks, 14)
2613    #cd = compute_CD(avranks, 10, type="bonferroni-dunn")
2614    print cd
2615
2616    print compute_friedman(avranks, 14)
2617
2618    #graph_ranks("test.eps", avranks, names, cd=cd, cdmethod=0, width=6, textspace=1.5)
Note: See TracBrowser for help on using the repository browser.